[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v2 4/5] x86: Port the basic alternative mechanism from Linux to Xen



This patch ports the basic alternative mechanism from Linux to Xen.
With this mechanism, we can patch code based on the CPU features.

Signed-off-by: Feng Wu <feng.wu@xxxxxxxxx>
---
 xen/arch/x86/Makefile             |   1 +
 xen/arch/x86/alternative.c        | 213 ++++++++++++++++++++++++++++++++++++++
 xen/arch/x86/setup.c              |   3 +
 xen/arch/x86/xen.lds.S            |  15 +++
 xen/include/asm-x86/alternative.h |  78 ++++++++++++++
 5 files changed, 310 insertions(+)
 create mode 100644 xen/arch/x86/alternative.c
 create mode 100644 xen/include/asm-x86/alternative.h

diff --git a/xen/arch/x86/Makefile b/xen/arch/x86/Makefile
index d502bdf..3734884 100644
--- a/xen/arch/x86/Makefile
+++ b/xen/arch/x86/Makefile
@@ -58,6 +58,7 @@ obj-y += crash.o
 obj-y += tboot.o
 obj-y += hpet.o
 obj-y += xstate.o
+obj-y += alternative.o
 
 obj-$(crash_debug) += gdbstub.o
 
diff --git a/xen/arch/x86/alternative.c b/xen/arch/x86/alternative.c
new file mode 100644
index 0000000..3dbc811
--- /dev/null
+++ b/xen/arch/x86/alternative.c
@@ -0,0 +1,213 @@
+/******************************************************************************
+ * alternative.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <xen/types.h>
+#include <asm/processor.h>
+#include <asm/alternative.h>
+#include <xen/init.h>
+#include <asm/system.h>
+#include <asm/traps.h>
+#include <asm/nmi.h>
+
+#define MAX_PATCH_LEN (255-1)
+
+extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
+
+#ifdef K8_NOP1
+static const unsigned char k8nops[] __initconst = {
+    K8_NOP1,
+    K8_NOP2,
+    K8_NOP3,
+    K8_NOP4,
+    K8_NOP5,
+    K8_NOP6,
+    K8_NOP7,
+    K8_NOP8
+};
+static const unsigned char * const k8_nops[ASM_NOP_MAX+1] __initconst = {
+    NULL,
+    k8nops,
+    k8nops + 1,
+    k8nops + 1 + 2,
+    k8nops + 1 + 2 + 3,
+    k8nops + 1 + 2 + 3 + 4,
+    k8nops + 1 + 2 + 3 + 4 + 5,
+    k8nops + 1 + 2 + 3 + 4 + 5 + 6,
+    k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7
+};
+#endif
+
+#ifdef P6_NOP1
+static const unsigned char p6nops[] __initconst = {
+    P6_NOP1,
+    P6_NOP2,
+    P6_NOP3,
+    P6_NOP4,
+    P6_NOP5,
+    P6_NOP6,
+    P6_NOP7,
+    P6_NOP8
+};
+static const unsigned char * const p6_nops[ASM_NOP_MAX+1] __initconst = {
+    NULL,
+    p6nops,
+    p6nops + 1,
+    p6nops + 1 + 2,
+    p6nops + 1 + 2 + 3,
+    p6nops + 1 + 2 + 3 + 4,
+    p6nops + 1 + 2 + 3 + 4 + 5,
+    p6nops + 1 + 2 + 3 + 4 + 5 + 6,
+    p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7
+};
+#endif
+
+static const unsigned char * const *ideal_nops __initdata = k8_nops;
+
+static int __init mask_nmi_callback(struct cpu_user_regs *regs, int cpu)
+{
+    return 1;
+}
+
+static void __init arch_init_ideal_nops(void)
+{
+    /*
+     * Due to a decoder implementation quirk, some
+     * specific Intel CPUs actually perform better with
+     * the "k8_nops" than with the SDM-recommended NOPs.
+     */
+    if ( (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
+         !(boot_cpu_data.x86 == 6 &&
+           boot_cpu_data.x86_model >= 0x0f &&
+           boot_cpu_data.x86_model != 0x1c &&
+           boot_cpu_data.x86_model != 0x26 &&
+           boot_cpu_data.x86_model != 0x27 &&
+           boot_cpu_data.x86_model < 0x30) )
+        ideal_nops = p6_nops;
+}
+
+/* Use this to add nops to a buffer, then text_poke the whole buffer. */
+static void __init add_nops(void *insns, unsigned int len)
+{
+    while ( len > 0 )
+    {
+        unsigned int noplen = len;
+        if ( noplen > ASM_NOP_MAX )
+            noplen = ASM_NOP_MAX;
+        memcpy(insns, ideal_nops[noplen], noplen);
+        insns += noplen;
+        len -= noplen;
+    }
+}
+
+/*
+ * text_poke_early - Update instructions on a live kernel at boot time
+ * @addr: address to modify
+ * @opcode: source of the copy
+ * @len: length to copy
+ *
+ * When you use this code to patch more than one byte of an instruction
+ * you need to make sure that other CPUs cannot execute this code in parallel.
+ * Also no thread must be currently preempted in the middle of these
+ * instructions. And on the local CPU you need to be protected again NMI or MCE
+ * handlers seeing an inconsistent instruction while you patch.
+ */
+static void *__init text_poke_early(void *addr, const void *opcode, size_t len)
+{
+    unsigned long flags;
+
+    local_irq_save(flags);
+    memcpy(addr, opcode, len);
+    sync_core();
+    local_irq_restore(flags);
+
+    return addr;
+}
+
+/*
+ * Replace instructions with better alternatives for this CPU type.
+ * This runs before SMP is initialized to avoid SMP problems with
+ * self modifying code. This implies that asymmetric systems where
+ * APs have less capabilities than the boot processor are not handled.
+ * Tough. Make sure you disable such features by hand.
+ */
+
+static void __init apply_alternatives(struct alt_instr *start, struct 
alt_instr *end)
+{
+    struct alt_instr *a;
+    u8 *instr, *replacement;
+    u8 insnbuf[MAX_PATCH_LEN];
+
+    printk(KERN_INFO "alt table %p -> %p\n", start, end);
+
+    /*
+     * The scan order should be from start to end. A later scanned
+     * alternative code can overwrite a previous scanned alternative code.
+     * Some kernel functions (e.g. memcpy, memset, etc) use this order to
+     * patch code.
+     *
+     * So be careful if you want to change the scan order to any other
+     * order.
+     */
+    for ( a = start; a < end; a++ )
+    {
+        instr = (u8 *)&a->instr_offset + a->instr_offset;
+        replacement = (u8 *)&a->repl_offset + a->repl_offset;
+        BUG_ON(a->replacementlen > a->instrlen);
+        BUG_ON(a->instrlen > sizeof(insnbuf));
+        BUG_ON(a->cpuid >= NCAPINTS * 32);
+        if ( !boot_cpu_has(a->cpuid) )
+            continue;
+
+        memcpy(insnbuf, replacement, a->replacementlen);
+
+        /* 0xe8/0xe9 is a relative jump; fix the offset. */
+        if ( (*insnbuf & 0xfe) == 0xe8 && a->replacementlen == 5 )
+            *(s32 *)(insnbuf + 1) += replacement - instr;
+
+        add_nops(insnbuf + a->replacementlen,
+                 a->instrlen - a->replacementlen);
+        text_poke_early(instr, insnbuf, a->instrlen);
+    }
+}
+
+void __init alternative_instructions(void)
+{
+    nmi_callback_t saved_nmi_callback;
+
+    arch_init_ideal_nops();
+
+    /*
+     * The patching is not fully atomic, so try to avoid local interruptions
+     * that might execute the to be patched code.
+     * Other CPUs are not running.
+     */
+    saved_nmi_callback = set_nmi_callback(mask_nmi_callback);
+    /*
+     * Don't stop machine check exceptions while patching.
+     * MCEs only happen when something got corrupted and in this
+     * case we must do something about the corruption.
+     * Ignoring it is worse than a unlikely patching race.
+     * Also machine checks tend to be broadcast and if one CPU
+     * goes into machine check the others follow quickly, so we don't
+     * expect a machine check to cause undue problems during to code
+     * patching.
+     */
+
+    apply_alternatives(__alt_instructions, __alt_instructions_end);
+    set_nmi_callback(saved_nmi_callback);
+}
diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
index 508649d..d16453a 100644
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -48,6 +48,7 @@
 #include <asm/setup.h>
 #include <xen/cpu.h>
 #include <asm/nmi.h>
+#include <asm/alternative.h>
 
 /* opt_nosmp: If true, secondary processors are ignored. */
 static bool_t __initdata opt_nosmp;
@@ -1288,6 +1289,8 @@ void __init noreturn __start_xen(unsigned long mbi_p)
     if ( cpu_has_fsgsbase )
         set_in_cr4(X86_CR4_FSGSBASE);
 
+    alternative_instructions();
+
     local_irq_enable();
 
     pt_pci_init();
diff --git a/xen/arch/x86/xen.lds.S b/xen/arch/x86/xen.lds.S
index 17db361..d4b1f1a 100644
--- a/xen/arch/x86/xen.lds.S
+++ b/xen/arch/x86/xen.lds.S
@@ -105,6 +105,12 @@ SECTIONS
   .init.text : {
        _sinittext = .;
        *(.init.text)
+       /*
+        * Here are the replacement instructions. The linker sticks them
+        * as binary blobs. The .altinstructions has enough data to get
+        * the address and the length of them to patch the kernel safely.
+        */
+       *(.altinstr_replacement)
        _einittext = .;
   } :text
   .init.data : {
@@ -120,6 +126,15 @@ SECTIONS
        __trampoline_seg_start = .;
        *(.trampoline_seg)
        __trampoline_seg_stop = .;
+       /*
+        * struct alt_inst entries. From the header (alternative.h):
+        * "Alternative instructions for different CPU types or capabilities"
+        * Think locking instructions on spinlocks.
+        */
+       . = ALIGN(8);
+        __alt_instructions = .;
+        *(.altinstructions)
+        __alt_instructions_end = .;
 
        . = ALIGN(8);
        __ctors_start = .;
diff --git a/xen/include/asm-x86/alternative.h 
b/xen/include/asm-x86/alternative.h
new file mode 100644
index 0000000..55a6604
--- /dev/null
+++ b/xen/include/asm-x86/alternative.h
@@ -0,0 +1,78 @@
+#ifndef __X86_ALTERNATIVE_H__
+#define __X86_ALTERNATIVE_H__
+
+#include <asm/nops.h>
+
+#ifdef __ASSEMBLY__
+.macro altinstruction_entry orig alt feature orig_len alt_len
+        .long \orig - .
+        .long \alt - .
+        .word \feature
+        .byte \orig_len
+        .byte \alt_len
+.endm
+#else
+#include <xen/types.h>
+
+struct alt_instr {
+    s32 instr_offset;       /* original instruction */
+    s32 repl_offset;        /* offset to replacement instruction */
+    u16 cpuid;              /* cpuid bit set for replacement */
+    u8  instrlen;           /* length of original instruction */
+    u8  replacementlen;     /* length of new instruction, <= instrlen */
+};
+
+extern void alternative_instructions(void);
+
+#define OLDINSTR(oldinstr)      "661:\n\t" oldinstr "\n662:\n"
+
+#define b_replacement(number)   "663"#number
+#define e_replacement(number)   "664"#number
+
+#define alt_slen "662b-661b"
+#define alt_rlen(number) e_replacement(number)"f-"b_replacement(number)"f"
+
+#define ALTINSTR_ENTRY(feature, number)                                       \
+        " .long 661b - .\n"                             /* label           */ \
+        " .long " b_replacement(number)"f - .\n"        /* new instruction */ \
+        " .word " STR(feature) "\n"                     /* feature bit     */ \
+        " .byte " alt_slen "\n"                         /* source len      */ \
+        " .byte " alt_rlen(number) "\n"                 /* replacement len */
+
+#define DISCARD_ENTRY(number)                           /* rlen <= slen */    \
+        " .byte 0xff + (" alt_rlen(number) ") - (" alt_slen ")\n"
+
+#define ALTINSTR_REPLACEMENT(newinstr, feature, number) /* replacement */     \
+        b_replacement(number)":\n\t" newinstr "\n" e_replacement(number) 
":\n\t"
+
+/* alternative assembly primitive: */
+#define ALTERNATIVE(oldinstr, newinstr, feature)                        \
+        OLDINSTR(oldinstr)                                              \
+        ".pushsection .altinstructions,\"a\"\n"                         \
+        ALTINSTR_ENTRY(feature, 1)                                      \
+        ".popsection\n"                                                 \
+        ".pushsection .discard,\"aw\",@progbits\n"                      \
+        DISCARD_ENTRY(1)                                                \
+        ".popsection\n"                                                 \
+        ".pushsection .altinstr_replacement, \"ax\"\n"                  \
+        ALTINSTR_REPLACEMENT(newinstr, feature, 1)                      \
+        ".popsection"
+
+/*
+ * Alternative instructions for different CPU types or capabilities.
+ *
+ * This allows to use optimized instructions even on generic binary
+ * kernels.
+ *
+ * length of oldinstr must be longer or equal the length of newinstr
+ * It can be padded with nops as needed.
+ *
+ * For non barrier like inlines please define new variants
+ * without volatile and memory clobber.
+ */
+#define alternative(oldinstr, newinstr, feature)                        \
+        asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) : : : "memory")
+
+#endif  /*  __ASSEMBLY__  */
+
+#endif /* __X86_ALTERNATIVE_H__ */
-- 
1.8.3.1


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.