[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v3 5/7] x86/alt: Support for automatic padding calculations



The correct amount of padding in an origin patch site can be calculated
automatically, based on the relative lengths of the replacements.

This requires a bit of trickery to calculate correctly, especially in the
ALTENRATIVE_2 case where a branchless max() calculation in needed.  The
calculation is further complicated because GAS's idea of true is -1 rather
than 1, which is why the extra negations are required.

Additionally, have apply_alternatives() attempt to optimise the padding nops.
This is complicated by the fact that we must not attempt to optimise nops over
an origin site which has already been modified.

To keep track of this, add a priv field to struct alt_instr, which gets
modified by apply_alternatives().  One extra requirement is that alt_instr's
referring to the same origin site must now be consecutive, but we already have
this property.

Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
---
CC: Jan Beulich <JBeulich@xxxxxxxx>
CC: Konrad Rzeszutek Wilk <konrad.wilk@xxxxxxxxxx>
CC: Roger Pau Monné <roger.pau@xxxxxxxxxx>
CC: Wei Liu <wei.liu2@xxxxxxxxxx>

v2:
 * Fix build with Clang
v3:
 * s/GCC/GAS/ for the NEGATIVE_TRUE comments
 * Factor out OLDINSTR() generation on the C side
 * Introduce alt_instr->priv and use it to avoid repeatedly optimising the
   alternatives.
---
 xen/arch/x86/Rules.mk                 |  4 +++
 xen/arch/x86/alternative.c            | 45 +++++++++++++++++++++++-----
 xen/include/asm-x86/alternative-asm.h | 56 +++++++++++++++++++++++++++--------
 xen/include/asm-x86/alternative.h     | 50 ++++++++++++++++++++++++-------
 4 files changed, 124 insertions(+), 31 deletions(-)

diff --git a/xen/arch/x86/Rules.mk b/xen/arch/x86/Rules.mk
index a29ab22..ac585a3 100644
--- a/xen/arch/x86/Rules.mk
+++ b/xen/arch/x86/Rules.mk
@@ -25,6 +25,10 @@ $(call as-option-add,CFLAGS,CC,".equ \"x\"$$(comma)1", \
                      '-D__OBJECT_LABEL__=$(subst $(BASEDIR)/,,$(CURDIR))/$$@')
 $(call as-option-add,CFLAGS,CC,"invpcid (%rax)$$(comma)%rax",-DHAVE_AS_INVPCID)
 
+# GAS's idea of true is -1.  Clang's idea is 1
+$(call as-option-add,CFLAGS,CC,\
+    ".if ((1 > 0) < 0); .error \"\";.endif",,-DHAVE_AS_NEGATIVE_TRUE)
+
 CFLAGS += -mno-red-zone -fpic -fno-asynchronous-unwind-tables
 
 # Xen doesn't use SSE interally.  If the compiler supports it, also skip the
diff --git a/xen/arch/x86/alternative.c b/xen/arch/x86/alternative.c
index 51ca53e..93adf56 100644
--- a/xen/arch/x86/alternative.c
+++ b/xen/arch/x86/alternative.c
@@ -159,10 +159,10 @@ text_poke(void *addr, const void *opcode, size_t len)
  * APs have less capabilities than the boot processor are not handled.
  * Tough. Make sure you disable such features by hand.
  */
-void init_or_livepatch apply_alternatives(const struct alt_instr *start,
-                                          const struct alt_instr *end)
+void init_or_livepatch apply_alternatives(struct alt_instr *start,
+                                          struct alt_instr *end)
 {
-    const struct alt_instr *a;
+    struct alt_instr *a, *base;
 
     printk(KERN_INFO "alt table %p -> %p\n", start, end);
 
@@ -175,18 +175,47 @@ void init_or_livepatch apply_alternatives(const struct 
alt_instr *start,
      * So be careful if you want to change the scan order to any other
      * order.
      */
-    for ( a = start; a < end; a++ )
+    for ( a = base = start; a < end; a++ )
     {
         uint8_t *orig = ALT_ORIG_PTR(a);
         uint8_t *repl = ALT_REPL_PTR(a);
         uint8_t buf[MAX_PATCH_LEN];
+        unsigned int total_len = a->orig_len + a->pad_len;
 
-        BUG_ON(a->repl_len > a->orig_len);
-        BUG_ON(a->orig_len > sizeof(buf));
+        BUG_ON(a->repl_len > total_len);
+        BUG_ON(total_len > sizeof(buf));
         BUG_ON(a->cpuid >= NCAPINTS * 32);
 
+        /*
+         * Detect sequences of alt_instr's patching the same origin site, and
+         * keep base pointing at the first alt_instr entry.  This is so we can
+         * refer to a single ->priv field for patching decisions.
+         *
+         * ->priv being nonzero means that the origin site has already been
+         * modified, and we shouldn't try to optimise the nops again.
+         */
+        if ( ALT_ORIG_PTR(base) != orig )
+            base = a;
+
+        /* If there is no replacement to make, see about optimising the nops. 
*/
         if ( !boot_cpu_has(a->cpuid) )
+        {
+            /* Origin site site already touched?  Don't nop anything. */
+            if ( base->priv )
+                continue;
+
+            base->priv = 1;
+
+            /* Nothing useful to do? */
+            if ( a->pad_len <= 1 )
+                continue;
+
+            add_nops(buf, a->pad_len);
+            text_poke(orig + a->orig_len, buf, a->pad_len);
             continue;
+        }
+
+        base->priv = 1;
 
         memcpy(buf, repl, a->repl_len);
 
@@ -194,8 +223,8 @@ void init_or_livepatch apply_alternatives(const struct 
alt_instr *start,
         if ( a->repl_len >= 5 && (*buf & 0xfe) == 0xe8 )
             *(int32_t *)(buf + 1) += repl - orig;
 
-        add_nops(buf + a->repl_len, a->orig_len - a->repl_len);
-        text_poke(orig, buf, a->orig_len);
+        add_nops(buf + a->repl_len, total_len - a->repl_len);
+        text_poke(orig, buf, total_len);
     }
 }
 
diff --git a/xen/include/asm-x86/alternative-asm.h 
b/xen/include/asm-x86/alternative-asm.h
index 2af4f6b..0b61516 100644
--- a/xen/include/asm-x86/alternative-asm.h
+++ b/xen/include/asm-x86/alternative-asm.h
@@ -9,30 +9,53 @@
  * enough information for the alternatives patching code to patch an
  * instruction. See apply_alternatives().
  */
-.macro altinstruction_entry orig repl feature orig_len repl_len
+.macro altinstruction_entry orig repl feature orig_len repl_len pad_len
     .long \orig - .
     .long \repl - .
     .word \feature
     .byte \orig_len
     .byte \repl_len
+    .byte \pad_len
+    .byte 0 /* priv */
 .endm
 
-#define decl_orig(insn)         .L\@_orig_s:      insn; .L\@_orig_e:
+/* GAS's idea of true is -1, while Clang's idea is 1. */
+#ifdef HAVE_AS_NEGATIVE_TRUE
+# define as_true(x) (-(x))
+#else
+# define as_true(x) (x)
+#endif
+
+#define decl_orig(insn, padding)                  \
+ .L\@_orig_s: insn; .L\@_orig_e:                  \
+ .L\@_diff = padding;                             \
+ .skip as_true(.L\@_diff > 0) * .L\@_diff, 0x90;  \
+ .L\@_orig_p:
+
 #define orig_len               (.L\@_orig_e       -     .L\@_orig_s)
+#define pad_len                (.L\@_orig_p       -     .L\@_orig_e)
+#define total_len              (.L\@_orig_p       -     .L\@_orig_s)
 
 #define decl_repl(insn, nr)     .L\@_repl_s\()nr: insn; .L\@_repl_e\()nr:
 #define repl_len(nr)           (.L\@_repl_e\()nr  -     .L\@_repl_s\()nr)
 
+#define as_max(a, b)           ((a) ^ (((a) ^ (b)) & -as_true((a) < (b))))
+
 .macro ALTERNATIVE oldinstr, newinstr, feature
-    decl_orig(\oldinstr)
+    decl_orig(\oldinstr, repl_len(1) - orig_len)
 
     .pushsection .altinstructions, "a", @progbits
     altinstruction_entry .L\@_orig_s, .L\@_repl_s1, \feature, \
-        orig_len, repl_len(1)
+        orig_len, repl_len(1), pad_len
 
     .section .discard, "a", @progbits
-    /* Assembler-time check that \newinstr isn't longer than \oldinstr. */
-    .byte 0xff + repl_len(1) - orig_len
+    /*
+     * Assembler-time checks:
+     *   - total_len <= 255
+     *   - \newinstr <= total_len
+     */
+    .byte total_len
+    .byte 0xff + repl_len(1) - total_len
 
     .section .altinstr_replacement, "ax", @progbits
 
@@ -42,19 +65,24 @@
 .endm
 
 .macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2
-    decl_orig(\oldinstr)
+    decl_orig(\oldinstr, as_max(repl_len(1), repl_len(2)) - orig_len)
 
     .pushsection .altinstructions, "a", @progbits
 
     altinstruction_entry .L\@_orig_s, .L\@_repl_s1, \feature1, \
-        orig_len, repl_len(1)
+        orig_len, repl_len(1), pad_len
     altinstruction_entry .L\@_orig_s, .L\@_repl_s2, \feature2, \
-        orig_len, repl_len(2)
+        orig_len, repl_len(2), pad_len
 
     .section .discard, "a", @progbits
-    /* Assembler-time check that \newinstr{1,2} aren't longer than \oldinstr. 
*/
-    .byte 0xff + repl_len(1) - orig_len
-    .byte 0xff + repl_len(2) - orig_len
+    /*
+     * Assembler-time checks:
+     *   - total_len <= 255
+     *   - \newinstr* <= total_len
+     */
+    .byte total_len
+    .byte 0xff + repl_len(1) - total_len
+    .byte 0xff + repl_len(2) - total_len
 
     .section .altinstr_replacement, "ax", @progbits
 
@@ -64,10 +92,14 @@
     .popsection
 .endm
 
+#undef as_max
 #undef repl_len
 #undef decl_repl
+#undef total_len
+#undef pad_len
 #undef orig_len
 #undef decl_orig
+#undef as_true
 
 #endif /* __ASSEMBLY__ */
 #endif /* _ASM_X86_ALTERNATIVE_ASM_H_ */
diff --git a/xen/include/asm-x86/alternative.h 
b/xen/include/asm-x86/alternative.h
index bcad3ee..4803368 100644
--- a/xen/include/asm-x86/alternative.h
+++ b/xen/include/asm-x86/alternative.h
@@ -8,12 +8,14 @@
 #include <xen/stringify.h>
 #include <xen/types.h>
 
-struct alt_instr {
+struct __packed alt_instr {
     int32_t  orig_offset;   /* original instruction */
     int32_t  repl_offset;   /* offset to replacement instruction */
     uint16_t cpuid;         /* cpuid bit set for replacement */
     uint8_t  orig_len;      /* length of original instruction */
-    uint8_t  repl_len;      /* length of new instruction, <= instrlen */
+    uint8_t  repl_len;      /* length of new instruction */
+    uint8_t  pad_len;       /* length of build-time padding */
+    uint8_t  priv;          /* Private, for use by apply_alternatives() */
 };
 
 #define __ALT_PTR(a,f)      ((uint8_t *)((void *)&(a)->f + (a)->f))
@@ -22,47 +24,73 @@ struct alt_instr {
 
 extern void add_nops(void *insns, unsigned int len);
 /* Similar to alternative_instructions except it can be run with IRQs enabled. 
*/
-extern void apply_alternatives(const struct alt_instr *start,
-                               const struct alt_instr *end);
+extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
 extern void alternative_instructions(void);
 
-#define OLDINSTR(oldinstr) ".LXEN%=_orig_s:\n\t" oldinstr "\n.LXEN%=_orig_e:\n"
-
 #define alt_orig_len       "(.LXEN%=_orig_e - .LXEN%=_orig_s)"
+#define alt_pad_len        "(.LXEN%=_orig_p - .LXEN%=_orig_e)"
+#define alt_total_len      "(.LXEN%=_orig_p - .LXEN%=_orig_s)"
 #define alt_repl_s(num)    ".LXEN%=_repl_s"#num
 #define alt_repl_e(num)    ".LXEN%=_repl_e"#num
 #define alt_repl_len(num)  "(" alt_repl_e(num) " - " alt_repl_s(num) ")"
 
+/* GAS's idea of true is -1, while Clang's idea is 1. */
+#ifdef HAVE_AS_NEGATIVE_TRUE
+# define AS_TRUE "-"
+#else
+# define AS_TRUE ""
+#endif
+
+#define as_max(a, b) "(("a") ^ ((("a") ^ ("b")) & -("AS_TRUE"(("a") < 
("b")))))"
+
+#define OLDINSTR(oldinstr, padding)                              \
+    ".LXEN%=_orig_s:\n\t" oldinstr "\n .LXEN%=_orig_e:\n\t"      \
+    ".LXEN%=_diff = " padding "\n\t"                             \
+    ".skip "AS_TRUE"(.LXEN%=_diff > 0) * .LXEN%=_diff, 0x90\n\t" \
+    ".LXEN%=_orig_p:\n\t"
+
+#define OLDINSTR_1(oldinstr, n1)                                 \
+    OLDINSTR(oldinstr, alt_repl_len(n1) "-" alt_orig_len)
+
+#define OLDINSTR_2(oldinstr, n1, n2)                             \
+    OLDINSTR(oldinstr,                                           \
+             as_max((alt_repl_len(n1),                           \
+                     alt_repl_len(n2)) "-" alt_orig_len))
+
 #define ALTINSTR_ENTRY(feature, num)                                    \
         " .long .LXEN%=_orig_s - .\n"             /* label           */ \
         " .long " alt_repl_s(num)" - .\n"         /* new instruction */ \
         " .word " __stringify(feature) "\n"       /* feature bit     */ \
         " .byte " alt_orig_len "\n"               /* source len      */ \
-        " .byte " alt_repl_len(num) "\n"          /* replacement len */
+        " .byte " alt_repl_len(num) "\n"          /* replacement len */ \
+        " .byte " alt_pad_len "\n"                /* padding len     */ \
+        " .byte 0\n"                              /* priv            */
 
-#define DISCARD_ENTRY(num)                        /* repl <= orig */    \
-        " .byte 0xff + (" alt_repl_len(num) ") - (" alt_orig_len ")\n"
+#define DISCARD_ENTRY(num)                        /* repl <= total */   \
+        " .byte 0xff + (" alt_repl_len(num) ") - (" alt_total_len ")\n"
 
 #define ALTINSTR_REPLACEMENT(newinstr, num)       /* replacement */     \
         alt_repl_s(num)":\n\t" newinstr "\n" alt_repl_e(num) ":\n\t"
 
 /* alternative assembly primitive: */
 #define ALTERNATIVE(oldinstr, newinstr, feature)                        \
-        OLDINSTR(oldinstr)                                              \
+        OLDINSTR_1(oldinstr, 1)                                         \
         ".pushsection .altinstructions, \"a\", @progbits\n"             \
         ALTINSTR_ENTRY(feature, 1)                                      \
         ".section .discard, \"a\", @progbits\n"                         \
+        ".byte " alt_total_len "\n" /* total_len <= 255 */              \
         DISCARD_ENTRY(1)                                                \
         ".section .altinstr_replacement, \"ax\", @progbits\n"           \
         ALTINSTR_REPLACEMENT(newinstr, 1)                               \
         ".popsection\n"
 
 #define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2) \
-        OLDINSTR(oldinstr)                                              \
+        OLDINSTR_2(oldinstr, 1, 2)                                      \
         ".pushsection .altinstructions, \"a\", @progbits\n"             \
         ALTINSTR_ENTRY(feature1, 1)                                     \
         ALTINSTR_ENTRY(feature2, 2)                                     \
         ".section .discard, \"a\", @progbits\n"                         \
+        ".byte " alt_total_len "\n" /* total_len <= 255 */              \
         DISCARD_ENTRY(1)                                                \
         DISCARD_ENTRY(2)                                                \
         ".section .altinstr_replacement, \"ax\", @progbits\n"           \
-- 
2.1.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.