[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v7 14/17] x86/entry: Clobber the Return Stack Buffer/Return Address Stack on entry to Xen



ret instructions are speculated directly to values recorded in the RSB/RAS, as
there is no uncertainty in well-formed code.  Guests can take advantage of
this in two ways:

  1) If they can find a path in Xen which executes more ret instructions than
     call instructions.  (At least one in the waitqueue infrastructure,
     probably others.)
  2) Use the fact that the RSB/RAS in hardware is actually a circular stack
     without a concept of empty.  (When it logically empties, stale values
     will start being used.)

To mitigate, unconditionally overwrite the RSB on entry to Xen with gadgets
which will capture and contain rogue speculation.

Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
---
v7:
 * Rewritten almost from scratch.  See code comments for details.
---
 docs/misc/xen-command-line.markdown |  6 +++++-
 xen/arch/x86/spec_ctrl.c            | 34 +++++++++++++++++++++++++++++--
 xen/include/asm-x86/cpufeatures.h   |  2 ++
 xen/include/asm-x86/nops.h          |  1 +
 xen/include/asm-x86/spec_ctrl_asm.h | 40 +++++++++++++++++++++++++++++++++++++
 5 files changed, 80 insertions(+), 3 deletions(-)

diff --git a/docs/misc/xen-command-line.markdown 
b/docs/misc/xen-command-line.markdown
index 1aa1225..8510e47 100644
--- a/docs/misc/xen-command-line.markdown
+++ b/docs/misc/xen-command-line.markdown
@@ -246,7 +246,7 @@ enough. Setting this to a high value may cause boot 
failure, particularly if
 the NMI watchdog is also enabled.
 
 ### bti (x86)
-> `= List of [ thunk=retpoline|lfence|jmp, ibrs=<bool> ]`
+> `= List of [ thunk=retpoline|lfence|jmp, ibrs=<bool>, 
rsb_{vmexit,native}=<bool> ]`
 
 Branch Target Injection controls.  By default, Xen will pick the most
 appropriate BTI mitigations based on compiled in support, loaded microcode,
@@ -265,6 +265,10 @@ On hardware supporting IBRS, the `ibrs=` option can be 
used to force or
 prevent Xen using the feature itself.  If Xen is not using IBRS itself,
 functionality is still set up so IBRS can be virtualised for guests.
 
+The `rsb_vmexit=` and `rsb_native=` options can be used to fine tune when the
+RSB gets overwritten.  There are individual controls for an entry from HVM
+context, and an entry from a native (PV or Xen) context.
+
 ### xenheap\_megabytes (arm32)
 > `= <size>`
 
diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
index 7b0daaf..680fabe 100644
--- a/xen/arch/x86/spec_ctrl.c
+++ b/xen/arch/x86/spec_ctrl.c
@@ -33,6 +33,7 @@ static enum ind_thunk {
     THUNK_JMP,
 } opt_thunk __initdata = THUNK_DEFAULT;
 static int opt_ibrs __initdata = -1;
+static bool opt_rsb_native __initdata = true, opt_rsb_vmexit __initdata = true;
 
 static int __init parse_bti(const char *s)
 {
@@ -59,6 +60,10 @@ static int __init parse_bti(const char *s)
         }
         else if ( (val = parse_boolean("ibrs", s, ss)) >= 0 )
             opt_ibrs = val;
+        else if ( (val = parse_boolean("rsb_native", s, ss)) >= 0 )
+            opt_rsb_native = val;
+        else if ( (val = parse_boolean("rsb_vmexit", s, ss)) >= 0 )
+            opt_rsb_vmexit = val;
         else
             rc = -EINVAL;
 
@@ -95,13 +100,15 @@ static void __init print_details(enum ind_thunk thunk)
         printk(XENLOG_DEBUG "  Compiled-in support: INDIRECT_THUNK\n");
 
     printk(XENLOG_INFO
-           "BTI mitigations: Thunk %s, Others:%s\n",
+           "BTI mitigations: Thunk %s, Others:%s%s%s\n",
            thunk == THUNK_NONE      ? "N/A" :
            thunk == THUNK_RETPOLINE ? "RETPOLINE" :
            thunk == THUNK_LFENCE    ? "LFENCE" :
            thunk == THUNK_JMP       ? "JMP" : "?",
            boot_cpu_has(X86_FEATURE_XEN_IBRS_SET)    ? " IBRS+" :
-           boot_cpu_has(X86_FEATURE_XEN_IBRS_CLEAR)  ? " IBRS-"      : "");
+           boot_cpu_has(X86_FEATURE_XEN_IBRS_CLEAR)  ? " IBRS-"      : "",
+           boot_cpu_has(X86_FEATURE_RSB_NATIVE)      ? " RSB_NATIVE" : "",
+           boot_cpu_has(X86_FEATURE_RSB_VMEXIT)      ? " RSB_VMEXIT" : "");
 }
 
 /* Calculate whether Retpoline is known-safe on this CPU. */
@@ -243,6 +250,29 @@ void __init init_speculation_mitigations(void)
             setup_force_cpu_cap(X86_FEATURE_XEN_IBRS_CLEAR);
     }
 
+    /*
+     * PV guests can poison the RSB to any virtual address from which
+     * they can execute a call instruction.  This is necessarily outside
+     * of the Xen supervisor mappings.
+     *
+     * With SMEP enabled, the processor won't speculate into user
+     * mappings.  Therefore, we don't need to worry about poisoned
+     * entries from 64bit PV guests.
+     *
+     * 32bit PV guest kernels run in ring 1, so use supervisor mappings.
+     * If a processors speculates to 32bit PV guest kernel mappings, it is
+     * speculating in 64bit supervisor mode, and can leak data.
+     */
+    if ( opt_rsb_native )
+        setup_force_cpu_cap(X86_FEATURE_RSB_NATIVE);
+
+    /*
+     * HVM guests can always poison the RSB to point at Xen supervisor
+     * mappings.
+     */
+    if ( opt_rsb_vmexit )
+        setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT);
+
     print_details(thunk);
 }
 
diff --git a/xen/include/asm-x86/cpufeatures.h 
b/xen/include/asm-x86/cpufeatures.h
index dd2388f..0ee4a1f 100644
--- a/xen/include/asm-x86/cpufeatures.h
+++ b/xen/include/asm-x86/cpufeatures.h
@@ -28,3 +28,5 @@ XEN_CPUFEATURE(IND_THUNK_JMP,   (FSCAPINTS+0)*32+14) /* Use 
IND_THUNK_JMP */
 XEN_CPUFEATURE(XEN_IBPB,        (FSCAPINTS+0)*32+15) /* IBRSB || IBPB */
 XEN_CPUFEATURE(XEN_IBRS_SET,    (FSCAPINTS+0)*32+16) /* IBRSB && IRBS set in 
Xen */
 XEN_CPUFEATURE(XEN_IBRS_CLEAR,  (FSCAPINTS+0)*32+17) /* IBRSB && IBRS clear in 
Xen */
+XEN_CPUFEATURE(RSB_NATIVE,      (FSCAPINTS+0)*32+18) /* RSB overwrite needed 
for native */
+XEN_CPUFEATURE(RSB_VMEXIT,      (FSCAPINTS+0)*32+20) /* RSB overwrite needed 
for vmexit */
diff --git a/xen/include/asm-x86/nops.h b/xen/include/asm-x86/nops.h
index 1738ba6..4f637d9 100644
--- a/xen/include/asm-x86/nops.h
+++ b/xen/include/asm-x86/nops.h
@@ -70,6 +70,7 @@
 #define ASM_NOP32 ASM_NOP8; ASM_NOP8; ASM_NOP8; ASM_NOP8
 #define ASM_NOP33 ASM_NOP8; ASM_NOP8; ASM_NOP8; ASM_NOP7; ASM_NOP2
 #define ASM_NOP38 ASM_NOP8; ASM_NOP8; ASM_NOP8; ASM_NOP8; ASM_NOP6
+#define ASM_NOP40 ASM_NOP8; ASM_NOP8; ASM_NOP8; ASM_NOP8; ASM_NOP8
 
 #define ASM_NOP_MAX 9
 
diff --git a/xen/include/asm-x86/spec_ctrl_asm.h 
b/xen/include/asm-x86/spec_ctrl_asm.h
index c380a3d..d36783e 100644
--- a/xen/include/asm-x86/spec_ctrl_asm.h
+++ b/xen/include/asm-x86/spec_ctrl_asm.h
@@ -73,6 +73,40 @@
  *  - SPEC_CTRL_EXIT_TO_GUEST
  */
 
+.macro DO_OVERWRITE_RSB
+/*
+ * Req: %rsp=regs
+ * Clobbers %rax, %rcx
+ *
+ * Requires 256 bytes of stack space, but %rsp has no net change. Based on
+ * Google's performance numbers, the loop is unrolled to 16 iterations and two
+ * calls per iteration.
+ *
+ * The call filling the RSB needs a nonzero displacement, but we use "1:
+ * pause, call 1b" to safely contains any ret-based speculation, even if the
+ * loop is speculatively executed prematurely.
+ *
+ * %rsp is preserved by using an extra GPR because a) we've got plenty spare,
+ * b) the two movs are shorter to encode than `add $32*8, %rsp`, and c) can be
+ * optimised with mov-elimination in modern cores.
+ */
+    mov $16, %ecx   /* 16 iterations, two calls per loop */
+    mov %rsp, %rax  /* Store the current %rsp */
+
+.Lloop_\@:
+
+    .rept 2         /* Unrolled twice. */
+    call 2f         /* Create an RSB entry. */
+1:  pause
+    call 1b         /* Capture rogue speculation. */
+2:
+    .endr
+
+    sub $1, %ecx
+    jnz .Lloop_\@
+    mov %rax, %rsp  /* Retore old %rsp */
+.endm
+
 .macro DO_SPEC_CTRL_ENTRY_FROM_VMEXIT ibrs_val:req
 /*
  * Requires %rbx=current, %rsp=regs/cpuinfo
@@ -178,6 +212,8 @@
 
 /* Use after a VMEXIT from an HVM guest. */
 #define SPEC_CTRL_ENTRY_FROM_VMEXIT                                     \
+    ALTERNATIVE __stringify(ASM_NOP40),                                 \
+        DO_OVERWRITE_RSB, X86_FEATURE_RSB_VMEXIT;                       \
     ALTERNATIVE_2 __stringify(ASM_NOP32),                               \
         __stringify(DO_SPEC_CTRL_ENTRY_FROM_VMEXIT                      \
                     ibrs_val=SPEC_CTRL_IBRS),                           \
@@ -188,6 +224,8 @@
 
 /* Use after an entry from PV context (syscall/sysenter/int80/int82/etc). */
 #define SPEC_CTRL_ENTRY_FROM_PV                                         \
+    ALTERNATIVE __stringify(ASM_NOP40),                                 \
+        DO_OVERWRITE_RSB, X86_FEATURE_RSB_NATIVE;                       \
     ALTERNATIVE_2 __stringify(ASM_NOP22),                               \
         __stringify(DO_SPEC_CTRL_ENTRY maybexen=0                       \
                     ibrs_val=SPEC_CTRL_IBRS),                           \
@@ -197,6 +235,8 @@
 
 /* Use in interrupt/exception context.  May interrupt Xen or PV context. */
 #define SPEC_CTRL_ENTRY_FROM_INTR                                       \
+    ALTERNATIVE __stringify(ASM_NOP40),                                 \
+        DO_OVERWRITE_RSB, X86_FEATURE_RSB_NATIVE;                       \
     ALTERNATIVE_2 __stringify(ASM_NOP38),                               \
         __stringify(DO_SPEC_CTRL_ENTRY maybexen=1                       \
                     ibrs_val=SPEC_CTRL_IBRS),                           \
-- 
2.1.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.