[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v6.5 23/26] x86/entry: Clobber the Return Stack Buffer on entry to Xen



ret instructions are unconditionally speculated based on values in the RSB.
If any path in Xen executes more ret than call instructions, speculation can
start following a guest controlled RSB entry.

There is at least one path (wake from waitqueue) which can end up executing
more ret than call instructions.  There may be other paths as well.

To mitigate, overwrite the RSB (when appropriate; see code for details) when
entering Xen from guest context.

Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
---
v2:
 * Rename pv/hvm to native/vmexit to be clearer
 * Reorder before SPEC_CTRL_ENTRY
 * Calculate when to clobber the RSB
 * Skip clobbering the RSB when interrupting Xen
v3:
 * Rework to be consistent the rewritten SPEC_CTRL_* patch
v4:
 * Merge OVERWRITE_RSB_* into SPEC_CTRL_ENTRY_* to enforce the ordering
   dependency.
 * Rebase over AMD changes
---
 docs/misc/xen-command-line.markdown |  6 ++-
 xen/arch/x86/spec_ctrl.c            | 81 +++++++++++++++++++++++++++++++++----
 xen/include/asm-x86/cpufeature.h    |  1 +
 xen/include/asm-x86/cpufeatures.h   |  4 ++
 xen/include/asm-x86/nops.h          |  2 +
 xen/include/asm-x86/spec_ctrl_asm.h | 46 +++++++++++++++++++++
 6 files changed, 131 insertions(+), 9 deletions(-)

diff --git a/docs/misc/xen-command-line.markdown 
b/docs/misc/xen-command-line.markdown
index 3429484..8bffe44 100644
--- a/docs/misc/xen-command-line.markdown
+++ b/docs/misc/xen-command-line.markdown
@@ -246,7 +246,7 @@ enough. Setting this to a high value may cause boot 
failure, particularly if
 the NMI watchdog is also enabled.
 
 ### bti (x86)
-> `= List of [ thunk=retpoline|lfence|plain, ibrs=<bool> ]`
+> `= List of [ thunk=retpoline|lfence|plain, ibrs=<bool>, 
rsb_{vmexit,native}=bool ]`
 
 Branch Target Injection controls.  By default, Xen will pick the most
 appropriate BTI mitigations based on compiled in support, loaded microcode,
@@ -263,6 +263,10 @@ On hardware supporting IBRS, the `ibrs=` option can be 
used to force or
 prevent Xen using the feature itself.  If Xen is not using IBRS itself,
 functionality is still set up so IBRS can be virtualised for guests.
 
+The `rsb_vmexit=` and `rsb_native=` options can be used to fine tune when the
+RSB gets overwritten.  There are individual controls for an entry from HVM
+context, and an entry from a native (PV or Xen) context.
+
 ### xenheap\_megabytes (arm32)
 > `= <size>`
 
diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
index 1cccb8a..bbf8f96 100644
--- a/xen/arch/x86/spec_ctrl.c
+++ b/xen/arch/x86/spec_ctrl.c
@@ -33,6 +33,7 @@ enum ind_thunk {
     THUNK_JMP,
 } opt_thunk __initdata = THUNK_DEFAULT;
 int opt_ibrs __initdata = -1;
+int opt_rsb_native __initdata = -1, opt_rsb_vmexit __initdata = -1;
 
 static int __init parse_bti(const char *s)
 {
@@ -59,6 +60,10 @@ static int __init parse_bti(const char *s)
         }
         else if ( (val = parse_boolean("ibrs", s, ss)) >= 0 )
             opt_ibrs = val;
+        else if ( (val = parse_boolean("rsb_native", s, ss)) >= 0 )
+            opt_rsb_native = val;
+        else if ( (val = parse_boolean("rsb_vmexit", s, ss)) >= 0 )
+            opt_rsb_vmexit = val;
         else
             rc = -EINVAL;
 
@@ -71,21 +76,23 @@ custom_param("bti", parse_bti);
 
 static void __init print_details(enum ind_thunk thunk)
 {
-    unsigned int _7d0 = 0, e8b = 0, tmp;
+    unsigned int _7b0 = 0, _7d0 = 0, e8b = 0, tmp;
 
     /* Collect diagnostics about available mitigations. */
     if ( boot_cpu_data.cpuid_level >= 7 )
-        cpuid_count(7, 0, &tmp, &tmp, &tmp, &_7d0);
+        cpuid_count(7, 0, &tmp, &_7b0, &tmp, &_7d0);
     if ( boot_cpu_data.extended_cpuid_level >= 0x80000008 )
         cpuid(0x80000008, &tmp, &e8b, &tmp, &tmp);
 
     printk(XENLOG_DEBUG "Speculative mitigation facilities:\n");
 
     /* Hardware features which pertain to speculative mitigations. */
-    if ( (_7d0 & (cpufeat_mask(X86_FEATURE_IBRSB) |
+    if ( (_7b0 & cpufeat_mask(X86_FEATURE_SMEP)) ||
+         (_7d0 & (cpufeat_mask(X86_FEATURE_IBRSB) |
                   cpufeat_mask(X86_FEATURE_STIBP))) ||
          (e8b & cpufeat_mask(X86_FEATURE_IBPB)) )
-        printk(XENLOG_DEBUG "  Hardware features:%s%s%s\n",
+        printk(XENLOG_DEBUG "  Hardware features:%s%s%s%s\n",
+               (_7b0 & cpufeat_mask(X86_FEATURE_SMEP))  ? " SMEP"      : "",
                (_7d0 & cpufeat_mask(X86_FEATURE_IBRSB)) ? " IBRS/IBPB" : "",
                (_7d0 & cpufeat_mask(X86_FEATURE_STIBP)) ? " STIBP"     : "",
                (e8b  & cpufeat_mask(X86_FEATURE_IBPB))  ? " IBPB"      : "");
@@ -95,13 +102,18 @@ static void __init print_details(enum ind_thunk thunk)
         printk(XENLOG_DEBUG "  Compiled-in support: INDIRECT_THUNK\n");
 
     printk(XENLOG_INFO
-           "BTI mitigations: Thunk %s, Others:%s\n",
+           "BTI mitigations: Thunk %s, Others:%s%s%s%s\n",
            thunk == THUNK_NONE      ? "N/A" :
            thunk == THUNK_RETPOLINE ? "RETPOLINE" :
            thunk == THUNK_LFENCE    ? "LFENCE" :
            thunk == THUNK_JMP       ? "JMP" : "?",
            boot_cpu_has(X86_FEATURE_XEN_IBRS_SET)    ? " IBRS+" :
-           boot_cpu_has(X86_FEATURE_XEN_IBRS_CLEAR)  ? " IBRS-"      : "");
+           boot_cpu_has(X86_FEATURE_XEN_IBRS_CLEAR)  ? " IBRS-"      : "",
+           cpu_has_xen_smep                          ? " SMEP"       : "",
+           (boot_cpu_has(X86_FEATURE_RSB_VMEXIT) ||
+            boot_cpu_has(X86_FEATURE_RSB_VMEXIT_SS)) ? " RSB_VMEXIT" : "",
+           (boot_cpu_has(X86_FEATURE_RSB_NATIVE) ||
+            boot_cpu_has(X86_FEATURE_RSB_NATIVE_SS)) ? " RSB_NATIVE" : "");
 }
 
 /* Calculate whether Retpoline is known-safe on this CPU. */
@@ -161,13 +173,14 @@ static bool __init retpoline_safe(void)
 void __init init_speculation_mitigations(void)
 {
     enum ind_thunk thunk = THUNK_DEFAULT;
-    bool ibrs = false;
+    bool ibrs = false, have_mitigation = true;
 
     /*
      * Has the user specified any custom BTI mitigations?  If so, follow their
      * instructions exactly and disable all heuristics.
      */
-    if ( opt_thunk != THUNK_DEFAULT || opt_ibrs != -1 )
+    if ( opt_thunk != THUNK_DEFAULT || opt_ibrs != -1 ||
+         opt_rsb_native != -1 || opt_rsb_vmexit != -1 )
     {
         thunk = opt_thunk;
         ibrs  = !!opt_ibrs;
@@ -201,6 +214,9 @@ void __init init_speculation_mitigations(void)
         /* Without compiler thunk support, use IBRS if available. */
         else if ( boot_cpu_has(X86_FEATURE_IBRSB) )
             ibrs = true;
+        /* Or give up completely. */
+        else
+            have_mitigation = false;
     }
 
     /*
@@ -242,6 +258,55 @@ void __init init_speculation_mitigations(void)
             setup_force_cpu_cap(X86_FEATURE_XEN_IBRS_CLEAR);
     }
 
+    /*
+     * Only bother overwriting the RSBs if we have a BTI mitigation available.
+     * Otherwise, we're already wide open to easier attacks than RSB-poisoning.
+     */
+    if ( have_mitigation )
+    {
+        /*
+         * If we are writing to MSR_SPEC_CTRL, the WRMSR is sufficiently
+         * serialising to protect against speculative exits of the RSB loop.
+         * If not, the RSB loop needs to provide its own speculative defence.
+         */
+        bool ss = !(boot_cpu_has(X86_FEATURE_XEN_IBRS_SET) ||
+                    boot_cpu_has(X86_FEATURE_XEN_IBRS_CLEAR));
+
+        /*
+         * HVM guests can always poison the RSB to point at Xen supervisor
+         * mappings.
+         */
+        if ( opt_rsb_vmexit )
+        {
+            BUILD_BUG_ON(X86_FEATURE_RSB_VMEXIT_SS !=
+                         X86_FEATURE_RSB_VMEXIT + 1);
+
+            setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT + ss);
+        }
+
+        /*
+         * PV guests can poison the RSB to any virtual address from which
+         * they can execute a call instruction.  This is necessarily outside
+         * of the Xen supervisor mappings.
+         *
+         * With SMEP enabled, the processor won't speculate into user
+         * mappings, and therefore, don't need to worry about poisioned
+         * entries.
+         *
+         * 32bit PV guest kernels run in ring 1, so use supervisor mappings.
+         * However, nothing secret lives below the 4G boundary, so a 32bit PV
+         * guest can't do anything useful by hijacking execution.
+         */
+        if ( opt_rsb_native == 1 ||
+             (opt_rsb_native == -1 && !cpu_has_xen_smep) )
+        {
+            BUILD_BUG_ON(X86_FEATURE_RSB_NATIVE_SS !=
+                         X86_FEATURE_RSB_NATIVE + 1);
+
+            setup_force_cpu_cap(X86_FEATURE_RSB_NATIVE + ss);
+        }
+    }
+
     print_details(thunk);
 }
 
diff --git a/xen/include/asm-x86/cpufeature.h b/xen/include/asm-x86/cpufeature.h
index 988a834..b7667b4 100644
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -107,6 +107,7 @@
 #define cpu_has_arch_perfmon    boot_cpu_has(X86_FEATURE_ARCH_PERFMON)
 #define cpu_has_cpuid_faulting  boot_cpu_has(X86_FEATURE_CPUID_FAULTING)
 #define cpu_has_aperfmperf      boot_cpu_has(X86_FEATURE_APERFMPERF)
+#define cpu_has_xen_smep        boot_cpu_has(X86_FEATURE_XEN_SMEP)
 #define cpu_has_lfence_dispatch boot_cpu_has(X86_FEATURE_LFENCE_DISPATCH)
 
 enum _cache_type {
diff --git a/xen/include/asm-x86/cpufeatures.h 
b/xen/include/asm-x86/cpufeatures.h
index dd2388f..56dd8f4 100644
--- a/xen/include/asm-x86/cpufeatures.h
+++ b/xen/include/asm-x86/cpufeatures.h
@@ -28,3 +28,7 @@ XEN_CPUFEATURE(IND_THUNK_JMP,   (FSCAPINTS+0)*32+14) /* Use 
IND_THUNK_JMP */
 XEN_CPUFEATURE(XEN_IBPB,        (FSCAPINTS+0)*32+15) /* IBRSB || IBPB */
 XEN_CPUFEATURE(XEN_IBRS_SET,    (FSCAPINTS+0)*32+16) /* IBRSB && IRBS set in 
Xen */
 XEN_CPUFEATURE(XEN_IBRS_CLEAR,  (FSCAPINTS+0)*32+17) /* IBRSB && IBRS clear in 
Xen */
+XEN_CPUFEATURE(RSB_NATIVE,      (FSCAPINTS+0)*32+18) /* RSB overwrite needed 
for native */
+XEN_CPUFEATURE(RSB_NATIVE_SS,   (FSCAPINTS+0)*32+19) /* RSB_NATIVE must 
self-serialise */
+XEN_CPUFEATURE(RSB_VMEXIT,      (FSCAPINTS+0)*32+20) /* RSB overwrite needed 
for vmexit */
+XEN_CPUFEATURE(RSB_VMEXIT_SS,   (FSCAPINTS+0)*32+21) /* RSB_VMEXIT must 
self-serialise */
diff --git a/xen/include/asm-x86/nops.h b/xen/include/asm-x86/nops.h
index 9e8f530..752fb0e 100644
--- a/xen/include/asm-x86/nops.h
+++ b/xen/include/asm-x86/nops.h
@@ -67,9 +67,11 @@
 
 #define ASM_NOP22 ASM_NOP8 ASM_NOP8 ASM_NOP6
 #define ASM_NOP26 ASM_NOP8 ASM_NOP8 ASM_NOP8 ASM_NOP2
+#define ASM_NOP27 ASM_NOP8 ASM_NOP8 ASM_NOP8 ASM_NOP3
 #define ASM_NOP32 ASM_NOP8 ASM_NOP8 ASM_NOP8 ASM_NOP8
 #define ASM_NOP33 ASM_NOP8 ASM_NOP8 ASM_NOP8 ASM_NOP7 ASM_NOP2
 #define ASM_NOP39 ASM_NOP8 ASM_NOP8 ASM_NOP8 ASM_NOP8 ASM_NOP7
+#define ASM_NOP40 ASM_NOP8 ASM_NOP8 ASM_NOP8 ASM_NOP8 ASM_NOP8
 
 #define ASM_NOP_MAX 9
 
diff --git a/xen/include/asm-x86/spec_ctrl_asm.h 
b/xen/include/asm-x86/spec_ctrl_asm.h
index 13e058c..430b440 100644
--- a/xen/include/asm-x86/spec_ctrl_asm.h
+++ b/xen/include/asm-x86/spec_ctrl_asm.h
@@ -73,6 +73,37 @@
  *  - SPEC_CTRL_EXIT_TO_GUEST
  */
 
+.macro DO_OVERWRITE_RSB maybexen:req ss:req
+/*
+ * Req: %rsp=regs
+ * Clobbers %ecx
+ *
+ * Requires 256 bytes of stack space, but %rsp has no net change.  Optionally
+ * checks for interrupting Xen context, and skipping the clobber.
+ *
+ * For safety, there must be an instruction stream serialising event between
+ * this loop and the next unmatched ret, to prevent an early speculative exit.
+ * If IBRS is in use, its WRMSR is sufficiently serialising.  If IBRS is not
+ * available, place an lfence after the loop to seriailse.
+ */
+    .if \maybexen
+        cmpl $__HYPERVISOR_CS, UREGS_cs(%rsp)
+        je .Lend_\@
+    .endif
+
+    mov $32, %ecx
+.Lloop_\@: call .Lcall_\@
+    pause
+.Lcall_\@: sub $1, %ecx
+    jnz .Lloop_\@
+    add $32*8, %rsp
+.Lend_\@:
+
+    .if \ss /* Need to self-serialise? */
+        lfence
+    .endif
+.endm
+
 .macro DO_SPEC_CTRL_ENTRY_FROM_VMEXIT ibrs_val:req
 /*
  * Requires %rbx=current, %rsp=regs/cpuinfo
@@ -178,6 +209,11 @@
 
 /* Use after a VMEXIT from an HVM guest. */
 #define SPEC_CTRL_ENTRY_FROM_VMEXIT                                     \
+    ALTERNATIVE_2 __stringify(ASM_NOP27),                               \
+        "DO_OVERWRITE_RSB maybexen=0 ss=1",                             \
+        X86_FEATURE_RSB_VMEXIT_SS,                                      \
+        "DO_OVERWRITE_RSB maybexen=0 ss=0",                             \
+        X86_FEATURE_RSB_VMEXIT;                                         \
     ALTERNATIVE_2 __stringify(ASM_NOP32),                               \
         __stringify(DO_SPEC_CTRL_ENTRY_FROM_VMEXIT                      \
                     ibrs_val=SPEC_CTRL_IBRS),                           \
@@ -188,6 +224,11 @@
 
 /* Use after an entry from PV context (syscall/sysenter/int80/int82/etc). */
 #define SPEC_CTRL_ENTRY_FROM_PV                                         \
+    ALTERNATIVE_2 __stringify(ASM_NOP27),                               \
+        "DO_OVERWRITE_RSB maybexen=0 ss=1",                             \
+        X86_FEATURE_RSB_NATIVE_SS,                                      \
+        "DO_OVERWRITE_RSB maybexen=0 ss=0",                             \
+        X86_FEATURE_RSB_NATIVE;                                         \
     ALTERNATIVE_2 __stringify(ASM_NOP22),                               \
         __stringify(DO_SPEC_CTRL_ENTRY maybexen=0                       \
                     ibrs_val=SPEC_CTRL_IBRS),                           \
@@ -197,6 +238,11 @@
 
 /* Use in interrupt/exception context.  May interrupt Xen or PV context. */
 #define SPEC_CTRL_ENTRY_FROM_INTR                                       \
+    ALTERNATIVE_2 __stringify(ASM_NOP40),                               \
+        "DO_OVERWRITE_RSB maybexen=1 ss=1",                             \
+        X86_FEATURE_RSB_NATIVE_SS,                                      \
+        "DO_OVERWRITE_RSB maybexen=1 ss=0",                             \
+        X86_FEATURE_RSB_NATIVE;                                         \
     ALTERNATIVE_2 __stringify(ASM_NOP39),                               \
         __stringify(DO_SPEC_CTRL_ENTRY maybexen=1                       \
                     ibrs_val=SPEC_CTRL_IBRS),                           \
-- 
2.1.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.