[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH RFC] x86emul: conditionally clear BNDn for branches



Considering that we surface MPX to HVM guests, instructions we emulate
should also correctly deal with MPX state. While for now BND*
instructions don't get emulated, the effect of branches (which we do
emulate) without BND prefix should be taken care of.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
---
RFC reasons:
- I haven't been able to find a layout specification for the BNDCSR
  area in the SDM, therefore struct xstate_bndcsr's layout is guessed.
- The SDM volumes contradict one another regarding short conditional
  branch behavior: SDM Vol 1 says they init BNDn (except for
  J{,E,R}CXZ), while Vol 2 says BND only affects near branches. The
  code for now assumes only near ones are affected.
- The SDM leaves open whether only taken Jcc will cause BNDn init, or
  also not taken ones. The code for now assumes only taken ones do.
- It is also left unclear whether addressing forms without base address
  register are valid for BNDMK (the instruction page references
  SRCMEM.base without indicating what this means in that specific
  case). The code uses an explicit base register for that reason (which
  also yields slightly smaller code).

--- a/tools/tests/x86_emulator/x86_emulate.c
+++ b/tools/tests/x86_emulator/x86_emulate.c
@@ -10,11 +10,15 @@ typedef bool bool_t;
 
 #define is_canonical_address(x) (((int64_t)(x) >> 47) == ((int64_t)(x) >> 63))
 
+#define EFER_SCE       (1 << 0)
+#define EFER_LMA       (1 << 10)
+
 #define BUG() abort()
 #define ASSERT assert
 
 #define cpu_has_amd_erratum(nr) 0
 #define mark_regs_dirty(r) ((void)(r))
+#define read_bndcfgu() 0
 
 #define __packed __attribute__((packed))
 
--- a/xen/arch/x86/Rules.mk
+++ b/xen/arch/x86/Rules.mk
@@ -14,6 +14,7 @@ $(call as-insn-check,CFLAGS,CC,"vmcall",
 $(call as-insn-check,CFLAGS,CC,"vmcall",-DHAVE_GAS_VMX)
 $(call as-insn-check,CFLAGS,CC,"invept (%rax)$$(comma)%rax",-DHAVE_GAS_EPT)
 $(call as-insn-check,CFLAGS,CC,"rdfsbase %rax",-DHAVE_GAS_FSGSBASE)
+$(call as-insn-check,CFLAGS,CC,"bndmov %bnd0$$(comma)%bnd0",-DHAVE_GAS_MPX)
 $(call as-insn-check,CFLAGS,CC,".equ \"x\"$$(comma)1", \
                      -U__OBJECT_LABEL__ -DHAVE_GAS_QUOTED_SYM \
                      '-D__OBJECT_LABEL__=$(subst $(BASEDIR)/,,$(CURDIR))/$$@')
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -421,6 +421,8 @@ int vcpu_initialise(struct vcpu *v)
 
         vmce_init_vcpu(v);
     }
+    else if ( (rc = xstate_alloc_save_area(v)) != 0 )
+        return rc;
 
     spin_lock_init(&v->arch.vpmu.vpmu_lock);
 
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -2623,6 +2623,11 @@ static int vmx_msr_read_intercept(unsign
     case MSR_IA32_DEBUGCTLMSR:
         __vmread(GUEST_IA32_DEBUGCTL, msr_content);
         break;
+    case MSR_IA32_BNDCFGS:
+        if ( !cpu_has_mpx )
+            goto gp_fault;
+        __vmread(GUEST_BNDCFGS, msr_content);
+        break;
     case IA32_FEATURE_CONTROL_MSR:
     case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_TRUE_ENTRY_CTLS:
         if ( !nvmx_msr_read_intercept(msr, msr_content) )
--- a/xen/arch/x86/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate.c
@@ -13,6 +13,7 @@
 #include <asm/x86_emulate.h>
 #include <asm/asm_defns.h> /* mark_regs_dirty() */
 #include <asm/processor.h> /* current_cpu_info */
+#include <asm/xstate.h> /* read_bndcfgu() */
 #include <asm/amd.h> /* cpu_has_amd_erratum() */
 
 /* Avoid namespace pollution. */
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -355,9 +355,10 @@ typedef union {
 #define MSR_SYSENTER_CS  0x00000174
 #define MSR_SYSENTER_ESP 0x00000175
 #define MSR_SYSENTER_EIP 0x00000176
+#define MSR_BNDCFGS      0x00000d90
+#define BNDCFG_ENABLE    (1 << 0)
+#define BNDCFG_PRESERVE  (1 << 1)
 #define MSR_EFER         0xc0000080
-#define EFER_SCE         (1u<<0)
-#define EFER_LMA         (1u<<10)
 #define MSR_STAR         0xc0000081
 #define MSR_LSTAR        0xc0000082
 #define MSR_CSTAR        0xc0000083
@@ -1090,6 +1091,7 @@ static bool_t vcpu_has(
 #define vcpu_has_clflush() vcpu_has(       1, EDX, 19, ctxt, ops)
 #define vcpu_has_lzcnt() vcpu_has(0x80000001, ECX,  5, ctxt, ops)
 #define vcpu_has_bmi1()  vcpu_has(0x00000007, EBX,  3, ctxt, ops)
+#define vcpu_has_mpx()   vcpu_has(0x00000007, EBX, 14, ctxt, ops)
 
 #define vcpu_must_have(leaf, reg, bit) \
     generate_exception_if(!vcpu_has(leaf, reg, bit, ctxt, ops), EXC_UD, -1)
@@ -1491,6 +1493,41 @@ static int inject_swint(enum x86_swint_t
     return ops->inject_hw_exception(fault_type, error_code, ctxt);
 }
 
+static void clear_bnd(struct x86_emulate_ctxt *ctxt,
+                      const struct x86_emulate_ops *ops, enum vex_pfx pfx)
+{
+    uint64_t bndcfg;
+    int rc;
+
+    if ( pfx == vex_f2 || !vcpu_has_mpx() )
+        return;
+
+#ifdef __XEN__
+    ASSERT(cpu_has_mpx);
+#endif
+
+    if ( !mode_ring0() )
+        bndcfg = read_bndcfgu();
+    else if ( !ops->read_msr ||
+              ops->read_msr(MSR_BNDCFGS, &bndcfg, ctxt) )
+        return;
+    if ( (bndcfg & BNDCFG_ENABLE) && !(bndcfg & BNDCFG_PRESERVE) )
+    {
+#if HAVE_GAS_MPX
+        asm volatile ( "bndmk -1(%0), %%bnd0" :: "r" (0L) );
+        asm volatile ( "bndmk -1(%0), %%bnd1" :: "r" (0L) );
+        asm volatile ( "bndmk -1(%0), %%bnd2" :: "r" (0L) );
+        asm volatile ( "bndmk -1(%0), %%bnd3" :: "r" (0L) );
+#else
+        asm volatile ( ".byte 0xf3, 0x0f, 0x1b, 0x41, -1" :: "c" (0L) );
+        asm volatile ( ".byte 0xf3, 0x0f, 0x1b, 0x49, -1" :: "c" (0L) );
+        asm volatile ( ".byte 0xf3, 0x0f, 0x1b, 0x51, -1" :: "c" (0L) );
+        asm volatile ( ".byte 0xf3, 0x0f, 0x1b, 0x59, -1" :: "c" (0L) );
+#endif
+    }
+ done:;
+}
+
 int x86emul_unhandleable_rw(
     enum x86_segment seg,
     unsigned long offset,
@@ -2720,6 +2757,7 @@ x86_emulate(
              (rc = ops->insn_fetch(x86_seg_cs, dst.val, NULL, 0, ctxt)) )
             goto done;
         _regs.eip = dst.val;
+        clear_bnd(ctxt, ops, vex.pfx);
         break;
     }
 
@@ -3465,6 +3503,7 @@ x86_emulate(
         op_bytes = ((op_bytes == 4) && mode_64bit()) ? 8 : op_bytes;
         src.val = _regs.eip;
         jmp_rel(rel);
+        clear_bnd(ctxt, ops, vex.pfx);
         goto push;
     }
 
@@ -3473,6 +3512,7 @@ x86_emulate(
                    ? (int32_t)insn_fetch_type(int16_t)
                    : insn_fetch_type(int32_t));
         jmp_rel(rel);
+        clear_bnd(ctxt, ops, vex.pfx);
         break;
     }
 
@@ -4443,7 +4483,10 @@ x86_emulate(
                    ? (int32_t)insn_fetch_type(int16_t)
                    : insn_fetch_type(int32_t));
         if ( test_cc(b, _regs.eflags) )
+        {
             jmp_rel(rel);
+            clear_bnd(ctxt, ops, vex.pfx);
+        }
         break;
     }
 
--- a/xen/arch/x86/xstate.c
+++ b/xen/arch/x86/xstate.c
@@ -470,15 +470,33 @@ bool_t xsave_enabled(const struct vcpu *
 int xstate_alloc_save_area(struct vcpu *v)
 {
     struct xsave_struct *save_area;
+    unsigned int size;
 
-    if ( !cpu_has_xsave || is_idle_vcpu(v) )
+    if ( !cpu_has_xsave )
         return 0;
 
-    BUG_ON(xsave_cntxt_size < XSTATE_AREA_MIN_SIZE);
+    if ( !is_idle_vcpu(v) || !cpu_has_xsavec )
+    {
+        size = xsave_cntxt_size;
+        BUG_ON(size < XSTATE_AREA_MIN_SIZE);
+    }
+    else
+    {
+        /*
+         * For idle vcpus on XSAVEC-capable CPUs allocate an area large
+         * enough to save any individual extended state.
+         */
+        unsigned int i;
+
+        for ( size = 0, i = 2; i < xstate_features; ++i )
+            if ( size < xstate_sizes[i] )
+                size = xstate_sizes[i];
+        size += XSTATE_AREA_MIN_SIZE;
+    }
 
     /* XSAVE/XRSTOR requires the save area be 64-byte-boundary aligned. */
     BUILD_BUG_ON(__alignof(*save_area) < 64);
-    save_area = _xzalloc(xsave_cntxt_size, __alignof(*save_area));
+    save_area = _xzalloc(size, __alignof(*save_area));
     if ( save_area == NULL )
         return -ENOMEM;
 
@@ -697,6 +715,40 @@ int handle_xsetbv(u32 index, u64 new_bv)
     return 0;
 }
 
+uint64_t read_bndcfgu(void)
+{
+    unsigned long cr0 = read_cr0();
+    struct xsave_struct *xstate
+        = idle_vcpu[smp_processor_id()]->arch.xsave_area;
+    const struct xstate_bndcsr *bndcsr;
+
+    clts();
+
+    if ( cpu_has_xsavec )
+    {
+        asm ( ".byte 0x0f,0xc7,0x27\n" /* xsavec */
+              : "=m" (*xstate)
+              : "a" (XSTATE(BNDCSR)), "d" (0), "D" (xstate) );
+
+        bndcsr = (void *)(xstate + 1);
+    }
+    else
+    {
+        alternative_io(".byte 0x0f,0xae,0x27\n", /* xsave */
+                       ".byte 0x0f,0xae,0x37\n", /* xsaveopt */
+                       X86_FEATURE_XSAVEOPT,
+                       "=m" (*xstate),
+                       "a" (XSTATE(BNDCSR)), "d" (0), "D" (xstate));
+
+        bndcsr = (void *)xstate + xstate_offsets[_XSTATE_BNDCSR];
+    }
+
+    if ( cr0 & X86_CR0_TS )
+        write_cr0(cr0);
+
+    return xstate->xsave_hdr.xstate_bv & XSTATE(BNDCSR) ? bndcsr->bndcfgu : 0;
+}
+
 /*
  * Local variables:
  * mode: C
--- a/xen/include/asm-x86/xstate.h
+++ b/xen/include/asm-x86/xstate.h
@@ -26,16 +26,30 @@
 #define XSAVE_HDR_OFFSET          FXSAVE_SIZE
 #define XSTATE_AREA_MIN_SIZE      (FXSAVE_SIZE + XSAVE_HDR_SIZE)
 
-#define XSTATE_FP      (1ULL << 0)
-#define XSTATE_SSE     (1ULL << 1)
-#define XSTATE_YMM     (1ULL << 2)
-#define XSTATE_BNDREGS (1ULL << 3)
-#define XSTATE_BNDCSR  (1ULL << 4)
-#define XSTATE_OPMASK  (1ULL << 5)
-#define XSTATE_ZMM     (1ULL << 6)
-#define XSTATE_HI_ZMM  (1ULL << 7)
-#define XSTATE_PKRU    (1ULL << 9)
-#define XSTATE_LWP     (1ULL << 62) /* AMD lightweight profiling */
+#define _XSTATE_FP      0
+#define _XSTATE_SSE     1
+#define _XSTATE_YMM     2
+#define _XSTATE_BNDREGS 3
+#define _XSTATE_BNDCSR  4
+#define _XSTATE_OPMASK  5
+#define _XSTATE_ZMM     6
+#define _XSTATE_HI_ZMM  7
+#define _XSTATE_PKRU    9
+#define _XSTATE_LWP     62 /* AMD lightweight profiling */
+
+#define XSTATE(n)      (1ULL << _XSTATE_##n)
+
+#define XSTATE_FP      XSTATE(FP)
+#define XSTATE_SSE     XSTATE(SSE)
+#define XSTATE_YMM     XSTATE(YMM)
+#define XSTATE_BNDREGS XSTATE(BNDREGS)
+#define XSTATE_BNDCSR  XSTATE(BNDCSR)
+#define XSTATE_OPMASK  XSTATE(OPMASK)
+#define XSTATE_ZMM     XSTATE(ZMM)
+#define XSTATE_HI_ZMM  XSTATE(HI_ZMM)
+#define XSTATE_PKRU    XSTATE(PKRU)
+#define XSTATE_LWP     XSTATE(LWP)
+
 #define XSTATE_FP_SSE  (XSTATE_FP | XSTATE_SSE)
 #define XCNTXT_MASK    (XSTATE_FP | XSTATE_SSE | XSTATE_YMM | XSTATE_OPMASK | \
                         XSTATE_ZMM | XSTATE_HI_ZMM | XSTATE_NONLAZY)
@@ -87,11 +101,17 @@ struct __attribute__((aligned (64))) xsa
     char data[];                             /* Variable layout states */
 };
 
+struct xstate_bndcsr {
+    uint64_t bndcfgu;
+    uint64_t bndstatus;
+};
+
 /* extended state operations */
 bool_t __must_check set_xcr0(u64 xfeatures);
 uint64_t get_xcr0(void);
 void set_msr_xss(u64 xss);
 uint64_t get_msr_xss(void);
+uint64_t read_bndcfgu(void);
 void xsave(struct vcpu *v, uint64_t mask);
 void xrstor(struct vcpu *v, uint64_t mask);
 bool_t xsave_enabled(const struct vcpu *v);


Attachment: x86emul-clear-bnd-on-branch.patch
Description: Text document

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.