[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v3 3/3] x86/vmx: fix vmentry failure with TSX bits in LBR



During VM entry, H/W will automatically load guest's MSRs from MSR-load
area in the same way as they would be written by WRMSR.

However, under the following conditions:

    1. LBR (Last Branch Record) MSRs were placed in the MSR-load area
    2. Address format of LBR includes TSX bits 61:62
    3. CPU has TSX support disabled

VM entry will fail with a message in the log similar to:

    (XEN) [   97.239514] d1v0 vmentry failure (reason 0x80000022): MSR loading 
(entry 3)
    (XEN) [   97.239516]   msr 00000680 val 1fff800000102e60 (mbz 0)

This happens because of the following behaviour:

    - When capturing branches, LBR H/W will always clear bits 61:62
      regardless of the sign extension
    - For WRMSR, bits 61:62 are considered the part of the sign extension

This bug affects only certain pCPUs (e.g. Haswell) with vCPUs that
use LBR.  Fix it by sign-extending TSX bits in all LBR entries during
VM entry in affected cases.

LBR MSRs are currently not Live Migrated. In order to implement such
functionality, the MSR levelling work has to be done first because
hosts can have different LBR formats.

Signed-off-by: Sergey Dyasli <sergey.dyasli@xxxxxxxxxx>
---
v2 --> v3:
- "vmx_" prefixes are removed from static symbols
- blank lines around lbr_tsx_fixup_enabled are added
- Renamed "LBR_FORMAT_MSR_IA32_PERF_CAP" --> "MSR_IA32_PERF_CAP_LBR_FORMAT"
- Removed "if" in lbr_tsx_fixup_enabled assignment

 xen/arch/x86/hvm/vmx/vmx.c         | 90 ++++++++++++++++++++++++++++++++++++++
 xen/include/asm-x86/cpufeature.h   |  3 ++
 xen/include/asm-x86/hvm/vmx/vmcs.h |  3 ++
 xen/include/asm-x86/msr-index.h    |  2 +
 4 files changed, 98 insertions(+)

diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 597d7ac..4bee90c 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -2298,6 +2298,8 @@ static void pi_notification_interrupt(struct 
cpu_user_regs *regs)
     raise_softirq(VCPU_KICK_SOFTIRQ);
 }
 
+static void __init lbr_tsx_fixup_check(void);
+
 const struct hvm_function_table * __init start_vmx(void)
 {
     set_in_cr4(X86_CR4_VMXE);
@@ -2367,6 +2369,8 @@ const struct hvm_function_table * __init start_vmx(void)
 
     setup_vmcs_dump();
 
+    lbr_tsx_fixup_check();
+
     return &vmx_function_table;
 }
 
@@ -2527,6 +2531,14 @@ static int vmx_cr_access(unsigned long 
exit_qualification)
     return X86EMUL_OKAY;
 }
 
+/* This defines the layout of struct lbr_info[] */
+#define LBR_LASTINT_FROM_IDX    0
+#define LBR_LASTINT_TO_IDX      1
+#define LBR_LASTBRANCH_TOS_IDX  2
+#define LBR_LASTBRANCH_FROM_IDX 3
+#define LBR_LASTBRANCH_TO_IDX   4
+#define LBR_LASTBRANCH_INFO     5
+
 static const struct lbr_info {
     u32 base, count;
 } p4_lbr[] = {
@@ -2632,6 +2644,56 @@ static const struct lbr_info *last_branch_msr_get(void)
     return NULL;
 }
 
+enum
+{
+    LBR_FORMAT_32                 = 0x0, /* 32-bit record format */
+    LBR_FORMAT_LIP                = 0x1, /* 64-bit LIP record format */
+    LBR_FORMAT_EIP                = 0x2, /* 64-bit EIP record format */
+    LBR_FORMAT_EIP_FLAGS          = 0x3, /* 64-bit EIP, Flags */
+    LBR_FORMAT_EIP_FLAGS_TSX      = 0x4, /* 64-bit EIP, Flags, TSX */
+    LBR_FORMAT_EIP_FLAGS_TSX_INFO = 0x5, /* 64-bit EIP, Flags, TSX, LBR_INFO */
+    LBR_FORMAT_EIP_FLAGS_CYCLES   = 0x6, /* 64-bit EIP, Flags, Cycles */
+};
+
+#define LBR_FROM_SIGNEXT_2MSB  ((1ULL << 59) | (1ULL << 60))
+
+static bool __read_mostly lbr_tsx_fixup_needed;
+static uint32_t __read_mostly lbr_from_start;
+static uint32_t __read_mostly lbr_from_end;
+static uint32_t __read_mostly lbr_lastint_from;
+
+static void __init lbr_tsx_fixup_check(void)
+{
+    bool tsx_support = cpu_has_hle || cpu_has_rtm;
+    uint64_t caps;
+    uint32_t lbr_format;
+
+    /* Fixup is needed only when TSX support is disabled ... */
+    if ( tsx_support )
+        return;
+
+    if ( !cpu_has_pdcm )
+        return;
+
+    rdmsrl(MSR_IA32_PERF_CAPABILITIES, caps);
+    lbr_format = caps & MSR_IA32_PERF_CAP_LBR_FORMAT;
+
+    /* ... and the address format of LBR includes TSX bits 61:62 */
+    if ( lbr_format == LBR_FORMAT_EIP_FLAGS_TSX )
+    {
+        const struct lbr_info *lbr = last_branch_msr_get();
+
+        if ( lbr == NULL )
+            return;
+
+        lbr_lastint_from = lbr[LBR_LASTINT_FROM_IDX].base;
+        lbr_from_start = lbr[LBR_LASTBRANCH_FROM_IDX].base;
+        lbr_from_end = lbr_from_start + lbr[LBR_LASTBRANCH_FROM_IDX].count;
+
+        lbr_tsx_fixup_needed = true;
+    }
+}
+
 static int is_last_branch_msr(u32 ecx)
 {
     const struct lbr_info *lbr = last_branch_msr_get();
@@ -2890,7 +2952,11 @@ static int vmx_msr_write_intercept(unsigned int msr, 
uint64_t msr_content)
             for ( ; (rc == 0) && lbr->count; lbr++ )
                 for ( i = 0; (rc == 0) && (i < lbr->count); i++ )
                     if ( (rc = vmx_add_guest_msr(lbr->base + i)) == 0 )
+                    {
                         vmx_disable_intercept_for_msr(v, lbr->base + i, 
MSR_TYPE_R | MSR_TYPE_W);
+                        v->arch.hvm_vmx.lbr_tsx_fixup_enabled =
+                            lbr_tsx_fixup_needed;
+                    }
         }
 
         if ( (rc < 0) ||
@@ -3911,6 +3977,27 @@ out:
     }
 }
 
+static void lbr_tsx_fixup(void)
+{
+    struct vcpu *curr = current;
+    unsigned int msr_count = curr->arch.hvm_vmx.msr_count;
+    struct vmx_msr_entry *msr_area = curr->arch.hvm_vmx.msr_area;
+    struct vmx_msr_entry *msr;
+
+    if ( (msr = vmx_find_msr(lbr_from_start, VMX_GUEST_MSR)) != NULL )
+    {
+        /*
+         * Sign extend into bits 61:62 while preserving bit 63
+         * The loop relies on the fact that MSR array is sorted.
+         */
+        for ( ; msr < msr_area + msr_count && msr->index < lbr_from_end; msr++ 
)
+            msr->data |= ((LBR_FROM_SIGNEXT_2MSB & msr->data) << 2);
+    }
+
+    if ( (msr = vmx_find_msr(lbr_lastint_from, VMX_GUEST_MSR)) != NULL )
+        msr->data |= ((LBR_FROM_SIGNEXT_2MSB & msr->data) << 2);
+}
+
 void vmx_vmenter_helper(const struct cpu_user_regs *regs)
 {
     struct vcpu *curr = current;
@@ -3967,6 +4054,9 @@ void vmx_vmenter_helper(const struct cpu_user_regs *regs)
     }
 
  out:
+    if ( unlikely(curr->arch.hvm_vmx.lbr_tsx_fixup_enabled) )
+        lbr_tsx_fixup();
+
     HVMTRACE_ND(VMENTRY, 0, 1/*cycles*/, 0, 0, 0, 0, 0, 0, 0);
 
     __vmwrite(GUEST_RIP,    regs->rip);
diff --git a/xen/include/asm-x86/cpufeature.h b/xen/include/asm-x86/cpufeature.h
index 39ad582..3b187ac 100644
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -78,6 +78,9 @@
 #define cpu_has_cmp_legacy     boot_cpu_has(X86_FEATURE_CMP_LEGACY)
 #define cpu_has_tbm            boot_cpu_has(X86_FEATURE_TBM)
 #define cpu_has_itsc           boot_cpu_has(X86_FEATURE_ITSC)
+#define cpu_has_hle            boot_cpu_has(X86_FEATURE_HLE)
+#define cpu_has_rtm            boot_cpu_has(X86_FEATURE_RTM)
+#define cpu_has_pdcm           boot_cpu_has(X86_FEATURE_PDCM)
 
 enum _cache_type {
     CACHE_TYPE_NULL = 0,
diff --git a/xen/include/asm-x86/hvm/vmx/vmcs.h 
b/xen/include/asm-x86/hvm/vmx/vmcs.h
index 4945b78..733e221 100644
--- a/xen/include/asm-x86/hvm/vmx/vmcs.h
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
@@ -147,6 +147,9 @@ struct arch_vmx_struct {
     uint8_t              vmx_realmode;
     /* Are we emulating rather than VMENTERing? */
     uint8_t              vmx_emulate;
+
+    bool                 lbr_tsx_fixup_enabled;
+
     /* Bitmask of segments that we can't safely use in virtual 8086 mode */
     uint16_t             vm86_segment_mask;
     /* Shadow CS, SS, DS, ES, FS, GS, TR while in virtual 8086 mode */
diff --git a/xen/include/asm-x86/msr-index.h b/xen/include/asm-x86/msr-index.h
index 98dbff1..771e750 100644
--- a/xen/include/asm-x86/msr-index.h
+++ b/xen/include/asm-x86/msr-index.h
@@ -55,6 +55,8 @@
 #define MSR_IA32_PEBS_ENABLE           0x000003f1
 #define MSR_IA32_DS_AREA               0x00000600
 #define MSR_IA32_PERF_CAPABILITIES     0x00000345
+/* Lower 6 bits define the format of the address in the LBR stack */
+#define MSR_IA32_PERF_CAP_LBR_FORMAT   0x3f
 
 #define MSR_IA32_BNDCFGS               0x00000d90
 #define IA32_BNDCFGS_ENABLE            0x00000001
-- 
2.9.3


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.