[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 4/9] x86/vmx: Support remote access to the MSR lists



At the moment, all modifications of the MSR lists are in current context.
However, future changes may need to put MSR_EFER into the lists from domctl
hypercall context.

Plumb a struct vcpu parameter down through the infrastructure, and use
vmx_vmcs_{enter,exit}() for safe access to the VMCS in vmx_add_msr().  Use
assertions to ensure that access is either in current context, or while the
vcpu is paused.

For now it is safe to require that remote accesses are under the domctl lock.
This will remain safe if/when the global domctl lock becomes per-domain.

Note these expectations beside the fields in arch_vmx_struct, and reorder the
fields to avoid unnecessary padding.

Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
---
CC: Jan Beulich <JBeulich@xxxxxxxx>
CC: Jun Nakajima <jun.nakajima@xxxxxxxxx>
CC: Kevin Tian <kevin.tian@xxxxxxxxx>
CC: Wei Liu <wei.liu2@xxxxxxxxxx>
CC: Roger Pau Monné <roger.pau@xxxxxxxxxx>

To preempt any questions about spinlocks, the use of the MSR lists in the
return-to-guest path causes checklock failures for plain spinlocks (despite it
technically being safe to live here), and the call to alloc_xenheap_page()
makes it impossible to use irqsave/restore variants, due to the nested
acquisition of the heap lock.
---
 xen/arch/x86/cpu/vpmu_intel.c      | 14 ++++++-------
 xen/arch/x86/hvm/vmx/vmcs.c        | 40 ++++++++++++++++++++++++++++----------
 xen/arch/x86/hvm/vmx/vmx.c         | 24 ++++++++++++-----------
 xen/include/asm-x86/hvm/vmx/vmcs.h | 34 ++++++++++++++++++++------------
 4 files changed, 72 insertions(+), 40 deletions(-)

diff --git a/xen/arch/x86/cpu/vpmu_intel.c b/xen/arch/x86/cpu/vpmu_intel.c
index 207e2e7..c499e69 100644
--- a/xen/arch/x86/cpu/vpmu_intel.c
+++ b/xen/arch/x86/cpu/vpmu_intel.c
@@ -455,12 +455,12 @@ static int core2_vpmu_alloc_resource(struct vcpu *v)
     if ( is_hvm_vcpu(v) )
     {
         wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
-        if ( vmx_add_host_load_msr(MSR_CORE_PERF_GLOBAL_CTRL) )
+        if ( vmx_add_host_load_msr(v, MSR_CORE_PERF_GLOBAL_CTRL) )
             goto out_err;
 
-        if ( vmx_add_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL) )
+        if ( vmx_add_guest_msr(v, MSR_CORE_PERF_GLOBAL_CTRL) )
             goto out_err;
-        vmx_write_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+        vmx_write_guest_msr(v, MSR_CORE_PERF_GLOBAL_CTRL, 0);
     }
 
     core2_vpmu_cxt = xzalloc_bytes(sizeof(*core2_vpmu_cxt) +
@@ -613,7 +613,7 @@ static int core2_vpmu_do_wrmsr(unsigned int msr, uint64_t 
msr_content,
             return -EINVAL;
 
         if ( is_hvm_vcpu(v) )
-            vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL,
+            vmx_read_guest_msr(v, MSR_CORE_PERF_GLOBAL_CTRL,
                                &core2_vpmu_cxt->global_ctrl);
         else
             rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, core2_vpmu_cxt->global_ctrl);
@@ -682,7 +682,7 @@ static int core2_vpmu_do_wrmsr(unsigned int msr, uint64_t 
msr_content,
                 return -EINVAL;
 
             if ( is_hvm_vcpu(v) )
-                vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL,
+                vmx_read_guest_msr(v, MSR_CORE_PERF_GLOBAL_CTRL,
                                    &core2_vpmu_cxt->global_ctrl);
             else
                 rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, core2_vpmu_cxt->global_ctrl);
@@ -701,7 +701,7 @@ static int core2_vpmu_do_wrmsr(unsigned int msr, uint64_t 
msr_content,
     else
     {
         if ( is_hvm_vcpu(v) )
-            vmx_write_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, msr_content);
+            vmx_write_guest_msr(v, MSR_CORE_PERF_GLOBAL_CTRL, msr_content);
         else
             wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, msr_content);
     }
@@ -735,7 +735,7 @@ static int core2_vpmu_do_rdmsr(unsigned int msr, uint64_t 
*msr_content)
             break;
         case MSR_CORE_PERF_GLOBAL_CTRL:
             if ( is_hvm_vcpu(v) )
-                vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, msr_content);
+                vmx_read_guest_msr(v, MSR_CORE_PERF_GLOBAL_CTRL, msr_content);
             else
                 rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, *msr_content);
             break;
diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
index e4acdc1..8bf54c4 100644
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -1301,13 +1301,15 @@ static struct vmx_msr_entry *locate_msr_entry(
     return start;
 }
 
-struct vmx_msr_entry *vmx_find_msr(uint32_t msr, enum vmx_msr_list_type type)
+struct vmx_msr_entry *vmx_find_msr(struct vcpu *v, uint32_t msr,
+                                   enum vmx_msr_list_type type)
 {
-    struct vcpu *curr = current;
-    struct arch_vmx_struct *arch_vmx = &curr->arch.hvm_vmx;
+    struct arch_vmx_struct *arch_vmx = &v->arch.hvm_vmx;
     struct vmx_msr_entry *start = NULL, *ent, *end;
     unsigned int total;
 
+    ASSERT(v == current || !vcpu_runnable(v));
+
     switch ( type )
     {
     case VMX_MSR_HOST:
@@ -1333,12 +1335,14 @@ struct vmx_msr_entry *vmx_find_msr(uint32_t msr, enum 
vmx_msr_list_type type)
     return ((ent < end) && (ent->index == msr)) ? ent : NULL;
 }
 
-int vmx_add_msr(uint32_t msr, enum vmx_msr_list_type type)
+int vmx_add_msr(struct vcpu *v, uint32_t msr, enum vmx_msr_list_type type)
 {
-    struct vcpu *curr = current;
-    struct arch_vmx_struct *arch_vmx = &curr->arch.hvm_vmx;
+    struct arch_vmx_struct *arch_vmx = &v->arch.hvm_vmx;
     struct vmx_msr_entry **ptr, *start = NULL, *ent, *end;
     unsigned int total;
+    int rc;
+
+    ASSERT(v == current || !vcpu_runnable(v));
 
     switch ( type )
     {
@@ -1357,13 +1361,18 @@ int vmx_add_msr(uint32_t msr, enum vmx_msr_list_type 
type)
         return -EINVAL;
     }
 
+    vmx_vmcs_enter(v);
+
     /* Allocate memory on first use. */
     if ( unlikely(!*ptr) )
     {
         paddr_t addr;
 
         if ( (*ptr = alloc_xenheap_page()) == NULL )
-            return -ENOMEM;
+        {
+            rc = -ENOMEM;
+            goto out;
+        }
 
         addr = virt_to_maddr(*ptr);
 
@@ -1385,10 +1394,16 @@ int vmx_add_msr(uint32_t msr, enum vmx_msr_list_type 
type)
     ent   = locate_msr_entry(start, end, msr);
 
     if ( (ent < end) && (ent->index == msr) )
-        return 0;
+    {
+        rc = 0;
+        goto out;
+    }
 
     if ( total == (PAGE_SIZE / sizeof(*ent)) )
-        return -ENOSPC;
+    {
+        rc = -ENOSPC;
+        goto out;
+    }
 
     memmove(ent + 1, ent, sizeof(*ent) * (end - ent));
 
@@ -1409,7 +1424,12 @@ int vmx_add_msr(uint32_t msr, enum vmx_msr_list_type 
type)
         break;
     }
 
-    return 0;
+    rc = 0;
+
+ out:
+    vmx_vmcs_exit(v);
+
+    return rc;
 }
 
 void vmx_set_eoi_exit_bitmap(struct vcpu *v, u8 vector)
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 123dccb..3950b12 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -2818,7 +2818,7 @@ static int is_last_branch_msr(u32 ecx)
 
 static int vmx_msr_read_intercept(unsigned int msr, uint64_t *msr_content)
 {
-    const struct vcpu *curr = current;
+    struct vcpu *curr = current;
 
     HVM_DBG_LOG(DBG_LEVEL_MSR, "ecx=%#x", msr);
 
@@ -2897,7 +2897,7 @@ static int vmx_msr_read_intercept(unsigned int msr, 
uint64_t *msr_content)
         if ( passive_domain_do_rdmsr(msr, msr_content) )
             goto done;
 
-        if ( vmx_read_guest_msr(msr, msr_content) == 0 )
+        if ( vmx_read_guest_msr(curr, msr, msr_content) == 0 )
             break;
 
         if ( is_last_branch_msr(msr) )
@@ -3109,7 +3109,7 @@ static int vmx_msr_write_intercept(unsigned int msr, 
uint64_t msr_content)
 
             for ( ; (rc == 0) && lbr->count; lbr++ )
                 for ( i = 0; (rc == 0) && (i < lbr->count); i++ )
-                    if ( (rc = vmx_add_guest_msr(lbr->base + i)) == 0 )
+                    if ( (rc = vmx_add_guest_msr(v, lbr->base + i)) == 0 )
                     {
                         vmx_clear_msr_intercept(v, lbr->base + i, VMX_MSR_RW);
                         if ( lbr_tsx_fixup_needed )
@@ -3121,7 +3121,7 @@ static int vmx_msr_write_intercept(unsigned int msr, 
uint64_t msr_content)
         }
 
         if ( (rc < 0) ||
-             (msr_content && (vmx_add_host_load_msr(msr) < 0)) )
+             (msr_content && (vmx_add_host_load_msr(v, msr) < 0)) )
             hvm_inject_hw_exception(TRAP_machine_check, X86_EVENT_NO_EC);
         else
             __vmwrite(GUEST_IA32_DEBUGCTL, msr_content);
@@ -3150,7 +3150,7 @@ static int vmx_msr_write_intercept(unsigned int msr, 
uint64_t msr_content)
         if ( wrmsr_viridian_regs(msr, msr_content) ) 
             break;
 
-        if ( vmx_write_guest_msr(msr, msr_content) == 0 ||
+        if ( vmx_write_guest_msr(v, msr, msr_content) == 0 ||
              is_last_branch_msr(msr) )
             break;
 
@@ -4165,7 +4165,7 @@ static void lbr_tsx_fixup(void)
     struct vmx_msr_entry *msr_area = curr->arch.hvm_vmx.msr_area;
     struct vmx_msr_entry *msr;
 
-    if ( (msr = vmx_find_msr(lbr_from_start, VMX_MSR_GUEST)) != NULL )
+    if ( (msr = vmx_find_msr(curr, lbr_from_start, VMX_MSR_GUEST)) != NULL )
     {
         /*
          * Sign extend into bits 61:62 while preserving bit 63
@@ -4175,15 +4175,15 @@ static void lbr_tsx_fixup(void)
             msr->data |= ((LBR_FROM_SIGNEXT_2MSB & msr->data) << 2);
     }
 
-    if ( (msr = vmx_find_msr(lbr_lastint_from, VMX_MSR_GUEST)) != NULL )
+    if ( (msr = vmx_find_msr(curr, lbr_lastint_from, VMX_MSR_GUEST)) != NULL )
         msr->data |= ((LBR_FROM_SIGNEXT_2MSB & msr->data) << 2);
 }
 
-static void sign_extend_msr(u32 msr, int type)
+static void sign_extend_msr(struct vcpu *v, u32 msr, int type)
 {
     struct vmx_msr_entry *entry;
 
-    if ( (entry = vmx_find_msr(msr, type)) != NULL )
+    if ( (entry = vmx_find_msr(v, msr, type)) != NULL )
     {
         if ( entry->data & VADDR_TOP_BIT )
             entry->data |= CANONICAL_MASK;
@@ -4194,6 +4194,8 @@ static void sign_extend_msr(u32 msr, int type)
 
 static void bdw_erratum_bdf14_fixup(void)
 {
+    struct vcpu *curr = current;
+
     /*
      * Occasionally, on certain Broadwell CPUs MSR_IA32_LASTINTTOIP has
      * been observed to have the top three bits corrupted as though the
@@ -4203,8 +4205,8 @@ static void bdw_erratum_bdf14_fixup(void)
      * erratum BDF14. Fix up MSR_IA32_LASTINT{FROM,TO}IP by
      * sign-extending into bits 48:63.
      */
-    sign_extend_msr(MSR_IA32_LASTINTFROMIP, VMX_MSR_GUEST);
-    sign_extend_msr(MSR_IA32_LASTINTTOIP, VMX_MSR_GUEST);
+    sign_extend_msr(curr, MSR_IA32_LASTINTFROMIP, VMX_MSR_GUEST);
+    sign_extend_msr(curr, MSR_IA32_LASTINTTOIP, VMX_MSR_GUEST);
 }
 
 static void lbr_fixup(void)
diff --git a/xen/include/asm-x86/hvm/vmx/vmcs.h 
b/xen/include/asm-x86/hvm/vmx/vmcs.h
index c8a1f89..f66f121 100644
--- a/xen/include/asm-x86/hvm/vmx/vmcs.h
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
@@ -130,10 +130,17 @@ struct arch_vmx_struct {
     uint64_t             sfmask;
 
     struct vmx_msr_bitmap *msr_bitmap;
-    unsigned int         msr_count;
+
+    /*
+     * Most accesses to the MSR host/guest load/save lists are in current
+     * context.  However, the data can be modified by toolstack/migration
+     * actions.  Remote access is only permitted for paused vcpus, and is
+     * protected under the domctl lock.
+     */
     struct vmx_msr_entry *msr_area;
-    unsigned int         host_msr_count;
     struct vmx_msr_entry *host_msr_area;
+    unsigned int         msr_count;
+    unsigned int         host_msr_count;
 
     unsigned long        eoi_exitmap_changed;
     DECLARE_BITMAP(eoi_exit_bitmap, NR_VECTORS);
@@ -537,25 +544,27 @@ enum vmx_msr_list_type {
     VMX_MSR_GUEST,
 };
 
-int vmx_add_msr(uint32_t msr, enum vmx_msr_list_type type);
+int vmx_add_msr(struct vcpu *v, uint32_t msr, enum vmx_msr_list_type type);
 
-static inline int vmx_add_host_load_msr(uint32_t msr)
+static inline int vmx_add_guest_msr(struct vcpu *v, uint32_t msr)
 {
-    return vmx_add_msr(msr, VMX_MSR_HOST);
+    return vmx_add_msr(v, msr, VMX_MSR_GUEST);
 }
 
-static inline int vmx_add_guest_msr(uint32_t msr)
+static inline int vmx_add_host_load_msr(struct vcpu *v, uint32_t msr)
 {
-    return vmx_add_msr(msr, VMX_MSR_GUEST);
+    return vmx_add_msr(v, msr, VMX_MSR_HOST);
 }
 
-struct vmx_msr_entry *vmx_find_msr(uint32_t msr, enum vmx_msr_list_type type);
+struct vmx_msr_entry *vmx_find_msr(struct vcpu *v, uint32_t msr,
+                                   enum vmx_msr_list_type type);
 
-static inline int vmx_read_guest_msr(uint32_t msr, uint64_t *val)
+static inline int vmx_read_guest_msr(struct vcpu *v, uint32_t msr,
+                                     uint64_t *val)
 {
     struct vmx_msr_entry *ent;
 
-    if ( (ent = vmx_find_msr(msr, VMX_MSR_GUEST)) )
+    if ( (ent = vmx_find_msr(v, msr, VMX_MSR_GUEST)) )
     {
         *val = ent->data;
         return 0;
@@ -564,11 +573,12 @@ static inline int vmx_read_guest_msr(uint32_t msr, 
uint64_t *val)
     return -ESRCH;
 }
 
-static inline int vmx_write_guest_msr(uint32_t msr, uint64_t val)
+static inline int vmx_write_guest_msr(struct vcpu *v, uint32_t msr,
+                                      uint64_t val)
 {
     struct vmx_msr_entry *ent;
 
-    if ( (ent = vmx_find_msr(msr, VMX_MSR_GUEST)) )
+    if ( (ent = vmx_find_msr(v, msr, VMX_MSR_GUEST)) )
     {
         ent->data = val;
         return 0;
-- 
2.1.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.