[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH 4/9] x86/vmx: Support remote access to the MSR lists



> From: Andrew Cooper [mailto:andrew.cooper3@xxxxxxxxxx]
> Sent: Tuesday, May 22, 2018 7:21 PM
> 
> At the moment, all modifications of the MSR lists are in current context.
> However, future changes may need to put MSR_EFER into the lists from
> domctl
> hypercall context.
> 
> Plumb a struct vcpu parameter down through the infrastructure, and use
> vmx_vmcs_{enter,exit}() for safe access to the VMCS in vmx_add_msr().
> Use
> assertions to ensure that access is either in current context, or while the
> vcpu is paused.
> 
> For now it is safe to require that remote accesses are under the domctl lock.
> This will remain safe if/when the global domctl lock becomes per-domain.
> 
> Note these expectations beside the fields in arch_vmx_struct, and reorder
> the
> fields to avoid unnecessary padding.
> 
> Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
> ---
> CC: Jan Beulich <JBeulich@xxxxxxxx>
> CC: Jun Nakajima <jun.nakajima@xxxxxxxxx>
> CC: Kevin Tian <kevin.tian@xxxxxxxxx>
> CC: Wei Liu <wei.liu2@xxxxxxxxxx>
> CC: Roger Pau Monné <roger.pau@xxxxxxxxxx>
> 
> To preempt any questions about spinlocks, the use of the MSR lists in the
> return-to-guest path causes checklock failures for plain spinlocks (despite it
> technically being safe to live here), and the call to alloc_xenheap_page()
> makes it impossible to use irqsave/restore variants, due to the nested
> acquisition of the heap lock.

I don't understand above words. How does it relate to the patch here?

> ---
>  xen/arch/x86/cpu/vpmu_intel.c      | 14 ++++++-------
>  xen/arch/x86/hvm/vmx/vmcs.c        | 40
> ++++++++++++++++++++++++++++----------
>  xen/arch/x86/hvm/vmx/vmx.c         | 24 ++++++++++++-----------
>  xen/include/asm-x86/hvm/vmx/vmcs.h | 34 ++++++++++++++++++++-------
> -----
>  4 files changed, 72 insertions(+), 40 deletions(-)
> 
> diff --git a/xen/arch/x86/cpu/vpmu_intel.c
> b/xen/arch/x86/cpu/vpmu_intel.c
> index 207e2e7..c499e69 100644
> --- a/xen/arch/x86/cpu/vpmu_intel.c
> +++ b/xen/arch/x86/cpu/vpmu_intel.c
> @@ -455,12 +455,12 @@ static int core2_vpmu_alloc_resource(struct vcpu
> *v)
>      if ( is_hvm_vcpu(v) )
>      {
>          wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
> -        if ( vmx_add_host_load_msr(MSR_CORE_PERF_GLOBAL_CTRL) )
> +        if ( vmx_add_host_load_msr(v, MSR_CORE_PERF_GLOBAL_CTRL) )
>              goto out_err;
> 
> -        if ( vmx_add_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL) )
> +        if ( vmx_add_guest_msr(v, MSR_CORE_PERF_GLOBAL_CTRL) )
>              goto out_err;
> -        vmx_write_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
> +        vmx_write_guest_msr(v, MSR_CORE_PERF_GLOBAL_CTRL, 0);
>      }
> 
>      core2_vpmu_cxt = xzalloc_bytes(sizeof(*core2_vpmu_cxt) +
> @@ -613,7 +613,7 @@ static int core2_vpmu_do_wrmsr(unsigned int msr,
> uint64_t msr_content,
>              return -EINVAL;
> 
>          if ( is_hvm_vcpu(v) )
> -            vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL,
> +            vmx_read_guest_msr(v, MSR_CORE_PERF_GLOBAL_CTRL,
>                                 &core2_vpmu_cxt->global_ctrl);
>          else
>              rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, core2_vpmu_cxt-
> >global_ctrl);
> @@ -682,7 +682,7 @@ static int core2_vpmu_do_wrmsr(unsigned int msr,
> uint64_t msr_content,
>                  return -EINVAL;
> 
>              if ( is_hvm_vcpu(v) )
> -                vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL,
> +                vmx_read_guest_msr(v, MSR_CORE_PERF_GLOBAL_CTRL,
>                                     &core2_vpmu_cxt->global_ctrl);
>              else
>                  rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, core2_vpmu_cxt-
> >global_ctrl);
> @@ -701,7 +701,7 @@ static int core2_vpmu_do_wrmsr(unsigned int msr,
> uint64_t msr_content,
>      else
>      {
>          if ( is_hvm_vcpu(v) )
> -            vmx_write_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL,
> msr_content);
> +            vmx_write_guest_msr(v, MSR_CORE_PERF_GLOBAL_CTRL,
> msr_content);
>          else
>              wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, msr_content);
>      }
> @@ -735,7 +735,7 @@ static int core2_vpmu_do_rdmsr(unsigned int msr,
> uint64_t *msr_content)
>              break;
>          case MSR_CORE_PERF_GLOBAL_CTRL:
>              if ( is_hvm_vcpu(v) )
> -                vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL,
> msr_content);
> +                vmx_read_guest_msr(v, MSR_CORE_PERF_GLOBAL_CTRL,
> msr_content);
>              else
>                  rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, *msr_content);
>              break;
> diff --git a/xen/arch/x86/hvm/vmx/vmcs.c
> b/xen/arch/x86/hvm/vmx/vmcs.c
> index e4acdc1..8bf54c4 100644
> --- a/xen/arch/x86/hvm/vmx/vmcs.c
> +++ b/xen/arch/x86/hvm/vmx/vmcs.c
> @@ -1301,13 +1301,15 @@ static struct vmx_msr_entry
> *locate_msr_entry(
>      return start;
>  }
> 
> -struct vmx_msr_entry *vmx_find_msr(uint32_t msr, enum
> vmx_msr_list_type type)
> +struct vmx_msr_entry *vmx_find_msr(struct vcpu *v, uint32_t msr,
> +                                   enum vmx_msr_list_type type)
>  {
> -    struct vcpu *curr = current;
> -    struct arch_vmx_struct *arch_vmx = &curr->arch.hvm_vmx;
> +    struct arch_vmx_struct *arch_vmx = &v->arch.hvm_vmx;
>      struct vmx_msr_entry *start = NULL, *ent, *end;
>      unsigned int total;
> 
> +    ASSERT(v == current || !vcpu_runnable(v));
> +
>      switch ( type )
>      {
>      case VMX_MSR_HOST:
> @@ -1333,12 +1335,14 @@ struct vmx_msr_entry
> *vmx_find_msr(uint32_t msr, enum vmx_msr_list_type type)
>      return ((ent < end) && (ent->index == msr)) ? ent : NULL;
>  }
> 
> -int vmx_add_msr(uint32_t msr, enum vmx_msr_list_type type)
> +int vmx_add_msr(struct vcpu *v, uint32_t msr, enum vmx_msr_list_type
> type)
>  {
> -    struct vcpu *curr = current;
> -    struct arch_vmx_struct *arch_vmx = &curr->arch.hvm_vmx;
> +    struct arch_vmx_struct *arch_vmx = &v->arch.hvm_vmx;
>      struct vmx_msr_entry **ptr, *start = NULL, *ent, *end;
>      unsigned int total;
> +    int rc;
> +
> +    ASSERT(v == current || !vcpu_runnable(v));
> 
>      switch ( type )
>      {
> @@ -1357,13 +1361,18 @@ int vmx_add_msr(uint32_t msr, enum
> vmx_msr_list_type type)
>          return -EINVAL;
>      }
> 
> +    vmx_vmcs_enter(v);
> +

why entering vmcs so early even before possible page allocation?

>      /* Allocate memory on first use. */
>      if ( unlikely(!*ptr) )
>      {
>          paddr_t addr;
> 
>          if ( (*ptr = alloc_xenheap_page()) == NULL )
> -            return -ENOMEM;
> +        {
> +            rc = -ENOMEM;
> +            goto out;
> +        }
> 
>          addr = virt_to_maddr(*ptr);
> 
> @@ -1385,10 +1394,16 @@ int vmx_add_msr(uint32_t msr, enum
> vmx_msr_list_type type)
>      ent   = locate_msr_entry(start, end, msr);
> 
>      if ( (ent < end) && (ent->index == msr) )
> -        return 0;
> +    {
> +        rc = 0;
> +        goto out;
> +    }
> 
>      if ( total == (PAGE_SIZE / sizeof(*ent)) )
> -        return -ENOSPC;
> +    {
> +        rc = -ENOSPC;
> +        goto out;
> +    }
> 
>      memmove(ent + 1, ent, sizeof(*ent) * (end - ent));
> 
> @@ -1409,7 +1424,12 @@ int vmx_add_msr(uint32_t msr, enum
> vmx_msr_list_type type)
>          break;
>      }
> 
> -    return 0;
> +    rc = 0;
> +
> + out:
> +    vmx_vmcs_exit(v);
> +
> +    return rc;
>  }
> 
>  void vmx_set_eoi_exit_bitmap(struct vcpu *v, u8 vector)
> diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
> index 123dccb..3950b12 100644
> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -2818,7 +2818,7 @@ static int is_last_branch_msr(u32 ecx)
> 
>  static int vmx_msr_read_intercept(unsigned int msr, uint64_t
> *msr_content)
>  {
> -    const struct vcpu *curr = current;
> +    struct vcpu *curr = current;
> 
>      HVM_DBG_LOG(DBG_LEVEL_MSR, "ecx=%#x", msr);
> 
> @@ -2897,7 +2897,7 @@ static int vmx_msr_read_intercept(unsigned int
> msr, uint64_t *msr_content)
>          if ( passive_domain_do_rdmsr(msr, msr_content) )
>              goto done;
> 
> -        if ( vmx_read_guest_msr(msr, msr_content) == 0 )
> +        if ( vmx_read_guest_msr(curr, msr, msr_content) == 0 )
>              break;
> 
>          if ( is_last_branch_msr(msr) )
> @@ -3109,7 +3109,7 @@ static int vmx_msr_write_intercept(unsigned int
> msr, uint64_t msr_content)
> 
>              for ( ; (rc == 0) && lbr->count; lbr++ )
>                  for ( i = 0; (rc == 0) && (i < lbr->count); i++ )
> -                    if ( (rc = vmx_add_guest_msr(lbr->base + i)) == 0 )
> +                    if ( (rc = vmx_add_guest_msr(v, lbr->base + i)) == 0 )
>                      {
>                          vmx_clear_msr_intercept(v, lbr->base + i, 
> VMX_MSR_RW);
>                          if ( lbr_tsx_fixup_needed )
> @@ -3121,7 +3121,7 @@ static int vmx_msr_write_intercept(unsigned int
> msr, uint64_t msr_content)
>          }
> 
>          if ( (rc < 0) ||
> -             (msr_content && (vmx_add_host_load_msr(msr) < 0)) )
> +             (msr_content && (vmx_add_host_load_msr(v, msr) < 0)) )
>              hvm_inject_hw_exception(TRAP_machine_check,
> X86_EVENT_NO_EC);
>          else
>              __vmwrite(GUEST_IA32_DEBUGCTL, msr_content);
> @@ -3150,7 +3150,7 @@ static int vmx_msr_write_intercept(unsigned int
> msr, uint64_t msr_content)
>          if ( wrmsr_viridian_regs(msr, msr_content) )
>              break;
> 
> -        if ( vmx_write_guest_msr(msr, msr_content) == 0 ||
> +        if ( vmx_write_guest_msr(v, msr, msr_content) == 0 ||
>               is_last_branch_msr(msr) )
>              break;
> 
> @@ -4165,7 +4165,7 @@ static void lbr_tsx_fixup(void)
>      struct vmx_msr_entry *msr_area = curr->arch.hvm_vmx.msr_area;
>      struct vmx_msr_entry *msr;
> 
> -    if ( (msr = vmx_find_msr(lbr_from_start, VMX_MSR_GUEST)) != NULL )
> +    if ( (msr = vmx_find_msr(curr, lbr_from_start, VMX_MSR_GUEST)) !=
> NULL )
>      {
>          /*
>           * Sign extend into bits 61:62 while preserving bit 63
> @@ -4175,15 +4175,15 @@ static void lbr_tsx_fixup(void)
>              msr->data |= ((LBR_FROM_SIGNEXT_2MSB & msr->data) << 2);
>      }
> 
> -    if ( (msr = vmx_find_msr(lbr_lastint_from, VMX_MSR_GUEST)) != NULL )
> +    if ( (msr = vmx_find_msr(curr, lbr_lastint_from, VMX_MSR_GUEST)) !=
> NULL )
>          msr->data |= ((LBR_FROM_SIGNEXT_2MSB & msr->data) << 2);
>  }
> 
> -static void sign_extend_msr(u32 msr, int type)
> +static void sign_extend_msr(struct vcpu *v, u32 msr, int type)
>  {
>      struct vmx_msr_entry *entry;
> 
> -    if ( (entry = vmx_find_msr(msr, type)) != NULL )
> +    if ( (entry = vmx_find_msr(v, msr, type)) != NULL )
>      {
>          if ( entry->data & VADDR_TOP_BIT )
>              entry->data |= CANONICAL_MASK;
> @@ -4194,6 +4194,8 @@ static void sign_extend_msr(u32 msr, int type)
> 
>  static void bdw_erratum_bdf14_fixup(void)
>  {
> +    struct vcpu *curr = current;
> +
>      /*
>       * Occasionally, on certain Broadwell CPUs MSR_IA32_LASTINTTOIP has
>       * been observed to have the top three bits corrupted as though the
> @@ -4203,8 +4205,8 @@ static void bdw_erratum_bdf14_fixup(void)
>       * erratum BDF14. Fix up MSR_IA32_LASTINT{FROM,TO}IP by
>       * sign-extending into bits 48:63.
>       */
> -    sign_extend_msr(MSR_IA32_LASTINTFROMIP, VMX_MSR_GUEST);
> -    sign_extend_msr(MSR_IA32_LASTINTTOIP, VMX_MSR_GUEST);
> +    sign_extend_msr(curr, MSR_IA32_LASTINTFROMIP, VMX_MSR_GUEST);
> +    sign_extend_msr(curr, MSR_IA32_LASTINTTOIP, VMX_MSR_GUEST);
>  }
> 
>  static void lbr_fixup(void)
> diff --git a/xen/include/asm-x86/hvm/vmx/vmcs.h b/xen/include/asm-
> x86/hvm/vmx/vmcs.h
> index c8a1f89..f66f121 100644
> --- a/xen/include/asm-x86/hvm/vmx/vmcs.h
> +++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
> @@ -130,10 +130,17 @@ struct arch_vmx_struct {
>      uint64_t             sfmask;
> 
>      struct vmx_msr_bitmap *msr_bitmap;
> -    unsigned int         msr_count;
> +
> +    /*
> +     * Most accesses to the MSR host/guest load/save lists are in current
> +     * context.  However, the data can be modified by toolstack/migration
> +     * actions.  Remote access is only permitted for paused vcpus, and is
> +     * protected under the domctl lock.
> +     */
>      struct vmx_msr_entry *msr_area;
> -    unsigned int         host_msr_count;
>      struct vmx_msr_entry *host_msr_area;
> +    unsigned int         msr_count;
> +    unsigned int         host_msr_count;
> 
>      unsigned long        eoi_exitmap_changed;
>      DECLARE_BITMAP(eoi_exit_bitmap, NR_VECTORS);
> @@ -537,25 +544,27 @@ enum vmx_msr_list_type {
>      VMX_MSR_GUEST,
>  };
> 
> -int vmx_add_msr(uint32_t msr, enum vmx_msr_list_type type);
> +int vmx_add_msr(struct vcpu *v, uint32_t msr, enum vmx_msr_list_type
> type);
> 
> -static inline int vmx_add_host_load_msr(uint32_t msr)
> +static inline int vmx_add_guest_msr(struct vcpu *v, uint32_t msr)
>  {
> -    return vmx_add_msr(msr, VMX_MSR_HOST);
> +    return vmx_add_msr(v, msr, VMX_MSR_GUEST);
>  }
> 
> -static inline int vmx_add_guest_msr(uint32_t msr)
> +static inline int vmx_add_host_load_msr(struct vcpu *v, uint32_t msr)
>  {
> -    return vmx_add_msr(msr, VMX_MSR_GUEST);
> +    return vmx_add_msr(v, msr, VMX_MSR_HOST);
>  }
> 
> -struct vmx_msr_entry *vmx_find_msr(uint32_t msr, enum
> vmx_msr_list_type type);
> +struct vmx_msr_entry *vmx_find_msr(struct vcpu *v, uint32_t msr,
> +                                   enum vmx_msr_list_type type);
> 
> -static inline int vmx_read_guest_msr(uint32_t msr, uint64_t *val)
> +static inline int vmx_read_guest_msr(struct vcpu *v, uint32_t msr,
> +                                     uint64_t *val)
>  {
>      struct vmx_msr_entry *ent;
> 
> -    if ( (ent = vmx_find_msr(msr, VMX_MSR_GUEST)) )
> +    if ( (ent = vmx_find_msr(v, msr, VMX_MSR_GUEST)) )
>      {
>          *val = ent->data;
>          return 0;
> @@ -564,11 +573,12 @@ static inline int vmx_read_guest_msr(uint32_t
> msr, uint64_t *val)
>      return -ESRCH;
>  }
> 
> -static inline int vmx_write_guest_msr(uint32_t msr, uint64_t val)
> +static inline int vmx_write_guest_msr(struct vcpu *v, uint32_t msr,
> +                                      uint64_t val)
>  {
>      struct vmx_msr_entry *ent;
> 
> -    if ( (ent = vmx_find_msr(msr, VMX_MSR_GUEST)) )
> +    if ( (ent = vmx_find_msr(v, msr, VMX_MSR_GUEST)) )
>      {
>          ent->data = val;
>          return 0;
> --
> 2.1.4

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.