[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v7 13/19] x86/VPMU: Add support for PMU register handling on PV guests



On 06/06/14 18:40, Boris Ostrovsky wrote:
> Intercept accesses to PMU MSRs and process them in VPMU module.
>
> Dump VPMU state for all domains (HVM and PV) when requested.
>
> Signed-off-by: Boris Ostrovsky <boris.ostrovsky@xxxxxxxxxx>
> Acked-by: Kevin Tian <kevin.tian@xxxxxxxxx>
> Reviewed-by: Dietmar Hahn <dietmar.hahn@xxxxxxxxxxxxxx>
> Tested-by: Dietmar Hahn <dietmar.hahn@xxxxxxxxxxxxxx>
> ---
>  xen/arch/x86/domain.c             |  3 +-
>  xen/arch/x86/hvm/vmx/vpmu_core2.c | 69 
> ++++++++++++++++++++++++++++++---------
>  xen/arch/x86/hvm/vpmu.c           |  7 ++++
>  xen/arch/x86/traps.c              | 48 +++++++++++++++++++++++++--
>  xen/include/public/pmu.h          |  1 +
>  5 files changed, 108 insertions(+), 20 deletions(-)
>
> diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
> index a3ac1e2..bb759dd 100644
> --- a/xen/arch/x86/domain.c
> +++ b/xen/arch/x86/domain.c
> @@ -2012,8 +2012,7 @@ void arch_dump_vcpu_info(struct vcpu *v)
>  {
>      paging_dump_vcpu_info(v);
>  
> -    if ( is_hvm_vcpu(v) )
> -        vpmu_dump(v);
> +    vpmu_dump(v);
>  }
>  
>  void domain_cpuid(
> diff --git a/xen/arch/x86/hvm/vmx/vpmu_core2.c 
> b/xen/arch/x86/hvm/vmx/vpmu_core2.c
> @@ -440,9 +454,16 @@ static int core2_vpmu_msr_common_check(u32 msr_index, 
> int *type, int *index)
>      return 1;
>  }
>  
> +static void inject_trap(struct vcpu *v, unsigned int trapno)
> +{
> +    if ( has_hvm_container_domain(v->domain) )
> +        hvm_inject_hw_exception(trapno, 0);
> +    else
> +        send_guest_trap(v->domain, v->vcpu_id, trapno);
> +}
> +

send_guest_trap() cant be used to send anything other than an nmi or
mce, but you appear to be using it to try and send #PFs ?

>  static int core2_vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content)
>  {
> -    u64 global_ctrl;
>      int i, tmp;
>      int type = -1, index = -1;
>      struct vcpu *v = current;
> @@ -466,7 +487,7 @@ static int core2_vpmu_do_wrmsr(unsigned int msr, uint64_t 
> msr_content)
>                  if ( vpmu_is_set(vpmu, VPMU_CPU_HAS_BTS) )
>                      return 1;
>                  gdprintk(XENLOG_WARNING, "Debug Store is not supported on 
> this cpu\n");
> -                hvm_inject_hw_exception(TRAP_gp_fault, 0);
> +                inject_trap(v, TRAP_gp_fault);
>                  return 0;
>              }
>          }
> @@ -479,11 +500,12 @@ static int core2_vpmu_do_wrmsr(unsigned int msr, 
> uint64_t msr_content)
>      {
>      case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
>          core2_vpmu_cxt->global_status &= ~msr_content;
> +        wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, msr_content);
>          return 1;
>      case MSR_CORE_PERF_GLOBAL_STATUS:
>          gdprintk(XENLOG_INFO, "Can not write readonly MSR: "
>                   "MSR_PERF_GLOBAL_STATUS(0x38E)!\n");
> -        hvm_inject_hw_exception(TRAP_gp_fault, 0);
> +        inject_trap(v, TRAP_gp_fault);
>          return 1;
>      case MSR_IA32_PEBS_ENABLE:
>          if ( msr_content & 1 )
> @@ -499,7 +521,7 @@ static int core2_vpmu_do_wrmsr(unsigned int msr, uint64_t 
> msr_content)
>                  gdprintk(XENLOG_WARNING,
>                           "Illegal address for IA32_DS_AREA: %#" PRIx64 "x\n",
>                           msr_content);
> -                hvm_inject_hw_exception(TRAP_gp_fault, 0);
> +                inject_trap(v, TRAP_gp_fault);
>                  return 1;
>              }
>              core2_vpmu_cxt->ds_area = msr_content;
> @@ -508,10 +530,14 @@ static int core2_vpmu_do_wrmsr(unsigned int msr, 
> uint64_t msr_content)
>          gdprintk(XENLOG_WARNING, "Guest setting of DTS is ignored.\n");
>          return 1;
>      case MSR_CORE_PERF_GLOBAL_CTRL:
> -        global_ctrl = msr_content;
> +        core2_vpmu_cxt->global_ctrl = msr_content;
>          break;
>      case MSR_CORE_PERF_FIXED_CTR_CTRL:
> -        vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, &global_ctrl);
> +        if ( has_hvm_container_domain(v->domain) )
> +            vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL,
> +                               &core2_vpmu_cxt->global_ctrl);
> +        else
> +            rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, core2_vpmu_cxt->global_ctrl);
>          *enabled_cntrs &= ~(((1ULL << fixed_pmc_cnt) - 1) << 32);
>          if ( msr_content != 0 )
>          {
> @@ -533,7 +559,11 @@ static int core2_vpmu_do_wrmsr(unsigned int msr, 
> uint64_t msr_content)
>              struct xen_pmu_cntr_pair *xen_pmu_cntr_pair =
>                  vpmu_reg_pointer(core2_vpmu_cxt, arch_counters);
>  
> -            vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, &global_ctrl);
> +            if ( has_hvm_container_domain(v->domain) )
> +                vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL,
> +                                   &core2_vpmu_cxt->global_ctrl);
> +            else
> +                rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 
> core2_vpmu_cxt->global_ctrl);
>  
>              if ( msr_content & (1ULL << 22) )
>                  *enabled_cntrs |= 1ULL << tmp;
> @@ -544,7 +574,8 @@ static int core2_vpmu_do_wrmsr(unsigned int msr, uint64_t 
> msr_content)
>          }
>      }
>  
> -    if ((global_ctrl & *enabled_cntrs) || (core2_vpmu_cxt->ds_area != 0)  )
> +    if ( (core2_vpmu_cxt->global_ctrl & *enabled_cntrs) ||
> +         (core2_vpmu_cxt->ds_area != 0) )
>          vpmu_set(vpmu, VPMU_RUNNING);
>      else
>          vpmu_reset(vpmu, VPMU_RUNNING);
> @@ -557,7 +588,7 @@ static int core2_vpmu_do_wrmsr(unsigned int msr, uint64_t 
> msr_content)
>          {
>          case MSR_TYPE_ARCH_CTRL:      /* MSR_P6_EVNTSEL[0,...] */
>              mask = ~((1ull << 32) - 1);
> -            if (msr_content & mask)
> +            if ( msr_content & mask )

It would be helpful not to mix large functional changes and code
cleanup.  While I appreciate the cleanup, could it be moved to a
different patch?  The same comment applies to several other patches as well.

~Andrew

>                  inject_gp = 1;
>              break;
>          case MSR_TYPE_CTRL:           /* IA32_FIXED_CTR_CTRL */
> @@ -565,22 +596,27 @@ static int core2_vpmu_do_wrmsr(unsigned int msr, 
> uint64_t msr_content)
>                  break;
>              /* 4 bits per counter, currently 3 fixed counters implemented. */
>              mask = ~((1ull << (fixed_pmc_cnt * FIXED_CTR_CTRL_BITS)) - 1);
> -            if (msr_content & mask)
> +            if ( msr_content & mask )
>                  inject_gp = 1;
>              break;
>          case MSR_TYPE_COUNTER:        /* IA32_FIXED_CTR[0-2] */
>              mask = ~((1ull << core2_get_bitwidth_fix_count()) - 1);
> -            if (msr_content & mask)
> +            if ( msr_content & mask )
>                  inject_gp = 1;
>              break;
>          }
> -        if (inject_gp)
> -            hvm_inject_hw_exception(TRAP_gp_fault, 0);
> +        if ( inject_gp )
> +            inject_trap(v, TRAP_gp_fault);
>          else
>              wrmsrl(msr, msr_content);
>      }
>      else
> -        vmx_write_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, msr_content);
> +    {
> +       if ( has_hvm_container_domain(v->domain) )
> +           vmx_write_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, msr_content);
> +       else
> +           wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, msr_content);
> +    }
>  
>      return 1;
>  }
> @@ -604,7 +640,10 @@ static int core2_vpmu_do_rdmsr(unsigned int msr, 
> uint64_t *msr_content)
>              *msr_content = core2_vpmu_cxt->global_status;
>              break;
>          case MSR_CORE_PERF_GLOBAL_CTRL:
> -            vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, msr_content);
> +            if ( has_hvm_container_domain(v->domain) )
> +                vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, msr_content);
> +            else
> +                rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, *msr_content);
>              break;
>          default:
>              rdmsrl(msr, *msr_content);
> diff --git a/xen/arch/x86/hvm/vpmu.c b/xen/arch/x86/hvm/vpmu.c
> index bceffa6..ca0534b 100644
> --- a/xen/arch/x86/hvm/vpmu.c
> +++ b/xen/arch/x86/hvm/vpmu.c
> @@ -456,6 +456,13 @@ long do_xenpmu_op(int op, 
> XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t) arg)
>              return -EFAULT;
>          pvpmu_finish(current->domain, &pmu_params);
>          break;
> +
> +    case XENPMU_lvtpc_set:
> +        if ( current->arch.vpmu.xenpmu_data == NULL )
> +            return -EINVAL;
> +        vpmu_lvtpc_update(current->arch.vpmu.xenpmu_data->pmu.l.lapic_lvtpc);
> +        ret = 0;
> +        break;
>      }
>  
>      return ret;
> diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
> index 136821f..57d06c9 100644
> --- a/xen/arch/x86/traps.c
> +++ b/xen/arch/x86/traps.c
> @@ -72,6 +72,7 @@
>  #include <asm/apic.h>
>  #include <asm/mc146818rtc.h>
>  #include <asm/hpet.h>
> +#include <asm/hvm/vpmu.h>
>  #include <public/arch-x86/cpuid.h>
>  #include <xsm/xsm.h>
>  
> @@ -877,8 +878,10 @@ void pv_cpuid(struct cpu_user_regs *regs)
>          __clear_bit(X86_FEATURE_TOPOEXT % 32, &c);
>          break;
>  
> +    case 0x0000000a: /* Architectural Performance Monitor Features (Intel) */
> +        break;
> +
>      case 0x00000005: /* MONITOR/MWAIT */
> -    case 0x0000000a: /* Architectural Performance Monitor Features */
>      case 0x0000000b: /* Extended Topology Enumeration */
>      case 0x8000000a: /* SVM revision and features */
>      case 0x8000001b: /* Instruction Based Sampling */
> @@ -894,6 +897,9 @@ void pv_cpuid(struct cpu_user_regs *regs)
>      }
>  
>   out:
> +    /* VPMU may decide to modify some of the leaves */
> +    vpmu_do_cpuid(regs->eax, &a, &b, &c, &d);
> +
>      regs->eax = a;
>      regs->ebx = b;
>      regs->ecx = c;
> @@ -1921,6 +1927,7 @@ static int emulate_privileged_op(struct cpu_user_regs 
> *regs)
>      char io_emul_stub[32];
>      void (*io_emul)(struct cpu_user_regs *) __attribute__((__regparm__(1)));
>      uint64_t val, msr_content;
> +    bool_t vpmu_msr;
>  
>      if ( !read_descriptor(regs->cs, v, regs,
>                            &code_base, &code_limit, &ar,
> @@ -2411,6 +2418,7 @@ static int emulate_privileged_op(struct cpu_user_regs 
> *regs)
>          uint32_t eax = regs->eax;
>          uint32_t edx = regs->edx;
>          msr_content = ((uint64_t)edx << 32) | eax;
> +        vpmu_msr = 0;
>          switch ( (u32)regs->ecx )
>          {
>          case MSR_FS_BASE:
> @@ -2547,7 +2555,19 @@ static int emulate_privileged_op(struct cpu_user_regs 
> *regs)
>              if ( v->arch.debugreg[7] & DR7_ACTIVE_MASK )
>                  wrmsrl(regs->_ecx, msr_content);
>              break;
> -
> +        case MSR_P6_PERFCTR0...MSR_P6_PERFCTR1:
> +        case MSR_P6_EVNTSEL0...MSR_P6_EVNTSEL1:
> +        case MSR_CORE_PERF_FIXED_CTR0...MSR_CORE_PERF_FIXED_CTR2:
> +        case MSR_CORE_PERF_FIXED_CTR_CTRL...MSR_CORE_PERF_GLOBAL_OVF_CTRL:
> +            if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL )
> +                vpmu_msr = 1;
> +        case MSR_AMD_FAM15H_EVNTSEL0...MSR_AMD_FAM15H_PERFCTR5:
> +            if ( vpmu_msr || (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) )
> +            {
> +                if ( !vpmu_do_wrmsr(regs->ecx, msr_content) )
> +                    goto invalid;
> +                break;
> +            }
>          default:
>              if ( wrmsr_hypervisor_regs(regs->ecx, msr_content) == 1 )
>                  break;
> @@ -2579,6 +2599,8 @@ static int emulate_privileged_op(struct cpu_user_regs 
> *regs)
>          break;
>  
>      case 0x32: /* RDMSR */
> +        vpmu_msr = 0;
> +
>          switch ( (u32)regs->ecx )
>          {
>          case MSR_FS_BASE:
> @@ -2649,7 +2671,27 @@ static int emulate_privileged_op(struct cpu_user_regs 
> *regs)
>                              [regs->_ecx - MSR_AMD64_DR1_ADDRESS_MASK + 1];
>              regs->edx = 0;
>              break;
> -
> +        case MSR_IA32_PERF_CAPABILITIES:
> +            /* No extra capabilities are supported */
> +            regs->eax = regs->edx = 0;
> +            break;
> +        case MSR_P6_PERFCTR0...MSR_P6_PERFCTR1:
> +        case MSR_P6_EVNTSEL0...MSR_P6_EVNTSEL1:
> +        case MSR_CORE_PERF_FIXED_CTR0...MSR_CORE_PERF_FIXED_CTR2:
> +        case MSR_CORE_PERF_FIXED_CTR_CTRL...MSR_CORE_PERF_GLOBAL_OVF_CTRL:
> +            if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL )
> +                vpmu_msr = 1;
> +        case MSR_AMD_FAM15H_EVNTSEL0...MSR_AMD_FAM15H_PERFCTR5:
> +            if ( vpmu_msr || (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) )
> +            {
> +                if ( vpmu_do_rdmsr(regs->ecx, &msr_content) )
> +                {
> +                    regs->eax = (uint32_t)msr_content;
> +                    regs->edx = (uint32_t)(msr_content >> 32);
> +                    break;
> +                }
> +                goto rdmsr_normal;
> +            }
>          default:
>              if ( rdmsr_hypervisor_regs(regs->ecx, &val) )
>              {
> diff --git a/xen/include/public/pmu.h b/xen/include/public/pmu.h
> index 6da3596..7341de9 100644
> --- a/xen/include/public/pmu.h
> +++ b/xen/include/public/pmu.h
> @@ -27,6 +27,7 @@
>  #define XENPMU_feature_set     3
>  #define XENPMU_init            4
>  #define XENPMU_finish          5
> +#define XENPMU_lvtpc_set       6
>  /* ` } */
>  
>  /* Parameters structure for HYPERVISOR_xenpmu_op call */


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.