[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v21 11/14] x86/VPMU: Handle PMU interrupts for PV(H) guests



Am Freitag 08 Mai 2015, 17:06:11 schrieb Boris Ostrovsky:
> Add support for handling PMU interrupts for PV(H) guests.

I have only some minor nits below.

Reviewed-by: Dietmar Hahn <dietmar.hahn@xxxxxxxxxxxxxx>

> 
> VPMU for the interrupted VCPU is unloaded until the guest issues XENPMU_flush
> hypercall. This allows the guest to access PMU MSR values that are stored in
> VPMU context which is shared between hypervisor and domain, thus avoiding
> traps to hypervisor.
> 
> Since the interrupt handler may now force VPMU context save (i.e. set
> VPMU_CONTEXT_SAVE flag) we need to make changes to amd_vpmu_save() which
> until now expected this flag to be set only when the counters were stopped.
> 
> Signed-off-by: Boris Ostrovsky <boris.ostrovsky@xxxxxxxxxx>
> Acked-by: Daniel De Graaf <dgdegra@xxxxxxxxxxxxx>
> ---
> Changes in v21:
> * Copy hypervisor-private VPMU context to shared page during interrupt and 
> copy
>   it back during XENPMU_flush (see also changes to patch 6). Verify 
> user-provided
>   VPMU context before loading it into hypervisor-private one (and then to HW).
>   Specifically, the changes are:
>   - Change definitions of save/load ops to take a flag that specifies whether
>     a copy and verification is required and, for the load op, to return error
>     if verification fails.
>   - Both load ops: update VMPU_RUNNING flag based on user-provided context, 
> copy
>     VPMU context
>   - Both save ops: copy VPMU context
>   - core2_vpmu_load(): add core2_vpmu_verify() call to do context verification
>   - precompute VPMU context size into ctxt_sz to use in memcpy
>   - Return an error in XENPMU_flush (vpmu.c) if verification fails.
> * Non-privileged domains should not be provided with physical CPUID in
>   vpmu_do_interrupt(), set it to vcpu_id instead.
> 
>  xen/arch/x86/hvm/svm/vpmu.c       |  63 +++++++---
>  xen/arch/x86/hvm/vmx/vpmu_core2.c |  87 ++++++++++++--
>  xen/arch/x86/hvm/vpmu.c           | 237 
> +++++++++++++++++++++++++++++++++++---
>  xen/include/asm-x86/hvm/vpmu.h    |   8 +-
>  xen/include/public/arch-x86/pmu.h |   3 +
>  xen/include/public/pmu.h          |   2 +
>  xen/include/xsm/dummy.h           |   4 +-
>  xen/xsm/flask/hooks.c             |   2 +
>  8 files changed, 359 insertions(+), 47 deletions(-)
> 
> diff --git a/xen/arch/x86/hvm/svm/vpmu.c b/xen/arch/x86/hvm/svm/vpmu.c
> index 74d03a5..efe5573 100644
> --- a/xen/arch/x86/hvm/svm/vpmu.c
> +++ b/xen/arch/x86/hvm/svm/vpmu.c
> @@ -45,6 +45,7 @@ static unsigned int __read_mostly num_counters;
>  static const u32 __read_mostly *counters;
>  static const u32 __read_mostly *ctrls;
>  static bool_t __read_mostly k7_counters_mirrored;
> +static unsigned long __read_mostly ctxt_sz;
>  
>  #define F10H_NUM_COUNTERS   4
>  #define F15H_NUM_COUNTERS   6
> @@ -188,27 +189,52 @@ static inline void context_load(struct vcpu *v)
>      }
>  }
>  
> -static void amd_vpmu_load(struct vcpu *v)
> +static int amd_vpmu_load(struct vcpu *v, bool_t from_guest)
>  {
>      struct vpmu_struct *vpmu = vcpu_vpmu(v);
> -    struct xen_pmu_amd_ctxt *ctxt = vpmu->context;
> -    uint64_t *ctrl_regs = vpmu_reg_pointer(ctxt, ctrls);
> +    struct xen_pmu_amd_ctxt *ctxt;
> +    uint64_t *ctrl_regs;
> +    unsigned int i;
>  
>      vpmu_reset(vpmu, VPMU_FROZEN);
>  
> -    if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
> +    if ( !from_guest && vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
>      {
> -        unsigned int i;
> +        ctxt = vpmu->context;
> +        ctrl_regs = vpmu_reg_pointer(ctxt, ctrls);
>  
>          for ( i = 0; i < num_counters; i++ )
>              wrmsrl(ctrls[i], ctrl_regs[i]);
>  
> -        return;
> +        return 0;
> +    }
> +
> +    if ( from_guest )
> +    {
> +        ASSERT(!is_hvm_vcpu(v));
> +
> +        ctxt = &vpmu->xenpmu_data->pmu.c.amd;
> +        ctrl_regs = vpmu_reg_pointer(ctxt, ctrls);
> +        for ( i = 0; i < num_counters; i++ )
> +        {
> +            if ( is_pmu_enabled(ctrl_regs[i]) )
> +            {
> +                vpmu_set(vpmu, VPMU_RUNNING);
> +                break;
> +            }
> +        }
> +
> +        if ( i == num_counters )
> +            vpmu_reset(vpmu, VPMU_RUNNING);
> +
> +        memcpy(vpmu->context, &vpmu->xenpmu_data->pmu.c.amd, ctxt_sz);
>      }
>  
>      vpmu_set(vpmu, VPMU_CONTEXT_LOADED);
>  
>      context_load(v);
> +
> +    return 0;
>  }
>  
>  static inline void context_save(struct vcpu *v)
> @@ -223,22 +249,17 @@ static inline void context_save(struct vcpu *v)
>          rdmsrl(counters[i], counter_regs[i]);
>  }
>  
> -static int amd_vpmu_save(struct vcpu *v)
> +static int amd_vpmu_save(struct vcpu *v,  bool_t to_guest)
>  {
>      struct vpmu_struct *vpmu = vcpu_vpmu(v);
>      unsigned int i;
>  
> -    /*
> -     * Stop the counters. If we came here via vpmu_save_force (i.e.
> -     * when VPMU_CONTEXT_SAVE is set) counters are already stopped.
> -     */
> +    for ( i = 0; i < num_counters; i++ )
> +        wrmsrl(ctrls[i], 0);
> +
>      if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_SAVE) )
>      {
>          vpmu_set(vpmu, VPMU_FROZEN);
> -
> -        for ( i = 0; i < num_counters; i++ )
> -            wrmsrl(ctrls[i], 0);
> -
>          return 0;
>      }
>  
> @@ -251,6 +272,12 @@ static int amd_vpmu_save(struct vcpu *v)
>           has_hvm_container_vcpu(v) && is_msr_bitmap_on(vpmu) )
>          amd_vpmu_unset_msr_bitmap(v);
>  
> +    if ( to_guest )
> +    {
> +        ASSERT(!is_hvm_vcpu(v));
> +        memcpy(&vpmu->xenpmu_data->pmu.c.amd, vpmu->context, ctxt_sz);
> +    }
> +
>      return 1;
>  }
>  
> @@ -433,8 +460,7 @@ int svm_vpmu_initialise(struct vcpu *v)
>      if ( !counters )
>          return -EINVAL;
>  
> -    ctxt = xzalloc_bytes(sizeof(*ctxt) +
> -                         2 * sizeof(uint64_t) * num_counters);
> +    ctxt = xzalloc_bytes(ctxt_sz);
>      if ( !ctxt )
>      {
>          printk(XENLOG_G_WARNING "Insufficient memory for PMU, "
> @@ -490,6 +516,9 @@ int __init amd_vpmu_init(void)
>          return -ENOSPC;
>      }
>  
> +    ctxt_sz = sizeof(struct xen_pmu_amd_ctxt) +
> +              2 * sizeof(uint64_t) * num_counters;
> +
>      return 0;
>  }
>  
> diff --git a/xen/arch/x86/hvm/vmx/vpmu_core2.c 
> b/xen/arch/x86/hvm/vmx/vpmu_core2.c
> index 49f6771..a8df3a0 100644
> --- a/xen/arch/x86/hvm/vmx/vpmu_core2.c
> +++ b/xen/arch/x86/hvm/vmx/vpmu_core2.c
> @@ -88,6 +88,9 @@ static unsigned int __read_mostly arch_pmc_cnt, 
> fixed_pmc_cnt;
>  static uint64_t __read_mostly fixed_ctrl_mask, fixed_counters_mask;
>  static uint64_t __read_mostly global_ovf_ctrl_mask;
>  
> +/* VPMU context size */
> +static unsigned long __read_mostly ctxt_sz;
> +
>  /*
>   * QUIRK to workaround an issue on various family 6 cpus.
>   * The issue leads to endless PMC interrupt loops on the processor.
> @@ -310,7 +313,7 @@ static inline void __core2_vpmu_save(struct vcpu *v)
>          rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, core2_vpmu_cxt->global_status);
>  }
>  
> -static int core2_vpmu_save(struct vcpu *v)
> +static int core2_vpmu_save(struct vcpu *v, bool_t to_user)

Variable name mix between core2_vpmu_save(.., to_user) and
amd_vpmu_save(.., to_guest) for the same interface.

>  {
>      struct vpmu_struct *vpmu = vcpu_vpmu(v);
>  
> @@ -327,6 +330,12 @@ static int core2_vpmu_save(struct vcpu *v)
>           has_hvm_container_vcpu(v) && cpu_has_vmx_msr_bitmap )
>          core2_vpmu_unset_msr_bitmap(v->arch.hvm_vmx.msr_bitmap);
>  
> +    if ( to_user )
> +    {
> +        ASSERT(!is_hvm_vcpu(v));
> +        memcpy(&vpmu->xenpmu_data->pmu.c.intel, vpmu->context, ctxt_sz);
> +    }
> +
>      return 1;
>  }
>  
> @@ -363,16 +372,79 @@ static inline void __core2_vpmu_load(struct vcpu *v)
>      }
>  }
>  
> -static void core2_vpmu_load(struct vcpu *v)
> +static int core2_vpmu_verify(struct vcpu *v)
> +{
> +    unsigned int i;
> +    struct vpmu_struct *vpmu = vcpu_vpmu(v);
> +    struct xen_pmu_intel_ctxt *core2_vpmu_cxt =
> +        &v->arch.vpmu.xenpmu_data->pmu.c.intel;
> +    uint64_t *fixed_counters = vpmu_reg_pointer(core2_vpmu_cxt, 
> fixed_counters);
> +    struct xen_pmu_cntr_pair *xen_pmu_cntr_pair =
> +        vpmu_reg_pointer(core2_vpmu_cxt, arch_counters);
> +    uint64_t fixed_ctrl;
> +    uint64_t enabled_cntrs = 0;
> +
> +    if ( core2_vpmu_cxt->global_ovf_ctrl & global_ovf_ctrl_mask )
> +            return 1;
> +
> +    fixed_ctrl = core2_vpmu_cxt->fixed_ctrl;
> +    if ( fixed_ctrl & fixed_ctrl_mask )
> +        return 1;
> +
> +    for ( i = 0; i < fixed_pmc_cnt; i++ )
> +    {
> +        if ( fixed_counters[i] & fixed_counters_mask )
> +            return 1;
> +        if ( (fixed_ctrl >> (i * FIXED_CTR_CTRL_BITS)) & 3 )
> +            enabled_cntrs |= (1ULL << i);
> +    }
> +    enabled_cntrs <<= 32;
> +
> +    for ( i = 0; i < arch_pmc_cnt; i++ )
> +    {
> +        uint64_t control = xen_pmu_cntr_pair[i].control;
> +
> +        if ( control & ARCH_CTRL_MASK )
> +            return 1;
> +        if ( control & (1ULL << 22) )

As I think thats's the ENABLE bit 22 a define should be better or at least a
comment?

> +            enabled_cntrs |= (1ULL << i);
> +    }
> +
> +    if ( vpmu_is_set(vcpu_vpmu(v), VPMU_CPU_HAS_DS) &&
> +         !is_canonical_address(core2_vpmu_cxt->ds_area) )
> +        return 1;
> +
> +    if ( (core2_vpmu_cxt->global_ctrl & enabled_cntrs) ||
> +         (core2_vpmu_cxt->ds_area != 0) )
> +        vpmu_set(vpmu, VPMU_RUNNING);
> +    else
> +        vpmu_reset(vpmu, VPMU_RUNNING);
> +
> +    *(uint64_t *)vpmu->priv_context = enabled_cntrs;
> +
> +    return 0;
> +}
> +
> +static int core2_vpmu_load(struct vcpu *v, bool_t from_guest)
>  {
>      struct vpmu_struct *vpmu = vcpu_vpmu(v);
>  
>      if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
> -        return;
> +        return 0;
> +
> +    if ( from_guest )
> +    {
> +        ASSERT(!is_hvm_vcpu(v));
> +        if ( core2_vpmu_verify(v) )
> +            return 1;
> +        memcpy(vpmu->context, &v->arch.vpmu.xenpmu_data->pmu.c.intel, 
> ctxt_sz);
> +    }
>  
>      vpmu_set(vpmu, VPMU_CONTEXT_LOADED);
>  
>      __core2_vpmu_load(v);
> +
> +    return 0;
>  }
>  
>  static int core2_vpmu_alloc_resource(struct vcpu *v)
> @@ -395,10 +467,7 @@ static int core2_vpmu_alloc_resource(struct vcpu *v)
>          vmx_write_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
>      }
>  
> -    core2_vpmu_cxt = xzalloc_bytes(sizeof(*core2_vpmu_cxt) +
> -                                   sizeof(uint64_t) * fixed_pmc_cnt +
> -                                   sizeof(struct xen_pmu_cntr_pair) *
> -                                   arch_pmc_cnt);
> +    core2_vpmu_cxt = xzalloc_bytes(ctxt_sz);
>      p = xzalloc(uint64_t);
>      if ( !core2_vpmu_cxt || !p )
>          goto out_err;
> @@ -921,6 +990,10 @@ int __init core2_vpmu_init(void)
>                               (((1ULL << fixed_pmc_cnt) - 1) << 32) |
>                               ((1ULL << arch_pmc_cnt) - 1));
>  
> +    ctxt_sz = sizeof(struct xen_pmu_intel_ctxt) +
> +              sizeof(uint64_t) * fixed_pmc_cnt +
> +              sizeof(struct xen_pmu_cntr_pair) * arch_pmc_cnt;
> +
>      check_pmc_quirk();
>  
>      if ( sizeof(struct xen_pmu_data) + sizeof(uint64_t) * fixed_pmc_cnt +
> diff --git a/xen/arch/x86/hvm/vpmu.c b/xen/arch/x86/hvm/vpmu.c
> index 07fa368..a30f02a 100644
> --- a/xen/arch/x86/hvm/vpmu.c
> +++ b/xen/arch/x86/hvm/vpmu.c
> @@ -85,31 +85,56 @@ static void __init parse_vpmu_param(char *s)
>  void vpmu_lvtpc_update(uint32_t val)
>  {
>      struct vpmu_struct *vpmu;
> +    struct vcpu *curr = current;
>  
> -    if ( vpmu_mode == XENPMU_MODE_OFF )
> +    if ( likely(vpmu_mode == XENPMU_MODE_OFF) )
>          return;
>  
> -    vpmu = vcpu_vpmu(current);
> +    vpmu = vcpu_vpmu(curr);
>  
>      vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR | (val & APIC_LVT_MASKED);
> -    apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
> +
> +    /* Postpone APIC updates for PV(H) guests if PMU interrupt is pending */
> +    if ( is_hvm_vcpu(curr) || !vpmu->xenpmu_data ||
> +         !(vpmu->xenpmu_data->pmu.pmu_flags & PMU_CACHED) )
> +        apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
>  }
>  
>  int vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content, uint64_t supported)
>  {
> -    struct vpmu_struct *vpmu = vcpu_vpmu(current);
> +    struct vcpu *curr = current;
> +    struct vpmu_struct *vpmu;
>  
>      if ( vpmu_mode == XENPMU_MODE_OFF )
>          return 0;
>  
> +    vpmu = vcpu_vpmu(curr);
>      if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->do_wrmsr )
> -        return vpmu->arch_vpmu_ops->do_wrmsr(msr, msr_content, supported);
> +    {
> +        int ret = vpmu->arch_vpmu_ops->do_wrmsr(msr, msr_content, supported);
> +
> +        /*
> +         * We may have received a PMU interrupt during WRMSR handling
> +         * and since do_wrmsr may load VPMU context we should save
> +         * (and unload) it again.
> +         */
> +        if ( !is_hvm_vcpu(curr) && vpmu->xenpmu_data &&
> +             (vpmu->xenpmu_data->pmu.pmu_flags & PMU_CACHED) )
> +        {
> +            vpmu_set(vpmu, VPMU_CONTEXT_SAVE);
> +            vpmu->arch_vpmu_ops->arch_vpmu_save(curr, 0);
> +            vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
> +        }
> +        return ret;
> +    }
> +
>      return 0;
>  }
>  
>  int vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content)
>  {
> -    struct vpmu_struct *vpmu = vcpu_vpmu(current);
> +    struct vcpu *curr = current;
> +    struct vpmu_struct *vpmu;
>  
>      if ( vpmu_mode == XENPMU_MODE_OFF )
>      {
> @@ -117,24 +142,166 @@ int vpmu_do_rdmsr(unsigned int msr, uint64_t 
> *msr_content)
>          return 0;
>      }
>  
> +    vpmu = vcpu_vpmu(curr);
>      if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->do_rdmsr )
> -        return vpmu->arch_vpmu_ops->do_rdmsr(msr, msr_content);
> +    {
> +        int ret = vpmu->arch_vpmu_ops->do_rdmsr(msr, msr_content);
> +
> +        if ( !is_hvm_vcpu(curr) && vpmu->xenpmu_data &&
> +             (vpmu->xenpmu_data->pmu.pmu_flags & PMU_CACHED) )
> +        {
> +            vpmu_set(vpmu, VPMU_CONTEXT_SAVE);
> +            vpmu->arch_vpmu_ops->arch_vpmu_save(curr, 0);
> +            vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
> +        }
> +        return ret;
> +    }
>      else
>          *msr_content = 0;
>  
>      return 0;
>  }
>  
> +static inline struct vcpu *choose_hwdom_vcpu(void)
> +{
> +    unsigned idx;
> +
> +    if ( hardware_domain->max_vcpus == 0 )
> +        return NULL;
> +
> +    idx = smp_processor_id() % hardware_domain->max_vcpus;
> +
> +    return hardware_domain->vcpu[idx];
> +}
> +
>  void vpmu_do_interrupt(struct cpu_user_regs *regs)
>  {
> -    struct vcpu *v = current;
> -    struct vpmu_struct *vpmu = vcpu_vpmu(v);
> +    struct vcpu *sampled = current, *sampling;
> +    struct vpmu_struct *vpmu;
> +
> +    /* dom0 will handle interrupt for special domains (e.g. idle domain) */
> +    if ( sampled->domain->domain_id >= DOMID_FIRST_RESERVED )
> +    {
> +        sampling = choose_hwdom_vcpu();
> +        if ( !sampling )
> +            return;
> +    }
> +    else
> +        sampling = sampled;
> +
> +    vpmu = vcpu_vpmu(sampling);
> +    if ( !is_hvm_vcpu(sampling) )
> +    {
> +        /* PV(H) guest */
> +        const struct cpu_user_regs *cur_regs;
> +        uint64_t *flags = &vpmu->xenpmu_data->pmu.pmu_flags;
> +        domid_t domid = DOMID_SELF;
> +
> +        if ( !vpmu->xenpmu_data )
> +            return;
> +
> +        if ( is_pvh_vcpu(sampling) &&
> +             !vpmu->arch_vpmu_ops->do_interrupt(regs) )

Here you expect vpmu->arch_vpmu_ops != NULL but ...

> +            return;
> +
> +        if ( *flags & PMU_CACHED )
> +            return;
> +
> +        /* PV guest will be reading PMU MSRs from xenpmu_data */
> +        vpmu_set(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
> +        vpmu->arch_vpmu_ops->arch_vpmu_save(sampling, 1);
> +        vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
> +
> +        if ( has_hvm_container_vcpu(sampled) )
> +            *flags = 0;
> +        else
> +            *flags = PMU_SAMPLE_PV;
> +
> +        /* Store appropriate registers in xenpmu_data */
> +        /* FIXME: 32-bit PVH should go here as well */
> +        if ( is_pv_32bit_vcpu(sampling) )
> +        {
> +            /*
> +             * 32-bit dom0 cannot process Xen's addresses (which are 64 bit)
> +             * and therefore we treat it the same way as a non-privileged
> +             * PV 32-bit domain.
> +             */
> +            struct compat_pmu_regs *cmp;
> +
> +            cur_regs = guest_cpu_user_regs();
> +
> +            cmp = (void *)&vpmu->xenpmu_data->pmu.r.regs;
> +            cmp->ip = cur_regs->rip;
> +            cmp->sp = cur_regs->rsp;
> +            cmp->flags = cur_regs->eflags;
> +            cmp->ss = cur_regs->ss;
> +            cmp->cs = cur_regs->cs;
> +            if ( (cmp->cs & 3) > 1 )
> +                *flags |= PMU_SAMPLE_USER;
> +        }
> +        else
> +        {
> +            struct xen_pmu_regs *r = &vpmu->xenpmu_data->pmu.r.regs;
> +
> +            if ( (vpmu_mode & XENPMU_MODE_SELF) )
> +                cur_regs = guest_cpu_user_regs();
> +            else if ( !guest_mode(regs) && 
> is_hardware_domain(sampling->domain) )
> +            {
> +                cur_regs = regs;
> +                domid = DOMID_XEN;
> +            }
> +            else
> +                cur_regs = guest_cpu_user_regs();
> +
> +            r->ip = cur_regs->rip;
> +            r->sp = cur_regs->rsp;
> +            r->flags = cur_regs->eflags;
> +
> +            if ( !has_hvm_container_vcpu(sampled) )
> +            {
> +                r->ss = cur_regs->ss;
> +                r->cs = cur_regs->cs;
> +                if ( !(sampled->arch.flags & TF_kernel_mode) )
> +                    *flags |= PMU_SAMPLE_USER;
> +            }
> +            else
> +            {
> +                struct segment_register seg;
> +
> +                hvm_get_segment_register(sampled, x86_seg_cs, &seg);
> +                r->cs = seg.sel;
> +                hvm_get_segment_register(sampled, x86_seg_ss, &seg);
> +                r->ss = seg.sel;
> +                r->cpl = seg.attr.fields.dpl;
> +                if ( !(sampled->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE) )
> +                    *flags |= PMU_SAMPLE_REAL;
> +            }
> +        }
> +
> +        vpmu->xenpmu_data->domain_id = domid;
> +        vpmu->xenpmu_data->vcpu_id = sampled->vcpu_id;
> +        if ( is_hardware_domain(sampling->domain) )
> +            vpmu->xenpmu_data->pcpu_id = smp_processor_id();
> +        else
> +            vpmu->xenpmu_data->pcpu_id = sampled->vcpu_id;
> +
> +        vpmu->hw_lapic_lvtpc |= APIC_LVT_MASKED;
> +        apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
> +        *flags |= PMU_CACHED;
> +
> +        send_guest_vcpu_virq(sampling, VIRQ_XENPMU);
> +
> +        return;
> +    }
>  
>      if ( vpmu->arch_vpmu_ops )

... here is a check.
Maybe this check here is unnecessary because you would never get this interrupt
without having arch_vpmu_ops != NULL to switch on the machinery?

There are some other locations too with checks before calling
vpmu->arch_vpmu_ops->... and some without. Maybe it would make sense to force
always a complete set of arch_vpmu_ops - functions to avoid this?

>      {
> -        struct vlapic *vlapic = vcpu_vlapic(v);
> +        struct vlapic *vlapic = vcpu_vlapic(sampling);
>          u32 vlapic_lvtpc;
>  
> +        /* We don't support (yet) HVM dom0 */
> +        ASSERT(sampling == sampled);
> +
>          if ( !vpmu->arch_vpmu_ops->do_interrupt(regs) ||
>               !is_vlapic_lvtpc_enabled(vlapic) )
>              return;
> @@ -147,7 +314,7 @@ void vpmu_do_interrupt(struct cpu_user_regs *regs)
>              vlapic_set_irq(vlapic, vlapic_lvtpc & APIC_VECTOR_MASK, 0);
>              break;
>          case APIC_MODE_NMI:
> -            v->nmi_pending = 1;
> +            sampling->nmi_pending = 1;
>              break;
>          }
>      }
> @@ -174,7 +341,7 @@ static void vpmu_save_force(void *arg)
>      vpmu_set(vpmu, VPMU_CONTEXT_SAVE);
>  
>      if ( vpmu->arch_vpmu_ops )
> -        (void)vpmu->arch_vpmu_ops->arch_vpmu_save(v);
> +        (void)vpmu->arch_vpmu_ops->arch_vpmu_save(v, 0);
>  
>      vpmu_reset(vpmu, VPMU_CONTEXT_SAVE);
>  
> @@ -193,20 +360,20 @@ void vpmu_save(struct vcpu *v)
>      per_cpu(last_vcpu, pcpu) = v;
>  
>      if ( vpmu->arch_vpmu_ops )
> -        if ( vpmu->arch_vpmu_ops->arch_vpmu_save(v) )
> +        if ( vpmu->arch_vpmu_ops->arch_vpmu_save(v, 0) )
>              vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
>  
>      apic_write(APIC_LVTPC, PMU_APIC_VECTOR | APIC_LVT_MASKED);
>  }
>  
> -void vpmu_load(struct vcpu *v)
> +int vpmu_load(struct vcpu *v, bool_t verify)

vpmu_load uses "verify" but within the arch_vpmu_load functions
(core2_vpmu_load() and amd_vpmu_load()) you use "from_guest" for the same
meaning. This is a little bit confusing.
Always using "verify" would be clearer I think.

Dietmar.

>  {
>      struct vpmu_struct *vpmu = vcpu_vpmu(v);
>      int pcpu = smp_processor_id();
>      struct vcpu *prev = NULL;
>  
>      if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) )
> -        return;
> +        return 0;
>  
>      /* First time this VCPU is running here */
>      if ( vpmu->last_pcpu != pcpu )
> @@ -245,15 +412,24 @@ void vpmu_load(struct vcpu *v)
>      local_irq_enable();
>  
>      /* Only when PMU is counting, we load PMU context immediately. */
> -    if ( !vpmu_is_set(vpmu, VPMU_RUNNING) )
> -        return;
> +    if ( !vpmu_is_set(vpmu, VPMU_RUNNING) ||
> +         (!is_hvm_vcpu(vpmu_vcpu(vpmu)) &&
> +          (vpmu->xenpmu_data->pmu.pmu_flags & PMU_CACHED)) )
> +        return 0;
>  
>      if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_load )
>      {
>          apic_write_around(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
>          /* Arch code needs to set VPMU_CONTEXT_LOADED */
> -        vpmu->arch_vpmu_ops->arch_vpmu_load(v);
> +        if ( vpmu->arch_vpmu_ops->arch_vpmu_load(v, verify) )
> +        {
> +            apic_write_around(APIC_LVTPC,
> +                              vpmu->hw_lapic_lvtpc | APIC_LVT_MASKED);
> +            return 1;
> +        }
>      }
> +
> +    return 0;
>  }
>  
>  void vpmu_initialise(struct vcpu *v)
> @@ -265,6 +441,8 @@ void vpmu_initialise(struct vcpu *v)
>  
>      BUILD_BUG_ON(sizeof(struct xen_pmu_intel_ctxt) > XENPMU_CTXT_PAD_SZ);
>      BUILD_BUG_ON(sizeof(struct xen_pmu_amd_ctxt) > XENPMU_CTXT_PAD_SZ);
> +    BUILD_BUG_ON(sizeof(struct xen_pmu_regs) > XENPMU_REGS_PAD_SZ);
> +    BUILD_BUG_ON(sizeof(struct compat_pmu_regs) > XENPMU_REGS_PAD_SZ);
>  
>      ASSERT(!vpmu->flags && !vpmu->context);
>  
> @@ -449,7 +627,9 @@ void vpmu_dump(struct vcpu *v)
>  long do_xenpmu_op(unsigned int op, XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t) 
> arg)
>  {
>      int ret;
> +    struct vcpu *curr;
>      struct xen_pmu_params pmu_params = {.val = 0};
> +    struct xen_pmu_data *xenpmu_data;
>  
>      if ( !opt_vpmu_enabled )
>          return -EOPNOTSUPP;
> @@ -552,6 +732,27 @@ long do_xenpmu_op(unsigned int op, 
> XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t) arg)
>          pvpmu_finish(current->domain, &pmu_params);
>          break;
>  
> +    case XENPMU_lvtpc_set:
> +        xenpmu_data = current->arch.vpmu.xenpmu_data;
> +        if ( xenpmu_data == NULL )
> +            return -EINVAL;
> +        vpmu_lvtpc_update(xenpmu_data->pmu.l.lapic_lvtpc);
> +        break;
> +
> +    case XENPMU_flush:
> +        curr = current;
> +        xenpmu_data = curr->arch.vpmu.xenpmu_data;
> +        if ( xenpmu_data == NULL )
> +            return -EINVAL;
> +        xenpmu_data->pmu.pmu_flags &= ~PMU_CACHED;
> +        vpmu_lvtpc_update(xenpmu_data->pmu.l.lapic_lvtpc);
> +        if ( vpmu_load(curr, 1) )
> +        {
> +            xenpmu_data->pmu.pmu_flags |= PMU_CACHED;
> +            return -EIO;
> +        }
> +        break ;
> +
>      default:
>          ret = -EINVAL;
>      }
> diff --git a/xen/include/asm-x86/hvm/vpmu.h b/xen/include/asm-x86/hvm/vpmu.h
> index 642a4b7..564d28d 100644
> --- a/xen/include/asm-x86/hvm/vpmu.h
> +++ b/xen/include/asm-x86/hvm/vpmu.h
> @@ -47,8 +47,8 @@ struct arch_vpmu_ops {
>                       unsigned int *eax, unsigned int *ebx,
>                       unsigned int *ecx, unsigned int *edx);
>      void (*arch_vpmu_destroy)(struct vcpu *v);
> -    int (*arch_vpmu_save)(struct vcpu *v);
> -    void (*arch_vpmu_load)(struct vcpu *v);
> +    int (*arch_vpmu_save)(struct vcpu *v, bool_t to_guest);
> +    int (*arch_vpmu_load)(struct vcpu *v, bool_t from_guest);
>      void (*arch_vpmu_dump)(const struct vcpu *);
>  };
>  
> @@ -107,7 +107,7 @@ void vpmu_do_cpuid(unsigned int input, unsigned int *eax, 
> unsigned int *ebx,
>  void vpmu_initialise(struct vcpu *v);
>  void vpmu_destroy(struct vcpu *v);
>  void vpmu_save(struct vcpu *v);
> -void vpmu_load(struct vcpu *v);
> +int vpmu_load(struct vcpu *v, bool_t verify);
>  void vpmu_dump(struct vcpu *v);
>  
>  extern int acquire_pmu_ownership(int pmu_ownership);
> @@ -126,7 +126,7 @@ static inline void vpmu_switch_from(struct vcpu *prev)
>  static inline void vpmu_switch_to(struct vcpu *next)
>  {
>      if ( vpmu_mode & (XENPMU_MODE_SELF | XENPMU_MODE_HV) )
> -        vpmu_load(next);
> +        vpmu_load(next, 0);
>  }
>  
>  #endif /* __ASM_X86_HVM_VPMU_H_*/
> diff --git a/xen/include/public/arch-x86/pmu.h 
> b/xen/include/public/arch-x86/pmu.h
> index c0068f1..de60199 100644
> --- a/xen/include/public/arch-x86/pmu.h
> +++ b/xen/include/public/arch-x86/pmu.h
> @@ -53,6 +53,9 @@ DEFINE_XEN_GUEST_HANDLE(xen_pmu_regs_t);
>  
>  /* PMU flags */
>  #define PMU_CACHED         (1<<0) /* PMU MSRs are cached in the context */
> +#define PMU_SAMPLE_USER    (1<<1) /* Sample is from user or kernel mode */
> +#define PMU_SAMPLE_REAL    (1<<2) /* Sample is from realmode */
> +#define PMU_SAMPLE_PV      (1<<3) /* Sample from a PV guest */
>  
>  /*
>   * Architecture-specific information describing state of the processor at
> diff --git a/xen/include/public/pmu.h b/xen/include/public/pmu.h
> index 57b130b..005a2ef 100644
> --- a/xen/include/public/pmu.h
> +++ b/xen/include/public/pmu.h
> @@ -27,6 +27,8 @@
>  #define XENPMU_feature_set     3
>  #define XENPMU_init            4
>  #define XENPMU_finish          5
> +#define XENPMU_lvtpc_set       6
> +#define XENPMU_flush           7 /* Write cached MSR values to HW     */
>  /* ` } */
>  
>  /* Parameters structure for HYPERVISOR_xenpmu_op call */
> diff --git a/xen/include/xsm/dummy.h b/xen/include/xsm/dummy.h
> index 6ec942c..e234349 100644
> --- a/xen/include/xsm/dummy.h
> +++ b/xen/include/xsm/dummy.h
> @@ -682,7 +682,9 @@ static XSM_INLINE int xsm_pmu_op (XSM_DEFAULT_ARG struct 
> domain *d, int op)
>      case XENPMU_feature_get:
>          return xsm_default_action(XSM_PRIV, d, current->domain);
>      case XENPMU_init:
> -    case XENPMU_finish: 
> +    case XENPMU_finish:
> +    case XENPMU_lvtpc_set:
> +    case XENPMU_flush:
>          return xsm_default_action(XSM_HOOK, d, current->domain);
>      default:
>          return -EPERM;
> diff --git a/xen/xsm/flask/hooks.c b/xen/xsm/flask/hooks.c
> index f8faba8..fc0328f 100644
> --- a/xen/xsm/flask/hooks.c
> +++ b/xen/xsm/flask/hooks.c
> @@ -1532,6 +1532,8 @@ static int flask_pmu_op (struct domain *d, unsigned int 
> op)
>                              XEN2__PMU_CTRL, NULL);
>      case XENPMU_init:
>      case XENPMU_finish:
> +    case XENPMU_lvtpc_set:
> +    case XENPMU_flush:
>          return avc_has_perm(dsid, SECINITSID_XEN, SECCLASS_XEN2,
>                              XEN2__PMU_USE, NULL);
>      default:
> 

-- 
Company details: http://ts.fujitsu.com/imprint.html

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.