Xen project Mailing List

Re: [Xen-devel] [PATCH v11 for-xen-4.5 16/20] x86/VPMU: Handle PMU interrupts for PV guests

On Mon, Sep 22, 2014 at 07:57:57PM -0400, Boris Ostrovsky wrote: > Add support for handling PMU interrupts for PV guests. > > VPMU for the interrupted VCPU is unloaded until the guest issues XENPMU_flush > hypercall. This allows the guest to access PMU MSR values that are stored in > VPMU context which is shared between hypervisor and domain, thus avoiding > traps to hypervisor. > > Since the the interrupt handler may now force VPMU context save (i.e. set Extra 'the' > VPMU_CONTEXT_SAVE flag) we need to make changes to amd_vpmu_save() which > until now expected this flag to be set only when the counters are stopped. s/are/were/ > > Signed-off-by: Boris Ostrovsky <boris.ostrovsky@xxxxxxxxxx> > --- > xen/arch/x86/hvm/svm/vpmu.c | 11 +-- > xen/arch/x86/hvm/vpmu.c | 187 > ++++++++++++++++++++++++++++++++++++++++---- > xen/include/public/pmu.h | 7 ++ > 3 files changed, 183 insertions(+), 22 deletions(-) > > diff --git a/xen/arch/x86/hvm/svm/vpmu.c b/xen/arch/x86/hvm/svm/vpmu.c > index 63c099c..055b21c 100644 > --- a/xen/arch/x86/hvm/svm/vpmu.c > +++ b/xen/arch/x86/hvm/svm/vpmu.c > @@ -229,17 +229,12 @@ static int amd_vpmu_save(struct vcpu *v) > struct vpmu_struct *vpmu = vcpu_vpmu(v); > unsigned int i; > > - /* > - * Stop the counters. If we came here via vpmu_save_force (i.e. > - * when VPMU_CONTEXT_SAVE is set) counters are already stopped. > - */ > + for ( i = 0; i < num_counters; i++ ) > + wrmsrl(ctrls[i], 0); > + > if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_SAVE) ) > { > vpmu_set(vpmu, VPMU_FROZEN); > - > - for ( i = 0; i < num_counters; i++ ) > - wrmsrl(ctrls[i], 0); > - > return 0; > } > > diff --git a/xen/arch/x86/hvm/vpmu.c b/xen/arch/x86/hvm/vpmu.c > index edc5f91..018221d 100644 > --- a/xen/arch/x86/hvm/vpmu.c > +++ b/xen/arch/x86/hvm/vpmu.c > @@ -79,44 +79,191 @@ static void __init parse_vpmu_param(char *s) > > void vpmu_lvtpc_update(uint32_t val) > { > - struct vpmu_struct *vpmu = vcpu_vpmu(current); > + struct vcpu *curr = current; > + struct vpmu_struct *vpmu = vcpu_vpmu(curr); > > vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR | (val & APIC_LVT_MASKED); > - apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc); > + > + /* Postpone APIC updates for PV(H) guests if PMU interrupt is pending */ > + if ( is_hvm_domain(curr->domain) || > + !(vpmu->xenpmu_data && (vpmu->xenpmu_data->pmu_flags & PMU_CACHED)) > ) > + apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc); > } > > int vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content, uint64_t supported) > { > - struct vpmu_struct *vpmu = vcpu_vpmu(current); > + struct vcpu *curr = current; > + struct vpmu_struct *vpmu = vcpu_vpmu(curr); > > if ( !(vpmu_mode & (XENPMU_MODE_SELF | XENPMU_MODE_HV)) ) > return 0; > > if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->do_wrmsr ) > - return vpmu->arch_vpmu_ops->do_wrmsr(msr, msr_content, supported); > + { > + int ret = vpmu->arch_vpmu_ops->do_wrmsr(msr, msr_content, supported); > + > + /* > + * We may have received a PMU interrupt during WRMSR handling > + * and since do_wrmsr may load VPMU context we should save > + * (and unload) it again. > + */ > + if ( !is_hvm_domain(curr->domain) && > + vpmu->xenpmu_data && (vpmu->xenpmu_data->pmu_flags & > PMU_CACHED) ) > + { > + vpmu_set(vpmu, VPMU_CONTEXT_SAVE); > + vpmu->arch_vpmu_ops->arch_vpmu_save(curr); > + vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED); > + } > + return ret; > + } > return 0; > } > > int vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content) > { > - struct vpmu_struct *vpmu = vcpu_vpmu(current); > + struct vcpu *curr = current; > + struct vpmu_struct *vpmu = vcpu_vpmu(curr); > > if ( !(vpmu_mode & (XENPMU_MODE_SELF | XENPMU_MODE_HV)) ) > return 0; > > if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->do_rdmsr ) > - return vpmu->arch_vpmu_ops->do_rdmsr(msr, msr_content); > + { You have a nice comment in the above code. Could you replicate it here or just point the reader of the code to the reasoning? > + int ret = vpmu->arch_vpmu_ops->do_rdmsr(msr, msr_content); > + > + if ( !is_hvm_domain(curr->domain) && > + vpmu->xenpmu_data && (vpmu->xenpmu_data->pmu_flags & > PMU_CACHED) ) > + { > + vpmu_set(vpmu, VPMU_CONTEXT_SAVE); > + vpmu->arch_vpmu_ops->arch_vpmu_save(curr); > + vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED); > + } > + return ret; > + } > return 0; > } > > +static struct vcpu *choose_hwdom_vcpu(void) > +{ > + struct vcpu *v; > + unsigned idx = smp_processor_id() % hardware_domain->max_vcpus; > + > + if ( hardware_domain->vcpu == NULL ) > + return NULL; > + > + v = hardware_domain->vcpu[idx]; > + > + /* > + * If index is not populated search downwards the vcpu array until > + * a valid vcpu can be found > + */ > + while ( !v && idx-- ) > + v = hardware_domain->vcpu[idx]; > + > + return v; > +} > + > int vpmu_do_interrupt(struct cpu_user_regs *regs) > { > - struct vcpu *v = current; > - struct vpmu_struct *vpmu = vcpu_vpmu(v); > + struct vcpu *sampled = current, *sampling; > + struct vpmu_struct *vpmu; > + > + /* dom0 will handle interrupt for special domains (e.g. idle domain) */ > + if ( sampled->domain->domain_id >= DOMID_FIRST_RESERVED ) > + { > + sampling = choose_hwdom_vcpu(); > + if ( !sampling ) > + return 0; > + } > + else > + sampling = sampled; > + > + vpmu = vcpu_vpmu(sampling); > + if ( !is_hvm_domain(sampling->domain) ) > + { > + /* PV(H) guest */ > + const struct cpu_user_regs *cur_regs; > + > + if ( !vpmu->xenpmu_data ) > + return 0; > + > + if ( vpmu->xenpmu_data->pmu_flags & PMU_CACHED ) > + return 1; > + > + if ( is_pvh_domain(sampled->domain) && > + !vpmu->arch_vpmu_ops->do_interrupt(regs) ) > + return 0; > + > + /* PV guest will be reading PMU MSRs from xenpmu_data */ > + vpmu_set(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED); > + vpmu->arch_vpmu_ops->arch_vpmu_save(sampling); > + vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED); > + > + /* Store appropriate registers in xenpmu_data */ > + if ( is_pv_32bit_domain(sampling->domain) ) > + { > + /* > + * 32-bit dom0 cannot process Xen's addresses (which are 64 bit) > + * and therefore we treat it the same way as a non-privileged > + * PV 32-bit domain. > + */ > + struct compat_pmu_regs *cmp; > + > + cur_regs = guest_cpu_user_regs(); > + > + cmp = (void *)&vpmu->xenpmu_data->pmu.r.regs; > + cmp->eip = cur_regs->rip; > + cmp->esp = cur_regs->rsp; > + cmp->cs = cur_regs->cs; > + if ( (cmp->cs & 3) == 1 ) > + cmp->cs &= ~3; > + } > + else > + { > + struct xen_pmu_regs *r = &vpmu->xenpmu_data->pmu.r.regs; > + > + /* Non-privileged domains are always in XENPMU_MODE_SELF mode */ > + if ( (vpmu_mode & XENPMU_MODE_SELF) || > + (!is_hardware_domain(sampled->domain) && > + !is_idle_vcpu(sampled)) ) > + cur_regs = guest_cpu_user_regs(); > + else > + cur_regs = regs; > + > + r->rip = cur_regs->rip; > + r->rsp = cur_regs->rsp; > + > + if ( !is_pvh_domain(sampled->domain) ) > + { > + r->cs = cur_regs->cs; > + if ( sampled->arch.flags & TF_kernel_mode ) > + r->cs &= ~3; > + } > + else > + { > + struct segment_register seg_cs; > + > + hvm_get_segment_register(sampled, x86_seg_cs, &seg_cs); > + r->cs = seg_cs.sel; > + } > + } > + > + vpmu->xenpmu_data->domain_id = DOMID_SELF; > + vpmu->xenpmu_data->vcpu_id = sampled->vcpu_id; > + vpmu->xenpmu_data->pcpu_id = smp_processor_id(); > + > + vpmu->xenpmu_data->pmu_flags |= PMU_CACHED; > + vpmu->hw_lapic_lvtpc |= APIC_LVT_MASKED; > + apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc); > + > + send_guest_vcpu_virq(sampling, VIRQ_XENPMU); > + > + return 1; > + } > > if ( vpmu->arch_vpmu_ops ) > { > - struct vlapic *vlapic = vcpu_vlapic(v); > + struct vlapic *vlapic = vcpu_vlapic(sampling); > u32 vlapic_lvtpc; > unsigned char int_vec; > > @@ -130,9 +277,9 @@ int vpmu_do_interrupt(struct cpu_user_regs *regs) > int_vec = vlapic_lvtpc & APIC_VECTOR_MASK; > > if ( GET_APIC_DELIVERY_MODE(vlapic_lvtpc) == APIC_MODE_FIXED ) > - vlapic_set_irq(vcpu_vlapic(v), int_vec, 0); > + vlapic_set_irq(vcpu_vlapic(sampling), int_vec, 0); > else > - v->nmi_pending = 1; > + sampling->nmi_pending = 1; > return 1; > } > > @@ -231,7 +378,9 @@ void vpmu_load(struct vcpu *v) > local_irq_enable(); > > /* Only when PMU is counting, we load PMU context immediately. */ > - if ( !vpmu_is_set(vpmu, VPMU_RUNNING) ) > + if ( !vpmu_is_set(vpmu, VPMU_RUNNING) || > + (!is_hvm_domain(v->domain) && > + (vpmu->xenpmu_data->pmu_flags & PMU_CACHED)) ) > return; > > if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_load ) > @@ -456,6 +605,7 @@ static int vpmu_force_context_switch(void) > long do_xenpmu_op(int op, XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t) arg) > { > int ret = -EINVAL; > + struct vcpu *curr; > xen_pmu_params_t pmu_params; > > switch ( op ) > @@ -549,9 +699,18 @@ long do_xenpmu_op(int op, > XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t) arg) > break; > > case XENPMU_lvtpc_set: > - if ( current->arch.vpmu.xenpmu_data == NULL ) > + curr = current; > + if ( curr->arch.vpmu.xenpmu_data == NULL ) > return -EINVAL; > - vpmu_lvtpc_update(current->arch.vpmu.xenpmu_data->pmu.l.lapic_lvtpc); > + vpmu_lvtpc_update(curr->arch.vpmu.xenpmu_data->pmu.l.lapic_lvtpc); > + ret = 0; > + break; > + > + case XENPMU_flush: > + curr = current; > + curr->arch.vpmu.xenpmu_data->pmu_flags &= ~PMU_CACHED; > + vpmu_lvtpc_update(curr->arch.vpmu.xenpmu_data->pmu.l.lapic_lvtpc); > + vpmu_load(curr); > ret = 0; > break; > } > diff --git a/xen/include/public/pmu.h b/xen/include/public/pmu.h > index 68a5fb8..a1886a5 100644 > --- a/xen/include/public/pmu.h > +++ b/xen/include/public/pmu.h > @@ -28,6 +28,7 @@ > #define XENPMU_init 4 > #define XENPMU_finish 5 > #define XENPMU_lvtpc_set 6 > +#define XENPMU_flush 7 /* Write cached MSR values to HW */ > /* ` } */ > > /* Parameters structure for HYPERVISOR_xenpmu_op call */ > @@ -61,6 +62,12 @@ DEFINE_XEN_GUEST_HANDLE(xen_pmu_params_t); > */ > #define XENPMU_FEATURE_INTEL_BTS 1 > > +/* > + * PMU MSRs are cached in the context so the PV guest doesn't need to trap to > + * the hypervisor > + */ > +#define PMU_CACHED 1 > + > /* Shared between hypervisor and PV domain */ > struct xen_pmu_data { > uint32_t domain_id; > -- > 1.8.1.4 > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@xxxxxxxxxxxxx > http://lists.xen.org/xen-devel _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.