|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH v11 for-xen-4.5 16/20] x86/VPMU: Handle PMU interrupts for PV guests
On Mon, Sep 22, 2014 at 07:57:57PM -0400, Boris Ostrovsky wrote:
> Add support for handling PMU interrupts for PV guests.
>
> VPMU for the interrupted VCPU is unloaded until the guest issues XENPMU_flush
> hypercall. This allows the guest to access PMU MSR values that are stored in
> VPMU context which is shared between hypervisor and domain, thus avoiding
> traps to hypervisor.
>
> Since the the interrupt handler may now force VPMU context save (i.e. set
Extra 'the'
> VPMU_CONTEXT_SAVE flag) we need to make changes to amd_vpmu_save() which
> until now expected this flag to be set only when the counters are stopped.
s/are/were/
>
> Signed-off-by: Boris Ostrovsky <boris.ostrovsky@xxxxxxxxxx>
> ---
> xen/arch/x86/hvm/svm/vpmu.c | 11 +--
> xen/arch/x86/hvm/vpmu.c | 187
> ++++++++++++++++++++++++++++++++++++++++----
> xen/include/public/pmu.h | 7 ++
> 3 files changed, 183 insertions(+), 22 deletions(-)
>
> diff --git a/xen/arch/x86/hvm/svm/vpmu.c b/xen/arch/x86/hvm/svm/vpmu.c
> index 63c099c..055b21c 100644
> --- a/xen/arch/x86/hvm/svm/vpmu.c
> +++ b/xen/arch/x86/hvm/svm/vpmu.c
> @@ -229,17 +229,12 @@ static int amd_vpmu_save(struct vcpu *v)
> struct vpmu_struct *vpmu = vcpu_vpmu(v);
> unsigned int i;
>
> - /*
> - * Stop the counters. If we came here via vpmu_save_force (i.e.
> - * when VPMU_CONTEXT_SAVE is set) counters are already stopped.
> - */
> + for ( i = 0; i < num_counters; i++ )
> + wrmsrl(ctrls[i], 0);
> +
> if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_SAVE) )
> {
> vpmu_set(vpmu, VPMU_FROZEN);
> -
> - for ( i = 0; i < num_counters; i++ )
> - wrmsrl(ctrls[i], 0);
> -
> return 0;
> }
>
> diff --git a/xen/arch/x86/hvm/vpmu.c b/xen/arch/x86/hvm/vpmu.c
> index edc5f91..018221d 100644
> --- a/xen/arch/x86/hvm/vpmu.c
> +++ b/xen/arch/x86/hvm/vpmu.c
> @@ -79,44 +79,191 @@ static void __init parse_vpmu_param(char *s)
>
> void vpmu_lvtpc_update(uint32_t val)
> {
> - struct vpmu_struct *vpmu = vcpu_vpmu(current);
> + struct vcpu *curr = current;
> + struct vpmu_struct *vpmu = vcpu_vpmu(curr);
>
> vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR | (val & APIC_LVT_MASKED);
> - apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
> +
> + /* Postpone APIC updates for PV(H) guests if PMU interrupt is pending */
> + if ( is_hvm_domain(curr->domain) ||
> + !(vpmu->xenpmu_data && (vpmu->xenpmu_data->pmu_flags & PMU_CACHED))
> )
> + apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
> }
>
> int vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content, uint64_t supported)
> {
> - struct vpmu_struct *vpmu = vcpu_vpmu(current);
> + struct vcpu *curr = current;
> + struct vpmu_struct *vpmu = vcpu_vpmu(curr);
>
> if ( !(vpmu_mode & (XENPMU_MODE_SELF | XENPMU_MODE_HV)) )
> return 0;
>
> if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->do_wrmsr )
> - return vpmu->arch_vpmu_ops->do_wrmsr(msr, msr_content, supported);
> + {
> + int ret = vpmu->arch_vpmu_ops->do_wrmsr(msr, msr_content, supported);
> +
> + /*
> + * We may have received a PMU interrupt during WRMSR handling
> + * and since do_wrmsr may load VPMU context we should save
> + * (and unload) it again.
> + */
> + if ( !is_hvm_domain(curr->domain) &&
> + vpmu->xenpmu_data && (vpmu->xenpmu_data->pmu_flags &
> PMU_CACHED) )
> + {
> + vpmu_set(vpmu, VPMU_CONTEXT_SAVE);
> + vpmu->arch_vpmu_ops->arch_vpmu_save(curr);
> + vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
> + }
> + return ret;
> + }
> return 0;
> }
>
> int vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content)
> {
> - struct vpmu_struct *vpmu = vcpu_vpmu(current);
> + struct vcpu *curr = current;
> + struct vpmu_struct *vpmu = vcpu_vpmu(curr);
>
> if ( !(vpmu_mode & (XENPMU_MODE_SELF | XENPMU_MODE_HV)) )
> return 0;
>
> if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->do_rdmsr )
> - return vpmu->arch_vpmu_ops->do_rdmsr(msr, msr_content);
> + {
You have a nice comment in the above code. Could you replicate it
here or just point the reader of the code to the reasoning?
> + int ret = vpmu->arch_vpmu_ops->do_rdmsr(msr, msr_content);
> +
> + if ( !is_hvm_domain(curr->domain) &&
> + vpmu->xenpmu_data && (vpmu->xenpmu_data->pmu_flags &
> PMU_CACHED) )
> + {
> + vpmu_set(vpmu, VPMU_CONTEXT_SAVE);
> + vpmu->arch_vpmu_ops->arch_vpmu_save(curr);
> + vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
> + }
> + return ret;
> + }
> return 0;
> }
>
> +static struct vcpu *choose_hwdom_vcpu(void)
> +{
> + struct vcpu *v;
> + unsigned idx = smp_processor_id() % hardware_domain->max_vcpus;
> +
> + if ( hardware_domain->vcpu == NULL )
> + return NULL;
> +
> + v = hardware_domain->vcpu[idx];
> +
> + /*
> + * If index is not populated search downwards the vcpu array until
> + * a valid vcpu can be found
> + */
> + while ( !v && idx-- )
> + v = hardware_domain->vcpu[idx];
> +
> + return v;
> +}
> +
> int vpmu_do_interrupt(struct cpu_user_regs *regs)
> {
> - struct vcpu *v = current;
> - struct vpmu_struct *vpmu = vcpu_vpmu(v);
> + struct vcpu *sampled = current, *sampling;
> + struct vpmu_struct *vpmu;
> +
> + /* dom0 will handle interrupt for special domains (e.g. idle domain) */
> + if ( sampled->domain->domain_id >= DOMID_FIRST_RESERVED )
> + {
> + sampling = choose_hwdom_vcpu();
> + if ( !sampling )
> + return 0;
> + }
> + else
> + sampling = sampled;
> +
> + vpmu = vcpu_vpmu(sampling);
> + if ( !is_hvm_domain(sampling->domain) )
> + {
> + /* PV(H) guest */
> + const struct cpu_user_regs *cur_regs;
> +
> + if ( !vpmu->xenpmu_data )
> + return 0;
> +
> + if ( vpmu->xenpmu_data->pmu_flags & PMU_CACHED )
> + return 1;
> +
> + if ( is_pvh_domain(sampled->domain) &&
> + !vpmu->arch_vpmu_ops->do_interrupt(regs) )
> + return 0;
> +
> + /* PV guest will be reading PMU MSRs from xenpmu_data */
> + vpmu_set(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
> + vpmu->arch_vpmu_ops->arch_vpmu_save(sampling);
> + vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
> +
> + /* Store appropriate registers in xenpmu_data */
> + if ( is_pv_32bit_domain(sampling->domain) )
> + {
> + /*
> + * 32-bit dom0 cannot process Xen's addresses (which are 64 bit)
> + * and therefore we treat it the same way as a non-privileged
> + * PV 32-bit domain.
> + */
> + struct compat_pmu_regs *cmp;
> +
> + cur_regs = guest_cpu_user_regs();
> +
> + cmp = (void *)&vpmu->xenpmu_data->pmu.r.regs;
> + cmp->eip = cur_regs->rip;
> + cmp->esp = cur_regs->rsp;
> + cmp->cs = cur_regs->cs;
> + if ( (cmp->cs & 3) == 1 )
> + cmp->cs &= ~3;
> + }
> + else
> + {
> + struct xen_pmu_regs *r = &vpmu->xenpmu_data->pmu.r.regs;
> +
> + /* Non-privileged domains are always in XENPMU_MODE_SELF mode */
> + if ( (vpmu_mode & XENPMU_MODE_SELF) ||
> + (!is_hardware_domain(sampled->domain) &&
> + !is_idle_vcpu(sampled)) )
> + cur_regs = guest_cpu_user_regs();
> + else
> + cur_regs = regs;
> +
> + r->rip = cur_regs->rip;
> + r->rsp = cur_regs->rsp;
> +
> + if ( !is_pvh_domain(sampled->domain) )
> + {
> + r->cs = cur_regs->cs;
> + if ( sampled->arch.flags & TF_kernel_mode )
> + r->cs &= ~3;
> + }
> + else
> + {
> + struct segment_register seg_cs;
> +
> + hvm_get_segment_register(sampled, x86_seg_cs, &seg_cs);
> + r->cs = seg_cs.sel;
> + }
> + }
> +
> + vpmu->xenpmu_data->domain_id = DOMID_SELF;
> + vpmu->xenpmu_data->vcpu_id = sampled->vcpu_id;
> + vpmu->xenpmu_data->pcpu_id = smp_processor_id();
> +
> + vpmu->xenpmu_data->pmu_flags |= PMU_CACHED;
> + vpmu->hw_lapic_lvtpc |= APIC_LVT_MASKED;
> + apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
> +
> + send_guest_vcpu_virq(sampling, VIRQ_XENPMU);
> +
> + return 1;
> + }
>
> if ( vpmu->arch_vpmu_ops )
> {
> - struct vlapic *vlapic = vcpu_vlapic(v);
> + struct vlapic *vlapic = vcpu_vlapic(sampling);
> u32 vlapic_lvtpc;
> unsigned char int_vec;
>
> @@ -130,9 +277,9 @@ int vpmu_do_interrupt(struct cpu_user_regs *regs)
> int_vec = vlapic_lvtpc & APIC_VECTOR_MASK;
>
> if ( GET_APIC_DELIVERY_MODE(vlapic_lvtpc) == APIC_MODE_FIXED )
> - vlapic_set_irq(vcpu_vlapic(v), int_vec, 0);
> + vlapic_set_irq(vcpu_vlapic(sampling), int_vec, 0);
> else
> - v->nmi_pending = 1;
> + sampling->nmi_pending = 1;
> return 1;
> }
>
> @@ -231,7 +378,9 @@ void vpmu_load(struct vcpu *v)
> local_irq_enable();
>
> /* Only when PMU is counting, we load PMU context immediately. */
> - if ( !vpmu_is_set(vpmu, VPMU_RUNNING) )
> + if ( !vpmu_is_set(vpmu, VPMU_RUNNING) ||
> + (!is_hvm_domain(v->domain) &&
> + (vpmu->xenpmu_data->pmu_flags & PMU_CACHED)) )
> return;
>
> if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_load )
> @@ -456,6 +605,7 @@ static int vpmu_force_context_switch(void)
> long do_xenpmu_op(int op, XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t) arg)
> {
> int ret = -EINVAL;
> + struct vcpu *curr;
> xen_pmu_params_t pmu_params;
>
> switch ( op )
> @@ -549,9 +699,18 @@ long do_xenpmu_op(int op,
> XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t) arg)
> break;
>
> case XENPMU_lvtpc_set:
> - if ( current->arch.vpmu.xenpmu_data == NULL )
> + curr = current;
> + if ( curr->arch.vpmu.xenpmu_data == NULL )
> return -EINVAL;
> - vpmu_lvtpc_update(current->arch.vpmu.xenpmu_data->pmu.l.lapic_lvtpc);
> + vpmu_lvtpc_update(curr->arch.vpmu.xenpmu_data->pmu.l.lapic_lvtpc);
> + ret = 0;
> + break;
> +
> + case XENPMU_flush:
> + curr = current;
> + curr->arch.vpmu.xenpmu_data->pmu_flags &= ~PMU_CACHED;
> + vpmu_lvtpc_update(curr->arch.vpmu.xenpmu_data->pmu.l.lapic_lvtpc);
> + vpmu_load(curr);
> ret = 0;
> break;
> }
> diff --git a/xen/include/public/pmu.h b/xen/include/public/pmu.h
> index 68a5fb8..a1886a5 100644
> --- a/xen/include/public/pmu.h
> +++ b/xen/include/public/pmu.h
> @@ -28,6 +28,7 @@
> #define XENPMU_init 4
> #define XENPMU_finish 5
> #define XENPMU_lvtpc_set 6
> +#define XENPMU_flush 7 /* Write cached MSR values to HW */
> /* ` } */
>
> /* Parameters structure for HYPERVISOR_xenpmu_op call */
> @@ -61,6 +62,12 @@ DEFINE_XEN_GUEST_HANDLE(xen_pmu_params_t);
> */
> #define XENPMU_FEATURE_INTEL_BTS 1
>
> +/*
> + * PMU MSRs are cached in the context so the PV guest doesn't need to trap to
> + * the hypervisor
> + */
> +#define PMU_CACHED 1
> +
> /* Shared between hypervisor and PV domain */
> struct xen_pmu_data {
> uint32_t domain_id;
> --
> 1.8.1.4
>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxx
> http://lists.xen.org/xen-devel
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |