[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 7/8] x86/VPMU: Save/restore VPMU only when necessary



VPMU doesn't need to always be saved during context switch. If we are
comming back to the same processor and no other VPCU has run here we can
simply continue running. This is especailly useful on Intel processors
where Global Control MSR is stored in VMCS, thus not requiring us to stop
the counters during save operation. On AMD we need to explicitly stop the
counters but we don't need to save them.

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@xxxxxxxxxx>
---
 xen/arch/x86/domain.c             | 14 ++++++--
 xen/arch/x86/hvm/svm/svm.c        |  2 --
 xen/arch/x86/hvm/svm/vpmu.c       | 58 +++++++++++++++++++++++++-----
 xen/arch/x86/hvm/vmx/vmx.c        |  2 --
 xen/arch/x86/hvm/vmx/vpmu_core2.c | 25 +++++++++++--
 xen/arch/x86/hvm/vpmu.c           | 74 +++++++++++++++++++++++++++++++++++----
 xen/include/asm-x86/hvm/vpmu.h    |  5 ++-
 7 files changed, 155 insertions(+), 25 deletions(-)

diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index 0d7a40c..6f9cbed 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -1535,8 +1535,14 @@ void context_switch(struct vcpu *prev, struct vcpu *next)
     if (prev != next)
         update_runstate_area(prev);
 
-    if ( is_hvm_vcpu(prev) && !list_empty(&prev->arch.hvm_vcpu.tm_list) )
-        pt_save_timer(prev);
+    if ( is_hvm_vcpu(prev) )
+    {
+        if (prev != next)
+            vpmu_save(prev);
+
+        if ( !list_empty(&prev->arch.hvm_vcpu.tm_list) )
+            pt_save_timer(prev);
+    }
 
     local_irq_disable();
 
@@ -1574,6 +1580,10 @@ void context_switch(struct vcpu *prev, struct vcpu *next)
                            (next->domain->domain_id != 0));
     }
 
+    if (is_hvm_vcpu(next) && (prev != next) )
+        /* Must be done with interrupts enabled */
+        vpmu_load(next);
+
     context_saved(prev);
 
     if (prev != next)
diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
index 89e47b3..8123f37 100644
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -860,7 +860,6 @@ static void svm_ctxt_switch_from(struct vcpu *v)
     svm_fpu_leave(v);
 
     svm_save_dr(v);
-    vpmu_save(v);
     svm_lwp_save(v);
     svm_tsc_ratio_save(v);
 
@@ -901,7 +900,6 @@ static void svm_ctxt_switch_to(struct vcpu *v)
     svm_vmsave(per_cpu(root_vmcb, cpu));
     svm_vmload(vmcb);
     vmcb->cleanbits.bytes = 0;
-    vpmu_load(v);
     svm_lwp_load(v);
     svm_tsc_ratio_load(v);
 
diff --git a/xen/arch/x86/hvm/svm/vpmu.c b/xen/arch/x86/hvm/svm/vpmu.c
index 6610806..a11a650 100644
--- a/xen/arch/x86/hvm/svm/vpmu.c
+++ b/xen/arch/x86/hvm/svm/vpmu.c
@@ -188,6 +188,21 @@ static inline void context_restore(struct vcpu *v)
 
 static void amd_vpmu_restore(struct vcpu *v)
 {
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+    struct amd_vpmu_context *ctxt = vpmu->context;
+
+    vpmu_reset(vpmu, VPMU_STOPPED);
+
+    if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) ) 
+    {
+        int i;
+
+        for ( i = 0; i < num_counters; i++ )
+            wrmsrl(ctrls[i], ctxt->ctrls[i]);
+
+        return;
+    }
+
     context_restore(v);
 }
 
@@ -197,23 +212,40 @@ static inline void context_save(struct vcpu *v)
     struct vpmu_struct *vpmu = vcpu_vpmu(v);
     struct amd_vpmu_context *ctxt = vpmu->context;
 
+    /* No need to save controls -- they are saved in amd_vpmu_do_wrmsr */
     for ( i = 0; i < num_counters; i++ )
-    {
-        rdmsrl(ctrls[i], ctxt->ctrls[i]);
-        wrmsrl(ctrls[i], 0);
         rdmsrl(counters[i], ctxt->counters[i]);
-    }
 }
 
-static void amd_vpmu_save(struct vcpu *v)
+static int amd_vpmu_save(struct vcpu *v)
 {
     struct vpmu_struct *vpmu = vcpu_vpmu(v);
     struct amd_vpmu_context *ctx = vpmu->context;
+    int i;
 
-    context_save(v);
+    /* 
+     * Stop the counters. If we came here via vpmu_save_force (i.e.
+     * when VPMU_CONTEXT_SAVE is set) counters are already stopped.
+     */
+    if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_SAVE) ) 
+    {
+        vpmu_set(vpmu, VPMU_STOPPED);
 
+        for ( i = 0; i < num_counters; i++ )
+            wrmsrl(ctrls[i], 0);
+
+        return 0;
+    }
+
+    if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
+        return 0;
+ 
+    context_save(v);
+ 
     if ( !vpmu_is_set(vpmu, VPMU_RUNNING) && ctx->msr_bitmap_set )
         amd_vpmu_unset_msr_bitmap(v);
+
+    return 1;
 }
 
 static void context_read(unsigned int msr, u64 *msr_content)
@@ -286,6 +318,7 @@ static int amd_vpmu_do_wrmsr(unsigned int msr, uint64_t 
msr_content)
             return 1;
         vpmu_set(vpmu, VPMU_RUNNING);
         apic_write(APIC_LVTPC, PMU_APIC_VECTOR);
+        vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR;
 
         if ( !((struct amd_vpmu_context *)vpmu->context)->msr_bitmap_set )
             amd_vpmu_set_msr_bitmap(v);
@@ -296,16 +329,19 @@ static int amd_vpmu_do_wrmsr(unsigned int msr, uint64_t 
msr_content)
         (is_pmu_enabled(msr_content) == 0) && vpmu_is_set(vpmu, VPMU_RUNNING) )
     {
         apic_write(APIC_LVTPC, PMU_APIC_VECTOR | APIC_LVT_MASKED);
+        vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR | APIC_LVT_MASKED;
         vpmu_reset(vpmu, VPMU_RUNNING);
         if ( ((struct amd_vpmu_context *)vpmu->context)->msr_bitmap_set )
             amd_vpmu_unset_msr_bitmap(v);
         release_pmu_ownship(PMU_OWNER_HVM);
     }
 
-    if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
+    if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED)
+        || vpmu_is_set(vpmu, VPMU_STOPPED) )
     {
         context_restore(v);
         vpmu_set(vpmu, VPMU_CONTEXT_LOADED);
+        vpmu_reset(vpmu, VPMU_STOPPED);
     }
 
     /* Update vpmu context immediately */
@@ -321,7 +357,13 @@ static int amd_vpmu_do_rdmsr(unsigned int msr, uint64_t 
*msr_content)
     struct vcpu *v = current;
     struct vpmu_struct *vpmu = vcpu_vpmu(v);
 
-    if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
+    if ( vpmu_is_set(vpmu, VPMU_STOPPED) )
+    {
+        context_restore(v);
+        vpmu_reset(vpmu, VPMU_STOPPED);
+    }
+
+    if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) ) 
         context_read(msr, msr_content);
     else
         rdmsrl(msr, *msr_content);
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index a869ed4..e36dbcb 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -615,7 +615,6 @@ static void vmx_ctxt_switch_from(struct vcpu *v)
     vmx_save_guest_msrs(v);
     vmx_restore_host_msrs();
     vmx_save_dr(v);
-    vpmu_save(v);
 }
 
 static void vmx_ctxt_switch_to(struct vcpu *v)
@@ -640,7 +639,6 @@ static void vmx_ctxt_switch_to(struct vcpu *v)
 
     vmx_restore_guest_msrs(v);
     vmx_restore_dr(v);
-    vpmu_load(v);
 }
 
 
diff --git a/xen/arch/x86/hvm/vmx/vpmu_core2.c 
b/xen/arch/x86/hvm/vmx/vpmu_core2.c
index 6195bfc..9f152b4 100644
--- a/xen/arch/x86/hvm/vmx/vpmu_core2.c
+++ b/xen/arch/x86/hvm/vmx/vpmu_core2.c
@@ -307,17 +307,23 @@ static inline void __core2_vpmu_save(struct vcpu *v)
         rdmsrl(MSR_IA32_PERFCTR0+i, core2_vpmu_cxt->arch_msr_pair[i].counter);
 }
 
-static void core2_vpmu_save(struct vcpu *v)
+static int core2_vpmu_save(struct vcpu *v)
 {
     struct vpmu_struct *vpmu = vcpu_vpmu(v);
 
+    if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_SAVE) )
+        return 0;
+
+    if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) ) 
+        return 0;
+
     __core2_vpmu_save(v);
 
     /* Unset PMU MSR bitmap to trap lazy load. */
     if ( !vpmu_is_set(vpmu, VPMU_RUNNING) && cpu_has_vmx_msr_bitmap )
         core2_vpmu_unset_msr_bitmap(v->arch.hvm_vmx.msr_bitmap);
 
-    return;
+    return 1;
 }
 
 static inline void __core2_vpmu_load(struct vcpu *v)
@@ -338,6 +344,11 @@ static inline void __core2_vpmu_load(struct vcpu *v)
 
 static void core2_vpmu_load(struct vcpu *v)
 {
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+
+    if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
+        return;
+
     __core2_vpmu_load(v);
 }
 
@@ -539,9 +550,15 @@ static int core2_vpmu_do_wrmsr(unsigned int msr, uint64_t 
msr_content)
     /* Setup LVTPC in local apic */
     if ( vpmu_is_set(vpmu, VPMU_RUNNING) &&
          is_vlapic_lvtpc_enabled(vcpu_vlapic(v)) )
+    {
         apic_write_around(APIC_LVTPC, PMU_APIC_VECTOR);
+        vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR;
+    }
     else
+    {
         apic_write_around(APIC_LVTPC, PMU_APIC_VECTOR | APIC_LVT_MASKED);
+        vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR | APIC_LVT_MASKED;
+    }
 
     core2_vpmu_save_msr_context(v, type, index, msr_content);
     if ( type != MSR_TYPE_GLOBAL )
@@ -616,6 +633,7 @@ static int core2_vpmu_do_rdmsr(unsigned int msr, uint64_t 
*msr_content)
         else
             return 0;
     }
+
     return 1;
 }
 
@@ -710,7 +728,8 @@ static int core2_vpmu_do_interrupt(struct cpu_user_regs 
*regs)
     }
 
     /* HW sets the MASK bit when performance counter interrupt occurs*/
-    apic_write_around(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
+    vpmu->hw_lapic_lvtpc = apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED;
+    apic_write_around(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
 
     return 1;
 }
diff --git a/xen/arch/x86/hvm/vpmu.c b/xen/arch/x86/hvm/vpmu.c
index 6b7af68..3f269d1 100644
--- a/xen/arch/x86/hvm/vpmu.c
+++ b/xen/arch/x86/hvm/vpmu.c
@@ -18,7 +18,6 @@
  *
  * Author: Haitao Shan <haitao.shan@xxxxxxxxx>
  */
-
 #include <xen/config.h>
 #include <xen/sched.h>
 #include <xen/xenoprof.h>
@@ -42,6 +41,8 @@ static unsigned int __read_mostly opt_vpmu_enabled;
 static void parse_vpmu_param(char *s);
 custom_param("vpmu", parse_vpmu_param);
 
+static DEFINE_PER_CPU(struct vcpu *, last_vcpu);
+
 static void __init parse_vpmu_param(char *s)
 {
     switch ( parse_bool(s) )
@@ -121,30 +122,89 @@ void vpmu_do_cpuid(unsigned int input,
         vpmu->arch_vpmu_ops->do_cpuid(input, eax, ebx, ecx, edx);
 }
 
+static void vpmu_save_force(void *arg)
+{
+    struct vcpu *v = (struct vcpu *)arg;
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+
+    if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
+        return;
+
+    if ( vpmu->arch_vpmu_ops )
+        (void)vpmu->arch_vpmu_ops->arch_vpmu_save(v);
+
+    vpmu_reset(vpmu, VPMU_CONTEXT_SAVE);
+
+    per_cpu(last_vcpu, smp_processor_id()) = NULL;
+}
+
 void vpmu_save(struct vcpu *v)
 {
     struct vpmu_struct *vpmu = vcpu_vpmu(v);
+    int pcpu = smp_processor_id();
  
     if ( !(vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) &&
           vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED)) )
        return;
 
+    vpmu->last_pcpu = pcpu;
+    per_cpu(last_vcpu, pcpu) = v;
+
     if ( vpmu->arch_vpmu_ops )
-        vpmu->arch_vpmu_ops->arch_vpmu_save(v);
+        if ( vpmu->arch_vpmu_ops->arch_vpmu_save(v) ) 
+            vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
 
-    vpmu->hw_lapic_lvtpc = apic_read(APIC_LVTPC);
     apic_write(APIC_LVTPC, PMU_APIC_VECTOR | APIC_LVT_MASKED);
-
-    vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
 }
 
 void vpmu_load(struct vcpu *v)
 {
     struct vpmu_struct *vpmu = vcpu_vpmu(v);
+    int pcpu = smp_processor_id();
+    struct vcpu *prev = NULL;
+
+    if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) )
+        return;
+
+    /* First time this VCPU is running here */
+    if ( vpmu->last_pcpu != pcpu )
+    {
+        /* 
+         * Get the context from last pcpu that we ran on. Note that if another
+         * VCPU is running there it must have saved this VPCU's context before
+         * startig to run (see below).
+         * There should be no race since remote pcpu will disable interrupts
+         * before saving the context.
+         */
+        if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
+        {
+            vpmu_set(vpmu, VPMU_CONTEXT_SAVE);
+            on_selected_cpus(cpumask_of(vpmu->last_pcpu),
+                             vpmu_save_force, (void *)v, 1);
+            vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
+        }
+    } 
+
+    /* Prevent forced context save from remote CPU */
+    local_irq_disable(); 
+
+    prev = per_cpu(last_vcpu, pcpu);
+
+    if (prev != v && prev) {
+       vpmu = vcpu_vpmu(prev);
+
+        /* Someone ran here before us */
+        vpmu_set(vpmu, VPMU_CONTEXT_SAVE);
+        vpmu_save_force(prev);
+        vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
+
+        vpmu = vcpu_vpmu(v);
+    }
+
+    local_irq_enable();
 
     /* Only when PMU is counting, we load PMU context immediately. */
-    if ( !(vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) &&
-           vpmu_is_set(vpmu, VPMU_RUNNING)) )
+    if ( !vpmu_is_set(vpmu, VPMU_RUNNING) )
         return;
 
     if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_load )
diff --git a/xen/include/asm-x86/hvm/vpmu.h b/xen/include/asm-x86/hvm/vpmu.h
index 01be976..5040a49 100644
--- a/xen/include/asm-x86/hvm/vpmu.h
+++ b/xen/include/asm-x86/hvm/vpmu.h
@@ -52,7 +52,7 @@ struct arch_vpmu_ops {
                      unsigned int *eax, unsigned int *ebx,
                      unsigned int *ecx, unsigned int *edx);
     void (*arch_vpmu_destroy)(struct vcpu *v);
-    void (*arch_vpmu_save)(struct vcpu *v);
+    int (*arch_vpmu_save)(struct vcpu *v);
     void (*arch_vpmu_load)(struct vcpu *v);
     void (*arch_vpmu_dump)(struct vcpu *v);
 };
@@ -62,6 +62,7 @@ int svm_vpmu_initialise(struct vcpu *, unsigned int flags);
 
 struct vpmu_struct {
     u32 flags;
+    u32 last_pcpu;
     u32 hw_lapic_lvtpc;
     void *context;
     struct arch_vpmu_ops *arch_vpmu_ops;
@@ -73,6 +74,8 @@ struct vpmu_struct {
 #define VPMU_PASSIVE_DOMAIN_ALLOCATED       0x8
 #define VPMU_CPU_HAS_DS                     0x10 /* Has Debug Store */
 #define VPMU_CPU_HAS_BTS                    0x20 /* Has Branch Trace Store */
+#define VPMU_CONTEXT_SAVE                   0x40 /* Force context save */
+#define VPMU_STOPPED                        0x80 /* Stop counters while VCPU 
is not running */
 
 
 #define vpmu_set(_vpmu, _x)    ((_vpmu)->flags |= (_x))
-- 
1.8.1.2


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.