[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH 9/9] xen/x86: track dirty pCPU caches for a given vCPU



When a guest is allowed access to cache control operations such tracking
prevents having to issue a system-wide cache flush, and rather just flush
the pCPUs where the vCPU has been scheduled since the last flush.

Note that domain-wide flushes accumulate the dirty caches from all the
vCPUs, but clearing the vCPU masks will require pausing all vCPUs, which
seems overkill.  Instead leave the vCPU dirty masks as-is, worse case it
will result in redundant flushes in further calls.

Signed-off-by: Roger Pau Monné <roger.pau@xxxxxxxxxx>
---
 xen/arch/x86/domain.c             | 43 +++++++++++++++++++++++++++++++
 xen/arch/x86/hvm/hvm.c            |  2 +-
 xen/arch/x86/hvm/mtrr.c           |  2 +-
 xen/arch/x86/hvm/svm/svm.c        |  6 +++--
 xen/arch/x86/hvm/vmx/vmx.c        |  6 +++--
 xen/arch/x86/include/asm/domain.h |  9 +++++++
 xen/arch/x86/mm.c                 | 25 +++++++-----------
 xen/arch/x86/pv/emul-priv-op.c    |  8 ++----
 8 files changed, 73 insertions(+), 28 deletions(-)

diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index f197dad4c0cd..3d08b829d2db 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -579,6 +579,13 @@ int arch_vcpu_create(struct vcpu *v)
 
         if ( (rc = init_vcpu_msr_policy(v)) )
             goto fail;
+
+        if ( cache_flush_permitted(d) &&
+             !cond_zalloc_cpumask_var(&v->arch.dirty_cache) )
+        {
+            rc = -ENOMEM;
+            goto fail;
+        }
     }
     else if ( (rc = xstate_alloc_save_area(v)) != 0 )
         return rc;
@@ -614,6 +621,7 @@ int arch_vcpu_create(struct vcpu *v)
     vcpu_destroy_fpu(v);
     xfree(v->arch.msrs);
     v->arch.msrs = NULL;
+    FREE_CPUMASK_VAR(v->arch.dirty_cache);
 
     return rc;
 }
@@ -628,6 +636,8 @@ void arch_vcpu_destroy(struct vcpu *v)
     xfree(v->arch.msrs);
     v->arch.msrs = NULL;
 
+    FREE_CPUMASK_VAR(v->arch.dirty_cache);
+
     if ( is_hvm_vcpu(v) )
         hvm_vcpu_destroy(v);
     else
@@ -2018,6 +2028,9 @@ static void __context_switch(void)
         cpumask_set_cpu(cpu, nd->dirty_cpumask);
     write_atomic(&n->dirty_cpu, cpu);
 
+    if ( cache_flush_permitted(nd) )
+        __cpumask_set_cpu(cpu, n->arch.dirty_cache);
+
     if ( !is_idle_domain(nd) )
     {
         memcpy(stack_regs, &n->arch.user_regs, CTXT_SWITCH_STACK_BYTES);
@@ -2606,6 +2619,36 @@ unsigned int domain_max_paddr_bits(const struct domain 
*d)
     return bits;
 }
 
+void vcpu_flush_cache(struct vcpu *curr)
+{
+    ASSERT(curr == current);
+    ASSERT(cache_flush_permitted(curr->domain));
+
+    flush_mask(curr->arch.dirty_cache, FLUSH_CACHE);
+    cpumask_clear(curr->arch.dirty_cache);
+    __cpumask_set_cpu(smp_processor_id(), curr->arch.dirty_cache);
+}
+
+void domain_flush_cache(const struct domain *d)
+{
+    const struct vcpu *v;
+    cpumask_t *mask = this_cpu(scratch_cpumask);
+
+    ASSERT(cache_flush_permitted(d));
+
+    cpumask_clear(mask);
+    for_each_vcpu( d, v )
+        cpumask_or(mask, mask, v->arch.dirty_cache);
+
+    flush_mask(mask, FLUSH_CACHE);
+    /*
+     * Clearing the mask of vCPUs in the domain would be racy unless all vCPUs
+     * are paused, so just leave them as-is, at the cost of possibly doing
+     * redundant flushes in later calls.  It's still better than doing a
+     * host-wide cache flush.
+     */
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 4cb2e13046d1..aed582a215a0 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -2277,7 +2277,7 @@ void hvm_shadow_handle_cd(struct vcpu *v, unsigned long 
value)
             domain_pause_nosync(v->domain);
 
             /* Flush physical caches. */
-            flush_all(FLUSH_CACHE);
+            domain_flush_cache(v->domain);
             hvm_set_uc_mode(v, 1);
 
             domain_unpause(v->domain);
diff --git a/xen/arch/x86/hvm/mtrr.c b/xen/arch/x86/hvm/mtrr.c
index 887994d2b984..cfe0d44459c2 100644
--- a/xen/arch/x86/hvm/mtrr.c
+++ b/xen/arch/x86/hvm/mtrr.c
@@ -769,7 +769,7 @@ void memory_type_changed(struct domain *d)
     if ( cache_flush_permitted(d) &&
          d->vcpu && d->vcpu[0] && p2m_memory_type_changed(d) )
     {
-        flush_all(FLUSH_CACHE);
+        domain_flush_cache(d);
     }
 }
 
diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
index e33a38c1e446..5d1777ace335 100644
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -2315,8 +2315,10 @@ static void svm_vmexit_mce_intercept(
 
 static void cf_check svm_wbinvd_intercept(void)
 {
-    if ( cache_flush_permitted(current->domain) )
-        flush_all(FLUSH_CACHE);
+    struct vcpu *curr = current;
+
+    if ( cache_flush_permitted(curr->domain) )
+        vcpu_flush_cache(curr);
 }
 
 static void svm_vmexit_do_invalidate_cache(struct cpu_user_regs *regs,
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 639882ceb216..9273607d576c 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -3840,11 +3840,13 @@ static void vmx_do_extint(struct cpu_user_regs *regs)
 
 static void cf_check vmx_wbinvd_intercept(void)
 {
-    if ( !cache_flush_permitted(current->domain) )
+    struct vcpu *curr = current;
+
+    if ( !cache_flush_permitted(curr->domain) )
         return;
 
     if ( cpu_has_wbinvd_exiting )
-        flush_all(FLUSH_CACHE);
+        vcpu_flush_cache(curr);
     else
         wbinvd();
 }
diff --git a/xen/arch/x86/include/asm/domain.h 
b/xen/arch/x86/include/asm/domain.h
index 8c0dea12a526..064b51889dc2 100644
--- a/xen/arch/x86/include/asm/domain.h
+++ b/xen/arch/x86/include/asm/domain.h
@@ -668,6 +668,12 @@ struct arch_vcpu
 
     struct vcpu_msrs *msrs;
 
+    /*
+     * When vCPU is allowed cache control track the pCPUs the vCPU has run on
+     * since the last flush.
+     */
+    cpumask_var_t dirty_cache;
+
     struct {
         bool next_interrupt_enabled;
     } monitor;
@@ -790,6 +796,9 @@ unsigned int domain_max_paddr_bits(const struct domain *d);
 #define arch_init_idle_domain arch_init_idle_domain
 void arch_init_idle_domain(struct domain *d);
 
+void vcpu_flush_cache(struct vcpu *curr);
+void domain_flush_cache(const struct domain *d);
+
 #endif /* __ASM_DOMAIN_H__ */
 
 /*
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index 59b60b1e62a7..11b59398a2c4 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -3804,26 +3804,19 @@ long do_mmuext_op(
             break;
 
         case MMUEXT_FLUSH_CACHE:
-            /*
-             * Dirty pCPU caches where the current vCPU has been scheduled are
-             * not tracked, and hence we need to resort to a global cache
-             * flush for correctness.
-             */
+            if ( unlikely(currd != pg_owner) )
+                rc = -EPERM;
+            else if ( likely(cache_flush_permitted(currd)) )
+                vcpu_flush_cache(curr);
+            else
+                rc = -EINVAL;
+            break;
+
         case MMUEXT_FLUSH_CACHE_GLOBAL:
             if ( unlikely(currd != pg_owner) )
                 rc = -EPERM;
             else if ( likely(cache_flush_permitted(currd)) )
-            {
-                unsigned int cpu;
-                cpumask_t *mask = this_cpu(scratch_cpumask);
-
-                cpumask_clear(mask);
-                for_each_online_cpu(cpu)
-                    if ( !cpumask_intersects(mask,
-                                             per_cpu(cpu_sibling_mask, cpu)) )
-                        __cpumask_set_cpu(cpu, mask);
-                flush_mask(mask, FLUSH_CACHE);
-            }
+                domain_flush_cache(currd);
             else
                 rc = -EINVAL;
             break;
diff --git a/xen/arch/x86/pv/emul-priv-op.c b/xen/arch/x86/pv/emul-priv-op.c
index 089d4cb4d905..076ce8f00457 100644
--- a/xen/arch/x86/pv/emul-priv-op.c
+++ b/xen/arch/x86/pv/emul-priv-op.c
@@ -1199,12 +1199,8 @@ static int cf_check cache_op(
      * newer linux uses this in some start-of-day timing loops.
      */
     if ( cache_flush_permitted(current->domain) )
-        /*
-         * Handle wbnoinvd as wbinvd, at the expense of higher cost.  Broadcast
-         * the flush to all pCPUs, Xen doesn't track where the vCPU has ran
-         * previously.
-         */
-        flush_all(FLUSH_CACHE);
+        /* Handle wbnoinvd as wbinvd, at the expense of higher cost. */
+        vcpu_flush_cache(current);
 
     return X86EMUL_OKAY;
 }
-- 
2.48.1




 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.