[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 2/2] xen: spread page scrubbing across all idle CPU



Because of page scrubbing, it's very slow to destroy a domain with large memory.
It takes around 10 minutes when destroy a guest of nearly 1 TB of memory.

Besides the CPU running 'xl destory', this patch try to accelerate the scrubbing
time by using all idle CPUs to do the scrubbing work in parallel.

Compared with other approaches, this way has a minimum effect to the whole
system. The disadvantage is if all CPUs are not idle, the scrubbing time can't
be reduced.

On a 60 CPUs system, the 'xl destroy' time of a 30G guest reduced from 15s to
8s.

Signed-off-by: Bob Liu <bob.liu@xxxxxxxxxx>
---
 xen/arch/x86/domain.c   |    1 +
 xen/common/domain.c     |    4 +++
 xen/common/page_alloc.c |   73 +++++++++++++++++++++++++++++++++++++++++++++--
 xen/include/xen/mm.h    |    2 ++
 4 files changed, 78 insertions(+), 2 deletions(-)

diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index 6fddd4c..ffa9b91 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -116,6 +116,7 @@ static void idle_loop(void)
     {
         if ( cpu_is_offline(smp_processor_id()) )
             play_dead();
+        scrub_free_pages(0);
         (*pm_idle)();
         do_tasklet();
         do_softirq();
diff --git a/xen/common/domain.c b/xen/common/domain.c
index 4291e29..7a8c6bf 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -593,6 +593,10 @@ int domain_kill(struct domain *d)
             BUG_ON(rc != -EAGAIN);
             break;
         }
+
+        tasklet_init(&global_scrub_tasklet, scrub_free_pages, 1);
+        tasklet_schedule_on_cpu(&global_scrub_tasklet, smp_processor_id());
+
         for_each_vcpu ( d, v )
             unmap_vcpu_info(v);
         d->is_dying = DOMDYING_dead;
diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index 56826b4..d51926d 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -79,6 +79,14 @@ PAGE_LIST_HEAD(page_offlined_list);
 /* Broken page list, protected by heap_lock. */
 PAGE_LIST_HEAD(page_broken_list);
 
+/* Global scrub page list, protected by scrub_lock */
+PAGE_LIST_HEAD(global_scrub_list);
+DEFINE_SPINLOCK(scrub_lock);
+struct tasklet global_scrub_tasklet;
+
+DEFINE_PER_CPU(struct page_list_head, scrub_list);
+DEFINE_PER_CPU(struct page_list_head, scrubbed_list);
+
 /*************************
  * BOOT-TIME ALLOCATOR
  */
@@ -1569,9 +1577,13 @@ void free_domheap_pages(struct page_info *pg, unsigned 
int order)
          * domain has died we assume responsibility for erasure.
          */
         if ( unlikely(d->is_dying) )
+        {
+            spin_lock(&scrub_lock);
             for ( i = 0; i < (1 << order); i++ )
-                scrub_one_page(&pg[i]);
-
+                page_list_add_tail(&pg[i], &global_scrub_list);
+            spin_unlock(&scrub_lock);
+            goto out;
+        }
         free_heap_pages(pg, order);
     }
     else if ( unlikely(d == dom_cow) )
@@ -1588,6 +1600,7 @@ void free_domheap_pages(struct page_info *pg, unsigned 
int order)
         drop_dom_ref = 0;
     }
 
+out:
     if ( drop_dom_ref )
         put_domain(d);
 }
@@ -1680,6 +1693,62 @@ void scrub_one_page(struct page_info *pg)
     unmap_domain_page(p);
 }
 
+#define SCRUB_BATCH 1024
+void scrub_free_pages(unsigned long is_tasklet)
+{
+    struct page_info *pg;
+    unsigned int i, cpu, empty = 0;
+
+    cpu = smp_processor_id();
+    do
+    {
+        if ( page_list_empty(&this_cpu(scrub_list)) )
+        {
+            /* Delist a batch of pages from global scrub list */
+            spin_lock(&scrub_lock);
+            for( i = 0; i < SCRUB_BATCH; i++ )
+            {
+                pg = page_list_remove_head(&global_scrub_list);
+                if ( !pg )
+                {
+                    empty = 1;
+                    break;
+                }
+                page_list_add_tail(pg, &this_cpu(scrub_list));
+             }
+             spin_unlock(&scrub_lock);
+        }
+
+        /* Scrub percpu list */
+        while ( !page_list_empty(&this_cpu(scrub_list)) )
+        {
+            pg = page_list_remove_head(&this_cpu(scrub_list));
+            ASSERT(pg);
+            scrub_one_page(pg);
+            page_list_add_tail(pg, &this_cpu(scrubbed_list));
+        }
+
+        /* Free percpu scrubbed list */
+        if ( !page_list_empty(&this_cpu(scrubbed_list)) )
+        {
+            spin_lock(&heap_lock);
+            while ( !page_list_empty(&this_cpu(scrubbed_list)) )
+            {
+                pg = page_list_remove_head(&this_cpu(scrubbed_list));
+                __free_heap_pages(pg, 0);
+            }
+            spin_unlock(&heap_lock);
+        }
+
+        /* Global list is empty */
+        if (empty)
+            return;
+    } while ( !softirq_pending(cpu) );
+
+    if( is_tasklet )
+        tasklet_schedule_on_cpu(&global_scrub_tasklet, cpu);
+}
+
 static void dump_heap(unsigned char key)
 {
     s_time_t      now = NOW();
diff --git a/xen/include/xen/mm.h b/xen/include/xen/mm.h
index b183189..29821c4 100644
--- a/xen/include/xen/mm.h
+++ b/xen/include/xen/mm.h
@@ -78,6 +78,8 @@ int query_page_offline(unsigned long mfn, uint32_t *status);
 unsigned long total_free_pages(void);
 
 void scrub_heap_pages(void);
+void scrub_free_pages(unsigned long is_tasklet);
+extern struct tasklet global_scrub_tasklet;
 
 int assign_pages(
     struct domain *d,
-- 
1.7.10.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.