Hi,
I noticed that parallel scrubbing pages is not efficient
due to spinlock contention. (also memory bandwidth?)
Actually heap_lock becomes a bottleneck.
In my investigation, it often takes 1 millisec just to acquire
the lock on ia64 with 8cpus. It's very wasteful.
For example, creating a domain is too slow while scrubbing pages.
# xm create vm memory=4000
# xm destroy vm; time xm create vm memory=4000
real 0m6.083s
user 0m0.024s
sys 0m0.008s
After the attached patch is applied,
# xm destroy vm; time xm create vm memory=4000
real 0m1.463s
user 0m0.132s
sys 0m0.028s
This workaround is an easy solution but ugly.
There must be another good way.
Thanks,
Kouya
Signed-off-by: Kouya Shimura <kouya@xxxxxxxxxxxxxx>
diff -r 4e3316ed1af5 xen/common/page_alloc.c
--- a/xen/common/page_alloc.c Fri Aug 08 15:02:19 2008 +0100
+++ b/xen/common/page_alloc.c Mon Aug 11 14:19:21 2008 +0900
@@ -70,6 +70,7 @@ integer_param("dma_bits", dma_bitsize);
#define scrub_page(p) clear_page(p)
#endif
+static DEFINE_SPINLOCK(page_scrub_mutex);
static DEFINE_SPINLOCK(page_scrub_lock);
LIST_HEAD(page_scrub_list);
static unsigned long scrub_pages;
@@ -951,6 +952,11 @@ static void page_scrub_softirq(void)
int i;
s_time_t start = NOW();
+ if (!spin_trylock(&page_scrub_mutex)) {
+ set_timer(&this_cpu(page_scrub_timer), NOW() + MILLISECS(1));
+ return;
+ }
+
/* Aim to do 1ms of work every 10ms. */
do {
spin_lock(&page_scrub_lock);
@@ -958,6 +964,7 @@ static void page_scrub_softirq(void)
if ( unlikely((ent = page_scrub_list.next) == &page_scrub_list) )
{
spin_unlock(&page_scrub_lock);
+ spin_unlock(&page_scrub_mutex);
return;
}
@@ -987,6 +994,8 @@ static void page_scrub_softirq(void)
free_heap_pages(pfn_dom_zone_type(page_to_mfn(pg)), pg, 0);
}
} while ( (NOW() - start) < MILLISECS(1) );
+
+ spin_unlock(&page_scrub_mutex);
set_timer(&this_cpu(page_scrub_timer), NOW() + MILLISECS(10));
}
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|