|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH v2 3/3] xen: use idle vcpus to scrub pages
In case of heavy lock contention, use two percpu lists.
- Delist a batch of pages to a percpu list from _heap[] free page list.
- Scrub pages on this percpu list and add to another percpu free list.
- Free those clean pages to _heap[], merge with other chunks if needed.
v2:
* Avoid having two hyperthreads within the same core doing scrubbing
* Limit (1<<SCRUB_BATCH_ORDER) pages to percpu list in one go
* Won't spin on heap lock when there is nothing to scrub
* Partial numa aware
Signed-off-by: Bob Liu <bob.liu@xxxxxxxxxx>
---
xen/arch/arm/domain.c | 1 +
xen/arch/x86/domain.c | 1 +
xen/common/page_alloc.c | 130 +++++++++++++++++++++++++++++++++++++++++++++++
xen/include/xen/mm.h | 1 +
4 files changed, 133 insertions(+)
diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
index 04d0cd0..b6bc3ac 100644
--- a/xen/arch/arm/domain.c
+++ b/xen/arch/arm/domain.c
@@ -44,6 +44,7 @@ void idle_loop(void)
if ( cpu_is_offline(smp_processor_id()) )
stop_cpu();
+ scrub_free_pages();
local_irq_disable();
if ( cpu_is_haltable(smp_processor_id()) )
{
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index e896210..e8d4fe7 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -116,6 +116,7 @@ static void idle_loop(void)
{
if ( cpu_is_offline(smp_processor_id()) )
play_dead();
+ scrub_free_pages();
(*pm_idle)();
do_tasklet();
do_softirq();
diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index ab293c8..6ab1d1d 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -86,6 +86,12 @@ PAGE_LIST_HEAD(page_offlined_list);
/* Broken page list, protected by heap_lock. */
PAGE_LIST_HEAD(page_broken_list);
+/* A rough flag to indicate whether a node have need_scrub pages */
+static bool_t node_need_scrub[MAX_NUMNODES];
+static DEFINE_PER_CPU(bool_t, is_scrubbing);
+static DEFINE_PER_CPU(struct page_list_head, scrub_list_cpu);
+static DEFINE_PER_CPU(struct page_list_head, free_list_cpu);
+
/*************************
* BOOT-TIME ALLOCATOR
*/
@@ -948,6 +954,7 @@ static void free_heap_pages(
{
if ( !tainted )
{
+ node_need_scrub[node] = 1;
for ( i = 0; i < (1 << order); i++ )
pg[i].count_info |= PGC_need_scrub;
}
@@ -1525,7 +1532,130 @@ void __init scrub_heap_pages(void)
setup_low_mem_virq();
}
+#define SCRUB_BATCH_ORDER 12
+static void __scrub_free_pages(unsigned int node, unsigned int cpu)
+{
+ struct page_info *pg, *tmp;
+ unsigned int i;
+ int order;
+ struct page_list_head *local_scrub_list = &this_cpu(scrub_list_cpu);
+ struct page_list_head *local_free_list = &this_cpu(free_list_cpu);
+
+ /* Scrub percpu list */
+ while ( !page_list_empty(local_scrub_list) )
+ {
+ pg = page_list_remove_head(local_scrub_list);
+ order = PFN_ORDER(pg);
+ ASSERT( pg && order <= SCRUB_BATCH_ORDER );
+ for ( i = 0; i < (1 << order); i++ )
+ {
+ ASSERT( test_bit(_PGC_need_scrub, &pg[i].count_info) );
+ scrub_one_page(&pg[i]);
+ }
+ page_list_add_tail(pg, local_free_list);
+ if ( softirq_pending(cpu) )
+ return;
+ }
+
+ /* free percpu free list */
+ if ( !page_list_empty(local_free_list) )
+ {
+ spin_lock(&heap_lock);
+ page_list_for_each_safe( pg, tmp, local_free_list )
+ {
+ order = PFN_ORDER(pg);
+ page_list_del(pg, local_free_list);
+ for ( i = 0; i < (1 << order); i++ )
+ {
+ pg[i].count_info |= PGC_state_free;
+ pg[i].count_info &= ~PGC_need_scrub;
+ }
+ merge_free_trunks(pg, order, node, page_to_zone(pg), 0);
+ }
+ spin_unlock(&heap_lock);
+ }
+}
+
+void scrub_free_pages(void)
+{
+ int order;
+ struct page_info *pg, *tmp;
+ unsigned int i, zone, nr_delisted = 0;
+ unsigned int cpu = smp_processor_id();
+ unsigned int node = cpu_to_node(cpu);
+ struct page_list_head *local_scrub_list = &this_cpu(scrub_list_cpu);
+
+ /* Return if our sibling already started scrubbing */
+ for_each_cpu( i, per_cpu(cpu_sibling_mask,cpu) )
+ if ( per_cpu(is_scrubbing, i) )
+ return;
+ this_cpu(is_scrubbing) = 1;
+
+ while ( !softirq_pending(cpu) )
+ {
+ if ( !node_need_scrub[node] )
+ {
+ /* Free local per cpu list before we exit */
+ __scrub_free_pages(node, cpu);
+ goto out;
+ }
+
+ /* Delist a batch of pages from global scrub list */
+ if ( page_list_empty(local_scrub_list) )
+ {
+ spin_lock(&heap_lock);
+ for ( zone = 0; zone < NR_ZONES; zone++ )
+ {
+ for ( order = MAX_ORDER; order >= 0; order-- )
+ {
+ page_list_for_each_safe( pg, tmp, &heap(node, zone, order)
)
+ {
+ if ( !test_bit(_PGC_need_scrub, &(pg->count_info)) )
+ continue;
+
+ page_list_del( pg, &heap(node, zone, order) );
+ if ( order > SCRUB_BATCH_ORDER)
+ {
+ /* putback extra pages */
+ i = order;
+ while ( i != SCRUB_BATCH_ORDER )
+ {
+ PFN_ORDER(pg) = --i;
+ page_list_add_tail(pg, &heap(node, zone, i));
+ pg += 1 << i;
+ }
+ PFN_ORDER(pg) = SCRUB_BATCH_ORDER;
+ }
+
+ for ( i = 0; i < (1 << PFN_ORDER(pg)); i++ )
+ {
+ ASSERT( test_bit(_PGC_need_scrub,
&pg[i].count_info) );
+ ASSERT( !test_bit(_PGC_broken, &pg[i].count_info)
);
+ mark_page_offline(&pg[i], 0);
+ }
+ page_list_add_tail(pg, local_scrub_list);
+ nr_delisted += ( 1 << PFN_ORDER(pg) );
+ if ( nr_delisted >= (1 << SCRUB_BATCH_ORDER) )
+ {
+ nr_delisted = 0;
+ spin_unlock(&heap_lock);
+ goto start_scrub;
+ }
+ }
+ }
+ }
+
+ node_need_scrub[node] = 0;
+ spin_unlock(&heap_lock);
+ }
+ start_scrub:
+ __scrub_free_pages(node, cpu);
+ }
+
+ out:
+ this_cpu(is_scrubbing) = 0;
+}
/*************************
* XEN-HEAP SUB-ALLOCATOR
diff --git a/xen/include/xen/mm.h b/xen/include/xen/mm.h
index b183189..1fa8c3d 100644
--- a/xen/include/xen/mm.h
+++ b/xen/include/xen/mm.h
@@ -78,6 +78,7 @@ int query_page_offline(unsigned long mfn, uint32_t *status);
unsigned long total_free_pages(void);
void scrub_heap_pages(void);
+void scrub_free_pages(void);
int assign_pages(
struct domain *d,
--
1.7.10.4
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |