[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH 2/2] xen/mm: Introduce PG_state_uninitialised
From: David Woodhouse <dwmw@xxxxxxxxxxxx> It is possible for pages to enter general circulation without ever being process by init_heap_pages(). For example, pages of the multiboot module containing the initramfs may be assigned via assign_pages() to dom0 as it is created. And some code including map_pages_to_xen() has checks on 'system_state' to determine whether to use the boot or the heap allocator, but it seems impossible to prove that pages allocated by the boot allocator are not subsequently freed with free_heap_pages(). This actually works fine in the majority of cases; there are only a few esoteric corner cases which init_heap_pages() handles before handing the page range off to free_heap_pages(): • Excluding MFN #0 to avoid inappropriate cross-zone merging. • Ensuring that the node information structures exist, when the first page(s) of a given node are handled. • High order allocations crossing from one node to another. To handle this case, shift PG_state_inuse from its current value of zero, to another value. Use zero, which is the initial state of the entire frame table, as PG_state_uninitialised. Fix a couple of assertions which were assuming that PG_state_inuse is zero, and make them cope with the PG_state_uninitialised case too where appopriate. Finally, make free_xenheap_pages() and free_domheap_pages() call through to init_heap_pages() instead of directly to free_heap_pages() in the case where pages are being free which have never passed through init_heap_pages(). Signed-off-by: David Woodhouse <dwmw@xxxxxxxxxxxx> --- xen/arch/x86/mm.c | 3 ++- xen/common/page_alloc.c | 41 ++++++++++++++++++++++++++-------------- xen/include/asm-arm/mm.h | 3 ++- xen/include/asm-x86/mm.h | 3 ++- 4 files changed, 33 insertions(+), 17 deletions(-) diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c index 9b33829084..bf660ee8eb 100644 --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -488,7 +488,8 @@ void share_xen_page_with_guest(struct page_info *page, struct domain *d, page_set_owner(page, d); smp_wmb(); /* install valid domain ptr before updating refcnt. */ - ASSERT((page->count_info & ~PGC_xen_heap) == 0); + ASSERT((page->count_info & ~PGC_xen_heap) == PGC_state_inuse || + (page->count_info & ~PGC_xen_heap) == PGC_state_uninitialised); /* Only add to the allocation list if the domain isn't dying. */ if ( !d->is_dying ) diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c index 4084503554..9703a2c664 100644 --- a/xen/common/page_alloc.c +++ b/xen/common/page_alloc.c @@ -1407,6 +1407,7 @@ static void free_heap_pages( switch ( pg[i].count_info & PGC_state ) { case PGC_state_inuse: + case PGC_state_uninitialised: pg[i].count_info = PGC_state_free; break; @@ -1780,11 +1781,10 @@ int query_page_offline(mfn_t mfn, uint32_t *status) * latter is not on a MAX_ORDER boundary, then we reserve the page by * not freeing it to the buddy allocator. */ -static void init_heap_pages( - struct page_info *pg, unsigned long nr_pages) +static void init_heap_pages(struct page_info *pg, unsigned long nr_pages, + bool scrub) { unsigned long i; - bool idle_scrub = false; /* * Keep MFN 0 away from the buddy allocator to avoid crossing zone @@ -1809,7 +1809,7 @@ static void init_heap_pages( spin_unlock(&heap_lock); if ( system_state < SYS_STATE_active && opt_bootscrub == BOOTSCRUB_IDLE ) - idle_scrub = true; + scrub = true; for ( i = 0; i < nr_pages; i++ ) { @@ -1837,7 +1837,7 @@ static void init_heap_pages( nr_pages -= n; } - free_heap_pages(pg + i, 0, scrub_debug || idle_scrub); + free_heap_pages(pg + i, 0, scrub_debug || scrub); } } @@ -1873,7 +1873,7 @@ void __init end_boot_allocator(void) if ( (r->s < r->e) && (phys_to_nid(pfn_to_paddr(r->s)) == cpu_to_node(0)) ) { - init_heap_pages(mfn_to_page(_mfn(r->s)), r->e - r->s); + init_heap_pages(mfn_to_page(_mfn(r->s)), r->e - r->s, false); r->e = r->s; break; } @@ -1882,7 +1882,7 @@ void __init end_boot_allocator(void) { struct bootmem_region *r = &bootmem_region_list[i]; if ( r->s < r->e ) - init_heap_pages(mfn_to_page(_mfn(r->s)), r->e - r->s); + init_heap_pages(mfn_to_page(_mfn(r->s)), r->e - r->s, false); } nr_bootmem_regions = 0; @@ -2151,7 +2151,7 @@ void init_xenheap_pages(paddr_t ps, paddr_t pe) memguard_guard_range(maddr_to_virt(ps), pe - ps); - init_heap_pages(maddr_to_page(ps), (pe - ps) >> PAGE_SHIFT); + init_heap_pages(maddr_to_page(ps), (pe - ps) >> PAGE_SHIFT, false); } @@ -2174,14 +2174,20 @@ void *alloc_xenheap_pages(unsigned int order, unsigned int memflags) void free_xenheap_pages(void *v, unsigned int order) { + struct page_info *pg; ASSERT(!in_irq()); if ( v == NULL ) return; + pg = virt_to_page(v); + memguard_guard_range(v, 1 << (order + PAGE_SHIFT)); - free_heap_pages(virt_to_page(v), order, false); + if ( unlikely(page_state_is(pg, uninitialised)) ) + init_heap_pages(pg, 1 << order, true); + else + free_heap_pages(pg, order, false); } #else /* !CONFIG_SEPARATE_XENHEAP */ @@ -2237,7 +2243,10 @@ void free_xenheap_pages(void *v, unsigned int order) for ( i = 0; i < (1u << order); i++ ) pg[i].count_info &= ~PGC_xen_heap; - free_heap_pages(pg, order, true); + if ( unlikely(page_state_is(pg, uninitialised)) ) + init_heap_pages(pg, 1 << order, true); + else + free_heap_pages(pg, order, true); } #endif /* CONFIG_SEPARATE_XENHEAP */ @@ -2260,7 +2269,7 @@ void init_domheap_pages(paddr_t ps, paddr_t pe) if ( mfn_x(emfn) <= mfn_x(smfn) ) return; - init_heap_pages(mfn_to_page(smfn), mfn_x(emfn) - mfn_x(smfn)); + init_heap_pages(mfn_to_page(smfn), mfn_x(emfn) - mfn_x(smfn), false); } @@ -2301,10 +2310,11 @@ int assign_pages( for ( i = 0; i < (1 << order); i++ ) { ASSERT(page_get_owner(&pg[i]) == NULL); - ASSERT(!pg[i].count_info); + ASSERT(pg[i].count_info == PGC_state_inuse || + pg[i].count_info == PGC_state_uninitialised); page_set_owner(&pg[i], d); smp_wmb(); /* Domain pointer must be visible before updating refcnt. */ - pg[i].count_info = PGC_allocated | 1; + pg[i].count_info |= PGC_allocated | 1; page_list_add_tail(&pg[i], &d->page_list); } @@ -2427,7 +2437,10 @@ void free_domheap_pages(struct page_info *pg, unsigned int order) scrub = 1; } - free_heap_pages(pg, order, scrub); + if ( unlikely(page_state_is(pg, uninitialised)) ) + init_heap_pages(pg, 1 << order, scrub); + else + free_heap_pages(pg, order, scrub); } if ( drop_dom_ref ) diff --git a/xen/include/asm-arm/mm.h b/xen/include/asm-arm/mm.h index c9466c8ca0..c696941600 100644 --- a/xen/include/asm-arm/mm.h +++ b/xen/include/asm-arm/mm.h @@ -117,12 +117,13 @@ struct page_info * { inuse, offlining, offlined, free, broken_offlining, broken } */ #define PGC_state PG_mask(7, 9) -#define PGC_state_inuse PG_mask(0, 9) +#define PGC_state_uninitialised PG_mask(0, 9) #define PGC_state_offlining PG_mask(1, 9) #define PGC_state_offlined PG_mask(2, 9) #define PGC_state_free PG_mask(3, 9) #define PGC_state_broken_offlining PG_mask(4, 9) #define PGC_state_broken PG_mask(5, 9) +#define PGC_state_inuse PG_mask(6, 9) #define page_state_is(pg, st) (((pg)->count_info&PGC_state) == PGC_state_##st) #define page_is_broken(pg) (page_state_is((pg), broken_offlining) || \ diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h index 3edadf7a7c..645368e6a9 100644 --- a/xen/include/asm-x86/mm.h +++ b/xen/include/asm-x86/mm.h @@ -72,12 +72,13 @@ * { inuse, offlining, offlined, free, broken_offlining, broken } */ #define PGC_state PG_mask(7, 9) -#define PGC_state_inuse PG_mask(0, 9) +#define PGC_state_uninitialised PG_mask(0, 9) #define PGC_state_offlining PG_mask(1, 9) #define PGC_state_offlined PG_mask(2, 9) #define PGC_state_free PG_mask(3, 9) #define PGC_state_broken_offlining PG_mask(4, 9) #define PGC_state_broken PG_mask(5, 9) +#define PGC_state_inuse PG_mask(6, 9) #define page_state_is(pg, st) (((pg)->count_info&PGC_state) == PGC_state_##st) #define page_is_broken(pg) (page_state_is((pg), broken_offlining) || \ -- 2.21.0 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/mailman/listinfo/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |