Avoid backing frame table holes with memory, when those holes are large enough to cover an exact multiple of large pages. This is based on the introduction of a bit map, where each bit represents one such range, thus allowing mfn_valid() checks to easily filter out those MFNs that now shouldn't be used to index the frame table. This allows for saving a couple of 2M pages even on "normal" systems. Signed-off-by: Jan Beulich --- 2009-09-21.orig/xen/arch/x86/mm.c 2009-09-21 11:13:02.000000000 +0200 +++ 2009-09-21/xen/arch/x86/mm.c 2009-09-21 11:14:51.000000000 +0200 @@ -141,6 +141,10 @@ struct domain *dom_xen, *dom_io; unsigned long max_page; unsigned long total_pages; +unsigned long __read_mostly pdx_group_valid[BITS_TO_LONGS( + (FRAMETABLE_SIZE / sizeof(*frame_table) + PDX_GROUP_COUNT - 1) + / PDX_GROUP_COUNT)] = { [0] = 1 }; + #define PAGE_CACHE_ATTRS (_PAGE_PAT|_PAGE_PCD|_PAGE_PWT) int opt_allow_hugepage; @@ -162,36 +166,58 @@ l2_pgentry_t *compat_idle_pg_table_l2 = #define l3_disallow_mask(d) L3_DISALLOW_MASK #endif +static void __init init_frametable_chunk(void *start, void *end) +{ + unsigned long s = (unsigned long)start; + unsigned long e = (unsigned long)end; + unsigned long step, mfn; + + ASSERT(!(s & ((1 << L2_PAGETABLE_SHIFT) - 1))); + for ( ; s < e; s += step << PAGE_SHIFT ) + { + step = 1UL << (cpu_has_page1gb && + !(s & ((1UL << L3_PAGETABLE_SHIFT) - 1)) ? + L3_PAGETABLE_SHIFT - PAGE_SHIFT : + L2_PAGETABLE_SHIFT - PAGE_SHIFT); + /* + * The hardcoded 4 below is arbitrary - just pick whatever you think + * is reasonable to waste as a trade-off for using a large page. + */ + while ( step && s + (step << PAGE_SHIFT) > e + (4 << PAGE_SHIFT) ) + step >>= PAGETABLE_ORDER; + do { + mfn = alloc_boot_pages(step, step); + } while ( !mfn && (step >>= PAGETABLE_ORDER) ); + if ( !mfn ) + panic("Not enough memory for frame table"); + map_pages_to_xen(s, mfn, step, PAGE_HYPERVISOR); + } + + memset(start, 0, end - start); + memset(end, -1, s - (unsigned long)end); +} + void __init init_frametable(void) { - unsigned long nr_pages, page_step, i, mfn; + unsigned int sidx, eidx, nidx; + unsigned int max_idx = (max_pdx + PDX_GROUP_COUNT - 1) / PDX_GROUP_COUNT; #ifdef __x86_64__ - BUILD_BUG_ON(FRAMETABLE_VIRT_START & ((1UL << L3_PAGETABLE_SHIFT) - 1)); BUILD_BUG_ON(XEN_VIRT_END > FRAMETABLE_VIRT_END); -#else - BUILD_BUG_ON(FRAMETABLE_VIRT_START & ((1UL << L2_PAGETABLE_SHIFT) - 1)); #endif + BUILD_BUG_ON(FRAMETABLE_VIRT_START & ((1UL << L2_PAGETABLE_SHIFT) - 1)); - nr_pages = PFN_UP(max_pdx * sizeof(*frame_table)); - page_step = 1 << (cpu_has_page1gb ? L3_PAGETABLE_SHIFT - PAGE_SHIFT - : L2_PAGETABLE_SHIFT - PAGE_SHIFT); - - for ( i = 0; i < nr_pages; i += page_step ) + for ( sidx = 0; ; sidx = nidx ) { - /* - * The hardcoded 4 below is arbitrary - just pick whatever you think - * is reasonable to waste as a trade-off for using a large page. - */ - while (nr_pages + 4 - i < page_step) - page_step >>= PAGETABLE_ORDER; - mfn = alloc_boot_pages(page_step, page_step); - map_pages_to_xen( - FRAMETABLE_VIRT_START + (i << PAGE_SHIFT), - mfn, page_step, PAGE_HYPERVISOR); + eidx = find_next_zero_bit(pdx_group_valid, max_idx, sidx); + nidx = find_next_bit(pdx_group_valid, max_idx, eidx); + if ( nidx >= max_idx ) + break; + init_frametable_chunk(pdx_to_page(sidx * PDX_GROUP_COUNT), + pdx_to_page(eidx * PDX_GROUP_COUNT)); } - - memset(frame_table, 0, nr_pages << PAGE_SHIFT); + init_frametable_chunk(pdx_to_page(sidx * PDX_GROUP_COUNT), + pdx_to_page(max_pdx - 1) + 1); } void __init arch_init_memory(void) --- 2009-09-21.orig/xen/arch/x86/setup.c 2009-09-21 11:13:02.000000000 +0200 +++ 2009-09-21/xen/arch/x86/setup.c 2009-09-21 11:14:51.000000000 +0200 @@ -312,6 +312,16 @@ static void __init setup_max_pdx(void) #endif } +static void __init set_pdx_range(unsigned long smfn, unsigned long emfn) +{ + unsigned long idx, eidx; + + idx = pfn_to_pdx(smfn) / PDX_GROUP_COUNT; + eidx = (pfn_to_pdx(emfn - 1) + PDX_GROUP_COUNT) / PDX_GROUP_COUNT; + for ( ; idx < eidx; ++idx ) + __set_bit(idx, pdx_group_valid); +} + /* A temporary copy of the e820 map that we can mess with during bootstrap. */ static struct e820map __initdata boot_e820; @@ -657,6 +667,8 @@ void __init __start_xen(unsigned long mb if ( (boot_e820.map[i].type != E820_RAM) || (s >= e) ) continue; + set_pdx_range(s >> PAGE_SHIFT, e >> PAGE_SHIFT); + /* Map the chunk. No memory will need to be allocated to do this. */ map_pages_to_xen( (unsigned long)maddr_to_bootstrap_virt(s), @@ -853,6 +865,8 @@ void __init __start_xen(unsigned long mb } #endif + set_pdx_range(s >> PAGE_SHIFT, e >> PAGE_SHIFT); + /* Need to create mappings above 16MB. */ map_s = max_t(uint64_t, s, 16<<20); map_e = e; --- 2009-09-21.orig/xen/arch/x86/x86_64/mm.c 2009-09-21 11:13:02.000000000 +0200 +++ 2009-09-21/xen/arch/x86/x86_64/mm.c 2009-09-21 11:14:51.000000000 +0200 @@ -64,6 +64,14 @@ l3_pgentry_t __attribute__ ((__section__ l2_pgentry_t __attribute__ ((__section__ (".bss.page_aligned"))) l2_xenmap[L2_PAGETABLE_ENTRIES]; +int __mfn_valid(unsigned long mfn) +{ + return likely(mfn < max_page) && + likely(!(mfn & pfn_hole_mask)) && + likely(test_bit(pfn_to_pdx(mfn) / PDX_GROUP_COUNT, + pdx_group_valid)); +} + void *alloc_xen_pagetable(void) { extern int early_boot; --- 2009-09-21.orig/xen/include/asm-x86/config.h 2009-09-21 11:13:02.000000000 +0200 +++ 2009-09-21/xen/include/asm-x86/config.h 2009-09-21 11:14:51.000000000 +0200 @@ -314,7 +314,8 @@ extern unsigned int video_mode, video_fl #define RDWR_MPT_VIRT_END LINEAR_PT_VIRT_START #define RDWR_MPT_VIRT_START (RDWR_MPT_VIRT_END - (MACHPHYS_MBYTES<<20)) #define FRAMETABLE_VIRT_END RDWR_MPT_VIRT_START -#define FRAMETABLE_VIRT_START (FRAMETABLE_VIRT_END - (FRAMETABLE_MBYTES<<20)) +#define FRAMETABLE_SIZE (FRAMETABLE_MBYTES<<20) +#define FRAMETABLE_VIRT_START (FRAMETABLE_VIRT_END - FRAMETABLE_SIZE) #define RO_MPT_VIRT_END FRAMETABLE_VIRT_START #define RO_MPT_VIRT_START (RO_MPT_VIRT_END - (MACHPHYS_MBYTES<<20)) --- 2009-09-21.orig/xen/include/asm-x86/mm.h 2009-09-21 11:13:02.000000000 +0200 +++ 2009-09-21/xen/include/asm-x86/mm.h 2009-09-21 11:14:51.000000000 +0200 @@ -263,6 +263,10 @@ extern unsigned long max_page; extern unsigned long total_pages; void init_frametable(void); +#define PDX_GROUP_COUNT ((1 << L2_PAGETABLE_SHIFT) / \ + (sizeof(*frame_table) & -sizeof(*frame_table))) +extern unsigned long pdx_group_valid[]; + /* Convert between Xen-heap virtual addresses and page-info structures. */ static inline struct page_info *__virt_to_page(const void *v) { --- 2009-09-21.orig/xen/include/asm-x86/x86_32/page.h 2009-09-21 11:13:02.000000000 +0200 +++ 2009-09-21/xen/include/asm-x86/x86_32/page.h 2009-09-21 11:14:51.000000000 +0200 @@ -41,7 +41,11 @@ #include #include -#define __mfn_valid(mfn) ((mfn) < max_page) +#define __mfn_valid(mfn) ({ \ + unsigned long __m_f_n = (mfn); \ + likely(__m_f_n < max_page) && \ + likely(test_bit(pfn_to_pdx(__m_f_n) / PDX_GROUP_COUNT, pdx_group_valid)); \ +}) #define max_pdx max_page #define pfn_to_pdx(pfn) (pfn) --- 2009-09-21.orig/xen/include/asm-x86/x86_64/page.h 2009-09-21 11:13:02.000000000 +0200 +++ 2009-09-21/xen/include/asm-x86/x86_64/page.h 2009-09-21 11:14:51.000000000 +0200 @@ -35,7 +35,7 @@ /* Physical address where Xen was relocated to. */ extern unsigned long xen_phys_start; -extern unsigned long max_page, max_pdx; +extern unsigned long max_pdx; extern unsigned long pfn_pdx_bottom_mask, ma_va_bottom_mask; extern unsigned int pfn_pdx_hole_shift; extern unsigned long pfn_hole_mask; @@ -53,10 +53,7 @@ extern void pfn_pdx_hole_setup(unsigned #define pdx_to_virt(pdx) ((void *)(DIRECTMAP_VIRT_START + \ ((unsigned long)(pdx) << PAGE_SHIFT))) -static inline int __mfn_valid(unsigned long mfn) -{ - return mfn < max_page && !(mfn & pfn_hole_mask); -} +extern int __mfn_valid(unsigned long mfn); static inline unsigned long pfn_to_pdx(unsigned long pfn) {