Avoid backing frame table holes with memory, when those holes are large
enough to cover an exact multiple of large pages. This is based on the
introduction of a bit map, where each bit represents one such range,
thus allowing mfn_valid() checks to easily filter out those MFNs that
now shouldn't be used to index the frame table.
This allows for saving a couple of 2M pages even on "normal" systems.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
--- 2009-09-21.orig/xen/arch/x86/mm.c 2009-09-21 11:13:02.000000000 +0200
+++ 2009-09-21/xen/arch/x86/mm.c 2009-09-21 11:14:51.000000000 +0200
@@ -141,6 +141,10 @@ struct domain *dom_xen, *dom_io;
unsigned long max_page;
unsigned long total_pages;
+unsigned long __read_mostly pdx_group_valid[BITS_TO_LONGS(
+ (FRAMETABLE_SIZE / sizeof(*frame_table) + PDX_GROUP_COUNT - 1)
+ / PDX_GROUP_COUNT)] = { [0] = 1 };
+
#define PAGE_CACHE_ATTRS (_PAGE_PAT|_PAGE_PCD|_PAGE_PWT)
int opt_allow_hugepage;
@@ -162,36 +166,58 @@ l2_pgentry_t *compat_idle_pg_table_l2 =
#define l3_disallow_mask(d) L3_DISALLOW_MASK
#endif
+static void __init init_frametable_chunk(void *start, void *end)
+{
+ unsigned long s = (unsigned long)start;
+ unsigned long e = (unsigned long)end;
+ unsigned long step, mfn;
+
+ ASSERT(!(s & ((1 << L2_PAGETABLE_SHIFT) - 1)));
+ for ( ; s < e; s += step << PAGE_SHIFT )
+ {
+ step = 1UL << (cpu_has_page1gb &&
+ !(s & ((1UL << L3_PAGETABLE_SHIFT) - 1)) ?
+ L3_PAGETABLE_SHIFT - PAGE_SHIFT :
+ L2_PAGETABLE_SHIFT - PAGE_SHIFT);
+ /*
+ * The hardcoded 4 below is arbitrary - just pick whatever you think
+ * is reasonable to waste as a trade-off for using a large page.
+ */
+ while ( step && s + (step << PAGE_SHIFT) > e + (4 << PAGE_SHIFT) )
+ step >>= PAGETABLE_ORDER;
+ do {
+ mfn = alloc_boot_pages(step, step);
+ } while ( !mfn && (step >>= PAGETABLE_ORDER) );
+ if ( !mfn )
+ panic("Not enough memory for frame table");
+ map_pages_to_xen(s, mfn, step, PAGE_HYPERVISOR);
+ }
+
+ memset(start, 0, end - start);
+ memset(end, -1, s - (unsigned long)end);
+}
+
void __init init_frametable(void)
{
- unsigned long nr_pages, page_step, i, mfn;
+ unsigned int sidx, eidx, nidx;
+ unsigned int max_idx = (max_pdx + PDX_GROUP_COUNT - 1) / PDX_GROUP_COUNT;
#ifdef __x86_64__
- BUILD_BUG_ON(FRAMETABLE_VIRT_START & ((1UL << L3_PAGETABLE_SHIFT) - 1));
BUILD_BUG_ON(XEN_VIRT_END > FRAMETABLE_VIRT_END);
-#else
- BUILD_BUG_ON(FRAMETABLE_VIRT_START & ((1UL << L2_PAGETABLE_SHIFT) - 1));
#endif
+ BUILD_BUG_ON(FRAMETABLE_VIRT_START & ((1UL << L2_PAGETABLE_SHIFT) - 1));
- nr_pages = PFN_UP(max_pdx * sizeof(*frame_table));
- page_step = 1 << (cpu_has_page1gb ? L3_PAGETABLE_SHIFT - PAGE_SHIFT
- : L2_PAGETABLE_SHIFT - PAGE_SHIFT);
-
- for ( i = 0; i < nr_pages; i += page_step )
+ for ( sidx = 0; ; sidx = nidx )
{
- /*
- * The hardcoded 4 below is arbitrary - just pick whatever you think
- * is reasonable to waste as a trade-off for using a large page.
- */
- while (nr_pages + 4 - i < page_step)
- page_step >>= PAGETABLE_ORDER;
- mfn = alloc_boot_pages(page_step, page_step);
- map_pages_to_xen(
- FRAMETABLE_VIRT_START + (i << PAGE_SHIFT),
- mfn, page_step, PAGE_HYPERVISOR);
+ eidx = find_next_zero_bit(pdx_group_valid, max_idx, sidx);
+ nidx = find_next_bit(pdx_group_valid, max_idx, eidx);
+ if ( nidx >= max_idx )
+ break;
+ init_frametable_chunk(pdx_to_page(sidx * PDX_GROUP_COUNT),
+ pdx_to_page(eidx * PDX_GROUP_COUNT));
}
-
- memset(frame_table, 0, nr_pages << PAGE_SHIFT);
+ init_frametable_chunk(pdx_to_page(sidx * PDX_GROUP_COUNT),
+ pdx_to_page(max_pdx - 1) + 1);
}
void __init arch_init_memory(void)
--- 2009-09-21.orig/xen/arch/x86/setup.c 2009-09-21 11:13:02.000000000
+0200
+++ 2009-09-21/xen/arch/x86/setup.c 2009-09-21 11:14:51.000000000 +0200
@@ -312,6 +312,16 @@ static void __init setup_max_pdx(void)
#endif
}
+static void __init set_pdx_range(unsigned long smfn, unsigned long emfn)
+{
+ unsigned long idx, eidx;
+
+ idx = pfn_to_pdx(smfn) / PDX_GROUP_COUNT;
+ eidx = (pfn_to_pdx(emfn - 1) + PDX_GROUP_COUNT) / PDX_GROUP_COUNT;
+ for ( ; idx < eidx; ++idx )
+ __set_bit(idx, pdx_group_valid);
+}
+
/* A temporary copy of the e820 map that we can mess with during bootstrap. */
static struct e820map __initdata boot_e820;
@@ -657,6 +667,8 @@ void __init __start_xen(unsigned long mb
if ( (boot_e820.map[i].type != E820_RAM) || (s >= e) )
continue;
+ set_pdx_range(s >> PAGE_SHIFT, e >> PAGE_SHIFT);
+
/* Map the chunk. No memory will need to be allocated to do this. */
map_pages_to_xen(
(unsigned long)maddr_to_bootstrap_virt(s),
@@ -853,6 +865,8 @@ void __init __start_xen(unsigned long mb
}
#endif
+ set_pdx_range(s >> PAGE_SHIFT, e >> PAGE_SHIFT);
+
/* Need to create mappings above 16MB. */
map_s = max_t(uint64_t, s, 16<<20);
map_e = e;
--- 2009-09-21.orig/xen/arch/x86/x86_64/mm.c 2009-09-21 11:13:02.000000000
+0200
+++ 2009-09-21/xen/arch/x86/x86_64/mm.c 2009-09-21 11:14:51.000000000 +0200
@@ -64,6 +64,14 @@ l3_pgentry_t __attribute__ ((__section__
l2_pgentry_t __attribute__ ((__section__ (".bss.page_aligned")))
l2_xenmap[L2_PAGETABLE_ENTRIES];
+int __mfn_valid(unsigned long mfn)
+{
+ return likely(mfn < max_page) &&
+ likely(!(mfn & pfn_hole_mask)) &&
+ likely(test_bit(pfn_to_pdx(mfn) / PDX_GROUP_COUNT,
+ pdx_group_valid));
+}
+
void *alloc_xen_pagetable(void)
{
extern int early_boot;
--- 2009-09-21.orig/xen/include/asm-x86/config.h 2009-09-21
11:13:02.000000000 +0200
+++ 2009-09-21/xen/include/asm-x86/config.h 2009-09-21 11:14:51.000000000
+0200
@@ -314,7 +314,8 @@ extern unsigned int video_mode, video_fl
#define RDWR_MPT_VIRT_END LINEAR_PT_VIRT_START
#define RDWR_MPT_VIRT_START (RDWR_MPT_VIRT_END - (MACHPHYS_MBYTES<<20))
#define FRAMETABLE_VIRT_END RDWR_MPT_VIRT_START
-#define FRAMETABLE_VIRT_START (FRAMETABLE_VIRT_END - (FRAMETABLE_MBYTES<<20))
+#define FRAMETABLE_SIZE (FRAMETABLE_MBYTES<<20)
+#define FRAMETABLE_VIRT_START (FRAMETABLE_VIRT_END - FRAMETABLE_SIZE)
#define RO_MPT_VIRT_END FRAMETABLE_VIRT_START
#define RO_MPT_VIRT_START (RO_MPT_VIRT_END - (MACHPHYS_MBYTES<<20))
--- 2009-09-21.orig/xen/include/asm-x86/mm.h 2009-09-21 11:13:02.000000000
+0200
+++ 2009-09-21/xen/include/asm-x86/mm.h 2009-09-21 11:14:51.000000000 +0200
@@ -263,6 +263,10 @@ extern unsigned long max_page;
extern unsigned long total_pages;
void init_frametable(void);
+#define PDX_GROUP_COUNT ((1 << L2_PAGETABLE_SHIFT) / \
+ (sizeof(*frame_table) & -sizeof(*frame_table)))
+extern unsigned long pdx_group_valid[];
+
/* Convert between Xen-heap virtual addresses and page-info structures. */
static inline struct page_info *__virt_to_page(const void *v)
{
--- 2009-09-21.orig/xen/include/asm-x86/x86_32/page.h 2009-09-21
11:13:02.000000000 +0200
+++ 2009-09-21/xen/include/asm-x86/x86_32/page.h 2009-09-21
11:14:51.000000000 +0200
@@ -41,7 +41,11 @@
#include <xen/config.h>
#include <asm/types.h>
-#define __mfn_valid(mfn) ((mfn) < max_page)
+#define __mfn_valid(mfn) ({ \
+ unsigned long __m_f_n = (mfn); \
+ likely(__m_f_n < max_page) && \
+ likely(test_bit(pfn_to_pdx(__m_f_n) / PDX_GROUP_COUNT, pdx_group_valid)); \
+})
#define max_pdx max_page
#define pfn_to_pdx(pfn) (pfn)
--- 2009-09-21.orig/xen/include/asm-x86/x86_64/page.h 2009-09-21
11:13:02.000000000 +0200
+++ 2009-09-21/xen/include/asm-x86/x86_64/page.h 2009-09-21
11:14:51.000000000 +0200
@@ -35,7 +35,7 @@
/* Physical address where Xen was relocated to. */
extern unsigned long xen_phys_start;
-extern unsigned long max_page, max_pdx;
+extern unsigned long max_pdx;
extern unsigned long pfn_pdx_bottom_mask, ma_va_bottom_mask;
extern unsigned int pfn_pdx_hole_shift;
extern unsigned long pfn_hole_mask;
@@ -53,10 +53,7 @@ extern void pfn_pdx_hole_setup(unsigned
#define pdx_to_virt(pdx) ((void *)(DIRECTMAP_VIRT_START + \
((unsigned long)(pdx) << PAGE_SHIFT)))
-static inline int __mfn_valid(unsigned long mfn)
-{
- return mfn < max_page && !(mfn & pfn_hole_mask);
-}
+extern int __mfn_valid(unsigned long mfn);
static inline unsigned long pfn_to_pdx(unsigned long pfn)
{
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|