[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 3/7] x86: map frame table sparsely



Avoid backing frame table holes with memory, when those holes are large
enough to cover an exact multiple of large pages. This is based on the
introduction of a bit map, where each bit represents one such range,
thus allowing mfn_valid() checks to easily filter out those MFNs that
now shouldn't be used to index the frame table.

This allows for saving a couple of 2M pages even on "normal" systems.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>

--- 2009-09-21.orig/xen/arch/x86/mm.c   2009-09-21 11:13:02.000000000 +0200
+++ 2009-09-21/xen/arch/x86/mm.c        2009-09-21 11:14:51.000000000 +0200
@@ -141,6 +141,10 @@ struct domain *dom_xen, *dom_io;
 unsigned long max_page;
 unsigned long total_pages;
 
+unsigned long __read_mostly pdx_group_valid[BITS_TO_LONGS(
+    (FRAMETABLE_SIZE / sizeof(*frame_table) + PDX_GROUP_COUNT - 1)
+    / PDX_GROUP_COUNT)] = { [0] = 1 };
+
 #define PAGE_CACHE_ATTRS (_PAGE_PAT|_PAGE_PCD|_PAGE_PWT)
 
 int opt_allow_hugepage;
@@ -162,36 +166,58 @@ l2_pgentry_t *compat_idle_pg_table_l2 = 
 #define l3_disallow_mask(d) L3_DISALLOW_MASK
 #endif
 
+static void __init init_frametable_chunk(void *start, void *end)
+{
+    unsigned long s = (unsigned long)start;
+    unsigned long e = (unsigned long)end;
+    unsigned long step, mfn;
+
+    ASSERT(!(s & ((1 << L2_PAGETABLE_SHIFT) - 1)));
+    for ( ; s < e; s += step << PAGE_SHIFT )
+    {
+        step = 1UL << (cpu_has_page1gb &&
+                       !(s & ((1UL << L3_PAGETABLE_SHIFT) - 1)) ?
+                       L3_PAGETABLE_SHIFT - PAGE_SHIFT :
+                       L2_PAGETABLE_SHIFT - PAGE_SHIFT);
+        /*
+         * The hardcoded 4 below is arbitrary - just pick whatever you think
+         * is reasonable to waste as a trade-off for using a large page.
+         */
+        while ( step && s + (step << PAGE_SHIFT) > e + (4 << PAGE_SHIFT) )
+            step >>= PAGETABLE_ORDER;
+        do {
+            mfn = alloc_boot_pages(step, step);
+        } while ( !mfn && (step >>= PAGETABLE_ORDER) );
+        if ( !mfn )
+            panic("Not enough memory for frame table");
+        map_pages_to_xen(s, mfn, step, PAGE_HYPERVISOR);
+    }
+
+    memset(start, 0, end - start);
+    memset(end, -1, s - (unsigned long)end);
+}
+
 void __init init_frametable(void)
 {
-    unsigned long nr_pages, page_step, i, mfn;
+    unsigned int sidx, eidx, nidx;
+    unsigned int max_idx = (max_pdx + PDX_GROUP_COUNT - 1) / PDX_GROUP_COUNT;
 
 #ifdef __x86_64__
-    BUILD_BUG_ON(FRAMETABLE_VIRT_START & ((1UL << L3_PAGETABLE_SHIFT) - 1));
     BUILD_BUG_ON(XEN_VIRT_END > FRAMETABLE_VIRT_END);
-#else
-    BUILD_BUG_ON(FRAMETABLE_VIRT_START & ((1UL << L2_PAGETABLE_SHIFT) - 1));
 #endif
+    BUILD_BUG_ON(FRAMETABLE_VIRT_START & ((1UL << L2_PAGETABLE_SHIFT) - 1));
 
-    nr_pages  = PFN_UP(max_pdx * sizeof(*frame_table));
-    page_step = 1 << (cpu_has_page1gb ? L3_PAGETABLE_SHIFT - PAGE_SHIFT
-                                      : L2_PAGETABLE_SHIFT - PAGE_SHIFT);
-
-    for ( i = 0; i < nr_pages; i += page_step )
+    for ( sidx = 0; ; sidx = nidx )
     {
-        /*
-         * The hardcoded 4 below is arbitrary - just pick whatever you think
-         * is reasonable to waste as a trade-off for using a large page.
-         */
-        while (nr_pages + 4 - i < page_step)
-            page_step >>= PAGETABLE_ORDER;
-        mfn = alloc_boot_pages(page_step, page_step);
-        map_pages_to_xen(
-            FRAMETABLE_VIRT_START + (i << PAGE_SHIFT),
-            mfn, page_step, PAGE_HYPERVISOR);
+        eidx = find_next_zero_bit(pdx_group_valid, max_idx, sidx);
+        nidx = find_next_bit(pdx_group_valid, max_idx, eidx);
+        if ( nidx >= max_idx )
+            break;
+        init_frametable_chunk(pdx_to_page(sidx * PDX_GROUP_COUNT),
+                              pdx_to_page(eidx * PDX_GROUP_COUNT));
     }
-
-    memset(frame_table, 0, nr_pages << PAGE_SHIFT);
+    init_frametable_chunk(pdx_to_page(sidx * PDX_GROUP_COUNT),
+                          pdx_to_page(max_pdx - 1) + 1);
 }
 
 void __init arch_init_memory(void)
--- 2009-09-21.orig/xen/arch/x86/setup.c        2009-09-21 11:13:02.000000000 
+0200
+++ 2009-09-21/xen/arch/x86/setup.c     2009-09-21 11:14:51.000000000 +0200
@@ -312,6 +312,16 @@ static void __init setup_max_pdx(void)
 #endif
 }
 
+static void __init set_pdx_range(unsigned long smfn, unsigned long emfn)
+{
+    unsigned long idx, eidx;
+
+    idx = pfn_to_pdx(smfn) / PDX_GROUP_COUNT;
+    eidx = (pfn_to_pdx(emfn - 1) + PDX_GROUP_COUNT) / PDX_GROUP_COUNT;
+    for ( ; idx < eidx; ++idx )
+        __set_bit(idx, pdx_group_valid);
+}
+
 /* A temporary copy of the e820 map that we can mess with during bootstrap. */
 static struct e820map __initdata boot_e820;
 
@@ -657,6 +667,8 @@ void __init __start_xen(unsigned long mb
         if ( (boot_e820.map[i].type != E820_RAM) || (s >= e) )
             continue;
 
+        set_pdx_range(s >> PAGE_SHIFT, e >> PAGE_SHIFT);
+
         /* Map the chunk. No memory will need to be allocated to do this. */
         map_pages_to_xen(
             (unsigned long)maddr_to_bootstrap_virt(s),
@@ -853,6 +865,8 @@ void __init __start_xen(unsigned long mb
         }
 #endif
 
+        set_pdx_range(s >> PAGE_SHIFT, e >> PAGE_SHIFT);
+
         /* Need to create mappings above 16MB. */
         map_s = max_t(uint64_t, s, 16<<20);
         map_e = e;
--- 2009-09-21.orig/xen/arch/x86/x86_64/mm.c    2009-09-21 11:13:02.000000000 
+0200
+++ 2009-09-21/xen/arch/x86/x86_64/mm.c 2009-09-21 11:14:51.000000000 +0200
@@ -64,6 +64,14 @@ l3_pgentry_t __attribute__ ((__section__
 l2_pgentry_t __attribute__ ((__section__ (".bss.page_aligned")))
     l2_xenmap[L2_PAGETABLE_ENTRIES];
 
+int __mfn_valid(unsigned long mfn)
+{
+    return likely(mfn < max_page) &&
+           likely(!(mfn & pfn_hole_mask)) &&
+           likely(test_bit(pfn_to_pdx(mfn) / PDX_GROUP_COUNT,
+                           pdx_group_valid));
+}
+
 void *alloc_xen_pagetable(void)
 {
     extern int early_boot;
--- 2009-09-21.orig/xen/include/asm-x86/config.h        2009-09-21 
11:13:02.000000000 +0200
+++ 2009-09-21/xen/include/asm-x86/config.h     2009-09-21 11:14:51.000000000 
+0200
@@ -314,7 +314,8 @@ extern unsigned int video_mode, video_fl
 #define RDWR_MPT_VIRT_END      LINEAR_PT_VIRT_START
 #define RDWR_MPT_VIRT_START    (RDWR_MPT_VIRT_END - (MACHPHYS_MBYTES<<20))
 #define FRAMETABLE_VIRT_END    RDWR_MPT_VIRT_START
-#define FRAMETABLE_VIRT_START  (FRAMETABLE_VIRT_END - (FRAMETABLE_MBYTES<<20))
+#define FRAMETABLE_SIZE         (FRAMETABLE_MBYTES<<20)
+#define FRAMETABLE_VIRT_START  (FRAMETABLE_VIRT_END - FRAMETABLE_SIZE)
 #define RO_MPT_VIRT_END                FRAMETABLE_VIRT_START
 #define RO_MPT_VIRT_START      (RO_MPT_VIRT_END - (MACHPHYS_MBYTES<<20))
 
--- 2009-09-21.orig/xen/include/asm-x86/mm.h    2009-09-21 11:13:02.000000000 
+0200
+++ 2009-09-21/xen/include/asm-x86/mm.h 2009-09-21 11:14:51.000000000 +0200
@@ -263,6 +263,10 @@ extern unsigned long max_page;
 extern unsigned long total_pages;
 void init_frametable(void);
 
+#define PDX_GROUP_COUNT ((1 << L2_PAGETABLE_SHIFT) / \
+                         (sizeof(*frame_table) & -sizeof(*frame_table)))
+extern unsigned long pdx_group_valid[];
+
 /* Convert between Xen-heap virtual addresses and page-info structures. */
 static inline struct page_info *__virt_to_page(const void *v)
 {
--- 2009-09-21.orig/xen/include/asm-x86/x86_32/page.h   2009-09-21 
11:13:02.000000000 +0200
+++ 2009-09-21/xen/include/asm-x86/x86_32/page.h        2009-09-21 
11:14:51.000000000 +0200
@@ -41,7 +41,11 @@
 #include <xen/config.h>
 #include <asm/types.h>
 
-#define __mfn_valid(mfn)        ((mfn) < max_page)
+#define __mfn_valid(mfn)        ({                                            \
+    unsigned long __m_f_n = (mfn);                                            \
+    likely(__m_f_n < max_page) &&                                             \
+    likely(test_bit(pfn_to_pdx(__m_f_n) / PDX_GROUP_COUNT, pdx_group_valid)); \
+})
 
 #define max_pdx                 max_page
 #define pfn_to_pdx(pfn)         (pfn)
--- 2009-09-21.orig/xen/include/asm-x86/x86_64/page.h   2009-09-21 
11:13:02.000000000 +0200
+++ 2009-09-21/xen/include/asm-x86/x86_64/page.h        2009-09-21 
11:14:51.000000000 +0200
@@ -35,7 +35,7 @@
 /* Physical address where Xen was relocated to. */
 extern unsigned long xen_phys_start;
 
-extern unsigned long max_page, max_pdx;
+extern unsigned long max_pdx;
 extern unsigned long pfn_pdx_bottom_mask, ma_va_bottom_mask;
 extern unsigned int pfn_pdx_hole_shift;
 extern unsigned long pfn_hole_mask;
@@ -53,10 +53,7 @@ extern void pfn_pdx_hole_setup(unsigned 
 #define pdx_to_virt(pdx) ((void *)(DIRECTMAP_VIRT_START + \
                                    ((unsigned long)(pdx) << PAGE_SHIFT)))
 
-static inline int __mfn_valid(unsigned long mfn)
-{
-    return mfn < max_page && !(mfn & pfn_hole_mask);
-}
+extern int __mfn_valid(unsigned long mfn);
 
 static inline unsigned long pfn_to_pdx(unsigned long pfn)
 {



_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.