WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-unstable] Reduce boot-time memory fragmentation

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] [xen-unstable] Reduce boot-time memory fragmentation
From: Xen patchbot-unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Mon, 15 Mar 2010 06:35:16 -0700
Delivery-date: Mon, 15 Mar 2010 06:36:09 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1268659473 0
# Node ID bc0087c3e75ee73398c8c03d8942a5909c730b6b
# Parent  25446394d21fd4ae3d5cb0b94c013c8d4c33e662
Reduce boot-time memory fragmentation

On certain NUMA configurations having init_node_heap() consume the
first few pages of a new node's memory for internal data structures
leads to unnecessary memory fragmentation, which can - with
sufficiently many nodes - result in there not remaining enough memory
below 4G for Dom0 to set up its swiotlb and PCI-consistent buffers.

Since alloc_boot_pages() generally consumes from the end of available
regions, make init_node_heap() prefer the end of such regions too (so
that fragmentation occurs at only one end of a region).

(Adjustment from first version: Use the tail of the region when the
end addresses alignment is less or equal to the beginning one's, not
just when it's less.)

Further, in order to prefer allocations from higher memory locations,
insert memory regions in reverse order in end_boot_allocator(), with
the exception of inserting one region residing on the boot CPU's node
first (for the statically allocated structures - used for the first
node seen - to be used for this node).

Finally, reduce MAX_ORDER on x86 to the maximum useful value (1Gb), so
that the reservation of a page on node boundaries (again leading to
fragmentation) can be avoided as much as possible (having node
boundaries on less the 1Gb aligned addresses is expected to be rare,
if found in practice at all).

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
---
 xen/common/page_alloc.c      |   36 ++++++++++++++++++++++++++++++++----
 xen/include/asm-x86/config.h |    1 +
 2 files changed, 33 insertions(+), 4 deletions(-)

diff -r 25446394d21f -r bc0087c3e75e xen/common/page_alloc.c
--- a/xen/common/page_alloc.c   Mon Mar 15 13:23:07 2010 +0000
+++ b/xen/common/page_alloc.c   Mon Mar 15 13:24:33 2010 +0000
@@ -231,7 +231,7 @@ static DEFINE_SPINLOCK(heap_lock);
 static DEFINE_SPINLOCK(heap_lock);
 
 static unsigned long init_node_heap(int node, unsigned long mfn,
-                                    unsigned long nr)
+                                    unsigned long nr, bool_t *use_tail)
 {
     /* First node to be discovered has its heap metadata statically alloced. */
     static heap_by_zone_and_order_t _heap_static;
@@ -250,12 +250,20 @@ static unsigned long init_node_heap(int 
         needed = 0;
     }
 #ifdef DIRECTMAP_VIRT_END
+    else if ( *use_tail && nr >= needed &&
+              (mfn + nr) <= (virt_to_mfn(DIRECTMAP_VIRT_END - 1) + 1) )
+    {
+        _heap[node] = mfn_to_virt(mfn + nr - needed);
+        avail[node] = mfn_to_virt(mfn + nr - 1) +
+                      PAGE_SIZE - sizeof(**avail) * NR_ZONES;
+    }
     else if ( nr >= needed &&
               (mfn + needed) <= (virt_to_mfn(DIRECTMAP_VIRT_END - 1) + 1) )
     {
         _heap[node] = mfn_to_virt(mfn);
         avail[node] = mfn_to_virt(mfn + needed - 1) +
                       PAGE_SIZE - sizeof(**avail) * NR_ZONES;
+        *use_tail = 0;
     }
 #endif
     else if ( get_order_from_bytes(sizeof(**_heap)) ==
@@ -812,15 +820,24 @@ static void init_heap_pages(
 
         if ( unlikely(!avail[nid_curr]) )
         {
+            unsigned long s = page_to_mfn(pg + i);
+            unsigned long e = page_to_mfn(pg + nr_pages - 1) + 1;
+            bool_t use_tail = (nid_curr == phys_to_nid(pfn_to_paddr(e - 1))) &&
+                              !(s & ((1UL << MAX_ORDER) - 1)) &&
+                              (find_first_set_bit(e) <= find_first_set_bit(s));
             unsigned long n;
 
-            n = init_node_heap(nid_curr, page_to_mfn(pg+i), nr_pages - i);
-            if ( n )
+            n = init_node_heap(nid_curr, page_to_mfn(pg+i), nr_pages - i,
+                               &use_tail);
+            BUG_ON(i + n > nr_pages);
+            if ( n && !use_tail )
             {
-                BUG_ON(i + n > nr_pages);
                 i += n - 1;
                 continue;
             }
+            if ( i + n == nr_pages )
+                break;
+            nr_pages -= n;
         }
 
         /*
@@ -870,6 +887,17 @@ void __init end_boot_allocator(void)
     for ( i = 0; i < nr_bootmem_regions; i++ )
     {
         struct bootmem_region *r = &bootmem_region_list[i];
+        if ( (r->s < r->e) &&
+             (phys_to_nid(pfn_to_paddr(r->s)) == cpu_to_node(0)) )
+        {
+            init_heap_pages(mfn_to_page(r->s), r->e - r->s);
+            r->e = r->s;
+            break;
+        }
+    }
+    for ( i = nr_bootmem_regions; i-- > 0; )
+    {
+        struct bootmem_region *r = &bootmem_region_list[i];
         if ( r->s < r->e )
             init_heap_pages(mfn_to_page(r->s), r->e - r->s);
     }
diff -r 25446394d21f -r bc0087c3e75e xen/include/asm-x86/config.h
--- a/xen/include/asm-x86/config.h      Mon Mar 15 13:23:07 2010 +0000
+++ b/xen/include/asm-x86/config.h      Mon Mar 15 13:24:33 2010 +0000
@@ -26,6 +26,7 @@
 #define CONFIG_NUMA 1
 #define CONFIG_DISCONTIGMEM 1
 #define CONFIG_NUMA_EMU 1
+#define CONFIG_PAGEALLOC_MAX_ORDER (2 * PAGETABLE_ORDER)
 
 /* Intel P4 currently has largest cache line (L2 line size is 128 bytes). */
 #define CONFIG_X86_L1_CACHE_SHIFT 7

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-changelog] [xen-unstable] Reduce boot-time memory fragmentation, Xen patchbot-unstable <=