WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH 4/7] x86: map M2P table sparsely

To: <xen-devel@xxxxxxxxxxxxxxxxxxx>
Subject: [Xen-devel] [PATCH 4/7] x86: map M2P table sparsely
From: "Jan Beulich" <JBeulich@xxxxxxxxxx>
Date: Mon, 21 Sep 2009 13:07:14 +0100
Delivery-date: Mon, 21 Sep 2009 05:11:21 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
Avoid backing M2P table holes with memory, when those holes are large
enough to cover an exact multiple of large pages.

For the sake of saving and migrating guests, XENMEM_machphys_mfn_list
fills the holes in the array it returns with the MFN for the previous
range returned (thanks to Keir pointing out that it really doesn't
matter *what* MFN gets returned for invalid ranges). Using the most
recently encountered MFN (rather than e.g. always the first one)
represents an attempt to cut down on the number of references these
pages will get when they get mapped into a privileged domain's address
space.

This also allows for saving a couple of 2M pages even on certain
"normal" systems.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>

--- 2009-09-10.orig/xen/arch/x86/x86_32/mm.c    2009-09-17 15:51:43.000000000 
+0200
+++ 2009-09-10/xen/arch/x86/x86_32/mm.c 2009-09-17 12:06:40.000000000 +0200
@@ -72,7 +72,7 @@ void __init paging_init(void)
 {
     unsigned long v;
     struct page_info *pg;
-    int i;
+    unsigned int i, n;
 
     if ( cpu_has_pge )
     {
@@ -96,8 +96,18 @@ void __init paging_init(void)
      */
     mpt_size  = (max_page * BYTES_PER_LONG) + (1UL << L2_PAGETABLE_SHIFT) - 1;
     mpt_size &= ~((1UL << L2_PAGETABLE_SHIFT) - 1UL);
+#define MFN(x) (((x) << L2_PAGETABLE_SHIFT) / sizeof(unsigned long))
+#define CNT ((sizeof(*frame_table) & -sizeof(*frame_table)) / \
+             sizeof(*machine_to_phys_mapping))
+    BUILD_BUG_ON((sizeof(*frame_table) & ~sizeof(*frame_table)) % \
+                 sizeof(*machine_to_phys_mapping));
     for ( i = 0; i < (mpt_size >> L2_PAGETABLE_SHIFT); i++ )
     {
+        for ( n = 0; n < CNT; ++n)
+            if ( mfn_valid(MFN(i) + n * PDX_GROUP_COUNT) )
+                break;
+        if ( n == CNT )
+            continue;
         if ( (pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER, 0)) == NULL )
             panic("Not enough memory to bootstrap Xen.\n");
         l2e_write(&idle_pg_table_l2[l2_linear_offset(RDWR_MPT_VIRT_START) + i],
@@ -106,11 +116,12 @@ void __init paging_init(void)
         l2e_write(&idle_pg_table_l2[l2_linear_offset(RO_MPT_VIRT_START) + i],
                   l2e_from_page(
                       pg, (__PAGE_HYPERVISOR | _PAGE_PSE) & ~_PAGE_RW));
+        /* Fill with an obvious debug pattern. */
+        memset((void *)(RDWR_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT)), 0x55,
+               1UL << L2_PAGETABLE_SHIFT);
     }
-
-    /* Fill with an obvious debug pattern. */
-    for ( i = 0; i < (mpt_size / BYTES_PER_LONG); i++)
-        set_gpfn_from_mfn(i, 0x55555555);
+#undef CNT
+#undef MFN
 
     /* Create page tables for ioremap()/map_domain_page_global(). */
     for ( i = 0; i < (IOREMAP_MBYTES >> (L2_PAGETABLE_SHIFT - 20)); i++ )
@@ -163,14 +174,17 @@ void __init subarch_init_memory(void)
 {
     unsigned long m2p_start_mfn;
     unsigned int i, j;
+    l2_pgentry_t l2e;
 
     BUILD_BUG_ON(sizeof(struct page_info) != 24);
 
     /* M2P table is mappable read-only by privileged domains. */
     for ( i = 0; i < (mpt_size >> L2_PAGETABLE_SHIFT); i++ )
     {
-        m2p_start_mfn = l2e_get_pfn(
-            idle_pg_table_l2[l2_linear_offset(RDWR_MPT_VIRT_START) + i]);
+        l2e = idle_pg_table_l2[l2_linear_offset(RDWR_MPT_VIRT_START) + i];
+        if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
+            continue;
+        m2p_start_mfn = l2e_get_pfn(l2e);
         for ( j = 0; j < L2_PAGETABLE_ENTRIES; j++ )
         {
             struct page_info *page = mfn_to_page(m2p_start_mfn + j);
@@ -191,8 +205,9 @@ void __init subarch_init_memory(void)
 long subarch_memory_op(int op, XEN_GUEST_HANDLE(void) arg)
 {
     struct xen_machphys_mfn_list xmml;
-    unsigned long mfn;
+    unsigned long mfn, last_mfn;
     unsigned int i, max;
+    l2_pgentry_t l2e;
     long rc = 0;
 
     switch ( op )
@@ -203,12 +218,18 @@ long subarch_memory_op(int op, XEN_GUEST
 
         max = min_t(unsigned int, xmml.max_extents, mpt_size >> 21);
 
-        for ( i = 0; i < max; i++ )
+        for ( i = 0, last_mfn = 0; i < max; i++ )
         {
-            mfn = l2e_get_pfn(idle_pg_table_l2[l2_linear_offset(
-                RDWR_MPT_VIRT_START + (i << 21))]) + l1_table_offset(i << 21);
+            l2e = idle_pg_table_l2[l2_linear_offset(
+                RDWR_MPT_VIRT_START + (i << 21))];
+            if ( l2e_get_flags(l2e) & _PAGE_PRESENT )
+                mfn = l2e_get_pfn(l2e);
+            else
+                mfn = last_mfn;
+            ASSERT(mfn);
             if ( copy_to_guest_offset(xmml.extent_start, i, &mfn, 1) )
                 return -EFAULT;
+            last_mfn = mfn;
         }
 
         xmml.nr_extents = i;
--- 2009-09-10.orig/xen/arch/x86/x86_64/compat/mm.c     2009-09-17 
15:51:43.000000000 +0200
+++ 2009-09-10/xen/arch/x86/x86_64/compat/mm.c  2009-09-17 09:08:28.000000000 
+0200
@@ -153,19 +153,31 @@ int compat_arch_memory_op(int op, XEN_GU
     }
 
     case XENMEM_machphys_mfn_list:
+    {
+        unsigned long limit;
+        compat_pfn_t last_mfn;
+
         if ( copy_from_guest(&xmml, arg, 1) )
             return -EFAULT;
 
-        for ( i = 0, v = RDWR_COMPAT_MPT_VIRT_START;
-              (i != xmml.max_extents) && (v != RDWR_COMPAT_MPT_VIRT_END);
+        limit = (unsigned long)(compat_machine_to_phys_mapping +
+            min_t(unsigned long, max_page,
+                  MACH2PHYS_COMPAT_NR_ENTRIES(current->domain)));
+        if ( limit > RDWR_COMPAT_MPT_VIRT_END )
+            limit = RDWR_COMPAT_MPT_VIRT_END;
+        for ( i = 0, v = RDWR_COMPAT_MPT_VIRT_START, last_mfn = 0;
+              (i != xmml.max_extents) && (v < limit);
               i++, v += 1 << L2_PAGETABLE_SHIFT )
         {
             l2e = compat_idle_pg_table_l2[l2_table_offset(v)];
-            if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
-                break;
-            mfn = l2e_get_pfn(l2e) + l1_table_offset(v);
+            if ( l2e_get_flags(l2e) & _PAGE_PRESENT )
+                mfn = l2e_get_pfn(l2e);
+            else
+                mfn = last_mfn;
+            ASSERT(mfn);
             if ( copy_to_compat_offset(xmml.extent_start, i, &mfn, 1) )
                 return -EFAULT;
+            last_mfn = mfn;
         }
 
         xmml.nr_extents = i;
@@ -173,6 +185,7 @@ int compat_arch_memory_op(int op, XEN_GU
             rc = -EFAULT;
 
         break;
+    }
 
     default:
         rc = -ENOSYS;
--- 2009-09-10.orig/xen/arch/x86/x86_64/mm.c    2009-09-17 15:53:39.000000000 
+0200
+++ 2009-09-10/xen/arch/x86/x86_64/mm.c 2009-09-17 15:53:46.000000000 +0200
@@ -194,7 +194,7 @@ void __init pfn_pdx_hole_setup(unsigned 
 void __init paging_init(void)
 {
     unsigned long i, mpt_size, va;
-    unsigned int memflags;
+    unsigned int n, memflags;
     l3_pgentry_t *l3_ro_mpt;
     l2_pgentry_t *l2_ro_mpt = NULL;
     struct page_info *l1_pg, *l2_pg, *l3_pg;
@@ -213,6 +213,11 @@ void __init paging_init(void)
      */
     mpt_size  = (max_page * BYTES_PER_LONG) + (1UL << L2_PAGETABLE_SHIFT) - 1;
     mpt_size &= ~((1UL << L2_PAGETABLE_SHIFT) - 1UL);
+#define MFN(x) (((x) << L2_PAGETABLE_SHIFT) / sizeof(unsigned long))
+#define CNT ((sizeof(*frame_table) & -sizeof(*frame_table)) / \
+             sizeof(*machine_to_phys_mapping))
+    BUILD_BUG_ON((sizeof(*frame_table) & ~sizeof(*frame_table)) % \
+                 sizeof(*machine_to_phys_mapping));
     for ( i = 0; i < (mpt_size >> L2_PAGETABLE_SHIFT); i++ )
     {
         BUILD_BUG_ON(RO_MPT_VIRT_START & ((1UL << L3_PAGETABLE_SHIFT) - 1));
@@ -222,37 +227,63 @@ void __init paging_init(void)
 
         if ( cpu_has_page1gb &&
              !((unsigned long)l2_ro_mpt & ~PAGE_MASK) &&
-             (mpt_size >> L3_PAGETABLE_SHIFT) > (i >> PAGETABLE_ORDER) &&
-             (l1_pg = alloc_domheap_pages(NULL, 2 * PAGETABLE_ORDER,
-                                          memflags)) != NULL )
+             (mpt_size >> L3_PAGETABLE_SHIFT) > (i >> PAGETABLE_ORDER) )
+        {
+            unsigned int k, holes;
+
+            for ( holes = k = 0; k < 1 << PAGETABLE_ORDER; ++k)
+            {
+                for ( n = 0; n < CNT; ++n)
+                    if ( mfn_valid(MFN(i + k) + n * PDX_GROUP_COUNT) )
+                        break;
+                if ( n == CNT )
+                    ++holes;
+            }
+            if ( k == holes )
+            {
+                i += (1UL << PAGETABLE_ORDER) - 1;
+                continue;
+            }
+            if ( holes == 0 &&
+                 (l1_pg = alloc_domheap_pages(NULL, 2 * PAGETABLE_ORDER,
+                                              memflags)) != NULL )
+            {
+                map_pages_to_xen(
+                    RDWR_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT),
+                    page_to_mfn(l1_pg),
+                    1UL << (2 * PAGETABLE_ORDER),
+                    PAGE_HYPERVISOR);
+                memset((void *)(RDWR_MPT_VIRT_START + (i << 
L2_PAGETABLE_SHIFT)),
+                       0x77, 1UL << L3_PAGETABLE_SHIFT);
+
+                ASSERT(!l2_table_offset(va));
+                /* NB. Cannot be GLOBAL as shadow_mode_translate reuses this 
area. */
+                l3e_write(&l3_ro_mpt[l3_table_offset(va)],
+                    l3e_from_page(l1_pg,
+                        /*_PAGE_GLOBAL|*/_PAGE_PSE|_PAGE_USER|_PAGE_PRESENT));
+                i += (1UL << PAGETABLE_ORDER) - 1;
+                continue;
+            }
+        }
+
+        for ( n = 0; n < CNT; ++n)
+            if ( mfn_valid(MFN(i) + n * PDX_GROUP_COUNT) )
+                break;
+        if ( n == CNT )
+            l1_pg = NULL;
+        else if ( (l1_pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER,
+                                               memflags)) == NULL )
+            goto nomem;
+        else
         {
             map_pages_to_xen(
                 RDWR_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT),
                 page_to_mfn(l1_pg),
-                1UL << (2 * PAGETABLE_ORDER),
+                1UL << PAGETABLE_ORDER,
                 PAGE_HYPERVISOR);
             memset((void *)(RDWR_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT)),
-                   0x77, 1UL << L3_PAGETABLE_SHIFT);
-
-            ASSERT(!l2_table_offset(va));
-            /* NB. Cannot be GLOBAL as shadow_mode_translate reuses this area. 
*/
-            l3e_write(&l3_ro_mpt[l3_table_offset(va)],
-                l3e_from_page(l1_pg,
-                    /*_PAGE_GLOBAL|*/_PAGE_PSE|_PAGE_USER|_PAGE_PRESENT));
-            i += (1UL << PAGETABLE_ORDER) - 1;
-            continue;
+                   0x55, 1UL << L2_PAGETABLE_SHIFT);
         }
-
-        if ( (l1_pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER,
-                                          memflags)) == NULL )
-            goto nomem;
-        map_pages_to_xen(
-            RDWR_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT),
-            page_to_mfn(l1_pg), 
-            1UL << PAGETABLE_ORDER,
-            PAGE_HYPERVISOR);
-        memset((void *)(RDWR_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT)), 0x55,
-               1UL << L2_PAGETABLE_SHIFT);
         if ( !((unsigned long)l2_ro_mpt & ~PAGE_MASK) )
         {
             if ( (l2_pg = alloc_domheap_page(NULL, memflags)) == NULL )
@@ -264,10 +295,13 @@ void __init paging_init(void)
             ASSERT(!l2_table_offset(va));
         }
         /* NB. Cannot be GLOBAL as shadow_mode_translate reuses this area. */
-        l2e_write(l2_ro_mpt, l2e_from_page(
-            l1_pg, /*_PAGE_GLOBAL|*/_PAGE_PSE|_PAGE_USER|_PAGE_PRESENT));
+        if ( l1_pg )
+            l2e_write(l2_ro_mpt, l2e_from_page(
+                l1_pg, /*_PAGE_GLOBAL|*/_PAGE_PSE|_PAGE_USER|_PAGE_PRESENT));
         l2_ro_mpt++;
     }
+#undef CNT
+#undef MFN
 
     /* Create user-accessible L2 directory to map the MPT for compat guests. */
     BUILD_BUG_ON(l4_table_offset(RDWR_MPT_VIRT_START) !=
@@ -288,12 +322,22 @@ void __init paging_init(void)
     mpt_size &= ~((1UL << L2_PAGETABLE_SHIFT) - 1UL);
     if ( (m2p_compat_vstart + mpt_size) < MACH2PHYS_COMPAT_VIRT_END )
         m2p_compat_vstart = MACH2PHYS_COMPAT_VIRT_END - mpt_size;
-    for ( i = 0; i < (mpt_size >> L2_PAGETABLE_SHIFT); i++ )
+#define MFN(x) (((x) << L2_PAGETABLE_SHIFT) / sizeof(unsigned int))
+#define CNT ((sizeof(*frame_table) & -sizeof(*frame_table)) / \
+             sizeof(*compat_machine_to_phys_mapping))
+    BUILD_BUG_ON((sizeof(*frame_table) & ~sizeof(*frame_table)) % \
+                 sizeof(*compat_machine_to_phys_mapping));
+    for ( i = 0; i < (mpt_size >> L2_PAGETABLE_SHIFT); i++, l2_ro_mpt++ )
     {
         memflags = MEMF_node(phys_to_nid(i <<
             (L2_PAGETABLE_SHIFT - 2 + PAGE_SHIFT)));
+        for ( n = 0; n < CNT; ++n)
+            if ( mfn_valid(MFN(i) + n * PDX_GROUP_COUNT) )
+                break;
+        if ( n == CNT )
+            continue;
         if ( (l1_pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER,
-                                          memflags)) == NULL )
+                                               memflags)) == NULL )
             goto nomem;
         map_pages_to_xen(
             RDWR_COMPAT_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT),
@@ -306,8 +350,9 @@ void __init paging_init(void)
                1UL << L2_PAGETABLE_SHIFT);
         /* NB. Cannot be GLOBAL as the ptes get copied into per-VM space. */
         l2e_write(l2_ro_mpt, l2e_from_page(l1_pg, _PAGE_PSE|_PAGE_PRESENT));
-        l2_ro_mpt++;
     }
+#undef CNT
+#undef MFN
 
     /* Set up linear page table mapping. */
     l4e_write(&idle_pg_table[l4_table_offset(LINEAR_PT_VIRT_START)],
@@ -428,7 +473,7 @@ long subarch_memory_op(int op, XEN_GUEST
     l3_pgentry_t l3e;
     l2_pgentry_t l2e;
     unsigned long v;
-    xen_pfn_t mfn;
+    xen_pfn_t mfn, last_mfn;
     unsigned int i;
     long rc = 0;
 
@@ -440,29 +485,32 @@ long subarch_memory_op(int op, XEN_GUEST
 
         BUILD_BUG_ON(RDWR_MPT_VIRT_START & ((1UL << L3_PAGETABLE_SHIFT) - 1));
         BUILD_BUG_ON(RDWR_MPT_VIRT_END   & ((1UL << L3_PAGETABLE_SHIFT) - 1));
-        for ( i = 0, v = RDWR_MPT_VIRT_START;
-              (i != xmml.max_extents) && (v != RDWR_MPT_VIRT_END);
+        for ( i = 0, v = RDWR_MPT_VIRT_START, last_mfn = 0;
+              (i != xmml.max_extents) &&
+              (v < (unsigned long)(machine_to_phys_mapping + max_page));
               i++, v += 1UL << L2_PAGETABLE_SHIFT )
         {
             l3e = l4e_to_l3e(idle_pg_table[l4_table_offset(v)])[
                 l3_table_offset(v)];
             if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
-                break;
-            if ( !(l3e_get_flags(l3e) & _PAGE_PSE) )
+                mfn = last_mfn;
+            else if ( !(l3e_get_flags(l3e) & _PAGE_PSE) )
             {
                 l2e = l3e_to_l2e(l3e)[l2_table_offset(v)];
-                if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
-                    break;
-                mfn = l2e_get_pfn(l2e);
+                if ( l2e_get_flags(l2e) & _PAGE_PRESENT )
+                    mfn = l2e_get_pfn(l2e);
+                else
+                    mfn = last_mfn;
             }
             else
             {
                 mfn = l3e_get_pfn(l3e)
                     + (l2_table_offset(v) << PAGETABLE_ORDER);
             }
-            ASSERT(!l1_table_offset(v));
+            ASSERT(mfn);
             if ( copy_to_guest_offset(xmml.extent_start, i, &mfn, 1) )
                 return -EFAULT;
+            last_mfn = mfn;
         }
 
         xmml.nr_extents = i;



_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-devel] [PATCH 4/7] x86: map M2P table sparsely, Jan Beulich <=