WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] Many fixes for save/restore and related areas for PAE in

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] Many fixes for save/restore and related areas for PAE in particular. Now
From: Xen patchbot -unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Thu, 17 Nov 2005 11:28:17 +0000
Delivery-date: Thu, 17 Nov 2005 11:29:58 +0000
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User smh22@xxxxxxxxxxxxxxxxxxxx
# Node ID fe3a892b33b4ccd3593bde788ceafa0668227450
# Parent  9b345321fd0676436d399c6eca0afd625b886ca4
Many fixes for save/restore and related areas for PAE in particular. Now
should be able to save/restore successfully on machines with up to 16GB 
and any size of guest. 

Signed-off-by: Steven Hand <steven@xxxxxxxxxxxxx>

diff -r 9b345321fd06 -r fe3a892b33b4 
linux-2.6-xen-sparse/arch/xen/i386/mm/ioremap.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/mm/ioremap.c   Wed Nov 16 14:50:36 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/ioremap.c   Wed Nov 16 16:45:03 2005
@@ -136,21 +136,19 @@
 }
 EXPORT_SYMBOL(direct_kernel_remap_pfn_range);
 
-/* FIXME: This is horribly broken on PAE */ 
 static int lookup_pte_fn(
        pte_t *pte, struct page *pte_page, unsigned long addr, void *data)
 {
-       unsigned long *ptep = (unsigned long *)data;
+       uint64_t *ptep = (uint64_t *)data;
        if (ptep)
-               *ptep = (pfn_to_mfn(page_to_pfn(pte_page)) <<
-                        PAGE_SHIFT) |
-                       ((unsigned long)pte & ~PAGE_MASK);
+               *ptep = ((uint64_t)pfn_to_mfn(page_to_pfn(pte_page)) <<
+                        PAGE_SHIFT) | ((unsigned long)pte & ~PAGE_MASK);
        return 0;
 }
 
 int create_lookup_pte_addr(struct mm_struct *mm, 
                           unsigned long address,
-                          unsigned long *ptep)
+                          uint64_t *ptep)
 {
        return generic_page_range(mm, address, PAGE_SIZE, lookup_pte_fn, ptep);
 }
diff -r 9b345321fd06 -r fe3a892b33b4 
linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c  Wed Nov 16 14:50:36 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c  Wed Nov 16 16:45:03 2005
@@ -412,7 +412,7 @@
        struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST*2];
        unsigned int i, op = 0;
        struct grant_handle_pair *handle;
-       unsigned long ptep;
+       uint64_t ptep;
        int ret;
 
        for ( i = 0; i < nr_pages; i++)
@@ -427,9 +427,9 @@
                op++;
 
                if (create_lookup_pte_addr(
-                       blktap_vma->vm_mm,
-                       MMAP_VADDR(user_vstart, idx, i), 
-                       &ptep) !=0) {
+                           blktap_vma->vm_mm,
+                           MMAP_VADDR(user_vstart, idx, i), 
+                           &ptep) !=0) {
                        DPRINTK("Couldn't get a pte addr!\n");
                        return;
                }
@@ -705,7 +705,7 @@
 
                unsigned long uvaddr;
                unsigned long kvaddr;
-               unsigned long ptep;
+               uint64_t ptep;
 
                uvaddr = MMAP_VADDR(user_vstart, pending_idx, i);
                kvaddr = MMAP_VADDR(mmap_vstart, pending_idx, i);
diff -r 9b345321fd06 -r fe3a892b33b4 
linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c
--- a/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c        Wed Nov 16 
14:50:36 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c        Wed Nov 16 
16:45:03 2005
@@ -152,7 +152,8 @@
                privcmd_mmapbatch_t m;
                struct vm_area_struct *vma = NULL;
                unsigned long *p, addr;
-               unsigned long mfn, ptep;
+               unsigned long mfn; 
+               uint64_t ptep;
                int i;
 
                if (copy_from_user(&m, (void *)data, sizeof(m))) {
@@ -217,15 +218,39 @@
 #endif
 
 #ifndef __ia64__
-       case IOCTL_PRIVCMD_GET_MACH2PHYS_START_MFN: {
-               unsigned long m2pv = (unsigned long)machine_to_phys_mapping;
-               pgd_t *pgd = pgd_offset_k(m2pv);
-               pud_t *pud = pud_offset(pgd, m2pv);
-               pmd_t *pmd = pmd_offset(pud, m2pv);
-               unsigned long m2p_start_mfn =
-                       (*(unsigned long *)pmd) >> PAGE_SHIFT; 
-               ret = put_user(m2p_start_mfn, (unsigned long *)data) ?
-                       -EFAULT: 0;
+       case IOCTL_PRIVCMD_GET_MACH2PHYS_MFNS: {
+
+               pgd_t *pgd; 
+               pud_t *pud; 
+               pmd_t *pmd; 
+               unsigned long m2pv, m2p_mfn;    
+               privcmd_m2pmfns_t m; 
+               unsigned long *p; 
+               int i; 
+
+               if (copy_from_user(&m, (void *)data, sizeof(m)))
+                       return -EFAULT;
+
+               m2pv = (unsigned long)machine_to_phys_mapping;
+
+               p = m.arr; 
+
+               for(i=0; i < m.num; i++) { 
+
+                       pgd = pgd_offset_k(m2pv);
+                       pud = pud_offset(pgd, m2pv);
+                       pmd = pmd_offset(pud, m2pv);
+                       m2p_mfn = (*(uint64_t *)pmd >> PAGE_SHIFT)&0xFFFFFFFF;
+                       
+                       if (put_user(m2p_mfn, p + i))
+                               return -EFAULT;
+
+                       m2pv += (1 << 21); 
+               }
+
+               ret = 0; 
+               break; 
+
        }
        break;
 #endif
diff -r 9b345321fd06 -r fe3a892b33b4 
linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable.h   Wed Nov 16 
14:50:36 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable.h   Wed Nov 16 
16:45:03 2005
@@ -450,11 +450,11 @@
 #endif /* !CONFIG_DISCONTIGMEM */
 
 int direct_remap_pfn_range(struct vm_area_struct *vma,
-                            unsigned long address, 
-                            unsigned long mfn,
-                            unsigned long size, 
-                            pgprot_t prot,
-                            domid_t  domid);
+                           unsigned long address, 
+                           unsigned long mfn,
+                           unsigned long size, 
+                           pgprot_t prot,
+                           domid_t  domid);
 int direct_kernel_remap_pfn_range(unsigned long address, 
                                  unsigned long mfn,
                                  unsigned long size, 
@@ -462,7 +462,7 @@
                                  domid_t  domid);
 int create_lookup_pte_addr(struct mm_struct *mm,
                            unsigned long address,
-                           unsigned long *ptep);
+                           uint64_t *ptep);
 int touch_pte_range(struct mm_struct *mm,
                     unsigned long address,
                     unsigned long size);
diff -r 9b345321fd06 -r fe3a892b33b4 
linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h Wed Nov 16 
14:50:36 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h Wed Nov 16 
16:45:03 2005
@@ -541,7 +541,7 @@
 
 int create_lookup_pte_addr(struct mm_struct *mm,
                            unsigned long address,
-                           unsigned long *ptep);
+                           uint64_t *ptep);
 
 int touch_pte_range(struct mm_struct *mm,
                     unsigned long address,
diff -r 9b345321fd06 -r fe3a892b33b4 
linux-2.6-xen-sparse/include/asm-xen/linux-public/privcmd.h
--- a/linux-2.6-xen-sparse/include/asm-xen/linux-public/privcmd.h       Wed Nov 
16 14:50:36 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/linux-public/privcmd.h       Wed Nov 
16 16:45:03 2005
@@ -55,6 +55,11 @@
        unsigned long *arr; /* array of mfns - top nibble set on err */
 } privcmd_mmapbatch_t; 
 
+typedef struct privcmd_m2pmfns { 
+       int num;    /* max number of mfns to return */
+       unsigned long *arr; /* array of mfns */
+} privcmd_m2pmfns_t; 
+
 typedef struct privcmd_blkmsg
 {
        unsigned long op;
@@ -69,12 +74,11 @@
  */
 #define IOCTL_PRIVCMD_HYPERCALL                                        \
        _IOC(_IOC_NONE, 'P', 0, sizeof(privcmd_hypercall_t))
-
 #define IOCTL_PRIVCMD_MMAP                                     \
        _IOC(_IOC_NONE, 'P', 2, sizeof(privcmd_mmap_t))
 #define IOCTL_PRIVCMD_MMAPBATCH                                        \
        _IOC(_IOC_NONE, 'P', 3, sizeof(privcmd_mmapbatch_t))
-#define IOCTL_PRIVCMD_GET_MACH2PHYS_START_MFN                  \
+#define IOCTL_PRIVCMD_GET_MACH2PHYS_MFNS                       \
        _IOC(_IOC_READ, 'P', 4, sizeof(unsigned long))
 
 #endif /* __LINUX_PUBLIC_PRIVCMD_H__ */
diff -r 9b345321fd06 -r fe3a892b33b4 tools/libxc/xc_linux_restore.c
--- a/tools/libxc/xc_linux_restore.c    Wed Nov 16 14:50:36 2005
+++ b/tools/libxc/xc_linux_restore.c    Wed Nov 16 16:45:03 2005
@@ -13,13 +13,13 @@
 #include "xg_save_restore.h"
 
 /* max mfn of the whole machine */
-static uint32_t max_mfn; 
+static unsigned long max_mfn; 
 
 /* virtual starting address of the hypervisor */
-static uint32_t hvirt_start; 
+static unsigned long hvirt_start; 
 
 /* #levels of page tables used by the currrent guest */
-static uint32_t pt_levels; 
+static unsigned int pt_levels; 
 
 /* total number of pages used by the current guest */
 static unsigned long max_pfn;
@@ -49,7 +49,6 @@
 
     return (r == count) ? 1 : 0; 
 }
-
 
 /*
 ** In the state file (or during transfer), all page-table pages are 
@@ -60,23 +59,11 @@
 */
 int uncanonicalize_pagetable(unsigned long type, void *page) 
 { 
-    int i, pte_last, xen_start, xen_end; 
+    int i, pte_last; 
     unsigned long pfn; 
     uint64_t pte; 
 
-    /* 
-    ** We need to determine which entries in this page table hold
-    ** reserved hypervisor mappings. This depends on the current
-    ** page table type as well as the number of paging levels. 
-    */
-    xen_start = xen_end = pte_last = PAGE_SIZE / ((pt_levels == 2)? 4 : 8); 
-    
-    if (pt_levels == 2 && type == L2TAB)
-        xen_start = (hvirt_start >> L2_PAGETABLE_SHIFT); 
-
-    if (pt_levels == 3 && type == L3TAB) 
-        xen_start = L3_PAGETABLE_ENTRIES_PAE; 
-
+    pte_last = PAGE_SIZE / ((pt_levels == 2)? 4 : 8); 
 
     /* Now iterate through the page table, uncanonicalizing each PTE */
     for(i = 0; i < pte_last; i++) { 
@@ -85,13 +72,10 @@
             pte = ((uint32_t *)page)[i]; 
         else 
             pte = ((uint64_t *)page)[i]; 
-        
-        if(i >= xen_start && i < xen_end) 
-            pte = 0; 
-        
+
         if(pte & _PAGE_PRESENT) { 
-            
-            pfn = pte >> PAGE_SHIFT; 
+
+            pfn = (pte >> PAGE_SHIFT) & 0xffffffff;
             
             if(pfn >= max_pfn) { 
                 ERR("Frame number in type %lu page table is out of range: "
@@ -101,17 +85,16 @@
             } 
             
             
-            if(type == L1TAB) 
-                pte &= (PAGE_SIZE - 1) & ~(_PAGE_GLOBAL | _PAGE_PAT);
-            else 
-                pte &= (PAGE_SIZE - 1) & ~(_PAGE_GLOBAL | _PAGE_PSE);
-            
-            pte |= p2m[pfn] << PAGE_SHIFT;
-            
+            pte &= 0xffffff0000000fffULL;
+            pte |= (uint64_t)p2m[pfn] << PAGE_SHIFT;
+
             if(pt_levels == 2) 
                 ((uint32_t *)page)[i] = (uint32_t)pte; 
             else 
                 ((uint64_t *)page)[i] = (uint64_t)pte; 
+
+        
+
         }
     }
     
@@ -143,6 +126,9 @@
     /* A table of MFNs to map in the current region */
     unsigned long *region_mfn = NULL;
 
+    /* Types of the pfns in the current region */
+    unsigned long region_pfn_type[MAX_BATCH_SIZE];
+
     /* A temporary mapping, and a copy, of one frame of guest memory. */
     unsigned long *page = NULL;
 
@@ -233,10 +219,12 @@
     
     if(xc_domain_memory_increase_reservation(
            xc_handle, dom, max_pfn, 0, 0, NULL) != 0) { 
-        ERR("Failed to increase reservation by %lx KB\n", max_pfn); 
+        ERR("Failed to increase reservation by %lx KB\n", PFN_TO_KB(max_pfn));
         errno = ENOMEM;
         goto out;
     }
+
+    DPRINTF("Increased domain reservation by %lx KB\n", PFN_TO_KB(max_pfn)); 
 
     /* Build the pfn-to-mfn table. We choose MFN ordering returned by Xen. */
     if (xc_get_pfn_list(xc_handle, dom, p2m, max_pfn) != max_pfn) {
@@ -248,6 +236,7 @@
         ERR("Could not initialise for MMU updates");
         goto out;
     }
+
 
     DPRINTF("Reloading memory pages:   0%%\n");
 
@@ -261,7 +250,6 @@
     while (1) { 
 
         int j;
-        unsigned long region_pfn_type[MAX_BATCH_SIZE];
 
         this_pc = (n * 100) / max_pfn;
         if ( (this_pc - prev_pc) >= 5 )
@@ -322,7 +310,7 @@
             if (pagetype == XTAB) 
                 /* a bogus/unmapped page: skip it */
                 continue;
-            
+
             if (pfn > max_pfn) {
                 ERR("pfn out of range");
                 goto out;
@@ -348,10 +336,20 @@
                 ** A page table page - need to 'uncanonicalize' it, i.e. 
                 ** replace all the references to pfns with the corresponding 
                 ** mfns for the new domain. 
-                */ 
-                if(!uncanonicalize_pagetable(pagetype, page))
-                    goto out; 
-
+                ** 
+                ** On PAE we need to ensure that PGDs are in MFNs < 4G, and 
+                ** so we may need to update the p2m after the main loop. 
+                ** Hence we defer canonicalization of L1s until then. 
+                */
+                if(pt_levels != 3 || pagetype != L1TAB) { 
+
+                    if(!uncanonicalize_pagetable(pagetype, page)) {
+                        ERR("failed uncanonicalize pt!\n"); 
+                        goto out; 
+                    }
+
+                } 
+                    
             } else if(pagetype != NOTAB) { 
 
                 ERR("Bogus page type %lx page table is out of range: "
@@ -359,7 +357,6 @@
                 goto out;
 
             } 
-
 
 
             if (verify) {
@@ -386,9 +383,9 @@
             }
 
             if (xc_add_mmu_update(xc_handle, mmu, 
-                                  (mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE,
-                                  pfn)) {
-                ERR("machpys mfn=%ld pfn=%ld", mfn, pfn);
+                                  (((unsigned long long)mfn) << PAGE_SHIFT) 
+                                  | MMU_MACHPHYS_UPDATE, pfn)) {
+                ERR("failed machpys update mfn=%lx pfn=%lx", mfn, pfn);
                 goto out;
             }
         } /* end of 'batch' for loop */
@@ -399,14 +396,39 @@
 
     DPRINTF("Received all pages\n");
 
-    if (pt_levels == 3) {
-
-        /* Get all PGDs below 4GB. */
+    if(pt_levels == 3) { 
+
+        /* 
+        ** XXX SMH on PAE we need to ensure PGDs are in MFNs < 4G. This 
+        ** is a little awkward and involves (a) finding all such PGDs and
+        ** replacing them with 'lowmem' versions; (b) upating the p2m[] 
+        ** with the new info; and (c) canonicalizing all the L1s using the
+        ** (potentially updated) p2m[]. 
+        ** 
+        ** This is relatively slow (and currently involves two passes through
+        ** the pfn_type[] array), but at least seems to be correct. May wish
+        ** to consider more complex approaches to optimize this later. 
+        */
+
+        int j, k; 
+
+        /* First pass: find all L3TABs current in > 4G mfns and get new mfns */
         for (i = 0; i < max_pfn; i++) {
             
             if (((pfn_type[i] & LTABTYPE_MASK)==L3TAB) && (p2m[i]>0xfffffUL)) {
 
                 unsigned long new_mfn; 
+                uint64_t l3ptes[4]; 
+                uint64_t *l3tab; 
+
+                l3tab = (uint64_t *)
+                    xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, 
+                                         PROT_READ, p2m[i]); 
+
+                for(j = 0; j < 4; j++) 
+                    l3ptes[j] = l3tab[j]; 
+                
+                munmap(l3tab, PAGE_SIZE); 
 
                 if (!(new_mfn=xc_make_page_below_4G(xc_handle, dom, p2m[i]))) {
                     ERR("Couldn't get a page below 4GB :-(");
@@ -414,15 +436,58 @@
                 }
                 
                 p2m[i] = new_mfn;
-                if (xc_add_mmu_update(
-                        xc_handle, mmu, 
-                        (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE, i)) {
+                if (xc_add_mmu_update(xc_handle, mmu, 
+                                      (((unsigned long long)new_mfn) 
+                                       << PAGE_SHIFT) | 
+                                      MMU_MACHPHYS_UPDATE, i)) {
                     ERR("Couldn't m2p on PAE root pgdir");
                     goto out;
                 }
+                
+                l3tab = (uint64_t *)
+                    xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, 
+                                         PROT_READ | PROT_WRITE, p2m[i]); 
+                
+                for(j = 0; j < 4; j++) 
+                    l3tab[j] = l3ptes[j]; 
+                
+                munmap(l3tab, PAGE_SIZE); 
+                
             }
         }
-        
+
+        /* Second pass: find all L1TABs and uncanonicalize them */
+        j = 0; 
+
+        for(i = 0; i < max_pfn; i++) { 
+            
+            if (((pfn_type[i] & LTABTYPE_MASK)==L1TAB)) { 
+                region_mfn[j] = p2m[i]; 
+                j++; 
+            }
+
+            if(i == (max_pfn-1) || j == MAX_BATCH_SIZE) { 
+
+                if (!(region_base = xc_map_foreign_batch(
+                          xc_handle, dom, PROT_READ | PROT_WRITE, 
+                          region_mfn, j))) {  
+                    ERR("map batch failed");
+                    goto out;
+                }
+
+                for(k = 0; k < j; k++) {
+                    if(!uncanonicalize_pagetable(L1TAB, 
+                                                 region_base + k*PAGE_SIZE)) {
+                        ERR("failed uncanonicalize pt!\n"); 
+                        goto out; 
+                    } 
+                }
+                
+                munmap(region_base, j*PAGE_SIZE); 
+                j = 0; 
+            }
+        }
+
     }
 
 
@@ -430,6 +495,7 @@
         ERR("Error doing finish_mmu_updates()"); 
         goto out;
     } 
+
 
     /*
      * Pin page tables. Do this after writing to them as otherwise Xen
@@ -439,7 +505,7 @@
 
         if ( (pfn_type[i] & LPINTAB) == 0 )
             continue;
-        
+
         switch(pfn_type[i]) { 
 
         case (L1TAB|LPINTAB): 
@@ -463,22 +529,15 @@
         }
 
         pin[nr_pins].arg1.mfn = p2m[i];
+
+        nr_pins ++; 
         
-        if (++nr_pins == MAX_PIN_BATCH) {
+        if (i == (max_pfn-1) || nr_pins == MAX_PIN_BATCH) {
             if (xc_mmuext_op(xc_handle, pin, nr_pins, dom) < 0) { 
                 ERR("Failed to pin batch of %d page tables", nr_pins); 
                 goto out;
             } 
-            DPRINTF("successfully pinned batch of %d page tables", nr_pins); 
             nr_pins = 0;
-        }
-    }
-    
-    if (nr_pins != 0) { 
-        if((rc = xc_mmuext_op(xc_handle, pin, nr_pins, dom)) < 0) { 
-            ERR("Failed (2) to pin batch of %d page tables", nr_pins); 
-            DPRINTF("rc is %d\n", rc); 
-            goto out;
         }
     }
 
@@ -579,23 +638,20 @@
     pfn = ctxt.ctrlreg[3] >> PAGE_SHIFT;
 
     if (pfn >= max_pfn) {
-        DPRINTF("PT base is bad: pfn=%lu max_pfn=%lu type=%08lx\n",
-                pfn, max_pfn, pfn_type[pfn]); 
-        ERR("PT base is bad.");
+        ERR("PT base is bad: pfn=%lu max_pfn=%lu type=%08lx",
+            pfn, max_pfn, pfn_type[pfn]); 
         goto out;
     }
 
     if ((pt_levels == 2) && ((pfn_type[pfn]&LTABTYPE_MASK) != L2TAB)) { 
-        DPRINTF("PT base is bad. pfn=%lu nr=%lu type=%08lx %08lx\n",
-                pfn, max_pfn, pfn_type[pfn], (unsigned long)L2TAB);
-        ERR("PT base is bad.");
+        ERR("PT base is bad. pfn=%lu nr=%lu type=%08lx %08lx",
+            pfn, max_pfn, pfn_type[pfn], (unsigned long)L2TAB);
         goto out;
     }
 
     if ((pt_levels == 3) && ((pfn_type[pfn]&LTABTYPE_MASK) != L3TAB)) { 
-        DPRINTF("PT base is bad. pfn=%lu nr=%lu type=%08lx %08lx\n",
-                pfn, max_pfn, pfn_type[pfn], (unsigned long)L3TAB);
-        ERR("PT base is bad.");
+        ERR("PT base is bad. pfn=%lu nr=%lu type=%08lx %08lx",
+            pfn, max_pfn, pfn_type[pfn], (unsigned long)L3TAB);
         goto out;
     }
     
diff -r 9b345321fd06 -r fe3a892b33b4 tools/libxc/xc_linux_save.c
--- a/tools/libxc/xc_linux_save.c       Wed Nov 16 14:50:36 2005
+++ b/tools/libxc/xc_linux_save.c       Wed Nov 16 16:45:03 2005
@@ -27,13 +27,13 @@
 
 
 /* max mfn of the whole machine */
-static uint32_t max_mfn; 
+static unsigned long max_mfn; 
 
 /* virtual starting address of the hypervisor */
-static uint32_t hvirt_start; 
+static unsigned long hvirt_start; 
 
 /* #levels of page tables used by the currrent guest */
-static uint32_t pt_levels; 
+static unsigned int pt_levels; 
 
 /* total number of pages used by the current guest */
 static unsigned long max_pfn;
@@ -500,6 +500,70 @@
 
 
 
+static unsigned long *xc_map_m2p(int xc_handle, 
+                                 unsigned long max_mfn, 
+                                 int prot) 
+{ 
+    privcmd_m2pmfns_t m2p_mfns; 
+    privcmd_mmap_t ioctlx; 
+    privcmd_mmap_entry_t *entries; 
+    unsigned long m2p_chunks, m2p_size; 
+    unsigned long *m2p; 
+    int i, rc; 
+
+    m2p_size   = M2P_SIZE(max_mfn); 
+    m2p_chunks = M2P_CHUNKS(max_mfn); 
+
+
+    m2p_mfns.num = m2p_chunks; 
+
+    if(!(m2p_mfns.arr = malloc(m2p_chunks * sizeof(unsigned long)))) { 
+        ERR("failed to allocate space for m2p mfns!\n"); 
+        return NULL; 
+    } 
+
+    if (ioctl(xc_handle, IOCTL_PRIVCMD_GET_MACH2PHYS_MFNS, &m2p_mfns) < 0) {
+        ERR("xc_get_m2p_mfns:"); 
+        return NULL;
+    }
+
+    if((m2p = mmap(NULL, m2p_size, prot, 
+                   MAP_SHARED, xc_handle, 0)) == MAP_FAILED) {
+        ERR("failed to mmap m2p"); 
+        return NULL; 
+    } 
+    
+
+    if(!(entries = malloc(m2p_chunks * sizeof(privcmd_mmap_entry_t)))) { 
+        ERR("failed to allocate space for mmap entries!\n"); 
+        return NULL; 
+    } 
+
+
+    ioctlx.num   = m2p_chunks;
+    ioctlx.dom   = DOMID_XEN; 
+    ioctlx.entry = entries; 
+    
+    for(i=0; i < m2p_chunks; i++) { 
+        
+        entries[i].va = (unsigned long)(((void *)m2p) + (i * M2P_CHUNK_SIZE)); 
+        entries[i].mfn = m2p_mfns.arr[i]; 
+        entries[i].npages = M2P_CHUNK_SIZE >> PAGE_SHIFT;
+
+    }
+
+    if((rc = ioctl(xc_handle, IOCTL_PRIVCMD_MMAP, &ioctlx)) < 0) {
+        ERR("ioctl_mmap failed (rc = %d)", rc); 
+        return NULL; 
+    }
+        
+    free(m2p_mfns.arr); 
+    free(entries); 
+
+    return m2p; 
+}
+
+
 
 int xc_linux_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, 
                   uint32_t max_factor, uint32_t flags)
@@ -531,16 +595,12 @@
     /* A copy of the pfn-to-mfn table frame list. */
     unsigned long *p2m_frame_list = NULL;
 
-    unsigned long m2p_start_mfn;
-    
     /* Live mapping of shared info structure */
     shared_info_t *live_shinfo = NULL;
 
     /* base of the region in which domain memory is mapped */
     unsigned char *region_base = NULL;
 
-
-    
     /* power of 2 order of max_pfn */
     int order_nr; 
 
@@ -563,9 +623,6 @@
         max_factor = DEF_MAX_FACTOR; 
     
     initialize_mbit_rate(); 
-
-    DPRINTF("xc_linux_save start DOM%u live=%s\n", dom, live ? 
-            "true" : "false"); 
 
     if(!get_platform_info(xc_handle, dom, 
                           &max_mfn, &hvirt_start, &pt_levels)) {
@@ -647,11 +704,13 @@
     }
 
     /* Setup the mfn_to_pfn table mapping */
-    m2p_start_mfn = xc_get_m2p_start_mfn(xc_handle);
-    live_m2p      = xc_map_foreign_range(xc_handle, DOMID_XEN, M2P_SIZE, 
-                                         PROT_READ, m2p_start_mfn);
-    
-    /* Get a local copy fo the live_P2M_frame_list */
+    if(!(live_m2p = xc_map_m2p(xc_handle, max_mfn, PROT_READ))) { 
+        ERR("Failed to map live M2P table"); 
+        goto out; 
+    } 
+
+    
+    /* Get a local copy of the live_P2M_frame_list */
     if(!(p2m_frame_list = malloc(P2M_FL_SIZE))) { 
         ERR("Couldn't allocate p2m_frame_list array");
         goto out;
@@ -662,6 +721,8 @@
     for (i = 0; i < max_pfn; i += ulpp) {
         if (!translate_mfn_to_pfn(&p2m_frame_list[i/ulpp])) { 
             ERR("Frame# in pfn-to-mfn frame list is not in pseudophys");
+            ERR("entry %d: p2m_frame_list[%ld] is 0x%lx", i, i/ulpp, 
+                p2m_frame_list[i/ulpp]); 
             goto out;
         }
     }
@@ -693,20 +754,14 @@
         
     }
 
-#if 0
-    sent_last_iter = 0xFFFFFFFF; /* Pretend we sent a /lot/ last time */
-#else
-    sent_last_iter = 1 << 20; 
-#endif
+    /* pretend we sent all the pages last iteration */
+    sent_last_iter = max_pfn; 
 
 
     /* calculate the power of 2 order of max_pfn, e.g.
        15->4 16->4 17->5 */
     for (i = max_pfn-1, order_nr = 0; i ; i >>= 1, order_nr++)
         continue;
-
-#undef BITMAP_SIZE
-#define BITMAP_SIZE ((1<<20)/8) 
 
     /* Setup to_send / to_fix and to_skip bitmaps */
     to_send = malloc(BITMAP_SIZE); 
@@ -922,10 +977,8 @@
 
 
                 /* write out pages in batch */
-                if (pagetype == XTAB) {
-                    DPRINTF("SKIP BOGUS page %i mfn %08lx\n", j, pfn_type[j]);
+                if (pagetype == XTAB)
                     continue;
-                }
 
                 pagetype &= LTABTYPE_MASK; 
                 
@@ -950,10 +1003,10 @@
             } /* end of the write out for this batch */
             
             sent_this_iter += batch;
-            
+
+            munmap(region_base, batch*PAGE_SIZE);
+        
         } /* end of this while loop for this iteration */
-        
-        munmap(region_base, batch*PAGE_SIZE);
         
       skip: 
         
@@ -1027,13 +1080,9 @@
 
     DPRINTF("All memory is saved\n");
 
-    /* Success! */
-    rc = 0;
-    
-    /* ^^^^^^ XXX SMH: hmm.. not sure that's really success! */
-    
     /* Zero terminate */
-    if (!write_exact(io_fd, &rc, sizeof(int))) { 
+    i = 0; 
+    if (!write_exact(io_fd, &i, sizeof(int))) { 
         ERR("Error when writing to state file (6)");
         goto out;
     }
@@ -1043,17 +1092,17 @@
         unsigned int i,j;
         unsigned long pfntab[1024]; 
 
-        for ( i = 0, j = 0; i < max_pfn; i++ ) {
-            if ( ! is_mapped(live_p2m[i]) )
+        for (i = 0, j = 0; i < max_pfn; i++) {
+            if (!is_mapped(live_p2m[i]))
                 j++;
         }
-
+        
         if(!write_exact(io_fd, &j, sizeof(unsigned int))) { 
             ERR("Error when writing to state file (6a)");
             goto out;
         }      
         
-        for ( i = 0, j = 0; i < max_pfn; ) {
+        for (i = 0, j = 0; i < max_pfn; ) {
 
             if (!is_mapped(live_p2m[i]))
                 pfntab[j++] = i;
@@ -1097,7 +1146,10 @@
         ERR("Error when writing to state file (1)");
         goto out;
     }
-    
+
+    /* Success! */
+    rc = 0;
+
  out:
 
     if (live_shinfo)
@@ -1110,7 +1162,7 @@
         munmap(live_p2m, P2M_SIZE); 
 
     if(live_m2p) 
-        munmap(live_m2p, M2P_SIZE); 
+        munmap(live_m2p, M2P_SIZE(max_mfn)); 
 
     free(pfn_type);
     free(pfn_batch);
diff -r 9b345321fd06 -r fe3a892b33b4 tools/libxc/xc_private.c
--- a/tools/libxc/xc_private.c  Wed Nov 16 14:50:36 2005
+++ b/tools/libxc/xc_private.c  Wed Nov 16 16:45:03 2005
@@ -260,18 +260,6 @@
 }
 
 
-unsigned long xc_get_m2p_start_mfn ( int xc_handle )
-{
-    unsigned long mfn;
-
-    if ( ioctl( xc_handle, IOCTL_PRIVCMD_GET_MACH2PHYS_START_MFN, &mfn ) < 0 )
-    {
-        perror("xc_get_m2p_start_mfn:");
-        return 0;
-    }
-    return mfn;
-}
-
 int xc_get_pfn_list(int xc_handle,
                     uint32_t domid, 
                     unsigned long *pfn_buf, 
diff -r 9b345321fd06 -r fe3a892b33b4 tools/libxc/xg_private.h
--- a/tools/libxc/xg_private.h  Wed Nov 16 14:50:36 2005
+++ b/tools/libxc/xg_private.h  Wed Nov 16 16:45:03 2005
@@ -153,8 +153,6 @@
     
 } mfn_mapper_t;
 
-unsigned long xc_get_m2p_start_mfn (int xc_handle);
-
 int xc_copy_to_domain_page(int xc_handle, uint32_t domid,
                             unsigned long dst_pfn, void *src_page);
 
diff -r 9b345321fd06 -r fe3a892b33b4 tools/libxc/xg_save_restore.h
--- a/tools/libxc/xg_save_restore.h     Wed Nov 16 14:50:36 2005
+++ b/tools/libxc/xg_save_restore.h     Wed Nov 16 16:45:03 2005
@@ -3,6 +3,8 @@
 ** 
 ** Defintions and utilities for save / restore. 
 */
+
+#include "xc_private.h"
 
 #define DEBUG    1
 #define PROGRESS 0
@@ -55,25 +57,24 @@
 ** Returns 1 on success, 0 on failure. 
 */
 static int get_platform_info(int xc_handle, uint32_t dom, 
-                             /* OUT */ uint32_t *max_mfn,  
-                             /* OUT */ uint32_t *hvirt_start, 
-                             /* OUT */ uint32_t *pt_levels)
+                             /* OUT */ unsigned long *max_mfn,  
+                             /* OUT */ unsigned long *hvirt_start, 
+                             /* OUT */ unsigned int *pt_levels)
     
 { 
     xen_capabilities_info_t xen_caps = "";
     xen_platform_parameters_t xen_params;
-    xc_physinfo_t physinfo;
     
-    if (xc_physinfo(xc_handle, &physinfo) != 0) 
-        return 0;
-    
+
     if (xc_version(xc_handle, XENVER_platform_parameters, &xen_params) != 0)
         return 0;
     
     if (xc_version(xc_handle, XENVER_capabilities, &xen_caps) != 0)
         return 0;
 
-    *max_mfn =     physinfo.total_pages;
+    if (xc_memory_op(xc_handle, XENMEM_maximum_ram_page, max_mfn) != 0)
+        return 0; 
+    
     *hvirt_start = xen_params.virt_start;
 
     if (strstr(xen_caps, "xen-3.0-x86_64"))
@@ -95,13 +96,22 @@
 ** entry tell us whether or not the the PFN is currently mapped.
 */
 
-#define PFN_TO_KB(_pfn) ((_pfn) * PAGE_SIZE / 1024)
+#define PFN_TO_KB(_pfn) ((_pfn) << (PAGE_SHIFT - 10))
 #define ROUNDUP(_x,_w) (((unsigned long)(_x)+(1UL<<(_w))-1) & ~((1UL<<(_w))-1))
 
-/* Size in bytes of the M2P and P2M (both rounded up to nearest PAGE_SIZE) */
-#define M2P_SIZE ROUNDUP((max_mfn * sizeof(unsigned long)), PAGE_SHIFT) 
-#define P2M_SIZE ROUNDUP((max_pfn * sizeof(unsigned long)), PAGE_SHIFT) 
 
+/* 
+** The M2P is made up of some number of 'chunks' of at least 2MB in size. 
+** The below definitions and utility function(s) deal with mapping the M2P 
+** regarldess of the underlying machine memory size or architecture. 
+*/
+#define M2P_SHIFT       L2_PAGETABLE_SHIFT_PAE 
+#define M2P_CHUNK_SIZE  (1 << M2P_SHIFT) 
+#define M2P_SIZE(_m)    ROUNDUP(((_m) * sizeof(unsigned long)), M2P_SHIFT) 
+#define M2P_CHUNKS(_m)  (M2P_SIZE((_m)) >> M2P_SHIFT)
+
+/* Size in bytes of the P2M (rounded up to the nearest PAGE_SIZE bytes) */
+#define P2M_SIZE        ROUNDUP((max_pfn * sizeof(unsigned long)), PAGE_SHIFT) 
 
 /* Number of unsigned longs in a page */
 #define ulpp            (PAGE_SIZE/sizeof(unsigned long))
diff -r 9b345321fd06 -r fe3a892b33b4 tools/python/xen/xend/XendCheckpoint.py
--- a/tools/python/xen/xend/XendCheckpoint.py   Wed Nov 16 14:50:36 2005
+++ b/tools/python/xen/xend/XendCheckpoint.py   Wed Nov 16 16:45:03 2005
@@ -129,7 +129,7 @@
         l = read_exact(fd, sizeof_unsigned_long,
                        "not a valid guest state file: pfn count read")
         nr_pfns = unpack("=L", l)[0]   # XXX endianess
-        if nr_pfns > 1024*1024:     # XXX
+        if nr_pfns > 16*1024*1024:     # XXX 
             raise XendError(
                 "not a valid guest state file: pfn count out of range")
 
diff -r 9b345321fd06 -r fe3a892b33b4 xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Wed Nov 16 14:50:36 2005
+++ b/xen/arch/x86/mm.c Wed Nov 16 16:45:03 2005
@@ -898,6 +898,7 @@
     return 1;
 
  fail:
+    MEM_LOG("Failure in alloc_l3_table: entry %d", i);
     while ( i-- > 0 )
         if ( is_guest_l3_slot(i) )
             put_page_from_l3e(pl3e[i], pfn);
@@ -948,6 +949,7 @@
     return 1;
 
  fail:
+    MEM_LOG("Failure in alloc_l4_table: entry %d", i);
     while ( i-- > 0 )
         if ( is_guest_l4_slot(i) )
             put_page_from_l4e(pl4e[i], pfn);

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-changelog] Many fixes for save/restore and related areas for PAE in particular. Now, Xen patchbot -unstable <=