[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] xen.git branch reorg / success with 2.6.30-rc3 pv_ops dom0



On Fri, 2009-06-05 at 14:19 -0400, Pasi KÃrkkÃinen wrote:
> On Fri, Jun 05, 2009 at 05:12:33PM +0100, Ian Campbell wrote:
> > On Fri, 2009-06-05 at 12:05 -0400, Ian Campbell wrote:
> > > 
> > > I had some patches to unify the 32 and 64 bit versions of dump
> page
> > > table at one point, since the 64 bit version does the right thing.
> > > I'll see if I can find or reproduce them.
> > 
> > Couldn't find them but please try this:
> > 
> 
> I had some problems applying the patch until I figured out it was
> supposed
> to be applied to a clean tree.. hopefully "git checkout file" restores
> (or
> resets) the file to it's original form and removes any local changes.

Should work I guess, I usually use "git reset --hard" to undo any local
mods.

> 
> Here goes again:
> http://pasik.reaktio.net/xen/pv_ops-dom0-debug/pv_ops-dom0-log-04-with-highpte-no-swap-with-debug2.txt
> 
> 
> L4 at e1822000 is pinned contains L2 at e1977228 which points at an L1
> which is unpinned low mem address 0x8bf8000

OK so I think that is interesting. A pinned L4 referencing an unpinned
L1 isn't supposed to happen, I don't think (Jeremy?).

The patch at the end (applies to a clean tree again) walks the lowmem
region of every L4 to ensure that every L1 page is pinned just before
pinning the L4. I hope this will catch the L1 in the act.

> PGD 8ef001 PUD 8ef001 PMD 1268067 PTE 207061

This just tells us that the PT which maps the PTE we were trying to
write is mapped R/O, which is not as interesting as I thought it would
be.

> Fixmap KM_PTE0 @ 0xf57f0000
> PGD 8ef001 PUD 8ef001 PMD 207067 PTE 0
> Fixmap KM_PTE1 @ 0xf57ee000
> PGD 8ef001 PUD 8ef001 PMD 207067 PTE 0

So these guys are not at fault, although we are in the middle of filling
in KM_PTE0, I think.

I've just had another go reproing this with a xen-3.3-testing.hg
hypervisor (both 32 and 64 bit) with a 32 bit kernel and dom0_mem=1024M.
No luck...

Ian.

diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index f9b252c..538590a 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -285,46 +285,12 @@ check_v8086_mode(struct pt_regs *regs, unsigned long 
address,
                tsk->thread.screen_bitmap |= 1 << bit;
 }
 
-static void dump_pagetable(unsigned long address)
-{
-       __typeof__(pte_val(__pte(0))) page;
-
-       page = read_cr3();
-       page = ((__typeof__(page) *) __va(page))[address >> PGDIR_SHIFT];
-
 #ifdef CONFIG_X86_PAE
-       printk("*pdpt = %016Lx ", page);
-       if ((page >> PAGE_SHIFT) < max_low_pfn
-           && page & _PAGE_PRESENT) {
-               page &= PAGE_MASK;
-               page = ((__typeof__(page) *) __va(page))[(address >> PMD_SHIFT)
-                                                       & (PTRS_PER_PMD - 1)];
-               printk(KERN_CONT "*pde = %016Lx ", page);
-               page &= ~_PAGE_NX;
-       }
+#define FMTPTE "ll"
 #else
-       printk("*pde = %08lx ", page);
+#define FMTPTE "l"
 #endif
 
-       /*
-        * We must not directly access the pte in the highpte
-        * case if the page table is located in highmem.
-        * And let's rather not kmap-atomic the pte, just in case
-        * it's allocated already:
-        */
-       if ((page >> PAGE_SHIFT) < max_low_pfn
-           && (page & _PAGE_PRESENT)
-           && !(page & _PAGE_PSE)) {
-
-               page &= PAGE_MASK;
-               page = ((__typeof__(page) *) __va(page))[(address >> PAGE_SHIFT)
-                                                       & (PTRS_PER_PTE - 1)];
-               printk("*pte = %0*Lx ", sizeof(page)*2, (u64)page);
-       }
-
-       printk("\n");
-}
-
 #else /* CONFIG_X86_64: */
 
 void vmalloc_sync_all(void)
@@ -440,6 +406,10 @@ check_v8086_mode(struct pt_regs *regs, unsigned long 
address,
 {
 }
 
+#define FMTPTE "ll"
+
+#endif /* CONFIG_X86_64 */
+
 static int bad_address(void *p)
 {
        unsigned long dummy;
@@ -447,7 +417,7 @@ static int bad_address(void *p)
        return probe_kernel_address((unsigned long *)p, dummy);
 }
 
-static void dump_pagetable(unsigned long address)
+void dump_pagetable(unsigned long address)
 {
        pgd_t *pgd;
        pud_t *pud;
@@ -462,7 +432,7 @@ static void dump_pagetable(unsigned long address)
        if (bad_address(pgd))
                goto bad;
 
-       printk("PGD %lx ", pgd_val(*pgd));
+       printk("PGD %"FMTPTE"x ", pgd_val(*pgd));
 
        if (!pgd_present(*pgd))
                goto out;
@@ -471,7 +441,7 @@ static void dump_pagetable(unsigned long address)
        if (bad_address(pud))
                goto bad;
 
-       printk("PUD %lx ", pud_val(*pud));
+       printk("PUD %"FMTPTE"x ", pud_val(*pud));
        if (!pud_present(*pud) || pud_large(*pud))
                goto out;
 
@@ -479,7 +449,7 @@ static void dump_pagetable(unsigned long address)
        if (bad_address(pmd))
                goto bad;
 
-       printk("PMD %lx ", pmd_val(*pmd));
+       printk("PMD %"FMTPTE"x ", pmd_val(*pmd));
        if (!pmd_present(*pmd) || pmd_large(*pmd))
                goto out;
 
@@ -487,7 +457,7 @@ static void dump_pagetable(unsigned long address)
        if (bad_address(pte))
                goto bad;
 
-       printk("PTE %lx", pte_val(*pte));
+       printk("PTE %"FMTPTE"x", pte_val(*pte));
 out:
        printk("\n");
        return;
@@ -495,8 +465,6 @@ bad:
        printk("BAD\n");
 }
 
-#endif /* CONFIG_X86_64 */
-
 /*
  * Workaround for K8 erratum #93 & buggy BIOS.
  *
@@ -598,6 +566,10 @@ show_fault_oops(struct pt_regs *regs, unsigned long 
error_code,
        printk_address(regs->ip, 1);
 
        dump_pagetable(address);
+       printk(KERN_CRIT "Fixmap KM_PTE0 @ %#lx\n", fix_to_virt(KM_PTE0));
+       dump_pagetable(fix_to_virt(KM_PTE0));
+       printk(KERN_CRIT "Fixmap KM_PTE1 @ %#lx\n", fix_to_virt(KM_PTE1));
+       dump_pagetable(fix_to_virt(KM_PTE1));
 }
 
 static noinline void
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 1729178..2c427d3 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1015,13 +1015,34 @@ static int xen_pin_page(struct mm_struct *mm, struct 
page *page,
        return flush;
 }
 
+static int xen_check_l1_pinned(pte_t *pte, unsigned long s, unsigned long e, 
struct mm_walk *walk)
+{
+       extern void dump_pagetable(unsigned long address);
+       struct page *pte_page = virt_to_page(pte);
+
+       if (!PagePinned(pte_page)) {
+               printk(KERN_CRIT "PTE @ %p is an L1 page %p covering %#lx-%#lx 
which is not pinned\n", pte, pte_page, s, e);
+               dump_pagetable((unsigned long)pte);
+               BUG();
+       }
+
+       return 0;
+}
+
 /* This is called just after a mm has been created, but it has not
    been used yet.  We need to make sure that its pagetable is all
    read-only, and can be pinned. */
 static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd)
 {
+       struct mm_walk xen_pin_walk = {
+               .pte_entry = &xen_check_l1_pinned,
+               .mm = mm,
+       };
+
        vm_unmap_aliases();
 
+       walk_page_range(0xc0000000, FIXADDR_TOP, &xen_pin_walk);
+
        xen_mc_batch();
 
        if (__xen_pgd_walk(mm, pgd, xen_pin_page, USER_LIMIT)) {
diff --git a/init/main.c b/init/main.c
index 33ce929..baf4300 100644
--- a/init/main.c
+++ b/init/main.c
@@ -74,6 +74,8 @@
 #include <asm/sections.h>
 #include <asm/cacheflush.h>
 
+#include <asm/xen/page.h>
+
 #ifdef CONFIG_X86_LOCAL_APIC
 #include <asm/smp.h>
 #endif
@@ -815,6 +817,54 @@ static noinline int init_post(void)
        system_state = SYSTEM_RUNNING;
        numa_default_policy();
 
+       {
+               extern void dump_pagetable(unsigned long address);
+               struct page *pgd_page, *pte_page;
+               pgd_t *pgd;
+               pud_t *pud;
+               pmd_t *pmd;
+               phys_addr_t pte_phys;
+               unsigned long address = 0xc08ce011UL;//(unsigned long) 
__builtin_return_address(0);
+
+               pgd = pgd_offset(&init_mm, address);
+               if (!pgd_present(*pgd))
+                       goto skip;
+
+               pud = pud_offset(pgd, address);
+               if (!pud_present(*pud))
+                       goto skip;
+
+               pmd = pmd_offset(pud, address);
+               if (!pmd_present(*pmd))
+                       goto skip;
+
+               pgd_page = virt_to_page(init_mm.pgd);
+               pte_page = pmd_page(*pmd);
+
+               pte_phys = page_to_phys(pte_page) + pte_index(address);
+               printk(KERN_CRIT "Test debug infrastructure on address 
%#lx:\n", address);
+               printk(KERN_CRIT "L4 at V:%p/P:%#llx/M:%#llx is %s and contains 
L2 at V:%p/P:%#llx/M:%#llx = %#llx "
+                      "which points to an L1 P:%#llx/M:%#llx which is %s %s\n",
+                      pgd, virt_to_phys(pgd), virt_to_machine(pgd).maddr,
+                      PagePinned(pgd_page) ? "pinned" : "unpinned",
+                      pmd, virt_to_phys(pmd), virt_to_machine(pmd).maddr,
+                      pmd_val(*pmd),
+                      pte_phys, phys_to_machine(XPADDR(pte_phys)).maddr,
+                      PagePinned(pte_page) ? "pinned" : "unpinned",
+                      PageHighMem(pte_page) ? "highmem" : "lowmem");
+               printk(KERN_CRIT "faulting address %#lx\n", address);
+               dump_pagetable(address);
+               if (!PageHighMem(pte_page)) {
+                       printk(KERN_CRIT "lowmem mapping of L1 @ P:%#llx is at 
V:%p\n", pte_phys, phys_to_virt(page_to_phys(pte_page)));
+                       dump_pagetable((unsigned 
long)phys_to_virt(page_to_phys(pte_page)));
+               }
+               printk(KERN_CRIT "Fixmap KM_PTE0 @ %#lx\n", 
fix_to_virt(KM_PTE0));
+               dump_pagetable(fix_to_virt(KM_PTE0));
+               printk(KERN_CRIT "Fixmap KM_PTE1 @ %#lx\n", 
fix_to_virt(KM_PTE1));
+               dump_pagetable(fix_to_virt(KM_PTE1));
+       }
+       skip:
+
        if (sys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0)
                printk(KERN_WARNING "Warning: unable to open an initial 
console.\n");
 
diff --git a/mm/rmap.c b/mm/rmap.c
index 1652166..ced5650 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -52,6 +52,9 @@
 #include <linux/migrate.h>
 
 #include <asm/tlbflush.h>
+#include <asm/io.h>
+
+#include <asm/xen/page.h>
 
 #include "internal.h"
 
@@ -267,6 +270,7 @@ unsigned long page_address_in_vma(struct page *page, struct 
vm_area_struct *vma)
 pte_t *page_check_address(struct page *page, struct mm_struct *mm,
                          unsigned long address, spinlock_t **ptlp, int sync)
 {
+       struct page *pgd_page, *pte_page;
        pgd_t *pgd;
        pud_t *pud;
        pmd_t *pmd;
@@ -285,6 +289,32 @@ pte_t *page_check_address(struct page *page, struct 
mm_struct *mm,
        if (!pmd_present(*pmd))
                return NULL;
 
+       pgd_page = virt_to_page(mm->pgd);
+       pte_page = pmd_page(*pmd);
+
+       if (PagePinned(pgd_page) != PagePinned(pte_page)) {
+               extern void dump_pagetable(unsigned long address);
+               phys_addr_t pte_phys = page_to_phys(pte_page) + 
pte_index(address);
+               printk(KERN_CRIT "L4 at V:%p/P:%#llx/M:%#llx is %s and contains 
L2 at V:%p/P:%#llx/M:%#llx = %#llx "
+                      "which points to an L1 P:%#llx/M:%#llx which is %s %s\n",
+                      pgd, virt_to_phys(pgd), virt_to_machine(pgd).maddr,
+                      PagePinned(pgd_page) ? "pinned" : "unpinned",
+                      pmd, virt_to_phys(pmd), virt_to_machine(pmd).maddr,
+                      pmd_val(*pmd),
+                      pte_phys, phys_to_machine(XPADDR(pte_phys)).maddr,
+                      PagePinned(pte_page) ? "pinned" : "unpinned",
+                      PageHighMem(pte_page) ? "highmem" : "lowmem");
+               printk(KERN_CRIT "faulting address %#lx\n", address);
+               dump_pagetable(address);
+               if (!PageHighMem(pte_page)) {
+                       printk(KERN_CRIT "lowmem mapping of L1 @ P:%#llx is at 
V:%p\n", pte_phys, phys_to_virt(page_to_phys(pte_page)));
+                       dump_pagetable((unsigned 
long)phys_to_virt(page_to_phys(pte_page)));
+               }
+               printk(KERN_CRIT "Fixmap KM_PTE0 @ %#lx\n", 
fix_to_virt(KM_PTE0));
+               dump_pagetable(fix_to_virt(KM_PTE0));
+               printk(KERN_CRIT "Fixmap KM_PTE1 @ %#lx\n", 
fix_to_virt(KM_PTE1));
+               dump_pagetable(fix_to_virt(KM_PTE1));
+       }
        pte = pte_offset_map(pmd, address);
        /* Make a quick check before getting the lock */
        if (!sync && !pte_present(*pte)) {



_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.