# HG changeset patch
# User kaf24@xxxxxxxxxxxxxxxxxxxx
# Node ID d78dedc4831f0378335f4e478af61994018e292e
# Parent dfbf0939350cf8823891c26785c2af15b54e9bcd
Detect spurious faults taken in the hypervisor that are
due to writable pagetable logic.
Signed-off-by: Keir Fraser <keir@xxxxxxxxxxxxx>
diff -r dfbf0939350c -r d78dedc4831f xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Fri Mar 24 09:59:31 2006
+++ b/xen/arch/x86/mm.c Fri Mar 24 11:14:58 2006
@@ -3351,8 +3351,9 @@
* permissions in page directories by writing back to the linear mapping.
*/
if ( (flags = l1e_get_flags(pte) & WRPT_PTE_FLAGS) == WRPT_PTE_FLAGS )
- return !__put_user(
- pte.l1, &linear_pg_table[l1_linear_offset(addr)].l1);
+ return __put_user(
+ pte.l1, &linear_pg_table[l1_linear_offset(addr)].l1) ?
+ 0 : EXCRET_not_a_fault;
/* We are looking only for read-only mappings of p.t. pages. */
if ( ((flags | _PAGE_RW) != WRPT_PTE_FLAGS) ||
diff -r dfbf0939350c -r d78dedc4831f xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c Fri Mar 24 09:59:31 2006
+++ b/xen/arch/x86/traps.c Fri Mar 24 11:14:58 2006
@@ -620,6 +620,46 @@
return 0;
}
+static int spurious_page_fault(unsigned long addr, struct cpu_user_regs *regs)
+{
+ struct vcpu *v = current;
+ struct domain *d = v->domain;
+ int rc;
+
+ /*
+ * The only possible reason for a spurious page fault not to be picked
+ * up already is that a page directory was unhooked by writable page table
+ * logic and then reattached before the faulting VCPU could detect it.
+ */
+ if ( is_idle_domain(d) || /* no ptwr in idle domain */
+ IN_HYPERVISOR_RANGE(addr) || /* no ptwr on hypervisor addrs */
+ shadow_mode_enabled(d) || /* no ptwr logic in shadow mode */
+ ((regs->error_code & 0x1d) != 0) ) /* simple not-present fault? */
+ return 0;
+
+ LOCK_BIGLOCK(d);
+
+ /*
+ * The page directory could have been detached again while we weren't
+ * holding the per-domain lock. Detect that and fix up if it's the case.
+ */
+ if ( unlikely(d->arch.ptwr[PTWR_PT_ACTIVE].l1va) &&
+ unlikely(l2_linear_offset(addr) ==
+ d->arch.ptwr[PTWR_PT_ACTIVE].l2_idx) )
+ {
+ ptwr_flush(d, PTWR_PT_ACTIVE);
+ rc = 1;
+ }
+ else
+ {
+ /* Okay, walk the page tables. Only check for not-present faults.*/
+ rc = __spurious_page_fault(addr);
+ }
+
+ UNLOCK_BIGLOCK(d);
+ return rc;
+}
+
/*
* #PF error code:
* Bit 0: Protection violation (=1) ; Page not present (=0)
@@ -644,6 +684,13 @@
if ( unlikely(!guest_mode(regs)) )
{
+ if ( spurious_page_fault(addr, regs) )
+ {
+ DPRINTK("Spurious fault in domain %u:%u at addr %lx\n",
+ current->domain->domain_id, current->vcpu_id, addr);
+ return EXCRET_not_a_fault;
+ }
+
if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
{
perfc_incrc(copy_user_faults);
diff -r dfbf0939350c -r d78dedc4831f xen/arch/x86/x86_32/traps.c
--- a/xen/arch/x86/x86_32/traps.c Fri Mar 24 09:59:31 2006
+++ b/xen/arch/x86/x86_32/traps.c Fri Mar 24 11:14:58 2006
@@ -70,38 +70,77 @@
void show_page_walk(unsigned long addr)
{
+ unsigned long pfn, mfn = read_cr3() >> PAGE_SHIFT;
+#ifdef CONFIG_X86_PAE
+ l3_pgentry_t l3e, *l3t;
+#endif
+ l2_pgentry_t l2e, *l2t;
+ l1_pgentry_t l1e, *l1t;
+
+ printk("Pagetable walk from %08lx:\n", addr);
+
+#ifdef CONFIG_X86_PAE
+ l3t = map_domain_page(mfn);
+ l3e = l3t[l3_table_offset(addr)];
+ mfn = l3e_get_pfn(l3e);
+ pfn = get_gpfn_from_mfn(mfn);
+ printk(" L3 = %"PRIpte" %08lx\n", l3e_get_intpte(l3e), pfn);
+ unmap_domain_page(l3t);
+ if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
+ return;
+#endif
+
+ l2t = map_domain_page(mfn);
+ l2e = l2t[l2_table_offset(addr)];
+ mfn = l2e_get_pfn(l2e);
+ pfn = get_gpfn_from_mfn(mfn);
+ printk(" L2 = %"PRIpte" %08lx %s\n", l2e_get_intpte(l2e), pfn,
+ (l2e_get_flags(l2e) & _PAGE_PSE) ? "(PSE)" : "");
+ unmap_domain_page(l2t);
+ if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) ||
+ (l2e_get_flags(l2e) & _PAGE_PSE) )
+ return;
+
+ l1t = map_domain_page(mfn);
+ l1e = l1t[l1_table_offset(addr)];
+ mfn = l1e_get_pfn(l1e);
+ pfn = get_gpfn_from_mfn(mfn);
+ printk(" L1 = %"PRIpte" %08lx\n", l1e_get_intpte(l1e), pfn);
+ unmap_domain_page(l1t);
+}
+
+int __spurious_page_fault(unsigned long addr)
+{
unsigned long mfn = read_cr3() >> PAGE_SHIFT;
- intpte_t *ptab, ent;
- unsigned long pfn;
-
- printk("Pagetable walk from %08lx:\n", addr);
-
#ifdef CONFIG_X86_PAE
- ptab = map_domain_page(mfn);
- ent = ptab[l3_table_offset(addr)];
- pfn = get_gpfn_from_mfn((u32)(ent >> PAGE_SHIFT));
- printk(" L3 = %"PRIpte" %08lx\n", ent, pfn);
- unmap_domain_page(ptab);
- if ( !(ent & _PAGE_PRESENT) )
- return;
- mfn = ent >> PAGE_SHIFT;
+ l3_pgentry_t l3e, *l3t;
#endif
-
- ptab = map_domain_page(mfn);
- ent = ptab[l2_table_offset(addr)];
- pfn = get_gpfn_from_mfn((u32)(ent >> PAGE_SHIFT));
- printk(" L2 = %"PRIpte" %08lx %s\n", ent, pfn,
- (ent & _PAGE_PSE) ? "(PSE)" : "");
- unmap_domain_page(ptab);
- if ( !(ent & _PAGE_PRESENT) || (ent & _PAGE_PSE) )
- return;
- mfn = ent >> PAGE_SHIFT;
-
- ptab = map_domain_page(ent >> PAGE_SHIFT);
- ent = ptab[l1_table_offset(addr)];
- pfn = get_gpfn_from_mfn((u32)(ent >> PAGE_SHIFT));
- printk(" L1 = %"PRIpte" %08lx\n", ent, pfn);
- unmap_domain_page(ptab);
+ l2_pgentry_t l2e, *l2t;
+ l1_pgentry_t l1e, *l1t;
+
+#ifdef CONFIG_X86_PAE
+ l3t = map_domain_page(mfn);
+ l3e = l3t[l3_table_offset(addr)];
+ mfn = l3e_get_pfn(l3e);
+ unmap_domain_page(l3t);
+ if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
+ return 0;
+#endif
+
+ l2t = map_domain_page(mfn);
+ l2e = l2t[l2_table_offset(addr)];
+ mfn = l2e_get_pfn(l2e);
+ unmap_domain_page(l2t);
+ if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
+ return 0;
+ if ( l2e_get_flags(l2e) & _PAGE_PSE )
+ return 1;
+
+ l1t = map_domain_page(mfn);
+ l1e = l1t[l1_table_offset(addr)];
+ mfn = l1e_get_pfn(l1e);
+ unmap_domain_page(l1t);
+ return !!(l1e_get_flags(l1e) & _PAGE_PRESENT);
}
#define DOUBLEFAULT_STACK_SIZE 1024
diff -r dfbf0939350c -r d78dedc4831f xen/arch/x86/x86_64/traps.c
--- a/xen/arch/x86/x86_64/traps.c Fri Mar 24 09:59:31 2006
+++ b/xen/arch/x86/x86_64/traps.c Fri Mar 24 11:14:58 2006
@@ -70,31 +70,79 @@
void show_page_walk(unsigned long addr)
{
- unsigned long page = read_cr3();
-
+ unsigned long pfn, mfn = read_cr3() >> PAGE_SHIFT;
+ l4_pgentry_t l4e, *l4t;
+ l3_pgentry_t l3e, *l3t;
+ l2_pgentry_t l2e, *l2t;
+ l1_pgentry_t l1e, *l1t;
+
printk("Pagetable walk from %016lx:\n", addr);
- page &= PAGE_MASK;
- page = ((unsigned long *) __va(page))[l4_table_offset(addr)];
- printk(" L4 = %016lx\n", page);
- if ( !(page & _PAGE_PRESENT) )
+ l4t = mfn_to_virt(mfn);
+ l4e = l4t[l4_table_offset(addr)];
+ mfn = l4e_get_pfn(l4e);
+ pfn = get_gpfn_from_mfn(mfn);
+ printk(" L4 = %"PRIpte" %016lx\n", l4e_get_intpte(l4e), pfn);
+ if ( !(l4e_get_flags(l4e) & _PAGE_PRESENT) )
return;
- page &= PAGE_MASK;
- page = ((unsigned long *) __va(page))[l3_table_offset(addr)];
- printk(" L3 = %016lx\n", page);
- if ( !(page & _PAGE_PRESENT) )
+ l3t = mfn_to_virt(mfn);
+ l3e = l3t[l3_table_offset(addr)];
+ mfn = l3e_get_pfn(l3e);
+ pfn = get_gpfn_from_mfn(mfn);
+ printk(" L3 = %"PRIpte" %016lx\n", l3e_get_intpte(l3e), pfn);
+ if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
return;
- page &= PAGE_MASK;
- page = ((unsigned long *) __va(page))[l2_table_offset(addr)];
- printk(" L2 = %016lx %s\n", page, (page & _PAGE_PSE) ? "(2MB)" : "");
- if ( !(page & _PAGE_PRESENT) || (page & _PAGE_PSE) )
+ l2t = mfn_to_virt(mfn);
+ l2e = l2t[l2_table_offset(addr)];
+ mfn = l2e_get_pfn(l2e);
+ pfn = get_gpfn_from_mfn(mfn);
+ printk(" L2 = %"PRIpte" %016lx %s\n", l2e_get_intpte(l2e), pfn,
+ (l2e_get_flags(l2e) & _PAGE_PSE) ? "(PSE)" : "");
+ if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) ||
+ (l2e_get_flags(l2e) & _PAGE_PSE) )
return;
- page &= PAGE_MASK;
- page = ((unsigned long *) __va(page))[l1_table_offset(addr)];
- printk(" L1 = %016lx\n", page);
+ l1t = mfn_to_virt(mfn);
+ l1e = l1t[l1_table_offset(addr)];
+ mfn = l1e_get_pfn(l1e);
+ pfn = get_gpfn_from_mfn(mfn);
+ printk(" L1 = %"PRIpte" %016lx\n", l1e_get_intpte(l1e), pfn);
+}
+
+int __spurious_page_fault(unsigned long addr)
+{
+ unsigned long mfn = read_cr3() >> PAGE_SHIFT;
+ l4_pgentry_t l4e, *l4t;
+ l3_pgentry_t l3e, *l3t;
+ l2_pgentry_t l2e, *l2t;
+ l1_pgentry_t l1e, *l1t;
+
+ l4t = mfn_to_virt(mfn);
+ l4e = l4t[l4_table_offset(addr)];
+ mfn = l4e_get_pfn(l4e);
+ if ( !(l4e_get_flags(l4e) & _PAGE_PRESENT) )
+ return 0;
+
+ l3t = mfn_to_virt(mfn);
+ l3e = l3t[l3_table_offset(addr)];
+ mfn = l3e_get_pfn(l3e);
+ if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
+ return 0;
+
+ l2t = mfn_to_virt(mfn);
+ l2e = l2t[l2_table_offset(addr)];
+ mfn = l2e_get_pfn(l2e);
+ if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
+ return 0;
+ if ( l2e_get_flags(l2e) & _PAGE_PSE )
+ return 1;
+
+ l1t = mfn_to_virt(mfn);
+ l1e = l1t[l1_table_offset(addr)];
+ mfn = l1e_get_pfn(l1e);
+ return !!(l1e_get_flags(l1e) & _PAGE_PRESENT);
}
asmlinkage void double_fault(void);
diff -r dfbf0939350c -r d78dedc4831f xen/include/asm-x86/processor.h
--- a/xen/include/asm-x86/processor.h Fri Mar 24 09:59:31 2006
+++ b/xen/include/asm-x86/processor.h Fri Mar 24 11:14:58 2006
@@ -524,6 +524,7 @@
void show_stack(struct cpu_user_regs *regs);
void show_registers(struct cpu_user_regs *regs);
void show_page_walk(unsigned long addr);
+int __spurious_page_fault(unsigned long addr);
asmlinkage void fatal_trap(int trapnr, struct cpu_user_regs *regs);
extern void mtrr_ap_init(void);
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|