# HG changeset patch
# User kaf24@xxxxxxxxxxxxxxxxxxxxx
# Node ID 5ec45b46456350f23e0aa4148bd52cbb64f1addf
# Parent 13ea4bea823764599c05a1dea2b7441e0f63456d
[XEN] Avoid taking domain biglock in the page-fault handler.
This avoids deadlock situation with the shadow_lock.
Signed-off-by: Keir Fraser <keir@xxxxxxxxxxxxx>
---
xen/arch/x86/traps.c | 43 ++++++++++++-------------------
xen/include/asm-x86/mm.h | 6 ++--
xen/include/asm-x86/page.h | 12 ++++++++
xen/include/asm-x86/x86_32/page-2level.h | 3 ++
xen/include/asm-x86/x86_32/page-3level.h | 11 +++++++
xen/include/asm-x86/x86_64/page.h | 3 ++
6 files changed, 50 insertions(+), 28 deletions(-)
diff -r 13ea4bea8237 -r 5ec45b464563 xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c Fri Nov 03 16:51:28 2006 +0000
+++ b/xen/arch/x86/traps.c Sat Nov 04 19:26:29 2006 +0000
@@ -704,12 +704,6 @@ static int handle_gdt_ldt_mapping_fault(
static int handle_gdt_ldt_mapping_fault(
unsigned long offset, struct cpu_user_regs *regs)
{
- extern int map_ldt_shadow_page(unsigned int);
-
- struct vcpu *v = current;
- struct domain *d = v->domain;
- int ret;
-
/* Which vcpu's area did we fault in, and is it in the ldt sub-area? */
unsigned int is_ldt_area = (offset >> (GDT_LDT_VCPU_VA_SHIFT-1)) & 1;
unsigned int vcpu_area = (offset >> GDT_LDT_VCPU_VA_SHIFT);
@@ -723,18 +717,15 @@ static int handle_gdt_ldt_mapping_fault(
if ( likely(is_ldt_area) )
{
/* LDT fault: Copy a mapping from the guest's LDT, if it is valid. */
- LOCK_BIGLOCK(d);
- ret = map_ldt_shadow_page(offset >> PAGE_SHIFT);
- UNLOCK_BIGLOCK(d);
-
- if ( unlikely(ret == 0) )
+ if ( unlikely(map_ldt_shadow_page(offset >> PAGE_SHIFT) == 0) )
{
/* In hypervisor mode? Leave it to the #PF handler to fix up. */
if ( !guest_mode(regs) )
return 0;
/* In guest mode? Propagate #PF to guest, with adjusted %cr2. */
propagate_page_fault(
- v->arch.guest_context.ldt_base + offset, regs->error_code);
+ current->arch.guest_context.ldt_base + offset,
+ regs->error_code);
}
}
else
@@ -787,7 +778,7 @@ static int __spurious_page_fault(
#if CONFIG_PAGING_LEVELS >= 4
l4t = map_domain_page(mfn);
- l4e = l4t[l4_table_offset(addr)];
+ l4e = l4e_read_atomic(&l4t[l4_table_offset(addr)]);
mfn = l4e_get_pfn(l4e);
unmap_domain_page(l4t);
if ( ((l4e_get_flags(l4e) & required_flags) != required_flags) ||
@@ -800,7 +791,7 @@ static int __spurious_page_fault(
#ifdef CONFIG_X86_PAE
l3t += (cr3 & 0xFE0UL) >> 3;
#endif
- l3e = l3t[l3_table_offset(addr)];
+ l3e = l3e_read_atomic(&l3t[l3_table_offset(addr)]);
mfn = l3e_get_pfn(l3e);
unmap_domain_page(l3t);
#ifdef CONFIG_X86_PAE
@@ -814,7 +805,7 @@ static int __spurious_page_fault(
#endif
l2t = map_domain_page(mfn);
- l2e = l2t[l2_table_offset(addr)];
+ l2e = l2e_read_atomic(&l2t[l2_table_offset(addr)]);
mfn = l2e_get_pfn(l2e);
unmap_domain_page(l2t);
if ( ((l2e_get_flags(l2e) & required_flags) != required_flags) ||
@@ -827,7 +818,7 @@ static int __spurious_page_fault(
}
l1t = map_domain_page(mfn);
- l1e = l1t[l1_table_offset(addr)];
+ l1e = l1e_read_atomic(&l1t[l1_table_offset(addr)]);
mfn = l1e_get_pfn(l1e);
unmap_domain_page(l1t);
if ( ((l1e_get_flags(l1e) & required_flags) != required_flags) ||
@@ -856,12 +847,16 @@ static int spurious_page_fault(
static int spurious_page_fault(
unsigned long addr, struct cpu_user_regs *regs)
{
- struct domain *d = current->domain;
- int is_spurious;
-
- LOCK_BIGLOCK(d);
+ unsigned long flags;
+ int is_spurious;
+
+ /*
+ * Disabling interrupts prevents TLB flushing, and hence prevents
+ * page tables from becoming invalid under our feet during the walk.
+ */
+ local_irq_save(flags);
is_spurious = __spurious_page_fault(addr, regs);
- UNLOCK_BIGLOCK(d);
+ local_irq_restore(flags);
return is_spurious;
}
@@ -878,11 +873,7 @@ static int fixup_page_fault(unsigned lon
if ( (addr >= GDT_LDT_VIRT_START) && (addr < GDT_LDT_VIRT_END) )
return handle_gdt_ldt_mapping_fault(
addr - GDT_LDT_VIRT_START, regs);
- /*
- * Do not propagate spurious faults in the hypervisor area to the
- * guest. It cannot fix them up.
- */
- return (spurious_page_fault(addr, regs) ? EXCRET_not_a_fault : 0);
+ return 0;
}
if ( VM_ASSIST(d, VMASST_TYPE_writable_pagetables) &&
diff -r 13ea4bea8237 -r 5ec45b464563 xen/include/asm-x86/mm.h
--- a/xen/include/asm-x86/mm.h Fri Nov 03 16:51:28 2006 +0000
+++ b/xen/include/asm-x86/mm.h Sat Nov 04 19:26:29 2006 +0000
@@ -179,8 +179,8 @@ void init_frametable(void);
int alloc_page_type(struct page_info *page, unsigned long type);
void free_page_type(struct page_info *page, unsigned long type);
-extern void invalidate_shadow_ldt(struct vcpu *d);
-extern int _shadow_mode_refcounts(struct domain *d);
+void invalidate_shadow_ldt(struct vcpu *d);
+int _shadow_mode_refcounts(struct domain *d);
static inline void put_page(struct page_info *page)
{
@@ -385,4 +385,6 @@ int steal_page(
int steal_page(
struct domain *d, struct page_info *page, unsigned int memflags);
+int map_ldt_shadow_page(unsigned int);
+
#endif /* __ASM_X86_MM_H__ */
diff -r 13ea4bea8237 -r 5ec45b464563 xen/include/asm-x86/page.h
--- a/xen/include/asm-x86/page.h Fri Nov 03 16:51:28 2006 +0000
+++ b/xen/include/asm-x86/page.h Sat Nov 04 19:26:29 2006 +0000
@@ -24,6 +24,18 @@
#elif defined(__x86_64__)
# include <asm/x86_64/page.h>
#endif
+
+/* Read a pte atomically from memory. */
+#define l1e_read_atomic(l1ep) l1e_from_intpte(pte_read_atomic(l1ep))
+#define l2e_read_atomic(l2ep) l2e_from_intpte(pte_read_atomic(l2ep))
+#define l3e_read_atomic(l3ep) l3e_from_intpte(pte_read_atomic(l3ep))
+#define l4e_read_atomic(l4ep) l4e_from_intpte(pte_read_atomic(l4ep))
+
+/* Write a pte atomically to memory. */
+#define l1e_write_atomic(l1ep, l1e) pte_write_atomic(l1ep, l1e_get_intpte(l1e))
+#define l2e_write_atomic(l2ep, l2e) pte_write_atomic(l2ep, l1e_get_intpte(l2e))
+#define l3e_write_atomic(l3ep, l3e) pte_write_atomic(l3ep, l1e_get_intpte(l3e))
+#define l4e_write_atomic(l4ep, l4e) pte_write_atomic(l4ep, l1e_get_intpte(l4e))
/* Get direct integer representation of a pte's contents (intpte_t). */
#define l1e_get_intpte(x) ((x).l1)
diff -r 13ea4bea8237 -r 5ec45b464563 xen/include/asm-x86/x86_32/page-2level.h
--- a/xen/include/asm-x86/x86_32/page-2level.h Fri Nov 03 16:51:28 2006 +0000
+++ b/xen/include/asm-x86/x86_32/page-2level.h Sat Nov 04 19:26:29 2006 +0000
@@ -28,6 +28,9 @@ typedef l2_pgentry_t root_pgentry_t;
#endif /* !__ASSEMBLY__ */
+#define pte_read_atomic(ptep) (*(intpte_t *)(ptep))
+#define pte_write_atomic(ptep, pte) (*(intpte_t *)(ptep)) = (pte))
+
/* root table */
#define root_get_pfn l2e_get_pfn
#define root_get_flags l2e_get_flags
diff -r 13ea4bea8237 -r 5ec45b464563 xen/include/asm-x86/x86_32/page-3level.h
--- a/xen/include/asm-x86/x86_32/page-3level.h Fri Nov 03 16:51:28 2006 +0000
+++ b/xen/include/asm-x86/x86_32/page-3level.h Sat Nov 04 19:26:29 2006 +0000
@@ -38,6 +38,17 @@ typedef l3_pgentry_t root_pgentry_t;
#endif /* !__ASSEMBLY__ */
+#define pte_read_atomic(ptep) ({ \
+ intpte_t __pte = *(intpte_t *)(ptep), __npte; \
+ while ( (__npte = cmpxchg((intpte_t *)(ptep), __pte, __pte)) != __pte ) \
+ __pte = __npte; \
+ __pte; })
+#define pte_write_atomic(ptep, pte) do { \
+ intpte_t __pte = *(intpte_t *)(ptep), __npte; \
+ while ( (__npte = cmpxchg((intpte_t *)(ptep), __pte, (pte))) != __pte ) \
+ __pte = __npte; \
+} while ( 0 )
+
/* root table */
#define root_get_pfn l3e_get_pfn
#define root_get_flags l3e_get_flags
diff -r 13ea4bea8237 -r 5ec45b464563 xen/include/asm-x86/x86_64/page.h
--- a/xen/include/asm-x86/x86_64/page.h Fri Nov 03 16:51:28 2006 +0000
+++ b/xen/include/asm-x86/x86_64/page.h Sat Nov 04 19:26:29 2006 +0000
@@ -40,6 +40,9 @@ typedef l4_pgentry_t root_pgentry_t;
typedef l4_pgentry_t root_pgentry_t;
#endif /* !__ASSEMBLY__ */
+
+#define pte_read_atomic(ptep) (*(intpte_t *)(ptep))
+#define pte_write_atomic(ptep, pte) (*(intpte_t *)(ptep)) = (pte))
/* Given a virtual address, get an entry offset into a linear page table. */
#define l1_linear_offset(_a) (((_a) & VADDR_MASK) >> L1_PAGETABLE_SHIFT)
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|