WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-unstable] [XEN] Avoid taking domain biglock in the

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] [xen-unstable] [XEN] Avoid taking domain biglock in the page-fault handler.
From: Xen patchbot-unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Mon, 06 Nov 2006 16:50:30 +0000
Delivery-date: Mon, 06 Nov 2006 08:53:05 -0800
Envelope-to: www-data@xxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User kaf24@xxxxxxxxxxxxxxxxxxxxx
# Node ID 5ec45b46456350f23e0aa4148bd52cbb64f1addf
# Parent  13ea4bea823764599c05a1dea2b7441e0f63456d
[XEN] Avoid taking domain biglock in the page-fault handler.
This avoids deadlock situation with the shadow_lock.
Signed-off-by: Keir Fraser <keir@xxxxxxxxxxxxx>
---
 xen/arch/x86/traps.c                     |   43 ++++++++++++-------------------
 xen/include/asm-x86/mm.h                 |    6 ++--
 xen/include/asm-x86/page.h               |   12 ++++++++
 xen/include/asm-x86/x86_32/page-2level.h |    3 ++
 xen/include/asm-x86/x86_32/page-3level.h |   11 +++++++
 xen/include/asm-x86/x86_64/page.h        |    3 ++
 6 files changed, 50 insertions(+), 28 deletions(-)

diff -r 13ea4bea8237 -r 5ec45b464563 xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c      Fri Nov 03 16:51:28 2006 +0000
+++ b/xen/arch/x86/traps.c      Sat Nov 04 19:26:29 2006 +0000
@@ -704,12 +704,6 @@ static int handle_gdt_ldt_mapping_fault(
 static int handle_gdt_ldt_mapping_fault(
     unsigned long offset, struct cpu_user_regs *regs)
 {
-    extern int map_ldt_shadow_page(unsigned int);
-
-    struct vcpu *v = current;
-    struct domain *d  = v->domain;
-    int ret;
-
     /* Which vcpu's area did we fault in, and is it in the ldt sub-area? */
     unsigned int is_ldt_area = (offset >> (GDT_LDT_VCPU_VA_SHIFT-1)) & 1;
     unsigned int vcpu_area   = (offset >> GDT_LDT_VCPU_VA_SHIFT);
@@ -723,18 +717,15 @@ static int handle_gdt_ldt_mapping_fault(
     if ( likely(is_ldt_area) )
     {
         /* LDT fault: Copy a mapping from the guest's LDT, if it is valid. */
-        LOCK_BIGLOCK(d);
-        ret = map_ldt_shadow_page(offset >> PAGE_SHIFT);
-        UNLOCK_BIGLOCK(d);
-
-        if ( unlikely(ret == 0) )
+        if ( unlikely(map_ldt_shadow_page(offset >> PAGE_SHIFT) == 0) )
         {
             /* In hypervisor mode? Leave it to the #PF handler to fix up. */
             if ( !guest_mode(regs) )
                 return 0;
             /* In guest mode? Propagate #PF to guest, with adjusted %cr2. */
             propagate_page_fault(
-                v->arch.guest_context.ldt_base + offset, regs->error_code);
+                current->arch.guest_context.ldt_base + offset,
+                regs->error_code);
         }
     }
     else
@@ -787,7 +778,7 @@ static int __spurious_page_fault(
 
 #if CONFIG_PAGING_LEVELS >= 4
     l4t = map_domain_page(mfn);
-    l4e = l4t[l4_table_offset(addr)];
+    l4e = l4e_read_atomic(&l4t[l4_table_offset(addr)]);
     mfn = l4e_get_pfn(l4e);
     unmap_domain_page(l4t);
     if ( ((l4e_get_flags(l4e) & required_flags) != required_flags) ||
@@ -800,7 +791,7 @@ static int __spurious_page_fault(
 #ifdef CONFIG_X86_PAE
     l3t += (cr3 & 0xFE0UL) >> 3;
 #endif
-    l3e = l3t[l3_table_offset(addr)];
+    l3e = l3e_read_atomic(&l3t[l3_table_offset(addr)]);
     mfn = l3e_get_pfn(l3e);
     unmap_domain_page(l3t);
 #ifdef CONFIG_X86_PAE
@@ -814,7 +805,7 @@ static int __spurious_page_fault(
 #endif
 
     l2t = map_domain_page(mfn);
-    l2e = l2t[l2_table_offset(addr)];
+    l2e = l2e_read_atomic(&l2t[l2_table_offset(addr)]);
     mfn = l2e_get_pfn(l2e);
     unmap_domain_page(l2t);
     if ( ((l2e_get_flags(l2e) & required_flags) != required_flags) ||
@@ -827,7 +818,7 @@ static int __spurious_page_fault(
     }
 
     l1t = map_domain_page(mfn);
-    l1e = l1t[l1_table_offset(addr)];
+    l1e = l1e_read_atomic(&l1t[l1_table_offset(addr)]);
     mfn = l1e_get_pfn(l1e);
     unmap_domain_page(l1t);
     if ( ((l1e_get_flags(l1e) & required_flags) != required_flags) ||
@@ -856,12 +847,16 @@ static int spurious_page_fault(
 static int spurious_page_fault(
     unsigned long addr, struct cpu_user_regs *regs)
 {
-    struct domain *d = current->domain;
-    int            is_spurious;
-
-    LOCK_BIGLOCK(d);
+    unsigned long flags;
+    int           is_spurious;
+
+    /*
+     * Disabling interrupts prevents TLB flushing, and hence prevents
+     * page tables from becoming invalid under our feet during the walk.
+     */
+    local_irq_save(flags);
     is_spurious = __spurious_page_fault(addr, regs);
-    UNLOCK_BIGLOCK(d);
+    local_irq_restore(flags);
 
     return is_spurious;
 }
@@ -878,11 +873,7 @@ static int fixup_page_fault(unsigned lon
         if ( (addr >= GDT_LDT_VIRT_START) && (addr < GDT_LDT_VIRT_END) )
             return handle_gdt_ldt_mapping_fault(
                 addr - GDT_LDT_VIRT_START, regs);
-        /*
-         * Do not propagate spurious faults in the hypervisor area to the
-         * guest. It cannot fix them up.
-         */
-        return (spurious_page_fault(addr, regs) ? EXCRET_not_a_fault : 0);
+        return 0;
     }
 
     if ( VM_ASSIST(d, VMASST_TYPE_writable_pagetables) &&
diff -r 13ea4bea8237 -r 5ec45b464563 xen/include/asm-x86/mm.h
--- a/xen/include/asm-x86/mm.h  Fri Nov 03 16:51:28 2006 +0000
+++ b/xen/include/asm-x86/mm.h  Sat Nov 04 19:26:29 2006 +0000
@@ -179,8 +179,8 @@ void init_frametable(void);
 
 int alloc_page_type(struct page_info *page, unsigned long type);
 void free_page_type(struct page_info *page, unsigned long type);
-extern void invalidate_shadow_ldt(struct vcpu *d);
-extern int _shadow_mode_refcounts(struct domain *d);
+void invalidate_shadow_ldt(struct vcpu *d);
+int _shadow_mode_refcounts(struct domain *d);
 
 static inline void put_page(struct page_info *page)
 {
@@ -385,4 +385,6 @@ int steal_page(
 int steal_page(
     struct domain *d, struct page_info *page, unsigned int memflags);
 
+int map_ldt_shadow_page(unsigned int);
+
 #endif /* __ASM_X86_MM_H__ */
diff -r 13ea4bea8237 -r 5ec45b464563 xen/include/asm-x86/page.h
--- a/xen/include/asm-x86/page.h        Fri Nov 03 16:51:28 2006 +0000
+++ b/xen/include/asm-x86/page.h        Sat Nov 04 19:26:29 2006 +0000
@@ -24,6 +24,18 @@
 #elif defined(__x86_64__)
 # include <asm/x86_64/page.h>
 #endif
+
+/* Read a pte atomically from memory. */
+#define l1e_read_atomic(l1ep) l1e_from_intpte(pte_read_atomic(l1ep))
+#define l2e_read_atomic(l2ep) l2e_from_intpte(pte_read_atomic(l2ep))
+#define l3e_read_atomic(l3ep) l3e_from_intpte(pte_read_atomic(l3ep))
+#define l4e_read_atomic(l4ep) l4e_from_intpte(pte_read_atomic(l4ep))
+
+/* Write a pte atomically to memory. */
+#define l1e_write_atomic(l1ep, l1e) pte_write_atomic(l1ep, l1e_get_intpte(l1e))
+#define l2e_write_atomic(l2ep, l2e) pte_write_atomic(l2ep, l1e_get_intpte(l2e))
+#define l3e_write_atomic(l3ep, l3e) pte_write_atomic(l3ep, l1e_get_intpte(l3e))
+#define l4e_write_atomic(l4ep, l4e) pte_write_atomic(l4ep, l1e_get_intpte(l4e))
 
 /* Get direct integer representation of a pte's contents (intpte_t). */
 #define l1e_get_intpte(x)          ((x).l1)
diff -r 13ea4bea8237 -r 5ec45b464563 xen/include/asm-x86/x86_32/page-2level.h
--- a/xen/include/asm-x86/x86_32/page-2level.h  Fri Nov 03 16:51:28 2006 +0000
+++ b/xen/include/asm-x86/x86_32/page-2level.h  Sat Nov 04 19:26:29 2006 +0000
@@ -28,6 +28,9 @@ typedef l2_pgentry_t root_pgentry_t;
 
 #endif /* !__ASSEMBLY__ */
 
+#define pte_read_atomic(ptep)       (*(intpte_t *)(ptep))
+#define pte_write_atomic(ptep, pte) (*(intpte_t *)(ptep)) = (pte))
+
 /* root table */
 #define root_get_pfn              l2e_get_pfn
 #define root_get_flags            l2e_get_flags
diff -r 13ea4bea8237 -r 5ec45b464563 xen/include/asm-x86/x86_32/page-3level.h
--- a/xen/include/asm-x86/x86_32/page-3level.h  Fri Nov 03 16:51:28 2006 +0000
+++ b/xen/include/asm-x86/x86_32/page-3level.h  Sat Nov 04 19:26:29 2006 +0000
@@ -38,6 +38,17 @@ typedef l3_pgentry_t root_pgentry_t;
 
 #endif /* !__ASSEMBLY__ */
 
+#define pte_read_atomic(ptep) ({                                            \
+    intpte_t __pte = *(intpte_t *)(ptep), __npte;                           \
+    while ( (__npte = cmpxchg((intpte_t *)(ptep), __pte, __pte)) != __pte ) \
+        __pte = __npte;                                                     \
+    __pte; })
+#define pte_write_atomic(ptep, pte) do {                                    \
+    intpte_t __pte = *(intpte_t *)(ptep), __npte;                           \
+    while ( (__npte = cmpxchg((intpte_t *)(ptep), __pte, (pte))) != __pte ) \
+        __pte = __npte;                                                     \
+} while ( 0 )
+
 /* root table */
 #define root_get_pfn              l3e_get_pfn
 #define root_get_flags            l3e_get_flags
diff -r 13ea4bea8237 -r 5ec45b464563 xen/include/asm-x86/x86_64/page.h
--- a/xen/include/asm-x86/x86_64/page.h Fri Nov 03 16:51:28 2006 +0000
+++ b/xen/include/asm-x86/x86_64/page.h Sat Nov 04 19:26:29 2006 +0000
@@ -40,6 +40,9 @@ typedef l4_pgentry_t root_pgentry_t;
 typedef l4_pgentry_t root_pgentry_t;
 
 #endif /* !__ASSEMBLY__ */
+
+#define pte_read_atomic(ptep)       (*(intpte_t *)(ptep))
+#define pte_write_atomic(ptep, pte) (*(intpte_t *)(ptep)) = (pte))
 
 /* Given a virtual address, get an entry offset into a linear page table. */
 #define l1_linear_offset(_a) (((_a) & VADDR_MASK) >> L1_PAGETABLE_SHIFT)

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-changelog] [xen-unstable] [XEN] Avoid taking domain biglock in the page-fault handler., Xen patchbot-unstable <=