WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-unstable] [XEN] Fix page-fault handler to not trust

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] [xen-unstable] [XEN] Fix page-fault handler to not trust bit 0 of error code.
From: Xen patchbot-unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Mon, 19 Jun 2006 14:10:32 +0000
Delivery-date: Mon, 19 Jun 2006 07:13:49 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User kaf24@xxxxxxxxxxxxxxxxxxxx
# Node ID e23961a8ce7eeffee8fb94c199818a5361f75639
# Parent  5033ffe8f53356cd7d8e52e9d4ce78c69e826d33
[XEN] Fix page-fault handler to not trust bit 0 of error code.
It can be cleared due to writable-pagetable logic. Various
other cleanups too. Spurious fault detection logic is
simplified.
Signed-off-by: Keir Fraser <keir@xxxxxxxxxxxxx>
---
 xen/arch/x86/mm.c               |    8 +--
 xen/arch/x86/traps.c            |   90 +++++++++++++---------------------------
 xen/arch/x86/x86_32/seg_fixup.c |    2 
 xen/arch/x86/x86_emulate.c      |    4 -
 4 files changed, 37 insertions(+), 67 deletions(-)

diff -r 5033ffe8f533 -r e23961a8ce7e xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Sat Jun 17 12:57:03 2006 +0100
+++ b/xen/arch/x86/mm.c Sun Jun 18 19:24:00 2006 +0100
@@ -3351,7 +3351,7 @@ static int ptwr_emulated_update(
         addr &= ~(sizeof(paddr_t)-1);
         if ( copy_from_user(&full, (void *)addr, sizeof(paddr_t)) )
         {
-            propagate_page_fault(addr, 4); /* user mode, read fault */
+            propagate_page_fault(addr, 0); /* read fault */
             return X86EMUL_PROPAGATE_FAULT;
         }
         /* Mask out bits provided by caller. */
@@ -3483,12 +3483,12 @@ int ptwr_do_page_fault(struct domain *d,
     unsigned long    l2_idx;
     struct x86_emulate_ctxt emul_ctxt;
 
-    if ( unlikely(shadow_mode_enabled(d)) )
-        return 0;
+    ASSERT(!shadow_mode_enabled(d));
 
     /*
      * Attempt to read the PTE that maps the VA being accessed. By checking for
      * PDE validity in the L2 we avoid many expensive fixups in __get_user().
+     * NB. The L2 entry cannot be detached as the caller already checked that.
      */
     if ( !(l2e_get_flags(__linear_l2_table[l2_linear_offset(addr)]) &
            _PAGE_PRESENT) ||
@@ -3579,7 +3579,7 @@ int ptwr_do_page_fault(struct domain *d,
     }
 
     /*
-     * We only allow one ACTIVE and one INACTIVE p.t. to be updated at at 
+     * We only allow one ACTIVE and one INACTIVE p.t. to be updated at a
      * time. If there is already one, we must flush it out.
      */
     if ( d->arch.ptwr[which].l1va )
diff -r 5033ffe8f533 -r e23961a8ce7e xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c      Sat Jun 17 12:57:03 2006 +0100
+++ b/xen/arch/x86/traps.c      Sun Jun 18 19:24:00 2006 +0100
@@ -547,6 +547,7 @@ static int handle_gdt_ldt_mapping_fault(
     {
         /* LDT fault: Copy a mapping from the guest's LDT, if it is valid. */
         LOCK_BIGLOCK(d);
+        cleanup_writable_pagetable(d);
         ret = map_ldt_shadow_page(offset >> PAGE_SHIFT);
         UNLOCK_BIGLOCK(d);
 
@@ -654,49 +655,21 @@ static int spurious_page_fault(
 static int spurious_page_fault(
     unsigned long addr, struct cpu_user_regs *regs)
 {
+    struct domain *d = current->domain;
+    int            is_spurious;
+
+    LOCK_BIGLOCK(d);
+    cleanup_writable_pagetable(d);
+    is_spurious = __spurious_page_fault(addr, regs);
+    UNLOCK_BIGLOCK(d);
+
+    return is_spurious;
+}
+
+static int fixup_page_fault(unsigned long addr, struct cpu_user_regs *regs)
+{
     struct vcpu   *v = current;
     struct domain *d = v->domain;
-    int            is_spurious;
-
-    LOCK_BIGLOCK(d);
-
-    is_spurious = __spurious_page_fault(addr, regs);
-    if ( is_spurious )
-        goto out;
-
-    /*
-     * The only possible reason for a spurious page fault not to be picked
-     * up already is that a page directory was unhooked by writable page table
-     * logic and then reattached before the faulting VCPU could detect it.
-     */
-    if ( is_idle_domain(d) ||               /* no ptwr in idle domain       */
-         IN_HYPERVISOR_RANGE(addr) ||       /* no ptwr on hypervisor addrs  */
-         shadow_mode_enabled(d) ||          /* no ptwr logic in shadow mode */
-         (regs->error_code & PGERR_page_present) ) /* not-present fault?    */
-        goto out;
-
-    /*
-     * The page directory could have been detached again while we weren't
-     * holding the per-domain lock. Detect that and fix up if it's the case.
-     */
-    if ( unlikely(d->arch.ptwr[PTWR_PT_ACTIVE].l1va) &&
-         unlikely(l2_linear_offset(addr) ==
-                  d->arch.ptwr[PTWR_PT_ACTIVE].l2_idx) )
-    {
-        ptwr_flush(d, PTWR_PT_ACTIVE);
-        is_spurious = 1;
-    }
-
- out:
-    UNLOCK_BIGLOCK(d);
-    return is_spurious;
-}
-
-static int fixup_page_fault(unsigned long addr, struct cpu_user_regs *regs)
-{
-    struct vcpu   *v = current;
-    struct domain *d = v->domain;
-    int            rc;
 
     if ( unlikely(IN_HYPERVISOR_RANGE(addr)) )
     {
@@ -709,10 +682,7 @@ static int fixup_page_fault(unsigned lon
          * Do not propagate spurious faults in the hypervisor area to the
          * guest. It cannot fix them up.
          */
-        LOCK_BIGLOCK(d);
-        rc = __spurious_page_fault(addr, regs);
-        UNLOCK_BIGLOCK(d);
-        return rc;
+        return (spurious_page_fault(addr, regs) ? EXCRET_not_a_fault : 0);
     }
 
     if ( unlikely(shadow_mode_enabled(d)) )
@@ -730,12 +700,14 @@ static int fixup_page_fault(unsigned lon
             return EXCRET_fault_fixed;
         }
 
+        /*
+         * Note it is *not* safe to check PGERR_page_present here. It can be
+         * clear, due to unhooked page table, when we would otherwise expect
+         * it to be set. We have an aversion to trusting that flag in Xen, and
+         * guests ought to be leery too.
+         */
         if ( guest_kernel_mode(v, regs) &&
-             /* Protection violation on write? No reserved-bit violation? */
-             ((regs->error_code & (PGERR_page_present |
-                                   PGERR_write_access |
-                                   PGERR_reserved_bit)) ==
-              (PGERR_page_present | PGERR_write_access)) &&
+             (regs->error_code & PGERR_write_access) &&
              ptwr_do_page_fault(d, addr, regs) )
         {
             UNLOCK_BIGLOCK(d);
@@ -870,8 +842,6 @@ static inline int admin_io_okay(
     (admin_io_okay(_p, 4, _d, _r) ? outl(_v, _p) : ((void)0))
 
 /* Propagate a fault back to the guest kernel. */
-#define USER_READ_FAULT  (PGERR_user_mode)
-#define USER_WRITE_FAULT (PGERR_user_mode | PGERR_write_access)
 #define PAGE_FAULT(_faultaddr, _errcode)        \
 ({  propagate_page_fault(_faultaddr, _errcode); \
     return EXCRET_fault_fixed;                  \
@@ -881,7 +851,7 @@ static inline int admin_io_okay(
 #define insn_fetch(_type, _size, _ptr)          \
 ({  unsigned long _x;                           \
     if ( get_user(_x, (_type *)eip) )           \
-        PAGE_FAULT(eip, USER_READ_FAULT);       \
+        PAGE_FAULT(eip, 0); /* read fault */    \
     eip += _size; (_type)_x; })
 
 static int emulate_privileged_op(struct cpu_user_regs *regs)
@@ -950,17 +920,17 @@ static int emulate_privileged_op(struct 
             case 1:
                 data = (u8)inb_user((u16)regs->edx, v, regs);
                 if ( put_user((u8)data, (u8 *)regs->edi) )
-                    PAGE_FAULT(regs->edi, USER_WRITE_FAULT);
+                    PAGE_FAULT(regs->edi, PGERR_write_access);
                 break;
             case 2:
                 data = (u16)inw_user((u16)regs->edx, v, regs);
                 if ( put_user((u16)data, (u16 *)regs->edi) )
-                    PAGE_FAULT(regs->edi, USER_WRITE_FAULT);
+                    PAGE_FAULT(regs->edi, PGERR_write_access);
                 break;
             case 4:
                 data = (u32)inl_user((u16)regs->edx, v, regs);
                 if ( put_user((u32)data, (u32 *)regs->edi) )
-                    PAGE_FAULT(regs->edi, USER_WRITE_FAULT);
+                    PAGE_FAULT(regs->edi, PGERR_write_access);
                 break;
             }
             regs->edi += (int)((regs->eflags & EF_DF) ? -op_bytes : op_bytes);
@@ -975,17 +945,17 @@ static int emulate_privileged_op(struct 
             {
             case 1:
                 if ( get_user(data, (u8 *)regs->esi) )
-                    PAGE_FAULT(regs->esi, USER_READ_FAULT);
+                    PAGE_FAULT(regs->esi, 0); /* read fault */
                 outb_user((u8)data, (u16)regs->edx, v, regs);
                 break;
             case 2:
                 if ( get_user(data, (u16 *)regs->esi) )
-                    PAGE_FAULT(regs->esi, USER_READ_FAULT);
+                    PAGE_FAULT(regs->esi, 0); /* read fault */
                 outw_user((u16)data, (u16)regs->edx, v, regs);
                 break;
             case 4:
                 if ( get_user(data, (u32 *)regs->esi) )
-                    PAGE_FAULT(regs->esi, USER_READ_FAULT);
+                    PAGE_FAULT(regs->esi, 0); /* read fault */
                 outl_user((u32)data, (u16)regs->edx, v, regs);
                 break;
             }
@@ -1168,7 +1138,7 @@ static int emulate_privileged_op(struct 
             v->arch.guest_context.ctrlreg[2] = *reg;
             v->vcpu_info->arch.cr2           = *reg;
             break;
-            
+
         case 3: /* Write CR3 */
             LOCK_BIGLOCK(v->domain);
             cleanup_writable_pagetable(v->domain);
diff -r 5033ffe8f533 -r e23961a8ce7e xen/arch/x86/x86_32/seg_fixup.c
--- a/xen/arch/x86/x86_32/seg_fixup.c   Sat Jun 17 12:57:03 2006 +0100
+++ b/xen/arch/x86/x86_32/seg_fixup.c   Sun Jun 18 19:24:00 2006 +0100
@@ -464,7 +464,7 @@ int gpf_emulate_4gb(struct cpu_user_regs
     return 0;
 
  page_fault:
-    propagate_page_fault((unsigned long)pb, 4);
+    propagate_page_fault((unsigned long)pb, 0); /* read fault */
     return EXCRET_fault_fixed;
 }
 
diff -r 5033ffe8f533 -r e23961a8ce7e xen/arch/x86/x86_emulate.c
--- a/xen/arch/x86/x86_emulate.c        Sat Jun 17 12:57:03 2006 +0100
+++ b/xen/arch/x86/x86_emulate.c        Sun Jun 18 19:24:00 2006 +0100
@@ -1146,7 +1146,7 @@ x86_emulate_read_std(
     *val = 0;
     if ( copy_from_user((void *)val, (void *)addr, bytes) )
     {
-        propagate_page_fault(addr, 4); /* user mode, read fault */
+        propagate_page_fault(addr, 0); /* read fault */
         return X86EMUL_PROPAGATE_FAULT;
     }
     return X86EMUL_CONTINUE;
@@ -1161,7 +1161,7 @@ x86_emulate_write_std(
 {
     if ( copy_to_user((void *)addr, (void *)&val, bytes) )
     {
-        propagate_page_fault(addr, 6); /* user mode, write fault */
+        propagate_page_fault(addr, PGERR_write_access); /* write fault */
         return X86EMUL_PROPAGATE_FAULT;
     }
     return X86EMUL_CONTINUE;

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-changelog] [xen-unstable] [XEN] Fix page-fault handler to not trust bit 0 of error code., Xen patchbot-unstable <=