WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-unstable] [XEN] Clean up the shadow interface

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] [xen-unstable] [XEN] Clean up the shadow interface
From: Xen patchbot-unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Wed, 20 Dec 2006 17:15:19 -0800
Delivery-date: Wed, 20 Dec 2006 17:16:48 -0800
Envelope-to: www-data@xxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User Tim Deegan <Tim.Deegan@xxxxxxxxxxxxx>
# Date 1166616187 0
# Node ID c75d6f2aad7a64b66b814ade1af9669ea456a69a
# Parent  b258c7587d8ddf5dbdae872ea54e74a119dbfd1f
[XEN] Clean up the shadow interface
Remove a lot of unneccesary things from shadow.h, and move the shadow lock
entirely inside the shadow code.
Signed-off-by: Tim Deegan <Tim.Deegan@xxxxxxxxxxxxx>
---
 xen/arch/x86/domain.c            |   19 -
 xen/arch/x86/domain_build.c      |    2 
 xen/arch/x86/mm.c                |  201 +++++++--------
 xen/arch/x86/mm/shadow/common.c  |  292 +++++++++++++---------
 xen/arch/x86/mm/shadow/multi.c   |   53 ++--
 xen/arch/x86/mm/shadow/multi.h   |    4 
 xen/arch/x86/mm/shadow/private.h |  160 ++++++++++--
 xen/arch/x86/mm/shadow/types.h   |    4 
 xen/include/asm-x86/mm.h         |    2 
 xen/include/asm-x86/shadow.h     |  506 ++++++++++-----------------------------
 10 files changed, 602 insertions(+), 641 deletions(-)

diff -r b258c7587d8d -r c75d6f2aad7a xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c     Wed Dec 20 11:59:54 2006 +0000
+++ b/xen/arch/x86/domain.c     Wed Dec 20 12:03:07 2006 +0000
@@ -172,10 +172,11 @@ int arch_domain_create(struct domain *d)
 {
 #ifdef __x86_64__
     struct page_info *pg;
+    int i;
 #endif
     l1_pgentry_t gdt_l1e;
     int vcpuid, pdpt_order;
-    int i, rc = -ENOMEM;
+    int rc = -ENOMEM;
 
     pdpt_order = get_order_from_bytes(PDPT_L1_ENTRIES * sizeof(l1_pgentry_t));
     d->arch.mm_perdomain_pt = alloc_xenheap_pages(pdpt_order);
@@ -218,12 +219,7 @@ int arch_domain_create(struct domain *d)
 
 #endif /* __x86_64__ */
 
-    shadow_lock_init(d);
-    for ( i = 0; i <= SHADOW_MAX_ORDER; i++ )
-        INIT_LIST_HEAD(&d->arch.shadow.freelists[i]);
-    INIT_LIST_HEAD(&d->arch.shadow.p2m_freelist);
-    INIT_LIST_HEAD(&d->arch.shadow.p2m_inuse);
-    INIT_LIST_HEAD(&d->arch.shadow.pinned_shadows);
+    shadow_domain_init(d);
 
     if ( !is_idle_domain(d) )
     {
@@ -365,15 +361,6 @@ int arch_set_info_guest(
 
         v->arch.guest_table = pagetable_from_pfn(cr3_pfn);
     }    
-
-    /* Shadow: make sure the domain has enough shadow memory to
-     * boot another vcpu */
-    if ( shadow_mode_enabled(d) 
-         && d->arch.shadow.total_pages < shadow_min_acceptable_pages(d) )
-    {
-        destroy_gdt(v);
-        return -ENOMEM;
-    }
 
     if ( v->vcpu_id == 0 )
         update_domain_wallclock_time(d);
diff -r b258c7587d8d -r c75d6f2aad7a xen/arch/x86/domain_build.c
--- a/xen/arch/x86/domain_build.c       Wed Dec 20 11:59:54 2006 +0000
+++ b/xen/arch/x86/domain_build.c       Wed Dec 20 12:03:07 2006 +0000
@@ -827,7 +827,7 @@ int construct_dom0(struct domain *d,
     regs->eflags = X86_EFLAGS_IF;
 
     if ( opt_dom0_shadow )
-        if ( shadow_test_enable(d) == 0 ) 
+        if ( shadow_enable(d, SHM2_enable) == 0 ) 
             shadow_update_paging_modes(v);
 
     if ( supervisor_mode_kernel )
diff -r b258c7587d8d -r c75d6f2aad7a xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Wed Dec 20 11:59:54 2006 +0000
+++ b/xen/arch/x86/mm.c Wed Dec 20 12:03:07 2006 +0000
@@ -365,6 +365,38 @@ void write_ptbase(struct vcpu *v)
     write_cr3(v->arch.cr3);
 }
 
+/* Should be called after CR3 is updated.
+ * Updates vcpu->arch.cr3 and, for HVM guests, vcpu->arch.hvm_vcpu.cpu_cr3.
+ * 
+ * Also updates other state derived from CR3 (vcpu->arch.guest_vtable,
+ * shadow_vtable, etc).
+ *
+ * Uses values found in vcpu->arch.(guest_table and guest_table_user), and
+ * for HVM guests, arch.monitor_table and hvm's guest CR3.
+ *
+ * Update ref counts to shadow tables appropriately.
+ */
+void update_cr3(struct vcpu *v)
+{
+    unsigned long cr3_mfn=0;
+
+    if ( shadow_mode_enabled(v->domain) )
+    {
+        shadow_update_cr3(v);
+        return;
+    }
+
+#if CONFIG_PAGING_LEVELS == 4
+    if ( !(v->arch.flags & TF_kernel_mode) )
+        cr3_mfn = pagetable_get_pfn(v->arch.guest_table_user);
+    else
+#endif
+        cr3_mfn = pagetable_get_pfn(v->arch.guest_table);
+
+    make_cr3(v, cr3_mfn);
+}
+
+
 void invalidate_shadow_ldt(struct vcpu *v)
 {
     int i;
@@ -1160,53 +1192,57 @@ static void free_l4_table(struct page_in
 
 #endif
 
-static inline int update_l1e(l1_pgentry_t *pl1e, 
-                             l1_pgentry_t  ol1e, 
-                             l1_pgentry_t  nl1e,
-                             unsigned long gl1mfn,
-                             struct vcpu *v)
+
+/* How to write an entry to the guest pagetables.
+ * Returns 0 for failure (pointer not valid), 1 for success. */
+static inline int update_intpte(intpte_t *p, 
+                                intpte_t old, 
+                                intpte_t new,
+                                unsigned long mfn,
+                                struct vcpu *v)
 {
     int rv = 1;
+#ifndef PTE_UPDATE_WITH_CMPXCHG
     if ( unlikely(shadow_mode_enabled(v->domain)) )
-        shadow_lock(v->domain);
-#ifndef PTE_UPDATE_WITH_CMPXCHG
-    rv = (!__copy_to_user(pl1e, &nl1e, sizeof(nl1e)));
+        rv = shadow_write_guest_entry(v, p, new, _mfn(mfn));
+    else
+        rv = (!__copy_to_user(p, &new, sizeof(new)));
 #else
     {
-        intpte_t o = l1e_get_intpte(ol1e);
-        intpte_t n = l1e_get_intpte(nl1e);
-        
+        intpte_t t = old;
         for ( ; ; )
         {
-            if ( unlikely(cmpxchg_user(pl1e, o, n) != 0) )
+            if ( unlikely(shadow_mode_enabled(v->domain)) )
+                rv = shadow_cmpxchg_guest_entry(v, p, &t, new, _mfn(mfn));
+            else
+                rv = (!cmpxchg_user(p, t, new));
+
+            if ( unlikely(rv == 0) )
             {
                 MEM_LOG("Failed to update %" PRIpte " -> %" PRIpte
-                        ": saw %" PRIpte,
-                        l1e_get_intpte(ol1e),
-                        l1e_get_intpte(nl1e),
-                        o);
-                rv = 0;
+                        ": saw %" PRIpte, old, new, t);
                 break;
             }
 
-            if ( o == l1e_get_intpte(ol1e) )
+            if ( t == old )
                 break;
 
             /* Allowed to change in Accessed/Dirty flags only. */
-            BUG_ON((o ^ l1e_get_intpte(ol1e)) &
-                   ~(int)(_PAGE_ACCESSED|_PAGE_DIRTY));
-            ol1e = l1e_from_intpte(o);
+            BUG_ON((t ^ old) & ~(intpte_t)(_PAGE_ACCESSED|_PAGE_DIRTY));
+
+            old = t;
         }
     }
 #endif
-    if ( unlikely(shadow_mode_enabled(v->domain)) && rv )
-    {
-        shadow_validate_guest_entry(v, _mfn(gl1mfn), pl1e);
-        shadow_unlock(v->domain);    
-    }
     return rv;
 }
 
+/* Macro that wraps the appropriate type-changes around update_intpte().
+ * Arguments are: type, ptr, old, new, mfn, vcpu */
+#define UPDATE_ENTRY(_t,_p,_o,_n,_m,_v)                             \
+    update_intpte((intpte_t *)(_p),                                 \
+                  _t ## e_get_intpte(_o), _t ## e_get_intpte(_n),   \
+                  (_m), (_v))
 
 /* Update the L1 entry at pl1e to new value nl1e. */
 static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e, 
@@ -1219,7 +1255,7 @@ static int mod_l1_entry(l1_pgentry_t *pl
         return 0;
 
     if ( unlikely(shadow_mode_refcounts(d)) )
-        return update_l1e(pl1e, ol1e, nl1e, gl1mfn, current);
+        return UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, current);
 
     if ( l1e_get_flags(nl1e) & _PAGE_PRESENT )
     {
@@ -1238,12 +1274,12 @@ static int mod_l1_entry(l1_pgentry_t *pl
 
         /* Fast path for identical mapping, r/w and presence. */
         if ( !l1e_has_changed(ol1e, nl1e, _PAGE_RW | _PAGE_PRESENT) )
-            return update_l1e(pl1e, ol1e, nl1e, gl1mfn, current);
+            return UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, current);
 
         if ( unlikely(!get_page_from_l1e(nl1e, FOREIGNDOM)) )
             return 0;
         
-        if ( unlikely(!update_l1e(pl1e, ol1e, nl1e, gl1mfn, current)) )
+        if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, current)) )
         {
             put_page_from_l1e(nl1e, d);
             return 0;
@@ -1251,7 +1287,7 @@ static int mod_l1_entry(l1_pgentry_t *pl
     }
     else
     {
-        if ( unlikely(!update_l1e(pl1e, ol1e, nl1e, gl1mfn, current)) )
+        if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, current)) )
             return 0;
     }
 
@@ -1259,36 +1295,6 @@ static int mod_l1_entry(l1_pgentry_t *pl
     return 1;
 }
 
-#ifndef PTE_UPDATE_WITH_CMPXCHG
-#define _UPDATE_ENTRY(_t,_p,_o,_n) ({ (*(_p) = (_n)); 1; })
-#else
-#define _UPDATE_ENTRY(_t,_p,_o,_n) ({                            \
-    for ( ; ; )                                                 \
-    {                                                           \
-        intpte_t __o = cmpxchg((intpte_t *)(_p),                \
-                               _t ## e_get_intpte(_o),          \
-                               _t ## e_get_intpte(_n));         \
-        if ( __o == _t ## e_get_intpte(_o) )                    \
-            break;                                              \
-        /* Allowed to change in Accessed/Dirty flags only. */   \
-        BUG_ON((__o ^ _t ## e_get_intpte(_o)) &                 \
-               ~(int)(_PAGE_ACCESSED|_PAGE_DIRTY));             \
-        _o = _t ## e_from_intpte(__o);                          \
-    }                                                           \
-    1; })
-#endif
-#define UPDATE_ENTRY(_t,_p,_o,_n,_m)  ({                            \
-    int rv;                                                         \
-    if ( unlikely(shadow_mode_enabled(current->domain)) )          \
-        shadow_lock(current->domain);                              \
-    rv = _UPDATE_ENTRY(_t, _p, _o, _n);                             \
-    if ( unlikely(shadow_mode_enabled(current->domain)) )          \
-    {                                                               \
-        shadow_validate_guest_entry(current, _mfn(_m), (_p));      \
-        shadow_unlock(current->domain);                            \
-    }                                                               \
-    rv;                                                             \
-})
 
 /* Update the L2 entry at pl2e to new value nl2e. pl2e is within frame pfn. */
 static int mod_l2_entry(l2_pgentry_t *pl2e, 
@@ -1320,18 +1326,18 @@ static int mod_l2_entry(l2_pgentry_t *pl
 
         /* Fast path for identical mapping and presence. */
         if ( !l2e_has_changed(ol2e, nl2e, _PAGE_PRESENT))
-            return UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn);
+            return UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, current);
 
         if ( unlikely(!get_page_from_l2e(nl2e, pfn, current->domain)) )
             return 0;
 
-        if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn)) )
+        if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, current)) )
         {
             put_page_from_l2e(nl2e, pfn);
             return 0;
         }
     }
-    else if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn)) )
+    else if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, current)) )
     {
         return 0;
     }
@@ -1381,18 +1387,18 @@ static int mod_l3_entry(l3_pgentry_t *pl
 
         /* Fast path for identical mapping and presence. */
         if (!l3e_has_changed(ol3e, nl3e, _PAGE_PRESENT))
-            return UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn);
+            return UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, current);
 
         if ( unlikely(!get_page_from_l3e(nl3e, pfn, current->domain)) )
             return 0;
 
-        if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn)) )
+        if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, current)) )
         {
             put_page_from_l3e(nl3e, pfn);
             return 0;
         }
     }
-    else if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn)) )
+    else if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, current)) )
     {
         return 0;
     }
@@ -1439,18 +1445,18 @@ static int mod_l4_entry(l4_pgentry_t *pl
 
         /* Fast path for identical mapping and presence. */
         if (!l4e_has_changed(ol4e, nl4e, _PAGE_PRESENT))
-            return UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn);
+            return UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, current);
 
         if ( unlikely(!get_page_from_l4e(nl4e, pfn, current->domain)) )
             return 0;
 
-        if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn)) )
+        if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, current)) )
         {
             put_page_from_l4e(nl4e, pfn);
             return 0;
         }
     }
-    else if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn)) )
+    else if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, current)) )
     {
         return 0;
     }
@@ -2292,15 +2298,11 @@ int do_mmu_update(
                     break;
 
                 if ( unlikely(shadow_mode_enabled(d)) )
-                    shadow_lock(d);
-
-                *(intpte_t *)va = req.val;
-                okay = 1;
-
-                if ( unlikely(shadow_mode_enabled(d)) )
+                    okay = shadow_write_guest_entry(v, va, req.val, _mfn(mfn));
+                else
                 {
-                    shadow_validate_guest_entry(v, _mfn(mfn), va);
-                    shadow_unlock(d);
+                    *(intpte_t *)va = req.val;
+                    okay = 1;
                 }
 
                 put_page_type(page);
@@ -2409,7 +2411,7 @@ static int create_grant_pte_mapping(
     }
 
     ol1e = *(l1_pgentry_t *)va;
-    if ( !update_l1e(va, ol1e, nl1e, mfn, v) )
+    if ( !UPDATE_ENTRY(l1, va, ol1e, nl1e, mfn, v) )
     {
         put_page_type(page);
         rc = GNTST_general_error;
@@ -2477,7 +2479,7 @@ static int destroy_grant_pte_mapping(
     }
 
     /* Delete pagetable entry. */
-    if ( unlikely(!update_l1e(
+    if ( unlikely(!UPDATE_ENTRY(l1, 
                       (l1_pgentry_t *)va, ol1e, l1e_empty(), mfn, 
                       d->vcpu[0] /* Change if we go to per-vcpu shadows. */)) )
     {
@@ -2515,7 +2517,7 @@ static int create_grant_va_mapping(
         return GNTST_general_error;
     }
     ol1e = *pl1e;
-    okay = update_l1e(pl1e, ol1e, nl1e, gl1mfn, v);
+    okay = UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, v);
     guest_unmap_l1e(v, pl1e);
     pl1e = NULL;
 
@@ -2553,7 +2555,7 @@ static int destroy_grant_va_mapping(
     }
 
     /* Delete pagetable entry. */
-    if ( unlikely(!update_l1e(pl1e, ol1e, l1e_empty(), gl1mfn, v)) )
+    if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, l1e_empty(), gl1mfn, v)) )
     {
         MEM_LOG("Cannot delete PTE entry at %p", (unsigned long *)pl1e);
         rc = GNTST_general_error;
@@ -2952,16 +2954,6 @@ long arch_memory_op(int op, XEN_GUEST_HA
 
         UNLOCK_BIGLOCK(d);
 
-        /* If we're doing FAST_FAULT_PATH, then shadow mode may have
-           cached the fact that this is an mmio region in the shadow
-           page tables.  Blow the tables away to remove the cache.
-           This is pretty heavy handed, but this is a rare operation
-           (it might happen a dozen times during boot and then never
-           again), so it doesn't matter too much. */
-        shadow_lock(d);
-        shadow_blow_tables(d);
-        shadow_unlock(d);
-
         put_domain(d);
 
         break;
@@ -3188,27 +3180,30 @@ static int ptwr_emulated_update(
     pl1e = (l1_pgentry_t *)((unsigned long)pl1e + (addr & ~PAGE_MASK));
     if ( do_cmpxchg )
     {
+        int okay;
+        ol1e = l1e_from_intpte(old);
+
         if ( shadow_mode_enabled(d) )
-            shadow_lock(d);
-        ol1e = l1e_from_intpte(old);
-        if ( cmpxchg((intpte_t *)pl1e, old, val) != old )
-        {
-            if ( shadow_mode_enabled(d) )
-                shadow_unlock(d);
+        {
+            intpte_t t = old;
+            okay = shadow_cmpxchg_guest_entry(v, (intpte_t *) pl1e, 
+                                              &t, val, _mfn(mfn));
+            okay = (okay && t == old);
+        }
+        else 
+            okay = (cmpxchg((intpte_t *)pl1e, old, val) == old);
+
+        if ( !okay )
+        {
             unmap_domain_page(pl1e);
             put_page_from_l1e(gl1e_to_ml1e(d, nl1e), d);
             return X86EMUL_CMPXCHG_FAILED;
         }
-        if ( unlikely(shadow_mode_enabled(d)) )
-        {
-            shadow_validate_guest_entry(v, _mfn(page_to_mfn(page)), pl1e);
-            shadow_unlock(d);    
-        }
     }
     else
     {
         ol1e = *pl1e;
-        if ( !update_l1e(pl1e, ol1e, nl1e, page_to_mfn(page), v) )
+        if ( !UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, page_to_mfn(page), v) )
             BUG();
     }
 
diff -r b258c7587d8d -r c75d6f2aad7a xen/arch/x86/mm/shadow/common.c
--- a/xen/arch/x86/mm/shadow/common.c   Wed Dec 20 11:59:54 2006 +0000
+++ b/xen/arch/x86/mm/shadow/common.c   Wed Dec 20 12:03:07 2006 +0000
@@ -38,6 +38,21 @@
 #include <asm/shadow.h>
 #include "private.h"
 
+
+/* Set up the shadow-specific parts of a domain struct at start of day.
+ * Called for every domain from arch_domain_create() */
+void shadow_domain_init(struct domain *d)
+{
+    int i;
+    shadow_lock_init(d);
+    for ( i = 0; i <= SHADOW_MAX_ORDER; i++ )
+        INIT_LIST_HEAD(&d->arch.shadow.freelists[i]);
+    INIT_LIST_HEAD(&d->arch.shadow.p2m_freelist);
+    INIT_LIST_HEAD(&d->arch.shadow.p2m_inuse);
+    INIT_LIST_HEAD(&d->arch.shadow.pinned_shadows);
+}
+
+
 #if SHADOW_AUDIT
 int shadow_audit_enable = 0;
 
@@ -434,7 +449,7 @@ void shadow_promote(struct vcpu *v, mfn_
     ASSERT(mfn_valid(gmfn));
 
     /* We should never try to promote a gmfn that has writeable mappings */
-    ASSERT(shadow_remove_write_access(v, gmfn, 0, 0) == 0);
+    ASSERT(sh_remove_write_access(v, gmfn, 0, 0) == 0);
 
     /* Is the page already shadowed? */
     if ( !test_and_set_bit(_PGC_page_table, &page->count_info) )
@@ -466,8 +481,7 @@ void shadow_demote(struct vcpu *v, mfn_t
  * Returns a bitmask of SHADOW_SET_* flags. */
 
 int
-__shadow_validate_guest_entry(struct vcpu *v, mfn_t gmfn, 
-                               void *entry, u32 size)
+sh_validate_guest_entry(struct vcpu *v, mfn_t gmfn, void *entry, u32 size)
 {
     int result = 0;
     struct page_info *page = mfn_to_page(gmfn);
@@ -546,22 +560,9 @@ __shadow_validate_guest_entry(struct vcp
 }
 
 
-int
-shadow_validate_guest_entry(struct vcpu *v, mfn_t gmfn, void *entry)
-/* This is the entry point from hypercalls. It returns a bitmask of all the 
- * results of shadow_set_l*e() calls, so the caller knows to do TLB flushes. */
-{
-    int rc;
-
-    ASSERT(shadow_locked_by_me(v->domain));
-    rc = __shadow_validate_guest_entry(v, gmfn, entry, sizeof(l1_pgentry_t));
-    shadow_audit_tables(v);
-    return rc;
-}
-
 void
-shadow_validate_guest_pt_write(struct vcpu *v, mfn_t gmfn,
-                                void *entry, u32 size)
+sh_validate_guest_pt_write(struct vcpu *v, mfn_t gmfn,
+                           void *entry, u32 size)
 /* This is the entry point for emulated writes to pagetables in HVM guests and
  * PV translated guests.
  */
@@ -570,7 +571,7 @@ shadow_validate_guest_pt_write(struct vc
     int rc;
 
     ASSERT(shadow_locked_by_me(v->domain));
-    rc = __shadow_validate_guest_entry(v, gmfn, entry, size);
+    rc = sh_validate_guest_entry(v, gmfn, entry, size);
     if ( rc & SHADOW_SET_FLUSH )
         /* Need to flush TLBs to pick up shadow PT changes */
         flush_tlb_mask(d->domain_dirty_cpumask);
@@ -583,6 +584,38 @@ shadow_validate_guest_pt_write(struct vc
          * unshadow the page. */
         sh_remove_shadows(v, gmfn, 0, 0);
     }
+}
+
+int shadow_write_guest_entry(struct vcpu *v, intpte_t *p,
+                             intpte_t new, mfn_t gmfn)
+/* Write a new value into the guest pagetable, and update the shadows 
+ * appropriately.  Returns 0 if we page-faulted, 1 for success. */
+{
+    int failed;
+    shadow_lock(v->domain);
+    failed = __copy_to_user(p, &new, sizeof(new));
+    if ( failed != sizeof(new) )
+        sh_validate_guest_entry(v, gmfn, p, sizeof(new));
+    shadow_unlock(v->domain);
+    return (failed == 0);
+}
+
+int shadow_cmpxchg_guest_entry(struct vcpu *v, intpte_t *p,
+                               intpte_t *old, intpte_t new, mfn_t gmfn)
+/* Cmpxchg a new value into the guest pagetable, and update the shadows 
+ * appropriately. Returns 0 if we page-faulted, 1 if not.
+ * N.B. caller should check the value of "old" to see if the
+ * cmpxchg itself was successful. */
+{
+    int failed;
+    intpte_t t = *old;
+    shadow_lock(v->domain);
+    failed = cmpxchg_user(p, t, new);
+    if ( t == *old )
+        sh_validate_guest_entry(v, gmfn, p, sizeof(new));
+    *old = t;
+    shadow_unlock(v->domain);
+    return (failed == 0);
 }
 
 
@@ -791,7 +824,7 @@ void shadow_prealloc(struct domain *d, u
 
 /* Deliberately free all the memory we can: this will tear down all of
  * this domain's shadows */
-void shadow_blow_tables(struct domain *d) 
+static void shadow_blow_tables(struct domain *d) 
 {
     struct list_head *l, *t;
     struct shadow_page_info *sp;
@@ -989,7 +1022,7 @@ void shadow_free(struct domain *d, mfn_t
  * Also, we only ever allocate a max-order chunk, so as to preserve
  * the invariant that shadow_prealloc() always works.
  * Returns 0 iff it can't get a chunk (the caller should then
- * free up some pages in domheap and call set_sh_allocation);
+ * free up some pages in domheap and call sh_set_allocation);
  * returns non-zero on success.
  */
 static int
@@ -1149,14 +1182,14 @@ p2m_next_level(struct domain *d, mfn_t *
                 if ( pagetable_get_pfn(v->arch.guest_table) 
                      == pagetable_get_pfn(d->arch.phys_table) 
                      && v->arch.shadow.mode != NULL )
-                    v->arch.shadow.mode->update_cr3(v);
+                    v->arch.shadow.mode->update_cr3(v, 0);
             }
         }
 #endif
         /* The P2M can be shadowed: keep the shadows synced */
         if ( d->vcpu[0] != NULL )
-            (void)__shadow_validate_guest_entry(d->vcpu[0], *table_mfn,
-                                                p2m_entry, sizeof *p2m_entry);
+            (void)sh_validate_guest_entry(d->vcpu[0], *table_mfn,
+                                          p2m_entry, sizeof *p2m_entry);
     }
     *table_mfn = _mfn(l1e_get_pfn(*p2m_entry));
     next = sh_map_domain_page(*table_mfn);
@@ -1216,8 +1249,8 @@ shadow_set_p2m_entry(struct domain *d, u
 
     /* The P2M can be shadowed: keep the shadows synced */
     if ( d->vcpu[0] != NULL )
-        (void)__shadow_validate_guest_entry(
-            d->vcpu[0], table_mfn, p2m_entry, sizeof(*p2m_entry));
+        (void)sh_validate_guest_entry(d->vcpu[0], table_mfn, 
+                                      p2m_entry, sizeof(*p2m_entry));
 
     /* Success */
     rv = 1;
@@ -1427,9 +1460,9 @@ static void shadow_p2m_teardown(struct d
  * Input will be rounded up to at least shadow_min_acceptable_pages(),
  * plus space for the p2m table.
  * Returns 0 for success, non-zero for failure. */
-static unsigned int set_sh_allocation(struct domain *d, 
-                                       unsigned int pages,
-                                       int *preempted)
+static unsigned int sh_set_allocation(struct domain *d, 
+                                      unsigned int pages,
+                                      int *preempted)
 {
     struct shadow_page_info *sp;
     unsigned int lower_bound;
@@ -1499,20 +1532,12 @@ static unsigned int set_sh_allocation(st
     return 0;
 }
 
-unsigned int shadow_set_allocation(struct domain *d, 
-                                    unsigned int megabytes,
-                                    int *preempted)
-/* Hypercall interface to set the shadow memory allocation */
-{
-    unsigned int rv;
-    shadow_lock(d);
-    rv = set_sh_allocation(d, megabytes << (20 - PAGE_SHIFT), preempted); 
-    SHADOW_PRINTK("dom %u allocation now %u pages (%u MB)\n",
-                   d->domain_id,
-                   d->arch.shadow.total_pages,
-                   shadow_get_allocation(d));
-    shadow_unlock(d);
-    return rv;
+/* Return the size of the shadow pool, rounded up to the nearest MB */
+static unsigned int shadow_get_allocation(struct domain *d)
+{
+    unsigned int pg = d->arch.shadow.total_pages;
+    return ((pg >> (20 - PAGE_SHIFT))
+            + ((pg & ((1 << (20 - PAGE_SHIFT)) - 1)) ? 1 : 0));
 }
 
 /**************************************************************************/
@@ -1889,24 +1914,24 @@ void sh_destroy_shadow(struct vcpu *v, m
  * level and fault_addr desribe how we found this to be a pagetable;
  * level==0 means we have some other reason for revoking write access.*/
 
-int shadow_remove_write_access(struct vcpu *v, mfn_t gmfn, 
-                                unsigned int level,
-                                unsigned long fault_addr)
+int sh_remove_write_access(struct vcpu *v, mfn_t gmfn, 
+                           unsigned int level,
+                           unsigned long fault_addr)
 {
     /* Dispatch table for getting per-type functions */
     static hash_callback_t callbacks[16] = {
         NULL, /* none    */
 #if CONFIG_PAGING_LEVELS == 2
-        SHADOW_INTERNAL_NAME(sh_remove_write_access,2,2), /* l1_32   */
-        SHADOW_INTERNAL_NAME(sh_remove_write_access,2,2), /* fl1_32  */
+        SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,2,2), /* l1_32   */
+        SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,2,2), /* fl1_32  */
 #else 
-        SHADOW_INTERNAL_NAME(sh_remove_write_access,3,2), /* l1_32   */
-        SHADOW_INTERNAL_NAME(sh_remove_write_access,3,2), /* fl1_32  */
+        SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,3,2), /* l1_32   */
+        SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,3,2), /* fl1_32  */
 #endif
         NULL, /* l2_32   */
 #if CONFIG_PAGING_LEVELS >= 3
-        SHADOW_INTERNAL_NAME(sh_remove_write_access,3,3), /* l1_pae  */
-        SHADOW_INTERNAL_NAME(sh_remove_write_access,3,3), /* fl1_pae */
+        SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,3,3), /* l1_pae  */
+        SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,3,3), /* fl1_pae */
 #else 
         NULL, /* l1_pae  */
         NULL, /* fl1_pae */
@@ -1914,8 +1939,8 @@ int shadow_remove_write_access(struct vc
         NULL, /* l2_pae  */
         NULL, /* l2h_pae */
 #if CONFIG_PAGING_LEVELS >= 4
-        SHADOW_INTERNAL_NAME(sh_remove_write_access,4,4), /* l1_64   */
-        SHADOW_INTERNAL_NAME(sh_remove_write_access,4,4), /* fl1_64  */
+        SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,4,4), /* l1_64   */
+        SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,4,4), /* fl1_64  */
 #else
         NULL, /* l1_64   */
         NULL, /* fl1_64  */
@@ -2077,25 +2102,25 @@ int shadow_remove_write_access(struct vc
 /* Remove all mappings of a guest frame from the shadow tables.
  * Returns non-zero if we need to flush TLBs. */
 
-int shadow_remove_all_mappings(struct vcpu *v, mfn_t gmfn)
+int sh_remove_all_mappings(struct vcpu *v, mfn_t gmfn)
 {
     struct page_info *page = mfn_to_page(gmfn);
-    int expected_count;
+    int expected_count, do_locking;
 
     /* Dispatch table for getting per-type functions */
     static hash_callback_t callbacks[16] = {
         NULL, /* none    */
 #if CONFIG_PAGING_LEVELS == 2
-        SHADOW_INTERNAL_NAME(sh_remove_all_mappings,2,2), /* l1_32   */
-        SHADOW_INTERNAL_NAME(sh_remove_all_mappings,2,2), /* fl1_32  */
+        SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,2,2), /* l1_32   */
+        SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,2,2), /* fl1_32  */
 #else 
-        SHADOW_INTERNAL_NAME(sh_remove_all_mappings,3,2), /* l1_32   */
-        SHADOW_INTERNAL_NAME(sh_remove_all_mappings,3,2), /* fl1_32  */
+        SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,3,2), /* l1_32   */
+        SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,3,2), /* fl1_32  */
 #endif
         NULL, /* l2_32   */
 #if CONFIG_PAGING_LEVELS >= 3
-        SHADOW_INTERNAL_NAME(sh_remove_all_mappings,3,3), /* l1_pae  */
-        SHADOW_INTERNAL_NAME(sh_remove_all_mappings,3,3), /* fl1_pae */
+        SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,3,3), /* l1_pae  */
+        SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,3,3), /* fl1_pae */
 #else 
         NULL, /* l1_pae  */
         NULL, /* fl1_pae */
@@ -2103,8 +2128,8 @@ int shadow_remove_all_mappings(struct vc
         NULL, /* l2_pae  */
         NULL, /* l2h_pae */
 #if CONFIG_PAGING_LEVELS >= 4
-        SHADOW_INTERNAL_NAME(sh_remove_all_mappings,4,4), /* l1_64   */
-        SHADOW_INTERNAL_NAME(sh_remove_all_mappings,4,4), /* fl1_64  */
+        SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,4,4), /* l1_64   */
+        SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,4,4), /* fl1_64  */
 #else
         NULL, /* l1_64   */
         NULL, /* fl1_64  */
@@ -2129,7 +2154,12 @@ int shadow_remove_all_mappings(struct vc
     if ( (page->count_info & PGC_count_mask) == 0 )
         return 0;
 
-    ASSERT(shadow_locked_by_me(v->domain));
+    /* Although this is an externally visible function, we do not know
+     * whether the shadow lock will be held when it is called (since it
+     * can be called via put_page_type when we clear a shadow l1e).
+     * If the lock isn't held, take it for the duration of the call. */
+    do_locking = !shadow_locked_by_me(v->domain);
+    if ( do_locking ) shadow_lock(v->domain);
 
     /* XXX TODO: 
      * Heuristics for finding the (probably) single mapping of this gmfn */
@@ -2153,6 +2183,8 @@ int shadow_remove_all_mappings(struct vc
                           page->count_info, page->u.inuse.type_info);
         }
     }
+
+    if ( do_locking ) shadow_unlock(v->domain);
 
     /* We killed at least one mapping, so must flush TLBs. */
     return 1;
@@ -2236,9 +2268,10 @@ void sh_remove_shadows(struct vcpu *v, m
  * (all != 0 implies fast == 0)
  */
 {
-    struct page_info *pg;
+    struct page_info *pg = mfn_to_page(gmfn);
     mfn_t smfn;
     u32 sh_flags;
+    int do_locking;
     unsigned char t;
     
     /* Dispatch table for getting per-type functions: each level must
@@ -2296,14 +2329,18 @@ void sh_remove_shadows(struct vcpu *v, m
         0  /* unused  */
     };
 
-    ASSERT(shadow_locked_by_me(v->domain));
     ASSERT(!(all && fast));
-
-    pg = mfn_to_page(gmfn);
 
     /* Bail out now if the page is not shadowed */
     if ( (pg->count_info & PGC_page_table) == 0 )
         return;
+
+    /* Although this is an externally visible function, we do not know
+     * whether the shadow lock will be held when it is called (since it
+     * can be called via put_page_type when we clear a shadow l1e).
+     * If the lock isn't held, take it for the duration of the call. */
+    do_locking = !shadow_locked_by_me(v->domain);
+    if ( do_locking ) shadow_lock(v->domain);
 
     SHADOW_PRINTK("d=%d, v=%d, gmfn=%05lx\n",
                    v->domain->domain_id, v->vcpu_id, mfn_x(gmfn));
@@ -2356,14 +2393,16 @@ void sh_remove_shadows(struct vcpu *v, m
     /* Need to flush TLBs now, so that linear maps are safe next time we 
      * take a fault. */
     flush_tlb_mask(v->domain->domain_dirty_cpumask);
-}
-
-void
-shadow_remove_all_shadows_and_parents(struct vcpu *v, mfn_t gmfn)
+
+    if ( do_locking ) shadow_unlock(v->domain);
+}
+
+static void
+sh_remove_all_shadows_and_parents(struct vcpu *v, mfn_t gmfn)
 /* Even harsher: this is a HVM page that we thing is no longer a pagetable.
  * Unshadow it, and recursively unshadow pages that reference it. */
 {
-    shadow_remove_all_shadows(v, gmfn);
+    sh_remove_shadows(v, gmfn, 0, 1);
     /* XXX TODO:
      * Rework this hashtable walker to return a linked-list of all 
      * the shadows it modified, then do breadth-first recursion 
@@ -2376,7 +2415,7 @@ shadow_remove_all_shadows_and_parents(st
 
 /**************************************************************************/
 
-void sh_update_paging_modes(struct vcpu *v)
+static void sh_update_paging_modes(struct vcpu *v)
 {
     struct domain *d = v->domain;
     struct shadow_paging_mode *old_mode = v->arch.shadow.mode;
@@ -2394,7 +2433,8 @@ void sh_update_paging_modes(struct vcpu 
 
     // First, tear down any old shadow tables held by this vcpu.
     //
-    shadow_detach_old_tables(v);
+    if ( v->arch.shadow.mode )
+        v->arch.shadow.mode->detach_old_tables(v);
 
     if ( !is_hvm_domain(d) )
     {
@@ -2402,10 +2442,9 @@ void sh_update_paging_modes(struct vcpu 
         /// PV guest
         ///
 #if CONFIG_PAGING_LEVELS == 4
-        if ( pv_32bit_guest(v) )
-            v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,4,3);
-        else
-            v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,4,4);
+        /* When 32-on-64 PV guests are supported, they must choose 
+         * a different mode here */
+        v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,4,4);
 #elif CONFIG_PAGING_LEVELS == 3
         v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3);
 #elif CONFIG_PAGING_LEVELS == 2
@@ -2493,7 +2532,7 @@ void sh_update_paging_modes(struct vcpu 
 
         if ( pagetable_is_null(v->arch.monitor_table) )
         {
-            mfn_t mmfn = shadow_make_monitor_table(v);
+            mfn_t mmfn = v->arch.shadow.mode->make_monitor_table(v);
             v->arch.monitor_table = pagetable_from_mfn(mmfn);
             make_cr3(v, mfn_x(mmfn));
             hvm_update_host_cr3(v);
@@ -2528,7 +2567,7 @@ void sh_update_paging_modes(struct vcpu 
 
                 old_mfn = pagetable_get_mfn(v->arch.monitor_table);
                 v->arch.monitor_table = pagetable_null();
-                new_mfn = v->arch.shadow.mode->make_monitor_table(v);          
  
+                new_mfn = v->arch.shadow.mode->make_monitor_table(v);
                 v->arch.monitor_table = pagetable_from_mfn(new_mfn);
                 SHADOW_PRINTK("new monitor table %"SH_PRI_mfn "\n",
                                mfn_x(new_mfn));
@@ -2549,7 +2588,14 @@ void sh_update_paging_modes(struct vcpu 
         //        This *does* happen, at least for CR4.PGE...
     }
 
-    v->arch.shadow.mode->update_cr3(v);
+    v->arch.shadow.mode->update_cr3(v, 0);
+}
+
+void shadow_update_paging_modes(struct vcpu *v)
+{
+    shadow_lock(v->domain);
+    sh_update_paging_modes(v);
+    shadow_unlock(v->domain);
 }
 
 /**************************************************************************/
@@ -2610,9 +2656,9 @@ int shadow_enable(struct domain *d, u32 
     /* Init the shadow memory allocation if the user hasn't done so */
     old_pages = d->arch.shadow.total_pages;
     if ( old_pages == 0 )
-        if ( set_sh_allocation(d, 256, NULL) != 0 ) /* Use at least 1MB */
-        {
-            set_sh_allocation(d, 0, NULL);
+        if ( sh_set_allocation(d, 256, NULL) != 0 ) /* Use at least 1MB */
+        {
+            sh_set_allocation(d, 0, NULL);
             rv = -ENOMEM;
             goto out;
         }
@@ -2620,7 +2666,7 @@ int shadow_enable(struct domain *d, u32 
     /* Init the hash table */
     if ( shadow_hash_alloc(d) != 0 )
     {
-        set_sh_allocation(d, old_pages, NULL);            
+        sh_set_allocation(d, old_pages, NULL);            
         rv = -ENOMEM;
         goto out;
     }
@@ -2630,7 +2676,7 @@ int shadow_enable(struct domain *d, u32 
         if ( !shadow_alloc_p2m_table(d) )
         {
             shadow_hash_teardown(d);
-            set_sh_allocation(d, old_pages, NULL);
+            sh_set_allocation(d, old_pages, NULL);
             shadow_p2m_teardown(d);
             rv = -ENOMEM;
             goto out;
@@ -2669,13 +2715,16 @@ void shadow_teardown(struct domain *d)
         /* Release the shadow and monitor tables held by each vcpu */
         for_each_vcpu(d, v)
         {
-            shadow_detach_old_tables(v);
-            if ( shadow_mode_external(d) )
+            if ( v->arch.shadow.mode )
             {
-                mfn = pagetable_get_mfn(v->arch.monitor_table);
-                if ( mfn_valid(mfn) && (mfn_x(mfn) != 0) )
-                    shadow_destroy_monitor_table(v, mfn);
-                v->arch.monitor_table = pagetable_null();
+                v->arch.shadow.mode->detach_old_tables(v);
+                if ( shadow_mode_external(d) )
+                {
+                    mfn = pagetable_get_mfn(v->arch.monitor_table);
+                    if ( mfn_valid(mfn) && (mfn_x(mfn) != 0) )
+                        v->arch.shadow.mode->destroy_monitor_table(v, mfn);
+                    v->arch.monitor_table = pagetable_null();
+                }
             }
         }
     }
@@ -2689,7 +2738,7 @@ void shadow_teardown(struct domain *d)
                        d->arch.shadow.free_pages, 
                        d->arch.shadow.p2m_pages);
         /* Destroy all the shadows and release memory to domheap */
-        set_sh_allocation(d, 0, NULL);
+        sh_set_allocation(d, 0, NULL);
         /* Release the hash table back to xenheap */
         if (d->arch.shadow.hash_table) 
             shadow_hash_teardown(d);
@@ -2755,10 +2804,10 @@ static int shadow_one_bit_enable(struct 
     if ( d->arch.shadow.mode == 0 )
     {
         /* Init the shadow memory allocation and the hash table */
-        if ( set_sh_allocation(d, 1, NULL) != 0 
+        if ( sh_set_allocation(d, 1, NULL) != 0 
              || shadow_hash_alloc(d) != 0 )
         {
-            set_sh_allocation(d, 0, NULL);
+            sh_set_allocation(d, 0, NULL);
             return -ENOMEM;
         }
     }
@@ -2794,7 +2843,8 @@ static int shadow_one_bit_disable(struct
                        d->arch.shadow.p2m_pages);
         for_each_vcpu(d, v)
         {
-            shadow_detach_old_tables(v);
+            if ( v->arch.shadow.mode )
+                v->arch.shadow.mode->detach_old_tables(v);
 #if CONFIG_PAGING_LEVELS == 4
             if ( !(v->arch.flags & TF_kernel_mode) )
                 make_cr3(v, pagetable_get_pfn(v->arch.guest_table_user));
@@ -2805,7 +2855,7 @@ static int shadow_one_bit_disable(struct
         }
 
         /* Pull down the memory allocation */
-        if ( set_sh_allocation(d, 0, NULL) != 0 )
+        if ( sh_set_allocation(d, 0, NULL) != 0 )
         {
             // XXX - How can this occur?
             //       Seems like a bug to return an error now that we've
@@ -2826,7 +2876,7 @@ static int shadow_one_bit_disable(struct
 }
 
 /* Enable/disable ops for the "test" and "log-dirty" modes */
-int shadow_test_enable(struct domain *d)
+static int shadow_test_enable(struct domain *d)
 {
     int ret;
 
@@ -2849,7 +2899,7 @@ int shadow_test_enable(struct domain *d)
     return ret;
 }
 
-int shadow_test_disable(struct domain *d)
+static int shadow_test_disable(struct domain *d)
 {
     int ret;
 
@@ -2968,8 +3018,8 @@ sh_p2m_remove_page(struct domain *d, uns
 
     if ( v != NULL )
     {
-        shadow_remove_all_shadows_and_parents(v, _mfn(mfn));
-        if ( shadow_remove_all_mappings(v, _mfn(mfn)) )
+        sh_remove_all_shadows_and_parents(v, _mfn(mfn));
+        if ( sh_remove_all_mappings(v, _mfn(mfn)) )
             flush_tlb_mask(d->domain_dirty_cpumask);
     }
 
@@ -3012,8 +3062,8 @@ shadow_guest_physmap_add_page(struct dom
             v = d->vcpu[0];
         if ( v != NULL )
         {
-            shadow_remove_all_shadows_and_parents(v, omfn);
-            if ( shadow_remove_all_mappings(v, omfn) )
+            sh_remove_all_shadows_and_parents(v, omfn);
+            if ( sh_remove_all_mappings(v, omfn) )
                 flush_tlb_mask(d->domain_dirty_cpumask);
         }
         set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
@@ -3043,6 +3093,17 @@ shadow_guest_physmap_add_page(struct dom
 
     shadow_set_p2m_entry(d, gfn, _mfn(mfn));
     set_gpfn_from_mfn(mfn, gfn);
+
+#if (SHADOW_OPTIMIZATIONS & SHOPT_FAST_FAULT_PATH)
+    /* If we're doing FAST_FAULT_PATH, then shadow mode may have
+       cached the fact that this is an mmio region in the shadow
+       page tables.  Blow the tables away to remove the cache.
+       This is pretty heavy handed, but this is a rare operation
+       (it might happen a dozen times during boot and then never
+       again), so it doesn't matter too much. */
+    shadow_blow_tables(d);
+#endif
+
     shadow_audit_p2m(d);
     shadow_unlock(d);
 }
@@ -3130,14 +3191,13 @@ static int shadow_log_dirty_op(
 
 
 /* Mark a page as dirty */
-void sh_do_mark_dirty(struct domain *d, mfn_t gmfn)
+void sh_mark_dirty(struct domain *d, mfn_t gmfn)
 {
     unsigned long pfn;
 
     ASSERT(shadow_locked_by_me(d));
-    ASSERT(shadow_mode_log_dirty(d));
-
-    if ( !mfn_valid(gmfn) )
+
+    if ( !shadow_mode_log_dirty(d) || !mfn_valid(gmfn) )
         return;
 
     ASSERT(d->arch.shadow.dirty_bitmap != NULL);
@@ -3181,13 +3241,19 @@ void sh_do_mark_dirty(struct domain *d, 
     }
 }
 
+void shadow_mark_dirty(struct domain *d, mfn_t gmfn)
+{
+    shadow_lock(d);
+    sh_mark_dirty(d, gmfn);
+    shadow_unlock(d);
+}
 
 /**************************************************************************/
 /* Shadow-control XEN_DOMCTL dispatcher */
 
 int shadow_domctl(struct domain *d, 
-                   xen_domctl_shadow_op_t *sc,
-                   XEN_GUEST_HANDLE(xen_domctl_t) u_domctl)
+                  xen_domctl_shadow_op_t *sc,
+                  XEN_GUEST_HANDLE(xen_domctl_t) u_domctl)
 {
     int rc, preempted = 0;
 
@@ -3233,7 +3299,9 @@ int shadow_domctl(struct domain *d,
         return 0;
 
     case XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION:
-        rc = shadow_set_allocation(d, sc->mb, &preempted);
+        shadow_lock(d);
+        rc = sh_set_allocation(d, sc->mb << (20 - PAGE_SHIFT), &preempted);
+        shadow_unlock(d);
         if ( preempted )
             /* Not finished.  Set up to re-run the call. */
             rc = hypercall_create_continuation(
diff -r b258c7587d8d -r c75d6f2aad7a xen/arch/x86/mm/shadow/multi.c
--- a/xen/arch/x86/mm/shadow/multi.c    Wed Dec 20 11:59:54 2006 +0000
+++ b/xen/arch/x86/mm/shadow/multi.c    Wed Dec 20 12:03:07 2006 +0000
@@ -243,7 +243,7 @@ guest_walk_tables(struct vcpu *v, unsign
     gw->l3mfn = vcpu_gfn_to_mfn(v, guest_l4e_get_gfn(*gw->l4e));
     if ( !mfn_valid(gw->l3mfn) ) return 1;
     /* This mfn is a pagetable: make sure the guest can't write to it. */
-    if ( guest_op && shadow_remove_write_access(v, gw->l3mfn, 3, va) != 0 )
+    if ( guest_op && sh_remove_write_access(v, gw->l3mfn, 3, va) != 0 )
         flush_tlb_mask(v->domain->domain_dirty_cpumask); 
     gw->l3e = ((guest_l3e_t *)sh_map_domain_page(gw->l3mfn))
         + guest_l3_table_offset(va);
@@ -257,7 +257,7 @@ guest_walk_tables(struct vcpu *v, unsign
     gw->l2mfn = vcpu_gfn_to_mfn(v, guest_l3e_get_gfn(*gw->l3e));
     if ( !mfn_valid(gw->l2mfn) ) return 1;
     /* This mfn is a pagetable: make sure the guest can't write to it. */
-    if ( guest_op && shadow_remove_write_access(v, gw->l2mfn, 2, va) != 0 )
+    if ( guest_op && sh_remove_write_access(v, gw->l2mfn, 2, va) != 0 )
         flush_tlb_mask(v->domain->domain_dirty_cpumask); 
     gw->l2e = ((guest_l2e_t *)sh_map_domain_page(gw->l2mfn))
         + guest_l2_table_offset(va);
@@ -299,7 +299,7 @@ guest_walk_tables(struct vcpu *v, unsign
         if ( !mfn_valid(gw->l1mfn) ) return 1;
         /* This mfn is a pagetable: make sure the guest can't write to it. */
         if ( guest_op 
-             && shadow_remove_write_access(v, gw->l1mfn, 1, va) != 0 )
+             && sh_remove_write_access(v, gw->l1mfn, 1, va) != 0 )
             flush_tlb_mask(v->domain->domain_dirty_cpumask); 
         gw->l1e = ((guest_l1e_t *)sh_map_domain_page(gw->l1mfn))
             + guest_l1_table_offset(va);
@@ -492,7 +492,7 @@ static u32 guest_set_ad_bits(struct vcpu
         u32 shflags = mfn_to_page(gmfn)->shadow_flags & SHF_page_type_mask;
         /* More than one type bit set in shadow-flags? */
         if ( shflags & ~(1UL << find_first_set_bit(shflags)) )
-            res = __shadow_validate_guest_entry(v, gmfn, ep, sizeof(*ep));
+            res = sh_validate_guest_entry(v, gmfn, ep, sizeof (*ep));
     }
 
     /* We should never need to flush the TLB or recopy PAE entries */
@@ -2847,7 +2847,7 @@ static int sh_page_fault(struct vcpu *v,
         /* If this is actually a page table, then we have a bug, and need 
          * to support more operations in the emulator.  More likely, 
          * though, this is a hint that this page should not be shadowed. */
-        shadow_remove_all_shadows(v, gmfn);
+        sh_remove_shadows(v, gmfn, 0 /* thorough */, 1 /* must succeed */);
     }
 
     /* Emulator has changed the user registers: write back */
@@ -3080,7 +3080,7 @@ sh_update_linear_entries(struct vcpu *v)
             sh_unmap_domain_page(ml4e);
         }
 
-        /* Shadow l3 tables are made up by update_cr3 */
+        /* Shadow l3 tables are made up by sh_update_cr3 */
         sl3e = v->arch.shadow.l3table;
 
         for ( i = 0; i < SHADOW_L3_PAGETABLE_ENTRIES; i++ )
@@ -3118,7 +3118,7 @@ sh_update_linear_entries(struct vcpu *v)
         int unmap_l2e = 0;
 
 #if GUEST_PAGING_LEVELS == 2
-        /* Shadow l3 tables were built by update_cr3 */
+        /* Shadow l3 tables were built by sh_update_cr3 */
         if ( shadow_mode_external(d) )
             shadow_l3e = (shadow_l3e_t *)&v->arch.shadow.l3table;
         else
@@ -3341,12 +3341,15 @@ sh_set_toplevel_shadow(struct vcpu *v,
 
 
 static void
-sh_update_cr3(struct vcpu *v)
+sh_update_cr3(struct vcpu *v, int do_locking)
 /* Updates vcpu->arch.cr3 after the guest has changed CR3.
  * Paravirtual guests should set v->arch.guest_table (and guest_table_user,
  * if appropriate).
  * HVM guests should also make sure hvm_get_guest_cntl_reg(v, 3) works,
  * and read vcpu->arch.hvm_vcpu.hw_cr3 afterwards.
+ * If do_locking != 0, assume we are being called from outside the 
+ * shadow code, and must take and release the shadow lock; otherwise 
+ * that is the caller's respnsibility.
  */
 {
     struct domain *d = v->domain;
@@ -3354,6 +3357,15 @@ sh_update_cr3(struct vcpu *v)
 #if GUEST_PAGING_LEVELS == 3
     u32 guest_idx=0;
 #endif
+
+    /* Don't do anything on an uninitialised vcpu */
+    if ( !is_hvm_domain(d) && !test_bit(_VCPUF_initialised, &v->vcpu_flags) )
+    {
+        ASSERT(v->arch.cr3 == 0);
+        return;
+    }
+
+    if ( do_locking ) shadow_lock(v->domain);
 
     ASSERT(shadow_locked_by_me(v->domain));
     ASSERT(v->arch.shadow.mode);
@@ -3400,11 +3412,6 @@ sh_update_cr3(struct vcpu *v)
 #endif
         gmfn = pagetable_get_mfn(v->arch.guest_table);
 
-    if ( !is_hvm_domain(d) && !test_bit(_VCPUF_initialised, &v->vcpu_flags) )
-    {
-        ASSERT(v->arch.cr3 == 0);
-        return;
-    }
 
     ////
     //// vcpu->arch.guest_vtable
@@ -3466,7 +3473,7 @@ sh_update_cr3(struct vcpu *v)
      * replace the old shadow pagetable(s), so that we can safely use the 
      * (old) shadow linear maps in the writeable mapping heuristics. */
 #if GUEST_PAGING_LEVELS == 2
-    if ( shadow_remove_write_access(v, gmfn, 2, 0) != 0 )
+    if ( sh_remove_write_access(v, gmfn, 2, 0) != 0 )
         flush_tlb_mask(v->domain->domain_dirty_cpumask); 
     sh_set_toplevel_shadow(v, 0, gmfn, SH_type_l2_shadow);
 #elif GUEST_PAGING_LEVELS == 3
@@ -3484,7 +3491,7 @@ sh_update_cr3(struct vcpu *v)
             {
                 gl2gfn = guest_l3e_get_gfn(gl3e[i]);
                 gl2mfn = vcpu_gfn_to_mfn(v, gl2gfn);
-                flush |= shadow_remove_write_access(v, gl2mfn, 2, 0); 
+                flush |= sh_remove_write_access(v, gl2mfn, 2, 0); 
             }
         }
         if ( flush ) 
@@ -3506,7 +3513,7 @@ sh_update_cr3(struct vcpu *v)
         }
     }
 #elif GUEST_PAGING_LEVELS == 4
-    if ( shadow_remove_write_access(v, gmfn, 4, 0) != 0 )
+    if ( sh_remove_write_access(v, gmfn, 4, 0) != 0 )
         flush_tlb_mask(v->domain->domain_dirty_cpumask);
     sh_set_toplevel_shadow(v, 0, gmfn, SH_type_l4_shadow);
 #else
@@ -3582,6 +3589,9 @@ sh_update_cr3(struct vcpu *v)
 
     /* Fix up the linear pagetable mappings */
     sh_update_linear_entries(v);
+
+    /* Release the lock, if we took it (otherwise it's the caller's problem) */
+    if ( do_locking ) shadow_unlock(v->domain);
 }
 
 
@@ -3637,7 +3647,8 @@ static int sh_guess_wrmap(struct vcpu *v
 }
 #endif
 
-int sh_remove_write_access(struct vcpu *v, mfn_t sl1mfn, mfn_t readonly_mfn)
+int sh_rm_write_access_from_l1(struct vcpu *v, mfn_t sl1mfn,
+                               mfn_t readonly_mfn)
 /* Excises all writeable mappings to readonly_mfn from this l1 shadow table */
 {
     shadow_l1e_t *sl1e;
@@ -3668,7 +3679,7 @@ int sh_remove_write_access(struct vcpu *
 }
 
 
-int sh_remove_all_mappings(struct vcpu *v, mfn_t sl1mfn, mfn_t target_mfn)
+int sh_rm_mappings_from_l1(struct vcpu *v, mfn_t sl1mfn, mfn_t target_mfn)
 /* Excises all mappings to guest frame from this shadow l1 table */
 {
     shadow_l1e_t *sl1e;
@@ -3888,7 +3899,7 @@ sh_x86_emulate_write(struct vcpu *v, uns
 
     skip = safe_not_to_verify_write(mfn, addr, src, bytes);
     memcpy(addr, src, bytes);
-    if ( !skip ) shadow_validate_guest_pt_write(v, mfn, addr, bytes);
+    if ( !skip ) sh_validate_guest_pt_write(v, mfn, addr, bytes);
 
     /* If we are writing zeros to this page, might want to unshadow */
     if ( likely(bytes >= 4) && (*(u32 *)addr == 0) )
@@ -3933,7 +3944,7 @@ sh_x86_emulate_cmpxchg(struct vcpu *v, u
 
     if ( prev == old )
     {
-        if ( !skip ) shadow_validate_guest_pt_write(v, mfn, addr, bytes);
+        if ( !skip ) sh_validate_guest_pt_write(v, mfn, addr, bytes);
     }
     else
         rv = X86EMUL_CMPXCHG_FAILED;
@@ -3977,7 +3988,7 @@ sh_x86_emulate_cmpxchg8b(struct vcpu *v,
 
     if ( prev == old )
     {
-        if ( !skip ) shadow_validate_guest_pt_write(v, mfn, addr, 8);
+        if ( !skip ) sh_validate_guest_pt_write(v, mfn, addr, 8);
     }
     else
         rv = X86EMUL_CMPXCHG_FAILED;
diff -r b258c7587d8d -r c75d6f2aad7a xen/arch/x86/mm/shadow/multi.h
--- a/xen/arch/x86/mm/shadow/multi.h    Wed Dec 20 11:59:54 2006 +0000
+++ b/xen/arch/x86/mm/shadow/multi.h    Wed Dec 20 12:03:07 2006 +0000
@@ -61,10 +61,10 @@ SHADOW_INTERNAL_NAME(sh_unhook_64b_mappi
     (struct vcpu *v, mfn_t sl4mfn);
 
 extern int
-SHADOW_INTERNAL_NAME(sh_remove_write_access, SHADOW_LEVELS, GUEST_LEVELS)
+SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1, SHADOW_LEVELS, GUEST_LEVELS)
     (struct vcpu *v, mfn_t sl1mfn, mfn_t readonly_mfn);
 extern int
-SHADOW_INTERNAL_NAME(sh_remove_all_mappings, SHADOW_LEVELS, GUEST_LEVELS)
+SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1, SHADOW_LEVELS, GUEST_LEVELS)
     (struct vcpu *v, mfn_t sl1mfn, mfn_t target_mfn);
 
 extern void
diff -r b258c7587d8d -r c75d6f2aad7a xen/arch/x86/mm/shadow/private.h
--- a/xen/arch/x86/mm/shadow/private.h  Wed Dec 20 11:59:54 2006 +0000
+++ b/xen/arch/x86/mm/shadow/private.h  Wed Dec 20 12:03:07 2006 +0000
@@ -33,8 +33,43 @@
 
 
 /******************************************************************************
+ * Levels of self-test and paranoia
+ */
+
+#define SHADOW_AUDIT_HASH           0x01  /* Check current hash bucket */
+#define SHADOW_AUDIT_HASH_FULL      0x02  /* Check every hash bucket */
+#define SHADOW_AUDIT_ENTRIES        0x04  /* Check this walk's shadows */
+#define SHADOW_AUDIT_ENTRIES_FULL   0x08  /* Check every shadow */
+#define SHADOW_AUDIT_ENTRIES_MFNS   0x10  /* Check gfn-mfn map in shadows */
+#define SHADOW_AUDIT_P2M            0x20  /* Check the p2m table */
+
+#ifdef NDEBUG
+#define SHADOW_AUDIT                   0
+#define SHADOW_AUDIT_ENABLE            0
+#else
+#define SHADOW_AUDIT                0x15  /* Basic audit of all except p2m. */
+#define SHADOW_AUDIT_ENABLE         shadow_audit_enable
+extern int shadow_audit_enable;
+#endif
+
+/******************************************************************************
+ * Levels of optimization
+ */
+
+#define SHOPT_WRITABLE_HEURISTIC  0x01  /* Guess at RW PTEs via linear maps */
+#define SHOPT_EARLY_UNSHADOW      0x02  /* Unshadow l1s on fork or exit */
+#define SHOPT_FAST_FAULT_PATH     0x04  /* Fast-path MMIO and not-present */
+#define SHOPT_PREFETCH            0x08  /* Shadow multiple entries per fault */
+#define SHOPT_LINUX_L3_TOPLEVEL   0x10  /* Pin l3es on early 64bit linux */
+#define SHOPT_SKIP_VERIFY         0x20  /* Skip PTE v'fy when safe to do so */
+
+#define SHADOW_OPTIMIZATIONS      0x3f
+
+
+/******************************************************************************
  * Debug and error-message output
  */
+
 #define SHADOW_PRINTK(_f, _a...)                                     \
     debugtrace_printk("sh: %s(): " _f, __func__, ##_a)
 #define SHADOW_ERROR(_f, _a...)                                      \
@@ -53,6 +88,58 @@
 #define SHADOW_DEBUG_A_AND_D           1
 #define SHADOW_DEBUG_EMULATE           1
 #define SHADOW_DEBUG_LOGDIRTY          0
+
+/******************************************************************************
+ * The shadow lock.
+ *
+ * This lock is per-domain.  It is intended to allow us to make atomic
+ * updates to the software TLB that the shadow tables provide.
+ * 
+ * Specifically, it protects:
+ *   - all changes to shadow page table pages
+ *   - the shadow hash table
+ *   - the shadow page allocator 
+ *   - all changes to guest page table pages
+ *   - all changes to the page_info->tlbflush_timestamp
+ *   - the page_info->count fields on shadow pages
+ *   - the shadow dirty bit array and count
+ */
+#ifndef CONFIG_SMP
+#error shadow.h currently requires CONFIG_SMP
+#endif
+
+#define shadow_lock_init(_d)                            \
+    do {                                                \
+        spin_lock_init(&(_d)->arch.shadow.lock);        \
+        (_d)->arch.shadow.locker = -1;                  \
+        (_d)->arch.shadow.locker_function = "nobody";   \
+    } while (0)
+
+#define shadow_locked_by_me(_d)                     \
+    (current->processor == (_d)->arch.shadow.locker)
+
+#define shadow_lock(_d)                                                 \
+    do {                                                                \
+        if ( unlikely((_d)->arch.shadow.locker == current->processor) ) \
+        {                                                               \
+            printk("Error: shadow lock held by %s\n",                   \
+                   (_d)->arch.shadow.locker_function);                  \
+            BUG();                                                      \
+        }                                                               \
+        spin_lock(&(_d)->arch.shadow.lock);                             \
+        ASSERT((_d)->arch.shadow.locker == -1);                         \
+        (_d)->arch.shadow.locker = current->processor;                  \
+        (_d)->arch.shadow.locker_function = __func__;                   \
+    } while (0)
+
+#define shadow_unlock(_d)                                       \
+    do {                                                        \
+        ASSERT((_d)->arch.shadow.locker == current->processor); \
+        (_d)->arch.shadow.locker = -1;                          \
+        (_d)->arch.shadow.locker_function = "nobody";           \
+        spin_unlock(&(_d)->arch.shadow.lock);                   \
+    } while (0)
+
 
 
 /******************************************************************************
@@ -291,6 +378,21 @@ void sh_install_xen_entries_in_l2h(struc
 void sh_install_xen_entries_in_l2h(struct vcpu *v, mfn_t sl2hmfn);
 void sh_install_xen_entries_in_l2(struct vcpu *v, mfn_t gl2mfn, mfn_t sl2mfn);
 
+/* Update the shadows in response to a pagetable write from Xen */
+extern int sh_validate_guest_entry(struct vcpu *v, mfn_t gmfn, 
+                                   void *entry, u32 size);
+
+/* Update the shadows in response to a pagetable write from a HVM guest */
+extern void sh_validate_guest_pt_write(struct vcpu *v, mfn_t gmfn, 
+                                       void *entry, u32 size);
+
+/* Remove all writeable mappings of a guest frame from the shadows.
+ * Returns non-zero if we need to flush TLBs. 
+ * level and fault_addr desribe how we found this to be a pagetable;
+ * level==0 means we have some other reason for revoking write access. */
+extern int sh_remove_write_access(struct vcpu *v, mfn_t readonly_mfn,
+                                  unsigned int level,
+                                  unsigned long fault_addr);
 
 /******************************************************************************
  * Flags used in the return value of the shadow_set_lXe() functions...
@@ -325,6 +427,26 @@ void sh_install_xen_entries_in_l2(struct
 #undef mfn_valid
 #define mfn_valid(_mfn) (mfn_x(_mfn) < max_page)
 
+
+static inline int
+sh_mfn_is_a_page_table(mfn_t gmfn)
+{
+    struct page_info *page = mfn_to_page(gmfn);
+    struct domain *owner;
+    unsigned long type_info;
+
+    if ( !mfn_valid(gmfn) )
+        return 0;
+
+    owner = page_get_owner(page);
+    if ( owner && shadow_mode_refcounts(owner) 
+         && (page->count_info & PGC_page_table) )
+        return 1; 
+
+    type_info = page->u.inuse.type_info & PGT_type_mask;
+    return type_info && (type_info <= PGT_l4_page_table);
+}
+
 // Provide mfn_t-aware versions of common xen functions
 static inline void *
 sh_map_domain_page(mfn_t mfn)
@@ -349,6 +471,25 @@ sh_unmap_domain_page_global(void *p)
 {
     unmap_domain_page_global(p);
 }
+
+static inline mfn_t
+pagetable_get_mfn(pagetable_t pt)
+{
+    return _mfn(pagetable_get_pfn(pt));
+}
+
+static inline pagetable_t
+pagetable_from_mfn(mfn_t mfn)
+{
+    return pagetable_from_pfn(mfn_x(mfn));
+}
+
+
+/******************************************************************************
+ * Log-dirty mode bitmap handling
+ */
+
+extern void sh_mark_dirty(struct domain *d, mfn_t gmfn);
 
 static inline int
 sh_mfn_is_dirty(struct domain *d, mfn_t gmfn)
@@ -366,25 +507,6 @@ sh_mfn_is_dirty(struct domain *d, mfn_t 
         return 1;
 
     return 0;
-}
-
-static inline int
-sh_mfn_is_a_page_table(mfn_t gmfn)
-{
-    struct page_info *page = mfn_to_page(gmfn);
-    struct domain *owner;
-    unsigned long type_info;
-
-    if ( !mfn_valid(gmfn) )
-        return 0;
-
-    owner = page_get_owner(page);
-    if ( owner && shadow_mode_refcounts(owner) 
-         && (page->count_info & PGC_page_table) )
-        return 1; 
-
-    type_info = page->u.inuse.type_info & PGT_type_mask;
-    return type_info && (type_info <= PGT_l4_page_table);
 }
 
 
diff -r b258c7587d8d -r c75d6f2aad7a xen/arch/x86/mm/shadow/types.h
--- a/xen/arch/x86/mm/shadow/types.h    Wed Dec 20 11:59:54 2006 +0000
+++ b/xen/arch/x86/mm/shadow/types.h    Wed Dec 20 12:03:07 2006 +0000
@@ -477,8 +477,8 @@ struct shadow_walk_t
 #define sh_gva_to_gpa              INTERNAL_NAME(sh_gva_to_gpa)
 #define sh_gva_to_gfn              INTERNAL_NAME(sh_gva_to_gfn)
 #define sh_update_cr3              INTERNAL_NAME(sh_update_cr3)
-#define sh_remove_write_access     INTERNAL_NAME(sh_remove_write_access)
-#define sh_remove_all_mappings     INTERNAL_NAME(sh_remove_all_mappings)
+#define sh_rm_write_access_from_l1 INTERNAL_NAME(sh_rm_write_access_from_l1)
+#define sh_rm_mappings_from_l1     INTERNAL_NAME(sh_rm_mappings_from_l1)
 #define sh_remove_l1_shadow        INTERNAL_NAME(sh_remove_l1_shadow)
 #define sh_remove_l2_shadow        INTERNAL_NAME(sh_remove_l2_shadow)
 #define sh_remove_l3_shadow        INTERNAL_NAME(sh_remove_l3_shadow)
diff -r b258c7587d8d -r c75d6f2aad7a xen/include/asm-x86/mm.h
--- a/xen/include/asm-x86/mm.h  Wed Dec 20 11:59:54 2006 +0000
+++ b/xen/include/asm-x86/mm.h  Wed Dec 20 12:03:07 2006 +0000
@@ -307,7 +307,7 @@ void audit_domains(void);
 
 int new_guest_cr3(unsigned long pfn);
 void make_cr3(struct vcpu *v, unsigned long mfn);
-
+void update_cr3(struct vcpu *v);
 void propagate_page_fault(unsigned long addr, u16 error_code);
 
 int __sync_lazy_execstate(void);
diff -r b258c7587d8d -r c75d6f2aad7a xen/include/asm-x86/shadow.h
--- a/xen/include/asm-x86/shadow.h      Wed Dec 20 11:59:54 2006 +0000
+++ b/xen/include/asm-x86/shadow.h      Wed Dec 20 12:03:07 2006 +0000
@@ -29,20 +29,8 @@
 #include <xen/domain_page.h>
 #include <asm/flushtlb.h>
 
-/* How to make sure a page is not referred to in a shadow PT */
-/* This will need to be a for_each_vcpu if we go to per-vcpu shadows */ 
-#define shadow_drop_references(_d, _p)                      \
-    shadow_remove_all_mappings((_d)->vcpu[0], _mfn(page_to_mfn(_p)))
-#define shadow_sync_and_drop_references(_d, _p)             \
-    shadow_remove_all_mappings((_d)->vcpu[0], _mfn(page_to_mfn(_p)))
-
-/* How to add and remove entries in the p2m mapping. */
-#define guest_physmap_add_page(_d, _p, _m)                  \
-    shadow_guest_physmap_add_page((_d), (_p), (_m))
-#define guest_physmap_remove_page(_d, _p, _m   )            \
-    shadow_guest_physmap_remove_page((_d), (_p), (_m))
-
-/* Shadow PT operation mode : shadow-mode variable in arch_domain. */
+/*****************************************************************************
+ * Macros to tell which shadow paging mode a domain is in */
 
 #define SHM2_shift 10
 /* We're in one of the shadow modes */
@@ -64,107 +52,24 @@
 #define shadow_mode_external(_d)  ((_d)->arch.shadow.mode & SHM2_external)
 
 /* Xen traps & emulates all reads of all page table pages:
- * not yet supported
- */
+ * not yet supported */
 #define shadow_mode_trap_reads(_d) ({ (void)(_d); 0; })
 
-// How do we tell that we have a 32-bit PV guest in a 64-bit Xen?
-#ifdef __x86_64__
-#define pv_32bit_guest(_v) 0 // not yet supported
-#else
-#define pv_32bit_guest(_v) !is_hvm_vcpu(v)
-#endif
-
-/* The shadow lock.
- *
- * This lock is per-domain.  It is intended to allow us to make atomic
- * updates to the software TLB that the shadow tables provide.
- * 
- * Specifically, it protects:
- *   - all changes to shadow page table pages
- *   - the shadow hash table
- *   - the shadow page allocator 
- *   - all changes to guest page table pages; if/when the notion of
- *     out-of-sync pages is added to this code, then the shadow lock is
- *     protecting all guest page table pages which are not listed as
- *     currently as both guest-writable and out-of-sync...
- *     XXX -- need to think about this relative to writable page tables.
- *   - all changes to the page_info->tlbflush_timestamp
- *   - the page_info->count fields on shadow pages
- *   - the shadow dirty bit array and count
- *   - XXX
- */
-#ifndef CONFIG_SMP
-#error shadow.h currently requires CONFIG_SMP
-#endif
-
-#define shadow_lock_init(_d)                            \
-    do {                                                \
-        spin_lock_init(&(_d)->arch.shadow.lock);        \
-        (_d)->arch.shadow.locker = -1;                  \
-        (_d)->arch.shadow.locker_function = "nobody";   \
-    } while (0)
-
-#define shadow_locked_by_me(_d)                     \
-    (current->processor == (_d)->arch.shadow.locker)
-
-#define shadow_lock(_d)                                                 \
-    do {                                                                \
-        if ( unlikely((_d)->arch.shadow.locker == current->processor) ) \
-        {                                                               \
-            printk("Error: shadow lock held by %s\n",                   \
-                   (_d)->arch.shadow.locker_function);                  \
-            BUG();                                                      \
-        }                                                               \
-        spin_lock(&(_d)->arch.shadow.lock);                             \
-        ASSERT((_d)->arch.shadow.locker == -1);                         \
-        (_d)->arch.shadow.locker = current->processor;                  \
-        (_d)->arch.shadow.locker_function = __func__;                   \
-    } while (0)
-
-#define shadow_unlock(_d)                                       \
-    do {                                                        \
-        ASSERT((_d)->arch.shadow.locker == current->processor); \
-        (_d)->arch.shadow.locker = -1;                          \
-        (_d)->arch.shadow.locker_function = "nobody";           \
-        spin_unlock(&(_d)->arch.shadow.lock);                   \
-    } while (0)
-
-/* 
- * Levels of self-test and paranoia
- * XXX should go in config files somewhere?  
- */
-#define SHADOW_AUDIT_HASH           0x01  /* Check current hash bucket */
-#define SHADOW_AUDIT_HASH_FULL      0x02  /* Check every hash bucket */
-#define SHADOW_AUDIT_ENTRIES        0x04  /* Check this walk's shadows */
-#define SHADOW_AUDIT_ENTRIES_FULL   0x08  /* Check every shadow */
-#define SHADOW_AUDIT_ENTRIES_MFNS   0x10  /* Check gfn-mfn map in shadows */
-#define SHADOW_AUDIT_P2M            0x20  /* Check the p2m table */
-
-#ifdef NDEBUG
-#define SHADOW_AUDIT                   0
-#define SHADOW_AUDIT_ENABLE            0
-#else
-#define SHADOW_AUDIT                0x15  /* Basic audit of all except p2m. */
-#define SHADOW_AUDIT_ENABLE         shadow_audit_enable
-extern int shadow_audit_enable;
-#endif
-
-/* 
- * Levels of optimization
- * XXX should go in config files somewhere?  
- */
-#define SHOPT_WRITABLE_HEURISTIC  0x01  /* Guess at RW PTEs via linear maps */
-#define SHOPT_EARLY_UNSHADOW      0x02  /* Unshadow l1s on fork or exit */
-#define SHOPT_FAST_FAULT_PATH     0x04  /* Fast-path MMIO and not-present */
-#define SHOPT_PREFETCH            0x08  /* Shadow multiple entries per fault */
-#define SHOPT_LINUX_L3_TOPLEVEL   0x10  /* Pin l3es on early 64bit linux */
-#define SHOPT_SKIP_VERIFY         0x20  /* Skip PTE v'fy when safe to do so */
-
-#define SHADOW_OPTIMIZATIONS      0x3f
-
-
-/* With shadow pagetables, the different kinds of address start 
+
+/******************************************************************************
+ * The equivalent for a particular vcpu of a shadowed domain. */
+
+/* Is this vcpu using the P2M table to translate between GFNs and MFNs?
+ *
+ * This is true of translated HVM domains on a vcpu which has paging
+ * enabled.  (HVM vcpus with paging disabled are using the p2m table as
+ * its paging table, so no translation occurs in this case.)
+ * It is also true for all vcpus of translated PV domains. */
+#define shadow_vcpu_mode_translate(_v) ((_v)->arch.shadow.translate_enabled)
+
+
+/******************************************************************************
+ * With shadow pagetables, the different kinds of address start 
  * to get get confusing.
  * 
  * Virtual addresses are what they usually are: the addresses that are used 
@@ -214,38 +119,16 @@ static inline _type _name##_x(_name##_t 
 #endif
 
 TYPE_SAFE(unsigned long,mfn)
+
+/* Macro for printk formats: use as printk("%"SH_PRI_mfn"\n", mfn_x(foo)); */
 #define SH_PRI_mfn "05lx"
 
-static inline mfn_t
-pagetable_get_mfn(pagetable_t pt)
-{
-    return _mfn(pagetable_get_pfn(pt));
-}
-
-static inline pagetable_t
-pagetable_from_mfn(mfn_t mfn)
-{
-    return pagetable_from_pfn(mfn_x(mfn));
-}
-
-static inline int
-shadow_vcpu_mode_translate(struct vcpu *v)
-{
-    // Returns true if this VCPU needs to be using the P2M table to translate
-    // between GFNs and MFNs.
-    //
-    // This is true of translated HVM domains on a vcpu which has paging
-    // enabled.  (HVM vcpu's with paging disabled are using the p2m table as
-    // its paging table, so no translation occurs in this case.)
-    //
-    // It is also true for translated PV domains.
-    //
-    return v->arch.shadow.translate_enabled;
-}
-
-
-/**************************************************************************/
-/* Mode-specific entry points into the shadow code */
+
+/*****************************************************************************
+ * Mode-specific entry points into the shadow code.  
+ *
+ * These shouldn't be used directly by callers; rather use the functions
+ * below which will indirect through this table as appropriate. */
 
 struct sh_emulate_ctxt;
 struct shadow_paging_mode {
@@ -254,7 +137,7 @@ struct shadow_paging_mode {
     int           (*invlpg                )(struct vcpu *v, unsigned long va);
     paddr_t       (*gva_to_gpa            )(struct vcpu *v, unsigned long va);
     unsigned long (*gva_to_gfn            )(struct vcpu *v, unsigned long va);
-    void          (*update_cr3            )(struct vcpu *v);
+    void          (*update_cr3            )(struct vcpu *v, int do_locking);
     int           (*map_and_validate_gl1e )(struct vcpu *v, mfn_t gmfn,
                                             void *new_guest_entry, u32 size);
     int           (*map_and_validate_gl2e )(struct vcpu *v, mfn_t gmfn,
@@ -286,35 +169,30 @@ struct shadow_paging_mode {
                                             unsigned long *gl1mfn);
     void          (*guest_get_eff_l1e     )(struct vcpu *v, unsigned long va,
                                             void *eff_l1e);
-#if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC
     int           (*guess_wrmap           )(struct vcpu *v, 
                                             unsigned long vaddr, mfn_t gmfn);
-#endif
     /* For outsiders to tell what mode we're in */
     unsigned int shadow_levels;
     unsigned int guest_levels;
 };
 
-static inline int shadow_guest_paging_levels(struct vcpu *v)
-{
-    ASSERT(v->arch.shadow.mode != NULL);
-    return v->arch.shadow.mode->guest_levels;
-}
-
-/**************************************************************************/
-/* Entry points into the shadow code */
-
-/* Enable arbitrary shadow mode. */
+
+/*****************************************************************************
+ * Entry points into the shadow code */
+
+/* Set up the shadow-specific parts of a domain struct at start of day.
+ * Called for  every domain from arch_domain_create() */
+void shadow_domain_init(struct domain *d);
+
+/* Enable an arbitrary shadow mode.  Call once at domain creation. */
 int shadow_enable(struct domain *d, u32 mode);
 
-/* Turning on shadow test mode */
-int shadow_test_enable(struct domain *d);
-
-/* Handler for shadow control ops: enabling and disabling shadow modes, 
- * and log-dirty bitmap ops all happen through here. */
+/* Handler for shadow control ops: operations from user-space to enable
+ * and disable ephemeral shadow modes (test mode and log-dirty mode) and
+ * manipulate the log-dirty bitmap. */
 int shadow_domctl(struct domain *d, 
-                   xen_domctl_shadow_op_t *sc,
-                   XEN_GUEST_HANDLE(xen_domctl_t) u_domctl);
+                  xen_domctl_shadow_op_t *sc,
+                  XEN_GUEST_HANDLE(xen_domctl_t) u_domctl);
 
 /* Call when destroying a domain */
 void shadow_teardown(struct domain *d);
@@ -322,164 +200,96 @@ void shadow_teardown(struct domain *d);
 /* Call once all of the references to the domain have gone away */
 void shadow_final_teardown(struct domain *d);
 
-
-/* Mark a page as dirty in the bitmap */
-void sh_do_mark_dirty(struct domain *d, mfn_t gmfn);
+/* Mark a page as dirty in the log-dirty bitmap: called when Xen 
+ * makes changes to guest memory on its behalf. */
+void shadow_mark_dirty(struct domain *d, mfn_t gmfn);
+/* Cleaner version so we don't pepper shadow_mode tests all over the place */
 static inline void mark_dirty(struct domain *d, unsigned long gmfn)
 {
-    if ( likely(!shadow_mode_log_dirty(d)) )
-        return;
-
-    shadow_lock(d);
-    sh_do_mark_dirty(d, _mfn(gmfn));
-    shadow_unlock(d);
-}
-
-/* Internal version, for when the shadow lock is already held */
-static inline void sh_mark_dirty(struct domain *d, mfn_t gmfn)
-{
-    ASSERT(shadow_locked_by_me(d));
     if ( unlikely(shadow_mode_log_dirty(d)) )
-        sh_do_mark_dirty(d, gmfn);
-}
-
-static inline int
-shadow_fault(unsigned long va, struct cpu_user_regs *regs)
-/* Called from pagefault handler in Xen, and from the HVM trap handlers
+        shadow_mark_dirty(d, _mfn(gmfn));
+}
+
+/* Handle page-faults caused by the shadow pagetable mechanisms.
+ * Called from pagefault handler in Xen, and from the HVM trap handlers
  * for pagefaults.  Returns 1 if this fault was an artefact of the
  * shadow code (and the guest should retry) or 0 if it is not (and the
  * fault should be handled elsewhere or passed to the guest). */
+static inline int shadow_fault(unsigned long va, struct cpu_user_regs *regs)
 {
     struct vcpu *v = current;
     perfc_incrc(shadow_fault);
     return v->arch.shadow.mode->page_fault(v, va, regs);
 }
 
-static inline int
-shadow_invlpg(struct vcpu *v, unsigned long va)
-/* Called when the guest requests an invlpg.  Returns 1 if the invlpg
- * instruction should be issued on the hardware, or 0 if it's safe not
- * to do so. */
+/* Handle invlpg requests on shadowed vcpus. 
+ * Returns 1 if the invlpg instruction should be issued on the hardware, 
+ * or 0 if it's safe not to do so. */
+static inline int shadow_invlpg(struct vcpu *v, unsigned long va)
 {
     return v->arch.shadow.mode->invlpg(v, va);
 }
 
-static inline paddr_t
-shadow_gva_to_gpa(struct vcpu *v, unsigned long va)
-/* Called to translate a guest virtual address to what the *guest*
- * pagetables would map it to. */
+/* Translate a guest virtual address to the physical address that the
+ * *guest* pagetables would map it to. */
+static inline paddr_t shadow_gva_to_gpa(struct vcpu *v, unsigned long va)
 {
     if ( unlikely(!shadow_vcpu_mode_translate(v)) )
         return (paddr_t) va;
     return v->arch.shadow.mode->gva_to_gpa(v, va);
 }
 
-static inline unsigned long
-shadow_gva_to_gfn(struct vcpu *v, unsigned long va)
-/* Called to translate a guest virtual address to what the *guest*
- * pagetables would map it to. */
+/* Translate a guest virtual address to the frame number that the
+ * *guest* pagetables would map it to. */
+static inline unsigned long shadow_gva_to_gfn(struct vcpu *v, unsigned long va)
 {
     if ( unlikely(!shadow_vcpu_mode_translate(v)) )
         return va >> PAGE_SHIFT;
     return v->arch.shadow.mode->gva_to_gfn(v, va);
 }
 
-static inline void
-shadow_update_cr3(struct vcpu *v)
-/* Updates all the things that are derived from the guest's CR3. 
- * Called when the guest changes CR3. */
-{
-    shadow_lock(v->domain);
-    v->arch.shadow.mode->update_cr3(v);
-    shadow_unlock(v->domain);
-}
-
-
-/* Should be called after CR3 is updated.
- * Updates vcpu->arch.cr3 and, for HVM guests, vcpu->arch.hvm_vcpu.cpu_cr3.
- * 
- * Also updates other state derived from CR3 (vcpu->arch.guest_vtable,
- * shadow_vtable, etc).
- *
- * Uses values found in vcpu->arch.(guest_table and guest_table_user), and
- * for HVM guests, arch.monitor_table and hvm's guest CR3.
- *
- * Update ref counts to shadow tables appropriately.
- */
-static inline void update_cr3(struct vcpu *v)
-{
-    unsigned long cr3_mfn=0;
-
-    if ( shadow_mode_enabled(v->domain) )
-    {
-        shadow_update_cr3(v);
-        return;
-    }
-
-#if CONFIG_PAGING_LEVELS == 4
-    if ( !(v->arch.flags & TF_kernel_mode) )
-        cr3_mfn = pagetable_get_pfn(v->arch.guest_table_user);
-    else
-#endif
-        cr3_mfn = pagetable_get_pfn(v->arch.guest_table);
-
-    make_cr3(v, cr3_mfn);
-}
-
-extern void sh_update_paging_modes(struct vcpu *v);
-
-/* Should be called to initialise paging structures if the paging mode
+/* Update all the things that are derived from the guest's CR3. 
+ * Called when the guest changes CR3; the caller can then use 
+ * v->arch.cr3 as the value to load into the host CR3 to schedule this vcpu
+ * and v->arch.hvm_vcpu.hw_cr3 as the value to put in the vmcb/vmcs when 
+ * entering the HVM guest. */
+static inline void shadow_update_cr3(struct vcpu *v)
+{
+    v->arch.shadow.mode->update_cr3(v, 1);
+}
+
+/* Update all the things that are derived from the guest's CR0/CR3/CR4.
+ * Called to initialize paging structures if the paging mode
  * has changed, and when bringing up a VCPU for the first time. */
-static inline void shadow_update_paging_modes(struct vcpu *v)
-{
-    ASSERT(shadow_mode_enabled(v->domain));
-    shadow_lock(v->domain);
-    sh_update_paging_modes(v);
-    shadow_unlock(v->domain);
-}
-
-static inline void
-shadow_detach_old_tables(struct vcpu *v)
-{
-    if ( v->arch.shadow.mode )
-        v->arch.shadow.mode->detach_old_tables(v);
-}
-
-static inline mfn_t
-shadow_make_monitor_table(struct vcpu *v)
-{
-    return v->arch.shadow.mode->make_monitor_table(v);
-}
-
-static inline void
-shadow_destroy_monitor_table(struct vcpu *v, mfn_t mmfn)
-{
-    v->arch.shadow.mode->destroy_monitor_table(v, mmfn);
-}
-
+void shadow_update_paging_modes(struct vcpu *v);
+
+
+/*****************************************************************************
+ * Access to the guest pagetables */
+
+/* Get a mapping of a PV guest's l1e for this virtual address. */
 static inline void *
 guest_map_l1e(struct vcpu *v, unsigned long addr, unsigned long *gl1mfn)
 {
-    if ( likely(!shadow_mode_translate(v->domain)) )
-    {
-        l2_pgentry_t l2e;
-        ASSERT(!shadow_mode_external(v->domain));
-        /* Find this l1e and its enclosing l1mfn in the linear map */
-        if ( __copy_from_user(&l2e, 
-                              &__linear_l2_table[l2_linear_offset(addr)],
-                              sizeof(l2_pgentry_t)) != 0 )
-            return NULL;
-        /* Check flags that it will be safe to read the l1e */
-        if ( (l2e_get_flags(l2e) & (_PAGE_PRESENT | _PAGE_PSE)) 
-             != _PAGE_PRESENT )
-            return NULL;
-        *gl1mfn = l2e_get_pfn(l2e);
-        return &__linear_l1_table[l1_linear_offset(addr)];
-    }
-
-    return v->arch.shadow.mode->guest_map_l1e(v, addr, gl1mfn);
-}
-
+    l2_pgentry_t l2e;
+
+    if ( unlikely(shadow_mode_translate(v->domain)) )
+        return v->arch.shadow.mode->guest_map_l1e(v, addr, gl1mfn);
+
+    /* Find this l1e and its enclosing l1mfn in the linear map */
+    if ( __copy_from_user(&l2e, 
+                          &__linear_l2_table[l2_linear_offset(addr)],
+                          sizeof(l2_pgentry_t)) != 0 )
+        return NULL;
+    /* Check flags that it will be safe to read the l1e */
+    if ( (l2e_get_flags(l2e) & (_PAGE_PRESENT | _PAGE_PSE)) 
+         != _PAGE_PRESENT )
+        return NULL;
+    *gl1mfn = l2e_get_pfn(l2e);
+    return &__linear_l1_table[l1_linear_offset(addr)];
+}
+
+/* Pull down the mapping we got from guest_map_l1e() */
 static inline void
 guest_unmap_l1e(struct vcpu *v, void *p)
 {
@@ -487,6 +297,7 @@ guest_unmap_l1e(struct vcpu *v, void *p)
         unmap_domain_page(p);
 }
 
+/* Read the guest's l1e that maps this address. */
 static inline void
 guest_get_eff_l1e(struct vcpu *v, unsigned long addr, void *eff_l1e)
 {
@@ -503,6 +314,8 @@ guest_get_eff_l1e(struct vcpu *v, unsign
     v->arch.shadow.mode->guest_get_eff_l1e(v, addr, eff_l1e);
 }
 
+/* Read the guest's l1e that maps this address, from the kernel-mode
+ * pagetables. */
 static inline void
 guest_get_eff_kern_l1e(struct vcpu *v, unsigned long addr, void *eff_l1e)
 {
@@ -518,81 +331,35 @@ guest_get_eff_kern_l1e(struct vcpu *v, u
     TOGGLE_MODE();
 }
 
-
-/* Validate a pagetable change from the guest and update the shadows. */
-extern int shadow_validate_guest_entry(struct vcpu *v, mfn_t gmfn,
-                                        void *new_guest_entry);
-extern int __shadow_validate_guest_entry(struct vcpu *v, mfn_t gmfn, 
-                                         void *entry, u32 size);
-
-/* Update the shadows in response to a pagetable write from a HVM guest */
-extern void shadow_validate_guest_pt_write(struct vcpu *v, mfn_t gmfn, 
-                                            void *entry, u32 size);
-
-/* Remove all writeable mappings of a guest frame from the shadows.
- * Returns non-zero if we need to flush TLBs. 
- * level and fault_addr desribe how we found this to be a pagetable;
- * level==0 means we have some other reason for revoking write access. */
-extern int shadow_remove_write_access(struct vcpu *v, mfn_t readonly_mfn,
-                                       unsigned int level,
-                                       unsigned long fault_addr);
-
-/* Remove all mappings of the guest mfn from the shadows. 
- * Returns non-zero if we need to flush TLBs. */
-extern int shadow_remove_all_mappings(struct vcpu *v, mfn_t target_mfn);
-
-/* Remove all mappings from the shadows. */
-extern void shadow_blow_tables(struct domain *d);
-
-void
-shadow_remove_all_shadows_and_parents(struct vcpu *v, mfn_t gmfn);
-/* This is a HVM page that we thing is no longer a pagetable.
- * Unshadow it, and recursively unshadow pages that reference it. */
+/* Write a new value into the guest pagetable, and update the shadows 
+ * appropriately.  Returns 0 if we page-faulted, 1 for success. */
+int shadow_write_guest_entry(struct vcpu *v, intpte_t *p,
+                             intpte_t new, mfn_t gmfn);
+
+/* Cmpxchg a new value into the guest pagetable, and update the shadows 
+ * appropriately. Returns 0 if we page-faulted, 1 if not.
+ * N.B. caller should check the value of "old" to see if the
+ * cmpxchg itself was successful. */
+int shadow_cmpxchg_guest_entry(struct vcpu *v, intpte_t *p,
+                               intpte_t *old, intpte_t new, mfn_t gmfn);
+
+/* Remove all mappings of the guest page from the shadows. 
+ * This is called from common code.  It does not flush TLBs. */
+int sh_remove_all_mappings(struct vcpu *v, mfn_t target_mfn);
+static inline void 
+shadow_drop_references(struct domain *d, struct page_info *p)
+{
+    /* See the comment about locking in sh_remove_all_mappings */
+    sh_remove_all_mappings(d->vcpu[0], _mfn(page_to_mfn(p)));
+}
 
 /* Remove all shadows of the guest mfn. */
-extern void sh_remove_shadows(struct vcpu *v, mfn_t gmfn, int fast, int all);
+void sh_remove_shadows(struct vcpu *v, mfn_t gmfn, int fast, int all);
 static inline void shadow_remove_all_shadows(struct vcpu *v, mfn_t gmfn)
 {
-    int was_locked = shadow_locked_by_me(v->domain);
-    if ( !was_locked )
-        shadow_lock(v->domain);
-    sh_remove_shadows(v, gmfn, 0, 1);
-    if ( !was_locked )
-        shadow_unlock(v->domain);
-}
-
-/* Add a page to a domain */
-void
-shadow_guest_physmap_add_page(struct domain *d, unsigned long gfn,
-                               unsigned long mfn);
-
-/* Remove a page from a domain */
-void
-shadow_guest_physmap_remove_page(struct domain *d, unsigned long gfn,
-                                  unsigned long mfn);
-
-/* 
- * Allocation of shadow pages 
- */
-
-/* Return the minumum acceptable number of shadow pages a domain needs */
-unsigned int shadow_min_acceptable_pages(struct domain *d);
-
-/* Set the pool of shadow pages to the required number of MB.
- * Input will be rounded up to at least min_acceptable_shadow_pages().
- * Returns 0 for success, 1 for failure. */
-unsigned int shadow_set_allocation(struct domain *d, 
-                                    unsigned int megabytes,
-                                    int *preempted);
-
-/* Return the size of the shadow pool, rounded up to the nearest MB */
-static inline unsigned int shadow_get_allocation(struct domain *d)
-{
-    unsigned int pg = d->arch.shadow.total_pages;
-    return ((pg >> (20 - PAGE_SHIFT))
-            + ((pg & ((1 << (20 - PAGE_SHIFT)) - 1)) ? 1 : 0));
-}
-
+    /* See the comment about locking in sh_remove_shadows */
+    sh_remove_shadows(v, gmfn, 0 /* Be thorough */, 1 /* Must succeed */);
+}
 
 /**************************************************************************/
 /* Guest physmap (p2m) support 
@@ -602,8 +369,19 @@ static inline unsigned int shadow_get_al
  * guests, so we steal the address space that would have normally
  * been used by the read-only MPT map.
  */
-
 #define phys_to_machine_mapping ((l1_pgentry_t *)RO_MPT_VIRT_START)
+
+/* Add a page to a domain's p2m table */
+void shadow_guest_physmap_add_page(struct domain *d, unsigned long gfn,
+                                   unsigned long mfn);
+
+/* Remove a page from a domain's p2m table */
+void shadow_guest_physmap_remove_page(struct domain *d, unsigned long gfn,
+                                      unsigned long mfn);
+
+/* Aliases, called from common code. */
+#define guest_physmap_add_page    shadow_guest_physmap_add_page
+#define guest_physmap_remove_page shadow_guest_physmap_remove_page
 
 /* Read the current domain's P2M table. */
 static inline mfn_t sh_gfn_to_mfn_current(unsigned long gfn)
@@ -627,8 +405,8 @@ static inline mfn_t sh_gfn_to_mfn_curren
     return _mfn(INVALID_MFN);
 }
 
-/* Walk another domain's P2M table, mapping pages as we go */
-extern mfn_t sh_gfn_to_mfn_foreign(struct domain *d, unsigned long gpfn);
+/* Read another domain's P2M table, mapping pages as we go */
+mfn_t sh_gfn_to_mfn_foreign(struct domain *d, unsigned long gpfn);
 
 /* General conversion function from gfn to mfn */
 static inline mfn_t
@@ -666,6 +444,7 @@ mmio_space(paddr_t gpa)
     return !mfn_valid(mfn_x(sh_gfn_to_mfn_current(gfn)));
 }
 
+/* Translate the frame number held in an l1e from guest to machine */
 static inline l1_pgentry_t
 gl1e_to_ml1e(struct domain *d, l1_pgentry_t l1e)
 {
@@ -685,4 +464,3 @@ gl1e_to_ml1e(struct domain *d, l1_pgentr
  * indent-tabs-mode: nil
  * End:
  */
-      

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-changelog] [xen-unstable] [XEN] Clean up the shadow interface, Xen patchbot-unstable <=