WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH 4 of 5] Add support for mapping grant references into

To: <xen-devel@xxxxxxxxxxxxxxxxxxx>
Subject: [Xen-devel] [PATCH 4 of 5] Add support for mapping grant references into HVM guests by modifying the P2M
From: <steven.smith@xxxxxxxxxxxxx>
Date: Tue, 19 May 2009 12:27:06 +0100
Delivery-date: Tue, 19 May 2009 04:27:36 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
In-reply-to: <patchbomb.1242732422@xxxxxxxxxxxxxxxxxxxxxxxxxx>
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User Steven Smith <steven.smith@xxxxxxxxxxxxx>
# Date 1242731493 -3600
# Node ID c6126a51b5094529933790ba166778bfb8c3d9de
# Parent  6cba03677059ca31345cb588fb294d915b8639a8
Add support for mapping grant references into HVM guests by modifying the P2M.
This is moderately involved, because it means that you can now have
several pfns mapping on to a single mfn, which wasn't previously
allowed.  Fortunately, pretty much everything which does a P2M lookup
immediately follows it with a get_page(), which will fail because the
page is owned by the wrong domain, which should be fairly safe.  This
means that guests are likely to crash if e.g. they try to put their
page tables in memory which was grant-mapped in from another domain,
but I don't consider that to be a particular problem.

There's a minor change to get_page_from_l1e(), which I believe is a
bug fix, because it makes put_page_from_l1e() be an inverse of
get_page_from_l1e(), but I don't really understand that stuff well
enough to be entirely confident of it.

One slight oddity is that we allow a guest's page tables to be mapped
writably from another domain (this is also true of ordinary PV grant
mappings).  I believe this to be okay, because the worst that can
happen is that an update to the guest page tables isn't reflected into
the shadows, and the worst that can happen as a result of that is that
the guest becomes unhappy, with no damage to Xen or to other domains.

This will only work for 64-bit hypervisors, because we've run out of
P2M types in 32 bit mode.

Signed-off-by: Steven Smith <steven.smith@xxxxxxxxxx>

diff -r 6cba03677059 -r c6126a51b509 xen/arch/x86/hvm/emulate.c
--- a/xen/arch/x86/hvm/emulate.c        Tue May 19 12:11:33 2009 +0100
+++ b/xen/arch/x86/hvm/emulate.c        Tue May 19 12:11:33 2009 +0100
@@ -636,12 +636,12 @@
         return rc;
 
     (void)gfn_to_mfn_current(sgpa >> PAGE_SHIFT, &p2mt);
-    if ( !p2m_is_ram(p2mt) )
+    if ( !p2m_is_ram(p2mt) && !p2m_is_grant(p2mt) )
         return hvmemul_do_mmio(
             sgpa, reps, bytes_per_rep, dgpa, IOREQ_READ, df, NULL);
 
     (void)gfn_to_mfn_current(dgpa >> PAGE_SHIFT, &p2mt);
-    if ( !p2m_is_ram(p2mt) )
+    if ( !p2m_is_ram(p2mt) && !p2m_is_grant(p2mt) )
         return hvmemul_do_mmio(
             dgpa, reps, bytes_per_rep, sgpa, IOREQ_WRITE, df, NULL);
 
diff -r 6cba03677059 -r c6126a51b509 xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c    Tue May 19 12:11:33 2009 +0100
+++ b/xen/arch/x86/hvm/hvm.c    Tue May 19 12:11:33 2009 +0100
@@ -1541,7 +1541,7 @@
 
         mfn = mfn_x(gfn_to_mfn_current(gfn, &p2mt));
 
-        if ( !p2m_is_ram(p2mt) )
+        if ( !p2m_is_ram(p2mt) && !p2m_is_grant(p2mt) )
             return HVMCOPY_bad_gfn_to_mfn;
         ASSERT(mfn_valid(mfn));
 
@@ -1549,7 +1549,7 @@
 
         if ( flags & HVMCOPY_to_guest )
         {
-            if ( p2mt == p2m_ram_ro )
+            if ( p2mt == p2m_ram_ro || p2mt == p2m_grant_map_ro )
             {
                 static unsigned long lastpage;
                 if ( xchg(&lastpage, gfn) != gfn )
@@ -1560,7 +1560,8 @@
             else
             {
                 memcpy(p, buf, count);
-                paging_mark_dirty(curr->domain, mfn);
+                if ( !p2m_is_grant(p2mt) )
+                    paging_mark_dirty(curr->domain, mfn);
             }
         }
         else
@@ -2700,7 +2701,7 @@
         {
             p2m_type_t t;
             mfn_t mfn = gfn_to_mfn(d, pfn, &t);
-            if ( mfn_x(mfn) != INVALID_MFN )
+            if ( !p2m_is_grant(t) && mfn_x(mfn) != INVALID_MFN )
             {
                 paging_mark_dirty(d, mfn_x(mfn));
                 /* These are most probably not page tables any more */
@@ -2746,20 +2747,75 @@
             
         if ( a.hvmmem_type >= ARRAY_SIZE(memtype) )
             goto param_fail4;
-            
+
+        /* This isn't entirely correct or necessary (the check in the
+           actual retype loop is the important one), but it means the
+           caller gets some chance to find out that things have
+           failed. */
+        for ( pfn = a.first_pfn; pfn < a.first_pfn + a.nr; pfn++ )
+        {
+            p2m_type_t t;
+            (void)gfn_to_mfn(d, pfn, &t);
+            if ( p2m_is_grant(t) )
+                goto param_fail4;
+        }
+
         rc = 0;
-        
+
         for ( pfn = a.first_pfn; pfn < a.first_pfn + a.nr; pfn++ )
         {
             p2m_type_t t;
             mfn_t mfn;
             mfn = gfn_to_mfn(d, pfn, &t);
-            p2m_change_type(d, pfn, t, memtype[a.hvmmem_type]);
+            /* Note that this will only happen if the guest has mapped
+               in a grant after the check above and before this check,
+               in which case they get what they deserve. */
+            if ( p2m_is_grant(t) )
+            {
+                gdprintk(XENLOG_WARNING,
+                         "type for pfn 0x%lx changed to grant while we were 
working?\n",
+                         pfn);
+            }
+            else
+            {
+                p2m_change_type(d, pfn, t, memtype[a.hvmmem_type]);
+            }
         }
          
     param_fail4:
         rcu_unlock_domain(d);
         break;
+    }
+
+    case HVMOP_map_grant_ref: {
+        struct xen_hvm_map_grant_ref a;
+
+        if ( copy_from_guest(&a, arg, 1) )
+            return -EFAULT;
+
+        rc = gnttab_map_grant_ref_p2m(a.domid, a.gref, a.pfn, a.flags,
+                                      &a.handle);
+        if ( rc < 0 )
+            return rc;
+        if ( copy_to_guest(arg, &a, 1) )
+            return -EFAULT;
+        else
+            return 0;
+    }
+
+    case HVMOP_unmap_grant_ref: {
+        struct xen_hvm_unmap_grant_ref a;
+
+        if ( copy_from_guest(&a, arg, 1) )
+            return -EFAULT;
+
+        rc = gnttab_unmap_grant_ref_p2m(a.handle);
+        if ( rc < 0)
+            return rc;
+        if ( copy_to_guest(arg, &a, 1) )
+            return -EFAULT;
+        else
+            return 0;
     }
 
     default:
diff -r 6cba03677059 -r c6126a51b509 xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c        Tue May 19 12:11:33 2009 +0100
+++ b/xen/arch/x86/hvm/svm/svm.c        Tue May 19 12:11:33 2009 +0100
@@ -909,8 +909,27 @@
     }
 
     /* Log-dirty: mark the page dirty and let the guest write it again */
-    paging_mark_dirty(current->domain, mfn_x(mfn));
-    p2m_change_type(current->domain, gfn, p2m_ram_logdirty, p2m_ram_rw);
+    if ( p2mt == p2m_ram_logdirty )
+    {
+        paging_mark_dirty(current->domain, mfn_x(mfn));
+        p2m_change_type(current->domain, gfn, p2m_ram_logdirty, p2m_ram_rw);
+        return;
+    }
+
+    /* Okay, this shouldn't happen.  Maybe the guest was writing to a
+       read-only grant mapping? */
+    if ( p2mt == p2m_grant_map_ro )
+    {
+        /* Naughty... */
+        gdprintk(XENLOG_WARNING,
+                 "trying to write to read-only grant mapping\n");
+        hvm_inject_exception(TRAP_gp_fault, 0, 0);
+        return;
+    }
+
+    /* Something bad has happened; either Xen or the hardware have
+       screwed up. */
+    gdprintk(XENLOG_WARNING, "unexpected SVM nested page fault\n");
 }
 
 static void svm_fpu_dirty_intercept(void)
diff -r 6cba03677059 -r c6126a51b509 xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Tue May 19 12:11:33 2009 +0100
+++ b/xen/arch/x86/mm.c Tue May 19 12:11:33 2009 +0100
@@ -761,7 +761,7 @@
      * qemu-dm helper process in dom0 to map the domain's memory without
      * messing up the count of "real" writable mappings.) */
     if ( (l1f & _PAGE_RW) &&
-         !(paging_mode_external(d) && (d != curr->domain)) &&
+         !(paging_mode_external(owner) && (owner != curr->domain)) &&
          !get_page_type(page, PGT_writable_page) )
         goto could_not_pin;
 
diff -r 6cba03677059 -r c6126a51b509 xen/arch/x86/mm/hap/p2m-ept.c
--- a/xen/arch/x86/mm/hap/p2m-ept.c     Tue May 19 12:11:33 2009 +0100
+++ b/xen/arch/x86/mm/hap/p2m-ept.c     Tue May 19 12:11:33 2009 +0100
@@ -39,10 +39,12 @@
             return;
         case p2m_ram_rw:
         case p2m_mmio_direct:
+        case p2m_grant_map_rw:
              entry->r = entry->w = entry->x = 1;
             return;
         case p2m_ram_logdirty:
         case p2m_ram_ro:
+        case p2m_grant_map_ro:
              entry->r = entry->x = 1;
              entry->w = 0;
             return;
diff -r 6cba03677059 -r c6126a51b509 xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c     Tue May 19 12:11:33 2009 +0100
+++ b/xen/arch/x86/mm/p2m.c     Tue May 19 12:11:33 2009 +0100
@@ -102,17 +102,27 @@
 
 static unsigned long p2m_type_to_flags(p2m_type_t t) 
 {
-    unsigned long flags = (t & 0x7UL) << 9;
+    unsigned long flags;
+#ifdef __x86_64__
+    flags = (unsigned long)(t & 0x3fff) << 9;
+#else
+    flags = (t & 0x7UL) << 9;
+#endif
+#ifndef HAVE_GRANT_MAP_P2M
+    BUG_ON(p2m_is_grant(t));
+#endif
     switch(t)
     {
     case p2m_invalid:
     default:
         return flags;
     case p2m_ram_rw:
+    case p2m_grant_map_rw:
         return flags | P2M_BASE_FLAGS | _PAGE_RW;
     case p2m_ram_logdirty:
         return flags | P2M_BASE_FLAGS;
     case p2m_ram_ro:
+    case p2m_grant_map_ro:
         return flags | P2M_BASE_FLAGS;
     case p2m_mmio_dm:
         return flags;
@@ -1797,7 +1807,8 @@
                         ASSERT(mfn_valid(_mfn(mfn)));
                         m2pfn = get_gpfn_from_mfn(mfn);
                         if ( m2pfn != gfn &&
-                             p2m_flags_to_type(l1e_get_flags(l1e[i1])) != 
p2m_mmio_direct )
+                             p2m_flags_to_type(l1e_get_flags(l1e[i1])) != 
p2m_mmio_direct &&
+                             
!p2m_is_grant(p2m_flags_to_type(l1e_get_flags(l1e[i1]))) )
                         {
                             pmbad++;
                             printk("mismatch: gfn %#lx -> mfn %#lx"
@@ -1850,6 +1861,8 @@
                 unsigned int page_order)
 {
     unsigned long i;
+    mfn_t mfn_return;
+    p2m_type_t t;
 
     if ( !paging_mode_translate(d) )
     {
@@ -1861,9 +1874,14 @@
 
     P2M_DEBUG("removing gfn=%#lx mfn=%#lx\n", gfn, mfn);
 
+    for ( i = 0; i < (1UL << page_order); i++ )
+    {
+        mfn_return = p2m_gfn_to_mfn(d, gfn + i, &t, p2m_query);
+        if ( !p2m_is_grant(t) )
+            set_gpfn_from_mfn(mfn+i, INVALID_M2P_ENTRY);
+        ASSERT( !p2m_is_valid(t) || mfn + i == mfn_x(mfn_return) );
+    }
     set_p2m_entry(d, gfn, _mfn(INVALID_MFN), page_order, p2m_invalid);
-    for ( i = 0; i < (1UL << page_order); i++ )
-        set_gpfn_from_mfn(mfn+i, INVALID_M2P_ENTRY);
 }
 
 void
@@ -1999,7 +2017,14 @@
     for ( i = 0; i < (1UL << page_order); i++ )
     {
         omfn = gfn_to_mfn_query(d, gfn + i, &ot);
-        if ( p2m_is_ram(ot) )
+        if ( p2m_is_grant(ot) )
+        {
+            /* Really shouldn't be unmapping grant maps this way */
+            domain_crash(d);
+            p2m_unlock(d->arch.p2m);
+            return -EINVAL;
+        }
+        else if ( p2m_is_ram(ot) )
         {
             ASSERT(mfn_valid(omfn));
             set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
@@ -2014,6 +2039,8 @@
     /* Then, look for m->p mappings for this range and deal with them */
     for ( i = 0; i < (1UL << page_order); i++ )
     {
+        if ( page_get_owner(mfn_to_page(_mfn(mfn + i))) != d )
+            continue;
         ogfn = mfn_to_gfn(d, _mfn(mfn+i));
         if (
 #ifdef __x86_64__
@@ -2029,6 +2056,9 @@
             P2M_DEBUG("aliased! mfn=%#lx, old gfn=%#lx, new gfn=%#lx\n",
                       mfn + i, ogfn, gfn + i);
             omfn = gfn_to_mfn_query(d, ogfn, &ot);
+            /* If we get here, we know the local domain owns the page,
+               so it can't have been grant mapped in. */
+            BUG_ON( p2m_is_grant(ot) );
             if ( p2m_is_ram(ot) )
             {
                 ASSERT(mfn_valid(omfn));
@@ -2045,8 +2075,11 @@
     {
         if ( !set_p2m_entry(d, gfn, _mfn(mfn), page_order, t) )
             rc = -EINVAL;
-        for ( i = 0; i < (1UL << page_order); i++ )
-            set_gpfn_from_mfn(mfn+i, gfn+i);
+        if ( !p2m_is_grant(t) )
+        {
+            for ( i = 0; i < (1UL << page_order); i++ )
+                set_gpfn_from_mfn(mfn+i, gfn+i);
+        }
     }
     else
     {
@@ -2085,6 +2118,8 @@
     l4_pgentry_t *l4e;
     int i4;
 #endif /* CONFIG_PAGING_LEVELS == 4 */
+
+    BUG_ON(p2m_is_grant(ot) || p2m_is_grant(nt));
 
     if ( !paging_mode_translate(d) )
         return;
@@ -2181,6 +2216,8 @@
     p2m_type_t pt;
     mfn_t mfn;
 
+    BUG_ON(p2m_is_grant(ot) || p2m_is_grant(nt));
+
     p2m_lock(d->arch.p2m);
 
     mfn = gfn_to_mfn(d, gfn, &pt);
@@ -2203,7 +2240,12 @@
         return 0;
 
     omfn = gfn_to_mfn_query(d, gfn, &ot);
-    if ( p2m_is_ram(ot) )
+    if ( p2m_is_grant(ot) )
+    {
+        domain_crash(d);
+        return 0;
+    }
+    else if ( p2m_is_ram(ot) )
     {
         ASSERT(mfn_valid(omfn));
         set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
diff -r 6cba03677059 -r c6126a51b509 xen/arch/x86/mm/shadow/multi.c
--- a/xen/arch/x86/mm/shadow/multi.c    Tue May 19 12:11:33 2009 +0100
+++ b/xen/arch/x86/mm/shadow/multi.c    Tue May 19 12:11:33 2009 +0100
@@ -627,7 +627,7 @@
     }
 
     /* Read-only memory */
-    if ( p2mt == p2m_ram_ro ) 
+    if ( p2m_is_readonly(p2mt) )
         sflags &= ~_PAGE_RW;
     
     // protect guest page tables
@@ -804,8 +804,10 @@
     return ((of | (of ^ nf)) == nf);
 }
 
+/* type is only used to distinguish grant map pages from ordinary RAM
+ * i.e. non-p2m_is_grant() pages are treated as p2m_ram_rw.  */
 static int inline
-shadow_get_page_from_l1e(shadow_l1e_t sl1e, struct domain *d)
+shadow_get_page_from_l1e(shadow_l1e_t sl1e, struct domain *d, p2m_type_t type)
 {
     int res;
     mfn_t mfn;
@@ -833,6 +835,20 @@
                        "which is owned by domain %d: %s\n",
                        d->domain_id, mfn_x(mfn), owner->domain_id,
                        res ? "success" : "failed");
+    }
+
+    /* Okay, it might still be a grant mapping PTE.  Try it. */
+    if ( unlikely(!res) &&
+         (type == p2m_grant_map_rw ||
+          (type == p2m_grant_map_ro &&
+           !(shadow_l1e_get_flags(sl1e) & _PAGE_RW))) )
+    {
+        /* It's a grant mapping.  The grant table implementation will
+           already have checked that we're supposed to have access, so
+           we can just grab a reference directly. */
+        mfn = shadow_l1e_get_mfn(sl1e);
+        if ( mfn_valid(mfn) )
+            res = get_page_from_l1e(sl1e, page_get_owner(mfn_to_page(mfn)));
     }
 
     if ( unlikely(!res) )
@@ -1128,6 +1144,7 @@
 static int shadow_set_l1e(struct vcpu *v, 
                           shadow_l1e_t *sl1e, 
                           shadow_l1e_t new_sl1e,
+                          p2m_type_t new_type,
                           mfn_t sl1mfn)
 {
     int flags = 0;
@@ -1155,7 +1172,7 @@
         /* About to install a new reference */        
         if ( shadow_mode_refcounts(d) ) {
             TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_SHADOW_L1_GET_REF);
-            if ( shadow_get_page_from_l1e(new_sl1e, d) == 0 ) 
+            if ( shadow_get_page_from_l1e(new_sl1e, d, new_type) == 0 ) 
             {
                 /* Doesn't look like a pagetable. */
                 flags |= SHADOW_SET_ERROR;
@@ -1190,6 +1207,22 @@
     return flags;
 }
 
+/* Clear the writable bit on *sl1e. */
+static void shadow_downgrade_l1e(struct vcpu *v,
+                                 shadow_l1e_t *sl1e,
+                                 mfn_t sl1mfn)
+{
+    shadow_l1e_t ro_sl1e = shadow_l1e_remove_flags(*sl1e, _PAGE_RW);
+    int r;
+    /* We rely on the fact that we never need to downgrade a page
+       unless it has been or is being shadowed, and we never shadow a
+       grant page.  We can therefore assume that sl1mfn isn't grant,
+       and so treat it as ram. */
+    r = shadow_set_l1e(v, sl1e, ro_sl1e, p2m_ram_rw, sl1mfn);
+    /* Because it's not a grant map, and we've already grabbed it
+     * once. */
+    ASSERT( !(r & SHADOW_SET_ERROR) );
+}
 
 /**************************************************************************/
 /* Macros to walk pagetables.  These take the shadow of a pagetable and 
@@ -2372,7 +2405,7 @@
     gmfn = gfn_to_mfn_query(v->domain, gfn, &p2mt);
 
     l1e_propagate_from_guest(v, new_gl1e, gmfn, &new_sl1e, ft_prefetch, p2mt);
-    result |= shadow_set_l1e(v, sl1p, new_sl1e, sl1mfn);
+    result |= shadow_set_l1e(v, sl1p, new_sl1e, p2mt, sl1mfn);
 
 #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
     gl1mfn = _mfn(mfn_to_page(sl1mfn)->v.sh.back);
@@ -2431,8 +2464,8 @@
             gfn = guest_l1e_get_gfn(gl1e);
             gmfn = gfn_to_mfn_query(v->domain, gfn, &p2mt);
             l1e_propagate_from_guest(v, gl1e, gmfn, &nsl1e, ft_prefetch, p2mt);
-            rc |= shadow_set_l1e(v, sl1p, nsl1e, sl1mfn);
-            
+            rc |= shadow_set_l1e(v, sl1p, nsl1e, p2mt, sl1mfn);
+
             *snpl1p = gl1e;
         }
     });
@@ -2749,7 +2782,7 @@
 
         /* Propagate the entry.  */
         l1e_propagate_from_guest(v, gl1e, gmfn, &sl1e, ft_prefetch, p2mt);
-        (void) shadow_set_l1e(v, ptr_sl1e + i, sl1e, sl1mfn);
+        (void) shadow_set_l1e(v, ptr_sl1e + i, sl1e, p2mt, sl1mfn);
 
 #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
         if ( snpl1p != NULL )
@@ -3202,7 +3235,7 @@
 
     /* Calculate the shadow entry and write it */
     l1e_propagate_from_guest(v, gw.l1e, gmfn, &sl1e, ft, p2mt);
-    r = shadow_set_l1e(v, ptr_sl1e, sl1e, sl1mfn);
+    r = shadow_set_l1e(v, ptr_sl1e, sl1e, p2mt, sl1mfn);
 
 #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
     if ( mfn_valid(gw.l1mfn) 
@@ -3255,7 +3288,7 @@
     }
 
     /* Ignore attempts to write to read-only memory. */
-    if ( (p2mt == p2m_ram_ro) && (ft == ft_demand_write) )
+    if ( p2m_is_readonly(p2mt) && (ft == ft_demand_write) )
     {
         static unsigned long lastpage;
         if ( xchg(&lastpage, va & PAGE_MASK) != (va & PAGE_MASK) )
@@ -3598,7 +3631,8 @@
                 shadow_l1e_t *sl1;
                 sl1 = sh_linear_l1_table(v) + shadow_l1_linear_offset(va);
                 /* Remove the shadow entry that maps this VA */
-                (void) shadow_set_l1e(v, sl1, shadow_l1e_empty(), sl1mfn);
+                (void) shadow_set_l1e(v, sl1, shadow_l1e_empty(),
+                                      p2m_invalid, sl1mfn);
             }
             shadow_unlock(v->domain);
             /* Need the invlpg, to pick up the disappeareance of the sl1e */
@@ -4286,7 +4320,6 @@
 int sh_rm_write_access_from_sl1p(struct vcpu *v, mfn_t gmfn, 
                                  mfn_t smfn, unsigned long off)
 {
-    int r;
     shadow_l1e_t *sl1p, sl1e;
     struct page_info *sp;
 
@@ -4312,9 +4345,7 @@
     }
 
     /* Found it!  Need to remove its write permissions. */
-    sl1e = shadow_l1e_remove_flags(sl1e, _PAGE_RW);
-    r = shadow_set_l1e(v, sl1p, sl1e, smfn);
-    ASSERT( !(r & SHADOW_SET_ERROR) );
+    shadow_downgrade_l1e(v, sl1p, smfn);
 
     sh_unmap_domain_page(sl1p);
     perfc_incr(shadow_writeable_h_7);
@@ -4367,8 +4398,12 @@
     /* Found it!  Need to remove its write permissions. */
     sl1mfn = shadow_l2e_get_mfn(*sl2p);
     sl1e = shadow_l1e_remove_flags(sl1e, _PAGE_RW);
-    r = shadow_set_l1e(v, sl1p, sl1e, sl1mfn);
-    ASSERT( !(r & SHADOW_SET_ERROR) );
+    r = shadow_set_l1e(v, sl1p, sl1e, p2m_ram_rw, sl1mfn);
+    if ( r & SHADOW_SET_ERROR ) {
+        /* Can only currently happen if we found a grant-mapped
+         * page.  Just make the guess fail. */
+        return 0;
+    }
     TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_WRMAP_GUESS_FOUND);
     return 1;
 }
@@ -4392,8 +4427,7 @@
              && (flags & _PAGE_RW) 
              && (mfn_x(shadow_l1e_get_mfn(*sl1e)) == mfn_x(readonly_mfn)) )
         {
-            shadow_l1e_t ro_sl1e = shadow_l1e_remove_flags(*sl1e, _PAGE_RW);
-            (void) shadow_set_l1e(v, sl1e, ro_sl1e, sl1mfn);
+            shadow_downgrade_l1e(v, sl1e, sl1mfn);
 #if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC 
             /* Remember the last shadow that we shot a writeable mapping in */
             v->arch.paging.shadow.last_writeable_pte_smfn = mfn_x(base_sl1mfn);
@@ -4421,7 +4455,8 @@
         if ( (flags & _PAGE_PRESENT) 
              && (mfn_x(shadow_l1e_get_mfn(*sl1e)) == mfn_x(target_mfn)) )
         {
-            (void) shadow_set_l1e(v, sl1e, shadow_l1e_empty(), sl1mfn);
+            (void) shadow_set_l1e(v, sl1e, shadow_l1e_empty(),
+                                  p2m_invalid, sl1mfn);
             if ( (mfn_to_page(target_mfn)->count_info & PGC_count_mask) == 0 )
                 /* This breaks us cleanly out of the FOREACH macro */
                 done = 1;
@@ -4439,17 +4474,21 @@
     switch ( mfn_to_page(smfn)->u.sh.type )
     {
     case SH_type_l1_shadow:
-        (void) shadow_set_l1e(v, ep, shadow_l1e_empty(), smfn); break;
+        (void) shadow_set_l1e(v, ep, shadow_l1e_empty(), p2m_invalid, smfn);
+        break;
     case SH_type_l2_shadow:
 #if GUEST_PAGING_LEVELS >= 3
     case SH_type_l2h_shadow:
 #endif
-        (void) shadow_set_l2e(v, ep, shadow_l2e_empty(), smfn); break;
+        (void) shadow_set_l2e(v, ep, shadow_l2e_empty(), smfn);
+        break;
 #if GUEST_PAGING_LEVELS >= 4
     case SH_type_l3_shadow:
-        (void) shadow_set_l3e(v, ep, shadow_l3e_empty(), smfn); break;
+        (void) shadow_set_l3e(v, ep, shadow_l3e_empty(), smfn);
+        break;
     case SH_type_l4_shadow:
-        (void) shadow_set_l4e(v, ep, shadow_l4e_empty(), smfn); break;
+        (void) shadow_set_l4e(v, ep, shadow_l4e_empty(), smfn);
+        break;
 #endif
     default: BUG(); /* Called with the wrong kind of shadow. */
     }
@@ -4557,9 +4596,9 @@
     else
         mfn = gfn_to_mfn(v->domain, _gfn(gfn), &p2mt);
         
-    if ( p2mt == p2m_ram_ro )
+    if ( p2m_is_readonly(p2mt) )
         return _mfn(READONLY_GFN);
-    if ( !p2m_is_ram(p2mt) )
+    if ( !p2m_is_ram(p2mt) && !p2m_is_grant(p2mt) )
         return _mfn(BAD_GFN_TO_MFN);
 
     ASSERT(mfn_valid(mfn));
@@ -4961,7 +5000,7 @@
                 gfn = guest_l1e_get_gfn(*gl1e);
                 mfn = shadow_l1e_get_mfn(*sl1e);
                 gmfn = gfn_to_mfn_query(v->domain, gfn, &p2mt);
-                if ( mfn_x(gmfn) != mfn_x(mfn) )
+                if ( !p2m_is_grant(p2mt) && mfn_x(gmfn) != mfn_x(mfn) )
                     AUDIT_FAIL(1, "bad translation: gfn %" SH_PRI_gfn
                                " --> %" PRI_mfn " != mfn %" PRI_mfn,
                                gfn_x(gfn), mfn_x(gmfn), mfn_x(mfn));
diff -r 6cba03677059 -r c6126a51b509 xen/common/grant_table.c
--- a/xen/common/grant_table.c  Tue May 19 12:11:33 2009 +0100
+++ b/xen/common/grant_table.c  Tue May 19 12:11:33 2009 +0100
@@ -189,29 +189,18 @@
     return handle;
 }
 
-/*
- * Returns 0 if TLB flush / invalidate required by caller.
- * va will indicate the address to be invalidated.
- * 
- * addr is _either_ a host virtual address, or the address of the pte to
- * update, as indicated by the GNTMAP_contains_pte flag.
- */
-static void
-__gnttab_map_grant_ref(
-    struct gnttab_map_grant_ref *op)
+static int
+__gnttab_map_grant_ref_core(struct domain *rd, unsigned short flags,
+                            grant_ref_t ref, unsigned long *frame,
+                            unsigned *cache_flags, u32 *old_pin, u32 *act_pin)
 {
-    struct domain *ld, *rd, *owner;
+    struct domain *ld;
     struct vcpu   *led;
-    int            handle;
-    unsigned long  frame = 0, nr_gets = 0;
     int            rc = GNTST_okay;
-    u32            old_pin;
-    u32            act_pin;
-    unsigned int   cache_flags;
     struct active_grant_entry *act;
-    struct grant_mapping *mt;
     grant_entry_t *sha;
     union grant_combo scombo, prev_scombo, new_scombo;
+    p2m_type_t     type;
 
     /*
      * We bound the number of times we retry CMPXCHG on memory locations that
@@ -226,44 +215,24 @@
     led = current;
     ld = led->domain;
 
-    if ( unlikely((op->flags & (GNTMAP_device_map|GNTMAP_host_map)) == 0) )
+    if ( unlikely((flags & (GNTMAP_device_map|GNTMAP_host_map)) == 0) )
     {
-        gdprintk(XENLOG_INFO, "Bad flags in grant map op (%x).\n", op->flags);
-        op->status = GNTST_bad_gntref;
-        return;
+        gdprintk(XENLOG_INFO, "Bad flags in grant map op (%x).\n", flags);
+        return GNTST_bad_gntref;
     }
 
-    if ( unlikely((rd = rcu_lock_domain_by_id(op->dom)) == NULL) )
-    {
-        gdprintk(XENLOG_INFO, "Could not find domain %d\n", op->dom);
-        op->status = GNTST_bad_domain;
-        return;
-    }
-
-    rc = xsm_grant_mapref(ld, rd, op->flags);
+    rc = xsm_grant_mapref(ld, rd, flags);
     if ( rc )
-    {
-        rcu_unlock_domain(rd);
-        op->status = GNTST_permission_denied;
-        return;
-    }
-
-    if ( unlikely((handle = get_maptrack_handle(ld->grant_table)) == -1) )
-    {
-        rcu_unlock_domain(rd);
-        gdprintk(XENLOG_INFO, "Failed to obtain maptrack handle.\n");
-        op->status = GNTST_no_device_space;
-        return;
-    }
+        return GNTST_permission_denied;
 
     spin_lock(&rd->grant_table->lock);
 
     /* Bounds check on the grant ref */
-    if ( unlikely(op->ref >= nr_grant_entries(rd->grant_table)))
-        PIN_FAIL(unlock_out, GNTST_bad_gntref, "Bad ref (%d).\n", op->ref);
+    if ( unlikely(ref >= nr_grant_entries(rd->grant_table)))
+        PIN_FAIL(unlock_out, GNTST_bad_gntref, "Bad ref (%d).\n", ref);
 
-    act = &active_entry(rd->grant_table, op->ref);
-    sha = &shared_entry(rd->grant_table, op->ref);
+    act = &active_entry(rd->grant_table, ref);
+    sha = &shared_entry(rd->grant_table, ref);
 
     /* If already pinned, check the active domid and avoid refcnt overflow. */
     if ( act->pin &&
@@ -274,7 +243,7 @@
                  act->domid, ld->domain_id, act->pin);
 
     if ( !act->pin ||
-         (!(op->flags & GNTMAP_readonly) &&
+         (!(flags & GNTMAP_readonly) &&
           !(act->pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask))) )
     {
         scombo.word = *(u32 *)&sha->flags;
@@ -300,7 +269,7 @@
             new_scombo = scombo;
             new_scombo.shorts.flags |= GTF_reading;
 
-            if ( !(op->flags & GNTMAP_readonly) )
+            if ( !(flags & GNTMAP_readonly) )
             {
                 new_scombo.shorts.flags |= GTF_writing;
                 if ( unlikely(scombo.shorts.flags & GTF_readonly) )
@@ -324,24 +293,115 @@
         {
             act->domid = scombo.shorts.domid;
             act->gfn = sha->frame;
-            act->frame = gmfn_to_mfn(rd, sha->frame);
+            act->frame = mfn_x(gfn_to_mfn(rd, sha->frame, &type));
+            if ( p2m_is_grant(type) )
+                PIN_FAIL(unlock_out, GNTST_general_error,
+                         "implicit transitive grant through P2M mapping\n");
         }
     }
 
-    old_pin = act->pin;
-    if ( op->flags & GNTMAP_device_map )
-        act->pin += (op->flags & GNTMAP_readonly) ?
+    *old_pin = act->pin;
+    if ( flags & GNTMAP_device_map )
+        act->pin += (flags & GNTMAP_readonly) ?
             GNTPIN_devr_inc : GNTPIN_devw_inc;
-    if ( op->flags & GNTMAP_host_map )
-        act->pin += (op->flags & GNTMAP_readonly) ?
+    if ( flags & GNTMAP_host_map )
+        act->pin += (flags & GNTMAP_readonly) ?
             GNTPIN_hstr_inc : GNTPIN_hstw_inc;
 
-    frame = act->frame;
-    act_pin = act->pin;
-
-    cache_flags = (sha->flags & (GTF_PAT | GTF_PWT | GTF_PCD) );
+    *frame = act->frame;
+    *act_pin = act->pin;
+    *cache_flags = (sha->flags & (GTF_PAT | GTF_PWT | GTF_PCD) );
 
     spin_unlock(&rd->grant_table->lock);
+
+    return GNTST_okay;
+
+ unlock_out:
+    spin_unlock(&rd->grant_table->lock);
+
+    return rc;
+}
+
+static void
+__gnttab_map_undo_core_pin(struct domain *rd, grant_ref_t gref,
+                           unsigned flags)
+{
+    struct active_grant_entry *act;
+    grant_entry_t *sha;
+
+    spin_lock(&rd->grant_table->lock);
+
+    act = &active_entry(rd->grant_table, gref);
+    sha = &shared_entry(rd->grant_table, gref);
+
+    if ( flags & GNTMAP_device_map )
+        act->pin -= (flags & GNTMAP_readonly) ?
+            GNTPIN_devr_inc : GNTPIN_devw_inc;
+    if ( flags & GNTMAP_host_map )
+        act->pin -= (flags & GNTMAP_readonly) ?
+            GNTPIN_hstr_inc : GNTPIN_hstw_inc;
+
+    if ( !(flags & GNTMAP_readonly) &&
+         !(act->pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) )
+        gnttab_clear_flag(_GTF_writing, &sha->flags);
+
+    if ( !act->pin )
+        gnttab_clear_flag(_GTF_reading, &sha->flags);
+
+    spin_unlock(&rd->grant_table->lock);
+}
+
+/*
+ * Returns 0 if TLB flush / invalidate required by caller.
+ * va will indicate the address to be invalidated.
+ * 
+ * addr is _either_ a host virtual address, or the address of the pte to
+ * update, as indicated by the GNTMAP_contains_pte flag.
+ */
+static void
+__gnttab_map_grant_ref(
+    struct gnttab_map_grant_ref *op)
+{
+    struct domain *ld = current->domain;
+    struct domain *owner;
+    struct domain *rd;
+    unsigned long frame;
+    unsigned cache_flags;
+    int rc;
+    unsigned nr_gets;
+    struct grant_mapping *mt;
+    grant_handle_t handle;
+    u32 old_pin;
+    u32 act_pin;
+
+    nr_gets = 0;
+
+    if ( unlikely((rd = rcu_lock_domain_by_id(op->dom)) == NULL) )
+    {
+        gdprintk(XENLOG_INFO, "Could not find domain %d\n", op->dom);
+        op->status = GNTST_bad_domain;
+        return;
+    }
+
+    handle = get_maptrack_handle(ld->grant_table);
+    if ( unlikely(handle == -1) )
+    {
+        op->status = GNTST_no_device_space;
+        rcu_unlock_domain(rd);
+        gdprintk(XENLOG_INFO, "Failed to obtain maptrack handle.\n");
+        return;
+    }
+
+    rc = __gnttab_map_grant_ref_core(rd, op->flags, op->ref, &frame,
+                                     &cache_flags, &old_pin, &act_pin);
+
+    if ( rc != GNTST_okay )
+    {
+        put_maptrack_handle(ld->grant_table, handle);
+        op->status = rc;
+        rcu_unlock_domain(rd);
+        return;
+    }
 
     if ( !mfn_valid(frame) ||
          (owner = page_get_owner_and_reference(mfn_to_page(frame))) == dom_io )
@@ -366,7 +426,7 @@
     }
     else if ( owner == rd )
     {
-        if ( gnttab_host_mapping_get_page_type(op, ld, rd) &&
+        if ( gnttab_host_mapping_get_page_type(op->flags, ld, rd) &&
              !get_page_type(mfn_to_page(frame), PGT_writable_page) )
             goto could_not_pin;
 
@@ -402,7 +462,12 @@
          !(old_pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) &&
          (act_pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) )
     {
-        if ( iommu_map_page(ld, mfn_to_gmfn(ld, frame), frame) )
+        /* Shouldn't happen, because you can't use iommu in a HVM
+         * domain. */
+        BUG_ON(paging_mode_translate(ld));
+        /* We're not translated, so we know that gmfns and mfns are
+           the same things, so the IOMMU entry is always 1-to-1. */
+        if ( iommu_map_page(ld, frame, frame) )
         {
             rc = GNTST_general_error;
             goto undo_out;
@@ -432,32 +497,13 @@
     }
     if ( nr_gets > 0 )
     {
-        if ( gnttab_host_mapping_get_page_type(op, ld, rd) )
+        if ( gnttab_host_mapping_get_page_type(op->flags, ld, rd) )
             put_page_type(mfn_to_page(frame));
         put_page(mfn_to_page(frame));
     }
 
-    spin_lock(&rd->grant_table->lock);
+    __gnttab_map_undo_core_pin(rd, op->ref, op->flags);
 
-    act = &active_entry(rd->grant_table, op->ref);
-    sha = &shared_entry(rd->grant_table, op->ref);
-
-    if ( op->flags & GNTMAP_device_map )
-        act->pin -= (op->flags & GNTMAP_readonly) ?
-            GNTPIN_devr_inc : GNTPIN_devw_inc;
-    if ( op->flags & GNTMAP_host_map )
-        act->pin -= (op->flags & GNTMAP_readonly) ?
-            GNTPIN_hstr_inc : GNTPIN_hstw_inc;
-
-    if ( !(op->flags & GNTMAP_readonly) &&
-         !(act->pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) )
-        gnttab_clear_flag(_GTF_writing, &sha->flags);
-
-    if ( !act->pin )
-        gnttab_clear_flag(_GTF_reading, &sha->flags);
-
- unlock_out:
-    spin_unlock(&rd->grant_table->lock);
     op->status = rc;
     put_maptrack_handle(ld->grant_table, handle);
     rcu_unlock_domain(rd);
@@ -506,9 +552,10 @@
 
     op->map = &maptrack_entry(ld->grant_table, op->handle);
 
-    if ( unlikely(!op->map->flags) )
+    if ( unlikely(!op->map->flags || (op->map->flags & MAPTRACK_P2M)) )
     {
-        gdprintk(XENLOG_INFO, "Zero flags for handle (%d).\n", op->handle);
+        gdprintk(XENLOG_INFO, "Bad flags for handle %d (%d).\n", op->handle,
+                 op->map->flags);
         op->status = GNTST_bad_handle;
         return;
     }
@@ -580,7 +627,8 @@
          (old_pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) &&
          !(act->pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) )
     {
-        if ( iommu_unmap_page(ld, mfn_to_gmfn(ld, op->frame)) )
+        BUG_ON(paging_mode_translate(ld));
+        if ( iommu_unmap_page(ld, op->frame) )
         {
             rc = GNTST_general_error;
             goto unmap_out;
@@ -657,7 +705,7 @@
 
         if ( !is_iomem_page(op->frame) ) 
         {
-            if ( gnttab_host_mapping_get_page_type(op, ld, rd) )
+            if ( gnttab_host_mapping_get_page_type(op->flags, ld, rd) )
                 put_page_type(mfn_to_page(op->frame));
             put_page(mfn_to_page(op->frame));
         }
@@ -1289,6 +1337,7 @@
     s16 rc = GNTST_okay;
     int retries = 0;
     union grant_combo scombo, prev_scombo, new_scombo;
+    p2m_type_t type;
 
     spin_lock(&rd->grant_table->lock);
 
@@ -1349,9 +1398,12 @@
 
         if ( !act->pin )
         {
+            act->frame = mfn_x(gfn_to_mfn(rd, sha->frame, &type));
+            if ( p2m_is_grant(type) )
+                PIN_FAIL(unlock_out, GNTST_general_error,
+                         "trying to do transitive P2M grant\n");
             act->domid = scombo.shorts.domid;
             act->gfn = sha->frame;
-            act->frame = gmfn_to_mfn(rd, sha->frame);
         }
     }
 
@@ -1732,8 +1784,9 @@
             {
                 BUG_ON(!(act->pin & GNTPIN_hstr_mask));
                 act->pin -= GNTPIN_hstr_inc;
-                if ( gnttab_release_host_mappings &&
-                     !is_iomem_page(act->frame) )
+                if ( (map->flags & MAPTRACK_P2M) ||
+                     (gnttab_release_host_mappings &&
+                      !is_iomem_page(act->frame)) )
                     put_page(mfn_to_page(act->frame));
             }
         }
@@ -1751,10 +1804,11 @@
             {
                 BUG_ON(!(act->pin & GNTPIN_hstw_mask));
                 act->pin -= GNTPIN_hstw_inc;
-                if ( gnttab_release_host_mappings &&
-                     !is_iomem_page(act->frame) )
+                if ( (map->flags & MAPTRACK_P2M) ||
+                     (gnttab_release_host_mappings &&
+                      !is_iomem_page(act->frame)) )
                 {
-                    if ( gnttab_host_mapping_get_page_type(map, d, rd) )
+                    if ( gnttab_host_mapping_get_page_type(map->flags, d, rd) )
                         put_page_type(mfn_to_page(act->frame));
                     put_page(mfn_to_page(act->frame));
                 }
@@ -1802,6 +1856,214 @@
     d->grant_table = NULL;
 }
 
+#ifdef HAVE_GRANT_MAP_P2M
+int
+gnttab_map_grant_ref_p2m(domid_t domid, grant_ref_t gref, xen_pfn_t pfn,
+                         unsigned flags, grant_handle_t *handle)
+{
+    struct domain *ld = current->domain;
+    struct domain *rd;
+    struct grant_mapping *mt;
+    unsigned long frame;
+    unsigned cache_flags;
+    u32 old_pin;
+    u32 act_pin;
+    int rc;
+
+    if ( (flags & ~GNTMAP_readonly) != GNTMAP_host_map )
+    {
+        gdprintk(XENLOG_INFO, "strange flags for map_grant_ref_p2m 0x%x\n",
+                 flags);
+        domain_crash(ld);
+        return GNTST_general_error;
+    }
+
+    if ( domid == ld->domain_id )
+    {
+        gdprintk(XENLOG_INFO, "trying to map own grant reference\n");
+        return GNTST_general_error;
+    }
+
+    rd = rcu_lock_domain_by_id(domid);
+    if ( !rd )
+    {
+        gdprintk(XENLOG_INFO, "Could not find domain %d\n", domid);
+        return GNTST_bad_domain;
+    }
+
+    maptrack_lock(ld->grant_table);
+
+    *handle = get_maptrack_handle(ld->grant_table);
+    if ( unlikely(*handle == -1) )
+    {
+        gdprintk(XENLOG_INFO, "Failed to obtain maptrack handle.\n");
+        maptrack_unlock(ld->grant_table);
+        rcu_unlock_domain(rd);
+        return GNTST_no_device_space;
+    }
+
+    rc = __gnttab_map_grant_ref_core(rd, flags, gref, &frame,
+                                     &cache_flags, &old_pin, &act_pin);
+    if ( rc != GNTST_okay )
+    {
+        put_maptrack_handle(ld->grant_table, *handle);
+        maptrack_unlock(ld->grant_table);
+        rcu_unlock_domain(rd);
+        return rc;
+    }
+
+    if ( !mfn_valid(frame) || is_iomem_page(frame) || cache_flags != 0 )
+    {
+        gdprintk(XENLOG_WARNING,
+                 "Can't map IO memory into P2M table (%lx, domain %d), or 
strange cache flags (%x)\n",
+                 frame, rd->domain_id, cache_flags);
+        rc = GNTST_general_error;
+        goto undo_out;
+    }
+
+    if ( unlikely(!(gnttab_host_mapping_get_page_type(flags, ld, rd) ?
+                    get_page_and_type(mfn_to_page(frame), rd,
+                                      PGT_writable_page) :
+                    get_page(mfn_to_page(frame), rd))) )
+    {
+        if ( !rd->is_dying )
+            gdprintk(XENLOG_WARNING, "Could not pin grant frame %lx\n",
+                     frame);
+        rc = GNTST_general_error;
+        goto undo_out;
+    }
+
+    rc = guest_physmap_add_entry(ld, pfn, frame, 0,
+                                 (flags & GNTMAP_readonly) ?
+                                 p2m_grant_map_ro :
+                                 p2m_grant_map_rw);
+    if ( rc )
+    {
+        rc = GNTST_general_error;
+        goto put_and_out;
+    }
+
+    mt = &maptrack_entry(ld->grant_table, *handle);
+    mt->domid = domid;
+    mt->ref   = gref;
+    mt->pfn   = pfn;
+    mt->flags = flags | MAPTRACK_P2M;
+    maptrack_unlock(ld->grant_table);
+
+    return GNTST_okay;
+
+put_and_out:
+    if ( gnttab_host_mapping_get_page_type(flags, ld, rd) )
+        put_page_and_type(mfn_to_page(frame));
+    else
+        put_page(mfn_to_page(frame));
+
+undo_out:
+    __gnttab_map_undo_core_pin(rd, gref, flags);
+    put_maptrack_handle(ld->grant_table, *handle);
+
+    maptrack_unlock(ld->grant_table);
+    rcu_unlock_domain(rd);
+    return rc;
+}
+
+int
+gnttab_unmap_grant_ref_p2m(grant_handle_t handle)
+{
+    struct domain *ld = current->domain;
+    struct domain *rd;
+    struct grant_mapping *mt;
+    struct active_grant_entry *act;
+    struct grant_entry   *sha;
+
+    if ( unlikely(handle >= ld->grant_table->maptrack_limit) )
+    {
+        gdprintk(XENLOG_INFO, "Bad handle (%d).\n", handle);
+        domain_crash(ld);
+        return GNTST_bad_handle;
+    }
+
+    maptrack_lock(ld->grant_table);
+    mt = &maptrack_entry(ld->grant_table, handle);
+    if ( !(mt->flags & MAPTRACK_P2M) )
+    {
+        gdprintk(XENLOG_INFO, "Bad flags for handle %d (%x).\n",
+                 handle, mt->flags);
+        maptrack_unlock(ld->grant_table);
+        domain_crash(ld);
+        return GNTST_bad_handle;
+    }
+
+    rd = rcu_lock_domain_by_id(mt->domid);
+    if ( unlikely(!rd) )
+    {
+        /* This really shouldn't happen: we hold a reference to one of
+           the remote domain's pages, which should be enough to keep
+           it around.  This should arguably be a BUG(), but be
+           generous and just use domain_crash(). */
+        gdprintk(XENLOG_INFO, "Could not find domain %d\n", mt->domid);
+        maptrack_unlock(ld->grant_table);
+        domain_crash(ld);
+        return GNTST_general_error;
+    }
+
+    spin_lock(&rd->grant_table->lock);
+
+    act = &active_entry(rd->grant_table, mt->ref);
+    sha = &shared_entry(rd->grant_table, mt->ref);
+
+    guest_physmap_remove_page(current->domain, mt->pfn, act->frame, 0);
+
+    if ( gnttab_host_mapping_get_page_type(mt->flags, ld, rd) )
+        put_page_and_type(mfn_to_page(act->frame));
+    else
+        put_page(mfn_to_page(act->frame));
+
+    ASSERT(act->pin & (GNTPIN_hstw_mask | GNTPIN_hstr_mask));
+    if ( mt->flags & GNTMAP_readonly )
+        act->pin -= GNTPIN_hstr_inc;
+    else
+        act->pin -= GNTPIN_hstw_inc;
+
+    /* If just unmapped a writable mapping, mark as dirtied */
+    if ( !(mt->flags & GNTMAP_readonly) )
+         gnttab_mark_dirty(rd, act->frame);
+
+    if ( ((act->pin & GNTPIN_hstw_mask) == 0) &&
+         !(mt->flags & GNTMAP_readonly) )
+        gnttab_clear_flag(_GTF_writing, &sha->flags);
+
+    if ( act->pin == 0 )
+        gnttab_clear_flag(_GTF_reading, &sha->flags);
+
+    mt->flags = 0;
+
+    spin_unlock(&rd->grant_table->lock);
+
+    put_maptrack_handle(ld->grant_table, handle);
+
+    maptrack_unlock(ld->grant_table);
+    rcu_unlock_domain(rd);
+
+    return GNTST_okay;
+}
+#else /* !HAVE_GRANT_MAP_P2M */
+int
+gnttab_map_grant_ref_p2m(domid_t domid, grant_ref_t gref, xen_pfn_t pfn,
+                         unsigned flags, grant_handle_t *handle)
+{
+    return GNTST_general_error;
+}
+int
+gnttab_unmap_grant_ref_p2m(grant_handle_t handle)
+{
+    /* gnttab_map_grant_ref_p2m() can never succeed, so this should
+       never be called. */
+    domain_crash(current->domain);
+    return GNTST_general_error;
+}
+#endif
+
 /*
  * Local variables:
  * mode: C
diff -r 6cba03677059 -r c6126a51b509 xen/include/asm-x86/grant_table.h
--- a/xen/include/asm-x86/grant_table.h Tue May 19 12:11:33 2009 +0100
+++ b/xen/include/asm-x86/grant_table.h Tue May 19 12:11:33 2009 +0100
@@ -39,8 +39,8 @@
 }
 
 /* Foreign mappings of HHVM-guest pages do not modify the type count. */
-#define gnttab_host_mapping_get_page_type(op, ld, rd)   \
-    (!((op)->flags & GNTMAP_readonly) &&                \
+#define gnttab_host_mapping_get_page_type(flags, ld, rd)   \
+    (!((flags) & GNTMAP_readonly) &&                       \
      (((ld) == (rd)) || !paging_mode_external(rd)))
 
 /* Done implicitly when page tables are destroyed. */
diff -r 6cba03677059 -r c6126a51b509 xen/include/asm-x86/hvm/domain.h
--- a/xen/include/asm-x86/hvm/domain.h  Tue May 19 12:11:33 2009 +0100
+++ b/xen/include/asm-x86/hvm/domain.h  Tue May 19 12:11:33 2009 +0100
@@ -31,6 +31,7 @@
 #include <asm/hvm/viridian.h>
 #include <asm/hvm/vmx/vmcs.h>
 #include <asm/hvm/svm/vmcb.h>
+#include <public/grant_table.h>
 #include <public/hvm/params.h>
 #include <public/hvm/save.h>
 
diff -r 6cba03677059 -r c6126a51b509 xen/include/asm-x86/p2m.h
--- a/xen/include/asm-x86/p2m.h Tue May 19 12:11:33 2009 +0100
+++ b/xen/include/asm-x86/p2m.h Tue May 19 12:11:33 2009 +0100
@@ -45,6 +45,10 @@
  */
 #define phys_to_machine_mapping ((l1_pgentry_t *)RO_MPT_VIRT_START)
 
+#ifdef __x86_64__
+#define HAVE_GRANT_MAP_P2M
+#endif
+
 /*
  * The upper levels of the p2m pagetable always contain full rights; all 
  * variation in the access control bits is made in the level-1 PTEs.
@@ -65,6 +69,12 @@
     p2m_mmio_dm = 4,            /* Reads and write go to the device model */
     p2m_mmio_direct = 5,        /* Read/write mapping of genuine MMIO area */
     p2m_populate_on_demand = 6, /* Place-holder for empty memory */
+
+    /* Note that these can only be used if HAVE_GRANT_MAP_P2M is
+       defined.  They get defined anyway so as to avoid lots of
+       #ifdef's everywhere else. */
+    p2m_grant_map_rw = 7,       /* Read/write grant mapping */
+    p2m_grant_map_ro = 8,       /* Read-only grant mapping */
 } p2m_type_t;
 
 typedef enum {
@@ -81,13 +91,19 @@
                        | p2m_to_mask(p2m_ram_logdirty)  \
                        | p2m_to_mask(p2m_ram_ro))
 
+/* Grant mapping types, which map to a real machine frame in another
+ * VM */
+#define P2M_GRANT_TYPES (p2m_to_mask(p2m_grant_map_rw)  \
+                         | p2m_to_mask(p2m_grant_map_ro) )
+
 /* MMIO types, which don't have to map to anything in the frametable */
 #define P2M_MMIO_TYPES (p2m_to_mask(p2m_mmio_dm)        \
                         | p2m_to_mask(p2m_mmio_direct))
 
 /* Read-only types, which must have the _PAGE_RW bit clear in their PTEs */
 #define P2M_RO_TYPES (p2m_to_mask(p2m_ram_logdirty)     \
-                      | p2m_to_mask(p2m_ram_ro))
+                      | p2m_to_mask(p2m_ram_ro)         \
+                      | p2m_to_mask(p2m_grant_map_ro) )
 
 #define P2M_MAGIC_TYPES (p2m_to_mask(p2m_populate_on_demand))
 
@@ -96,7 +112,8 @@
 #define p2m_is_mmio(_t) (p2m_to_mask(_t) & P2M_MMIO_TYPES)
 #define p2m_is_readonly(_t) (p2m_to_mask(_t) & P2M_RO_TYPES)
 #define p2m_is_magic(_t) (p2m_to_mask(_t) & P2M_MAGIC_TYPES)
-#define p2m_is_valid(_t) (p2m_to_mask(_t) & (P2M_RAM_TYPES | P2M_MMIO_TYPES))
+#define p2m_is_grant(_t) (p2m_to_mask(_t) & P2M_GRANT_TYPES)
+#define p2m_is_valid(_t) (p2m_to_mask(_t) & (P2M_RAM_TYPES | P2M_MMIO_TYPES | 
P2M_GRANT_TYPES))
 
 /* Populate-on-demand */
 #define POPULATE_ON_DEMAND_MFN  (1<<9)
@@ -161,8 +178,12 @@
 /* Extract the type from the PTE flags that store it */
 static inline p2m_type_t p2m_flags_to_type(unsigned long flags)
 {
-    /* Type is stored in the "available" bits, 9, 10 and 11 */
+    /* Type is stored in the "available" bits */
+#ifdef __x86_64__
+    return (flags >> 9) & 0x3fff;
+#else
     return (flags >> 9) & 0x7;
+#endif
 }
 
 /* Read the current domain's p2m table.  Do not populate PoD pages. */
@@ -225,17 +246,6 @@
     else
         return mfn_x(mfn);
 }
-
-/* Translate the frame number held in an l1e from guest to machine */
-static inline l1_pgentry_t
-gl1e_to_ml1e(struct domain *d, l1_pgentry_t l1e)
-{
-    if ( unlikely(paging_mode_translate(d)) )
-        l1e = l1e_from_pfn(gmfn_to_mfn(d, l1e_get_pfn(l1e)),
-                           l1e_get_flags(l1e));
-    return l1e;
-}
-
 
 /* Init the datastructures for later use by the p2m code */
 int p2m_init(struct domain *d);
diff -r 6cba03677059 -r c6126a51b509 xen/include/public/hvm/hvm_op.h
--- a/xen/include/public/hvm/hvm_op.h   Tue May 19 12:11:33 2009 +0100
+++ b/xen/include/public/hvm/hvm_op.h   Tue May 19 12:11:33 2009 +0100
@@ -128,4 +128,30 @@
 
 #endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */
 
+/* Map a grant reference exposed by a given remote domain into a given
+   slot in the local P2M table. */
+#define HVMOP_map_grant_ref 9
+struct xen_hvm_map_grant_ref {
+    /* IN */
+    domid_t domid;
+    uint16_t pad0;
+    grant_ref_t gref;
+    uint64_t pfn;
+    uint32_t flags;
+    /* OUT */
+    grant_handle_t handle;
+    uint32_t pad1;
+};
+typedef struct xen_hvm_map_grant_ref xen_hvm_map_grant_ref_t;
+DEFINE_XEN_GUEST_HANDLE(xen_hvm_map_grant_ref_t);
+
+/* Unmap a grant reference previously mapped by HVMOP_map_grant_ref */
+#define HVMOP_unmap_grant_ref 10
+struct xen_hvm_unmap_grant_ref {
+    /* IN */
+    grant_handle_t handle;
+};
+typedef struct xen_hvm_unmap_grant_ref xen_hvm_unmap_grant_ref_t;
+DEFINE_XEN_GUEST_HANDLE(xen_hvm_unmap_grant_ref_t);
+
 #endif /* __XEN_PUBLIC_HVM_HVM_OP_H__ */
diff -r 6cba03677059 -r c6126a51b509 xen/include/xen/grant_table.h
--- a/xen/include/xen/grant_table.h     Tue May 19 12:11:33 2009 +0100
+++ b/xen/include/xen/grant_table.h     Tue May 19 12:11:33 2009 +0100
@@ -71,10 +71,13 @@
  * table of these, indexes into which are returned as a 'mapping handle'.
  */
 struct grant_mapping {
-    u32      ref;           /* grant ref */
-    u16      flags;         /* 0-4: GNTMAP_* ; 5-15: unused */
-    domid_t  domid;         /* granting domain */
+    u32       ref;          /* grant ref */
+    u16       flags;        /* 0-4: GNTMAP_* ; 5: MAPTRACK_P2M; 6-15: unused */
+    domid_t   domid;        /* granting domain */
+    xen_pfn_t pfn;          /* Only for P2M mappings */
 };
+
+#define MAPTRACK_P2M 0x20
 
 /* Fairly arbitrary. [POLICY] */
 #define MAPTRACK_MAX_ENTRIES 16384
@@ -148,4 +151,8 @@
     return num_act_frames_from_sha_frames(nr_grant_frames(gt));
 }
 
+int gnttab_map_grant_ref_p2m(domid_t domid, grant_ref_t gref, xen_pfn_t pfn,
+                             unsigned flags, grant_handle_t *handle);
+int gnttab_unmap_grant_ref_p2m(grant_handle_t handle);
+
 #endif /* __XEN_GRANT_TABLE_H__ */
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
<Prev in Thread] Current Thread [Next in Thread>