[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v5 4/4] xen: introduce XENMEM_exchange_and_pin and XENMEM_unpin



Introduce two new hypercalls XENMEM_exchange_and_pin and XENMEM_unpin.

XENMEM_exchange_and_pin, it's like XENMEM_exchange but it also pins the
new pages: their p2m mapping are guaranteed not to be changed, until
XENMEM_unpin is called.  XENMEM_exchange_and_pin returns the DMA frame
numbers of the new pages to the caller, even if it's an autotranslate
guest.

The only effect of XENMEM_unpin is to "unpin" the previously
pinned pages. Afterwards the p2m mappings can be transparently changed by
the hypervisor as normal. The memory remains accessible from the guest.

Signed-off-by: Stefano Stabellini <stefano.stabellini@xxxxxxxxxxxxx>


Changes in v5:
- memory_exchange: handle guest_physmap_pin_range failures;
- make i an unsigned long in unpinned;
- add nr_unpinned to xen_unpin to report partial success.

Changes in v4:
- rename XENMEM_get_dma_buf to XENMEM_exchange_and_pin;
- rename XENMEM_get_dma_buf to XENMEM_unpin;
- move the pinning before we copy back the mfn to the guest;
- propagate errors returned by guest_physmap_pin_range;
- use xen_memory_exchange_t as parameter for XENMEM_exchange_and_pin;
- use an unsigned iterator in unpin;
- improve the documentation of the new hypercalls;
- add a note about out.address_bits for XENMEM_exchange.
---
 xen/common/memory.c         |  141 +++++++++++++++++++++++++++++++++----------
 xen/include/public/memory.h |   47 ++++++++++++++
 2 files changed, 156 insertions(+), 32 deletions(-)

diff --git a/xen/common/memory.c b/xen/common/memory.c
index 4b2f311..34169c1 100644
--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -364,14 +364,14 @@ static void decrease_reservation(struct memop_args *a)
     a->nr_done = i;
 }
 
-static long memory_exchange(XEN_GUEST_HANDLE_PARAM(xen_memory_exchange_t) arg)
+static long memory_exchange(int op, 
XEN_GUEST_HANDLE_PARAM(xen_memory_exchange_t) arg)
 {
     struct xen_memory_exchange exch;
     PAGE_LIST_HEAD(in_chunk_list);
     PAGE_LIST_HEAD(out_chunk_list);
     unsigned long in_chunk_order, out_chunk_order;
     xen_pfn_t     gpfn, gmfn, mfn;
-    unsigned long i, j, k = 0; /* gcc ... */
+    unsigned long i = 0, j = 0, k = 0; /* gcc ... */
     unsigned int  memflags = 0;
     long          rc = 0;
     struct domain *d;
@@ -542,54 +542,72 @@ static long 
memory_exchange(XEN_GUEST_HANDLE_PARAM(xen_memory_exchange_t) arg)
         /* Assign each output page to the domain. */
         for ( j = 0; (page = page_list_remove_head(&out_chunk_list)); ++j )
         {
-            if ( assign_pages(d, page, exch.out.extent_order,
-                              MEMF_no_refcount) )
-            {
-                unsigned long dec_count;
-                bool_t drop_dom_ref;
-
-                /*
-                 * Pages in in_chunk_list is stolen without
-                 * decreasing the tot_pages. If the domain is dying when
-                 * assign pages, we need decrease the count. For those pages
-                 * that has been assigned, it should be covered by
-                 * domain_relinquish_resources().
-                 */
-                dec_count = (((1UL << exch.in.extent_order) *
-                              (1UL << in_chunk_order)) -
-                             (j * (1UL << exch.out.extent_order)));
-
-                spin_lock(&d->page_alloc_lock);
-                domain_adjust_tot_pages(d, -dec_count);
-                drop_dom_ref = (dec_count && !d->tot_pages);
-                spin_unlock(&d->page_alloc_lock);
-
-                if ( drop_dom_ref )
-                    put_domain(d);
-
-                free_domheap_pages(page, exch.out.extent_order);
-                goto dying;
-            }
+            unsigned long dec_count;
+            bool_t drop_dom_ref;
 
             if ( __copy_from_guest_offset(&gpfn, exch.out.extent_start,
                                           (i << out_chunk_order) + j, 1) )
             {
                 rc = -EFAULT;
-                continue;
+                goto extent_error;
             }
 
             mfn = page_to_mfn(page);
             guest_physmap_add_page(d, gpfn, mfn, exch.out.extent_order);
 
+            if ( op == XENMEM_exchange_and_pin )
+            {
+                if ( guest_physmap_pin_range(d, gpfn, exch.out.extent_order) )
+                {
+                    rc = -EFAULT;
+                    goto extent_error_physmap;
+                }
+            }
+
             if ( !paging_mode_translate(d) )
             {
                 for ( k = 0; k < (1UL << exch.out.extent_order); k++ )
                     set_gpfn_from_mfn(mfn + k, gpfn + k);
+            }
+
+            rc = assign_pages(d, page, exch.out.extent_order, 
MEMF_no_refcount);
+            if ( rc )
+                goto extent_error_physmap;
+
+            if ( op == XENMEM_exchange_and_pin || !paging_mode_translate(d) )
+            {
                 if ( __copy_to_guest_offset(exch.out.extent_start,
                                             (i << out_chunk_order) + j,
                                             &mfn, 1) )
                     rc = -EFAULT;
             }
+
+            continue;
+
+extent_error_physmap:
+            guest_physmap_remove_page(d, gpfn, mfn, exch.out.extent_order);
+extent_error:
+            /*
+             * Pages in in_chunk_list is stolen without
+             * decreasing the tot_pages. If the domain is dying when
+             * assign pages, we need decrease the count. For those pages
+             * that has been assigned, it should be covered by
+             * domain_relinquish_resources().
+             */
+            dec_count = (((1UL << exch.in.extent_order) *
+                        (1UL << in_chunk_order)) -
+                    (j * (1UL << exch.out.extent_order)));
+
+            spin_lock(&d->page_alloc_lock);
+            domain_adjust_tot_pages(d, -dec_count);
+            drop_dom_ref = (dec_count && !d->tot_pages);
+            spin_unlock(&d->page_alloc_lock);
+
+            if ( drop_dom_ref )
+                put_domain(d);
+
+            free_domheap_pages(page, exch.out.extent_order);
+            goto dying;
         }
         BUG_ON( !(d->is_dying) && (j != (1UL << out_chunk_order)) );
     }
@@ -627,6 +645,60 @@ static long 
memory_exchange(XEN_GUEST_HANDLE_PARAM(xen_memory_exchange_t) arg)
     return rc;
 }
 
+static long unpin(XEN_GUEST_HANDLE_PARAM(xen_unpin_t) arg)
+{
+    int rc;
+    unsigned long i;
+    struct xen_unpin unpin;
+    xen_pfn_t gpfn;
+    struct domain *d;
+
+    if ( copy_from_guest(&unpin, arg, 1) )
+        return -EFAULT;
+
+    /* Various sanity checks. */
+    if ( /* Extent orders are sensible? */
+         (unpin.in.extent_order > MAX_ORDER) ||
+         /* Sizes of input list do not overflow a long? */
+         ((~0UL >> unpin.in.extent_order) < unpin.in.nr_extents) )
+        return -EFAULT;
+
+    if ( !guest_handle_okay(unpin.in.extent_start, unpin.in.nr_extents) )
+        return -EFAULT;
+
+    d = rcu_lock_domain_by_any_id(unpin.in.domid);
+    if ( d == NULL )
+    {
+        rc = -ESRCH;
+        goto fail;
+    }
+
+    for ( i = 0; i < unpin.in.nr_extents; i++ )
+    {
+        if ( unlikely(__copy_from_guest_offset(
+                      &gpfn, unpin.in.extent_start, i, 1)) )
+        {
+            rc = -EFAULT;
+            goto partial;
+        }
+
+        rc = guest_physmap_unpin_range(d, gpfn, unpin.in.extent_order);
+        if ( rc )
+            goto partial;
+    }
+
+    rc = 0;
+
+partial:
+    unpin.nr_unpinned = i;
+    if ( __copy_field_to_guest(arg, &unpin, nr_unpinned) )
+        rc = -EFAULT;
+
+ fail:
+    rcu_unlock_domain(d);
+    return rc;
+}
+
 long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
 {
     struct domain *d;
@@ -715,8 +787,13 @@ long do_memory_op(unsigned long cmd, 
XEN_GUEST_HANDLE_PARAM(void) arg)
 
         break;
 
+    case XENMEM_exchange_and_pin:
     case XENMEM_exchange:
-        rc = memory_exchange(guest_handle_cast(arg, xen_memory_exchange_t));
+        rc = memory_exchange(op, guest_handle_cast(arg, 
xen_memory_exchange_t));
+        break;
+
+    case XENMEM_unpin:
+        rc = unpin(guest_handle_cast(arg, xen_unpin_t));
         break;
 
     case XENMEM_maximum_ram_page:
diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h
index 7a26dee..73fdc31 100644
--- a/xen/include/public/memory.h
+++ b/xen/include/public/memory.h
@@ -105,6 +105,7 @@ struct xen_memory_exchange {
     /*
      * [IN] Details of memory extents to be exchanged (GMFN bases).
      * Note that @in.address_bits is ignored and unused.
+     * @out.address_bits should contain the address mask for the new pages.
      */
     struct xen_memory_reservation in;
 
@@ -459,6 +460,52 @@ DEFINE_XEN_GUEST_HANDLE(xen_mem_sharing_op_t);
  * The zero value is appropiate.
  */
 
+#define XENMEM_exchange_and_pin             26
+/*
+ * This hypercall is similar to XENMEM_exchange: it takes the same
+ * struct as an argument and it exchanges the pages passed in with a new
+ * set of pages. The new pages are going to be "pinned": it's guaranteed
+ * that their p2m mapping won't be changed until explicitly "unpinned".
+ * The content of the exchanged pages is lost.
+ * Only normal guest r/w memory can be pinned: no granted pages or
+ * ballooned pages.
+ * If return code is zero then @out.extent_list provides the DMA frame
+ * numbers of the newly-allocated memory.
+ * Returns zero on complete success, otherwise a negative error code:
+ *   -ENOSYS if not implemented
+ *   -EINVAL if the page is already pinned
+ *   -EFAULT if an internal error occurs
+ * On complete success then always @nr_exchanged == @in.nr_extents.  On
+ * partial success @nr_exchanged indicates how much work was done and a
+ * negative error code is returned.
+ */
+
+#define XENMEM_unpin             27
+/*
+ * XENMEM_unpin unpins a set of pages, previously pinned by
+ * XENMEM_exchange_and_pin. After this call the p2m mapping of the pages can
+ * be transparently changed by the hypervisor, as usual. The pages are
+ * still accessible from the guest.
+ */
+struct xen_unpin {
+    /*
+     * [IN] Details of memory extents to be unpinned (GMFN bases).
+     * Note that @in.address_bits is ignored and unused.
+     */
+    struct xen_memory_reservation in;
+    /*
+     * [OUT] Number of input extents that were successfully unpinned.
+     *  1. The first @nr_unpinned input extents were successfully
+     *     unpinned.
+     *  2. All other input extents are untouched.
+     *  3. If not all input exents are unpinned then the return code of this
+     *     command will be non-zero.
+     */
+    xen_ulong_t nr_unpinned;
+};
+typedef struct xen_unpin xen_unpin_t;
+DEFINE_XEN_GUEST_HANDLE(xen_unpin_t);
+
 #endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */
 
 #endif /* __XEN_PUBLIC_MEMORY_H__ */
-- 
1.7.2.5


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.