[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH RFC v2 22/23] xen/mem_paging: add a populate_evicted paging op



From: Joshua Otto <jtotto@xxxxxxxxxxxx>

The paging API presently permits only individual, populated pages to be
evicted, and even then only after a previous nomination op on the
candidate page.  This works well at steady-state, but is somewhat
awkward and inefficient for pagers attempting to implement startup
demand-paging for guests: in this case it is necessary to populate all
of the holes in the physmap to be demand-paged, only to then nominate
and immediately evict each page one-by-one.

To permit more efficient startup demand-paging, introduce a new
populate_evicted paging op.  Given a batch of gfns, it:
- marks gfns corresponding to phymap holes as paged-out directly
- frees the backing frames of previously-populated gfns, and then marks
  them as paged-out directly (skipping the nomination step)

The latter behaviour is needed to fully support postcopy live migration:
a page may be populated only to have its contents subsequently
invalidated by a write at the sender, requiring it to ultimately be
demand-paged anyway.

I measured a reduction in time required to evict a batch of 512k
previously-unpopulated pfns from 8.535s to 1.590s (~5.4x speedup).

Note: as a long-running batching memory op, populate_evicted takes
advantage of the existing pre-emption/continuation hack (encoding the
starting offset into the batch in bits [:6] of the op argument).  To
make this work, plumb the cmd argument all the way down through
do_memory_op() -> arch_memory_op() -> subarch_memory_op() ->
mem_paging_memop(), fixing up each switch statement along the way to
use only the MEMOP_CMD bits.

Signed-off-by: Joshua Otto <jtotto@xxxxxxxxxxxx>
---
 tools/libxc/include/xenctrl.h    |   2 +
 tools/libxc/xc_mem_paging.c      |  31 ++++++++++++
 xen/arch/x86/mm.c                |   5 +-
 xen/arch/x86/mm/mem_paging.c     |  34 ++++++++++++-
 xen/arch/x86/mm/p2m.c            | 101 +++++++++++++++++++++++++++++++++++++++
 xen/arch/x86/x86_64/compat/mm.c  |   6 ++-
 xen/arch/x86/x86_64/mm.c         |   6 ++-
 xen/include/asm-x86/mem_paging.h |   3 +-
 xen/include/asm-x86/p2m.h        |   2 +
 xen/include/public/memory.h      |  13 +++--
 10 files changed, 190 insertions(+), 13 deletions(-)

diff --git a/tools/libxc/include/xenctrl.h b/tools/libxc/include/xenctrl.h
index 1629f41..22992b9 100644
--- a/tools/libxc/include/xenctrl.h
+++ b/tools/libxc/include/xenctrl.h
@@ -1945,6 +1945,8 @@ int xc_mem_paging_resume(xc_interface *xch, domid_t 
domain_id);
 int xc_mem_paging_nominate(xc_interface *xch, domid_t domain_id,
                            uint64_t gfn);
 int xc_mem_paging_evict(xc_interface *xch, domid_t domain_id, uint64_t gfn);
+int xc_mem_paging_populate_evicted(xc_interface *xch, domid_t domain_id,
+                                   xen_pfn_t *gfns, uint32_t nr);
 int xc_mem_paging_prep(xc_interface *xch, domid_t domain_id, uint64_t gfn);
 int xc_mem_paging_load(xc_interface *xch, domid_t domain_id,
                        uint64_t gfn, void *buffer);
diff --git a/tools/libxc/xc_mem_paging.c b/tools/libxc/xc_mem_paging.c
index f314b08..b0416b6 100644
--- a/tools/libxc/xc_mem_paging.c
+++ b/tools/libxc/xc_mem_paging.c
@@ -116,6 +116,37 @@ int xc_mem_paging_load(xc_interface *xch, domid_t 
domain_id,
     return rc;
 }
 
+int xc_mem_paging_populate_evicted(xc_interface *xch,
+                                   domid_t domain_id,
+                                   xen_pfn_t *gfns,
+                                   uint32_t nr)
+{
+    DECLARE_HYPERCALL_BOUNCE(gfns, nr * sizeof(*gfns),
+                             XC_HYPERCALL_BUFFER_BOUNCE_IN);
+    int rc;
+
+    xen_mem_paging_op_t mpo =
+    {
+        .op       = XENMEM_paging_op_populate_evicted,
+        .domain   = domain_id,
+        .u        = { .batch = { .nr = nr } }
+    };
+
+    if ( xc_hypercall_bounce_pre(xch, gfns) )
+    {
+        PERROR("Could not bounce memory for 
XENMEM_paging_op_populate_evicted");
+        return -1;
+    }
+
+    set_xen_guest_handle(mpo.u.batch.gfns, gfns);
+
+    rc = do_memory_op(xch, XENMEM_paging_op, &mpo, sizeof(mpo));
+
+    xc_hypercall_bounce_post(xch, gfns);
+
+    return rc;
+}
+
 
 /*
  * Local variables:
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index 77b0af1..bc41bde 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -4955,9 +4955,10 @@ int xenmem_add_to_physmap_one(
 
 long arch_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
 {
-    int rc;
+    long rc;
+    int op = cmd & MEMOP_CMD_MASK;
 
-    switch ( cmd )
+    switch ( op )
     {
     case XENMEM_set_memory_map:
     {
diff --git a/xen/arch/x86/mm/mem_paging.c b/xen/arch/x86/mm/mem_paging.c
index e23e26c..8f62f58 100644
--- a/xen/arch/x86/mm/mem_paging.c
+++ b/xen/arch/x86/mm/mem_paging.c
@@ -21,12 +21,17 @@
 
 
 #include <asm/p2m.h>
+#include <xen/event.h>
 #include <xen/guest_access.h>
+#include <xen/hypercall.h>
 #include <xsm/xsm.h>
 
-int mem_paging_memop(XEN_GUEST_HANDLE_PARAM(xen_mem_paging_op_t) arg)
+long mem_paging_memop(unsigned long cmd,
+                      XEN_GUEST_HANDLE_PARAM(xen_mem_paging_op_t) arg)
 {
-    int rc;
+    long rc;
+    unsigned long start_gfn = cmd >> MEMOP_EXTENT_SHIFT;
+    xen_pfn_t gfn;
     xen_mem_paging_op_t mpo;
     struct domain *d;
     bool_t copyback = 0;
@@ -56,6 +61,31 @@ int 
mem_paging_memop(XEN_GUEST_HANDLE_PARAM(xen_mem_paging_op_t) arg)
         rc = p2m_mem_paging_evict(d, mpo.u.single.gfn);
         break;
 
+    case XENMEM_paging_op_populate_evicted:
+        while ( start_gfn < mpo.u.batch.nr )
+        {
+            if ( copy_from_guest_offset(&gfn, mpo.u.batch.gfns, start_gfn, 1) )
+            {
+                rc = -EFAULT;
+                goto out;
+            }
+
+            rc = p2m_mem_paging_populate_evicted(d, gfn);
+            if ( rc )
+                goto out;
+
+            if ( mpo.u.batch.nr > ++start_gfn && hypercall_preempt_check() )
+            {
+                cmd = XENMEM_paging_op | (start_gfn << MEMOP_EXTENT_SHIFT);
+                rc = hypercall_create_continuation(__HYPERVISOR_memory_op, 
"lh",
+                                                   cmd, arg);
+                goto out;
+            }
+        }
+
+        rc = 0;
+        break;
+
     case XENMEM_paging_op_prep:
         rc = p2m_mem_paging_prep(d, mpo.u.single.gfn, mpo.u.single.buffer);
         if ( !rc )
diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
index 9eb6dc8..2ad46f6 100644
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -1449,6 +1449,107 @@ int p2m_mem_paging_evict(struct domain *d, unsigned 
long gfn)
 }
 
 /**
+ * p2m_mem_paging_populate_evicted - 'populate' a guest page as paged-out
+ * @d: guest domain
+ * @gfn: guest page to populate
+ *
+ * Returns 0 for success or negative errno values if eviction is not possible.
+ *
+ * p2m_mem_paging_populate_evicted() is mostly commonly called by a pager
+ * during guest restoration to mark a page as evicted so that the guest can be
+ * resumed before memory restoration is complete.
+ *
+ * Ideally, the page has never previously been populated, and it is only
+ * necessary to mark the existing hole in the physmap as an evicted page.
+ * However, to accomodate the common live migration scenario in which a page is
+ * populated but subsequently has its contents invalidated by a write at the
+ * sender, permit @gfn to have already been populated and free its current
+ * backing frame if so.
+ */
+int p2m_mem_paging_populate_evicted(struct domain *d, unsigned long gfn)
+{
+    struct page_info *page = NULL;
+    p2m_type_t p2mt;
+    p2m_access_t a;
+    mfn_t mfn;
+    struct p2m_domain *p2m = p2m_get_hostp2m(d);
+    int rc = -EBUSY;
+
+    gfn_lock(p2m, gfn, 0);
+
+    mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, 0, NULL, NULL);
+
+    if ( mfn_valid(mfn) )
+    {
+        /*
+         * This is the first case we know how to deal with: the page has
+         * previously been populated, but the caller wants it in the evicted
+         * state anyway (e.g. because it was dirtied during live migration and
+         * is now being postcopy migrated).
+         *
+         * Double-check that it's pageable according to the union of the
+         * normal nominate() and evict() criteria, and free its backing page if
+         * so.
+         */
+
+        if ( !p2m_is_pageable(p2mt) )
+            goto out;
+
+        page = mfn_to_page(mfn);
+        if ( !get_page(page, d) )
+            goto out;
+
+        if ( is_iomem_page(mfn) )
+            goto err_put;
+
+        if ( (page->count_info & (PGC_count_mask | PGC_allocated)) !=
+             (2 | PGC_allocated) )
+            goto err_put;
+
+        if ( (page->u.inuse.type_info & PGT_count_mask) != 0 )
+            goto err_put;
+
+        /* Decrement guest domain's ref count of the page. */
+        if ( test_and_clear_bit(_PGC_allocated, &page->count_info) )
+            put_page(page);
+
+        /* Clear content before returning the page to Xen. */
+        scrub_one_page(page);
+
+        /* Finally, drop the ref _we_ took on the page, freeing it fully. */
+        put_page(page);
+    }
+    else if ( p2m_is_hole(p2mt) && !p2m_is_paging(p2mt) )
+    {
+        /*
+         * This is the second case we know how to deal with: the pfn isn't
+         * currently populated, and can transition directly to paged_out.  All
+         * we need to do is adjust its p2m entry, which we share with the first
+         * case, so there's nothing further to do along this branch.
+         */
+    }
+    else
+    {
+        /* We can't handle this - error out. */
+        goto out;
+    }
+
+    rc = p2m_set_entry(p2m, gfn, INVALID_MFN, PAGE_ORDER_4K, p2m_ram_paged, a);
+    if ( !rc )
+        atomic_inc(&d->paged_pages);
+
+    /* Hop over the inapplicable put_page(). */
+    goto out;
+
+ err_put:
+    put_page(page);
+
+ out:
+    gfn_unlock(p2m, gfn, 0);
+    return rc;
+}
+
+/**
  * p2m_mem_paging_drop_page - Tell pager to drop its reference to a paged page
  * @d: guest domain
  * @gfn: guest page to drop
diff --git a/xen/arch/x86/x86_64/compat/mm.c b/xen/arch/x86/x86_64/compat/mm.c
index b737af1..f4aff90 100644
--- a/xen/arch/x86/x86_64/compat/mm.c
+++ b/xen/arch/x86/x86_64/compat/mm.c
@@ -53,8 +53,9 @@ int compat_arch_memory_op(unsigned long cmd, 
XEN_GUEST_HANDLE_PARAM(void) arg)
     compat_pfn_t mfn;
     unsigned int i;
     int rc = 0;
+    int op = cmd & MEMOP_CMD_MASK;
 
-    switch ( cmd )
+    switch ( op )
     {
     case XENMEM_set_memory_map:
     {
@@ -187,7 +188,8 @@ int compat_arch_memory_op(unsigned long cmd, 
XEN_GUEST_HANDLE_PARAM(void) arg)
         return mem_sharing_get_nr_shared_mfns();
 
     case XENMEM_paging_op:
-        return mem_paging_memop(guest_handle_cast(arg, xen_mem_paging_op_t));
+        return mem_paging_memop(cmd,
+                                guest_handle_cast(arg, xen_mem_paging_op_t));
 
     case XENMEM_sharing_op:
         return mem_sharing_memop(guest_handle_cast(arg, xen_mem_sharing_op_t));
diff --git a/xen/arch/x86/x86_64/mm.c b/xen/arch/x86/x86_64/mm.c
index aa1b94f..7394d92 100644
--- a/xen/arch/x86/x86_64/mm.c
+++ b/xen/arch/x86/x86_64/mm.c
@@ -926,8 +926,9 @@ long subarch_memory_op(unsigned long cmd, 
XEN_GUEST_HANDLE_PARAM(void) arg)
     xen_pfn_t mfn, last_mfn;
     unsigned int i;
     long rc = 0;
+    int op = cmd & MEMOP_CMD_MASK;
 
-    switch ( cmd )
+    switch ( op )
     {
     case XENMEM_machphys_mfn_list:
         if ( copy_from_guest(&xmml, arg, 1) )
@@ -1004,7 +1005,8 @@ long subarch_memory_op(unsigned long cmd, 
XEN_GUEST_HANDLE_PARAM(void) arg)
         return mem_sharing_get_nr_shared_mfns();
 
     case XENMEM_paging_op:
-        return mem_paging_memop(guest_handle_cast(arg, xen_mem_paging_op_t));
+        return mem_paging_memop(cmd,
+                                guest_handle_cast(arg, xen_mem_paging_op_t));
 
     case XENMEM_sharing_op:
         return mem_sharing_memop(guest_handle_cast(arg, xen_mem_sharing_op_t));
diff --git a/xen/include/asm-x86/mem_paging.h b/xen/include/asm-x86/mem_paging.h
index 176acaf..7b9a4f6 100644
--- a/xen/include/asm-x86/mem_paging.h
+++ b/xen/include/asm-x86/mem_paging.h
@@ -22,7 +22,8 @@
 #ifndef __ASM_X86_MEM_PAGING_H__
 #define __ASM_X86_MEM_PAGING_H__
 
-int mem_paging_memop(XEN_GUEST_HANDLE_PARAM(xen_mem_paging_op_t) arg);
+long mem_paging_memop(unsigned long cmd,
+                      XEN_GUEST_HANDLE_PARAM(xen_mem_paging_op_t) arg);
 
 #endif /*__ASM_X86_MEM_PAGING_H__ */
 
diff --git a/xen/include/asm-x86/p2m.h b/xen/include/asm-x86/p2m.h
index 408f7da..653d413 100644
--- a/xen/include/asm-x86/p2m.h
+++ b/xen/include/asm-x86/p2m.h
@@ -676,6 +676,8 @@ int set_shared_p2m_entry(struct domain *d, unsigned long 
gfn, mfn_t mfn);
 int p2m_mem_paging_nominate(struct domain *d, unsigned long gfn);
 /* Evict a frame */
 int p2m_mem_paging_evict(struct domain *d, unsigned long gfn);
+/* If @gfn is populated, evict it.  If not, mark it as paged-out directly. */
+int p2m_mem_paging_populate_evicted(struct domain *d, unsigned long gfn);
 /* Tell xenpaging to drop a paged out frame */
 void p2m_mem_paging_drop_page(struct domain *d, unsigned long gfn, 
                                 p2m_type_t p2mt);
diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h
index 49ef162..5196803 100644
--- a/xen/include/public/memory.h
+++ b/xen/include/public/memory.h
@@ -385,10 +385,11 @@ typedef struct xen_pod_target xen_pod_target_t;
 #define XENMEM_get_sharing_freed_pages    18
 #define XENMEM_get_sharing_shared_pages   19
 
-#define XENMEM_paging_op                    20
-#define XENMEM_paging_op_nominate           0
-#define XENMEM_paging_op_evict              1
-#define XENMEM_paging_op_prep               2
+#define XENMEM_paging_op                     20
+#define XENMEM_paging_op_nominate            0
+#define XENMEM_paging_op_evict               1
+#define XENMEM_paging_op_prep                2
+#define XENMEM_paging_op_populate_evicted    3
 
 struct xen_mem_paging_op {
     uint8_t     op;         /* XENMEM_paging_op_* */
@@ -401,6 +402,10 @@ struct xen_mem_paging_op {
             /* Other OPs */
             uint64_aligned_t    gfn;   /* IN:  gfn of page being operated on */
         } single;
+        struct {
+            XEN_GUEST_HANDLE(xen_pfn_t) gfns;
+            uint32_t                    nr;
+        } batch;
     } u;
 };
 typedef struct xen_mem_paging_op xen_mem_paging_op_t;
-- 
2.7.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.