WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-unstable] PoD memory 7/9: Xen interface

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] [xen-unstable] PoD memory 7/9: Xen interface
From: Xen patchbot-unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Thu, 08 Jan 2009 06:57:25 -0800
Delivery-date: Thu, 08 Jan 2009 07:00:12 -0800
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1231152348 0
# Node ID 2090917489c52af6d8604ffeb7db00cbbb0a51da
# Parent  ebe11a4523931adb616e07f7ac0c0c89e526ca95
PoD memory 7/9: Xen interface

Implement Xen interface to PoD functionality.
* Increase the number of MEMOP bits from 4 to 6 (increasing the number
of available memory operations from 16 to 64).
* Introduce XENMEMF_populate_on_demand, which will cause
populate_physmap() to fill a range with PoD entries rather than
backing it with ram
* Introduce XENMEM_[sg]et_pod_target operation to the memory
hypercall, to get and set PoD cache size.  set_pod_target() should be
called during domain creation, as well as after modifying the memory
target of any domain which may have outstanding PoD entries.

Signed-off-by: George Dunlap <george.dunlap@xxxxxxxxxxxxx>
---
 xen/arch/x86/mm.c               |   43 +++++++++++
 xen/arch/x86/mm/p2m.c           |  153 +++++++++++++++++++++++++++++++++++++++-
 xen/arch/x86/x86_64/compat/mm.c |   23 ++++++
 xen/common/memory.c             |   59 +++++++++------
 xen/include/asm-x86/p2m.h       |    4 +
 xen/include/public/memory.h     |   15 +++
 xen/include/xen/hypercall.h     |    2 
 xen/include/xen/mm.h            |    2 
 xen/include/xlat.lst            |    1 
 9 files changed, 277 insertions(+), 25 deletions(-)

diff -r ebe11a452393 -r 2090917489c5 xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Mon Jan 05 10:45:09 2009 +0000
+++ b/xen/arch/x86/mm.c Mon Jan 05 10:45:48 2009 +0000
@@ -3976,6 +3976,49 @@ long arch_memory_op(int op, XEN_GUEST_HA
         return 0;
     }
 
+    case XENMEM_set_pod_target:
+    case XENMEM_get_pod_target:
+    {
+        xen_pod_target_t target;
+        struct domain *d;
+
+        /* Support DOMID_SELF? */
+        if ( !IS_PRIV(current->domain) )
+            return -EINVAL;
+
+        if ( copy_from_guest(&target, arg, 1) )
+            return -EFAULT;
+
+        rc = rcu_lock_target_domain_by_id(target.domid, &d);
+        if ( rc != 0 )
+            return rc;
+
+        if ( op == XENMEM_set_pod_target )
+        {
+            if ( target.target_pages > d->max_pages )
+            {
+                rc = -EINVAL;
+                goto pod_target_out_unlock;
+            }
+            
+            rc = p2m_pod_set_mem_target(d, target.target_pages);
+        }
+
+        target.tot_pages       = d->tot_pages;
+        target.pod_cache_pages = d->arch.p2m->pod.count;
+        target.pod_entries     = d->arch.p2m->pod.entry_count;
+
+        if ( copy_to_guest(arg, &target, 1) )
+        {
+            rc= -EFAULT;
+            goto pod_target_out_unlock;
+        }
+        
+    pod_target_out_unlock:
+        rcu_unlock_domain(d);
+        return rc;
+    }
+
     default:
         return subarch_memory_op(op, arg);
     }
diff -r ebe11a452393 -r 2090917489c5 xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c     Mon Jan 05 10:45:09 2009 +0000
+++ b/xen/arch/x86/mm/p2m.c     Mon Jan 05 10:45:48 2009 +0000
@@ -387,6 +387,150 @@ static struct page_info * p2m_pod_cache_
     return p;
 }
 
+/* Set the size of the cache, allocating or freeing as necessary. */
+static int
+p2m_pod_set_cache_target(struct domain *d, unsigned long pod_target)
+{
+    struct p2m_domain *p2md = d->arch.p2m;
+    int ret = 0;
+
+    /* Increasing the target */
+    while ( pod_target > p2md->pod.count )
+    {
+        struct page_info * page;
+        int order;
+
+        if ( (pod_target - p2md->pod.count) >= (1>>9) )
+            order = 9;
+        else
+            order = 0;
+
+        page = alloc_domheap_pages(d, order, 0);
+        if ( unlikely(page == NULL) )
+            goto out;
+
+        p2m_pod_cache_add(d, page, order);
+    }
+
+    /* Decreasing the target */
+    /* We hold the p2m lock here, so we don't need to worry about
+     * cache disappearing under our feet. */
+    while ( pod_target < p2md->pod.count )
+    {
+        struct page_info * page;
+        int order, i;
+
+        /* Grab the lock before checking that pod.super is empty, or the last
+         * entries may disappear before we grab the lock. */
+        spin_lock(&d->page_alloc_lock);
+
+        if ( (p2md->pod.count - pod_target) > (1>>9)
+             && !list_empty(&p2md->pod.super) )
+            order = 9;
+        else
+            order = 0;
+
+        page = p2m_pod_cache_get(d, order);
+
+        ASSERT(page != NULL);
+
+        spin_unlock(&d->page_alloc_lock);
+
+        /* Then free them */
+        for ( i = 0 ; i < (1 << order) ; i++ )
+        {
+            /* Copied from common/memory.c:guest_remove_page() */
+            if ( unlikely(!get_page(page+i, d)) )
+            {
+                gdprintk(XENLOG_INFO, "Bad page free for domain %u\n", 
d->domain_id);
+                ret = -EINVAL;
+                goto out;
+            }
+
+            if ( test_and_clear_bit(_PGT_pinned, &(page+i)->u.inuse.type_info) 
)
+                put_page_and_type(page+i);
+            
+            if ( test_and_clear_bit(_PGC_allocated, &(page+i)->count_info) )
+                put_page(page+i);
+
+            put_page(page+i);
+        }
+    }
+
+out:
+    return ret;
+}
+
+/*
+ * The "right behavior" here requires some careful thought.  First, some
+ * definitions:
+ * + M: static_max
+ * + B: number of pages the balloon driver has ballooned down to.
+ * + P: Number of populated pages. 
+ * + T: Old target
+ * + T': New target
+ *
+ * The following equations should hold:
+ *  0 <= P <= T <= B <= M
+ *  d->arch.p2m->pod.entry_count == B - P
+ *  d->tot_pages == P + d->arch.p2m->pod.count
+ *
+ * Now we have the following potential cases to cover:
+ *     B <T': Set the PoD cache size equal to the number of outstanding PoD
+ *   entries.  The balloon driver will deflate the balloon to give back
+ *   the remainder of the ram to the guest OS.
+ *  T <T'<B : Increase PoD cache size.
+ *  T'<T<=B : Here we have a choice.  We can decrease the size of the cache,
+ *   get the memory right away.  However, that means every time we 
+ *   reduce the memory target we risk the guest attempting to populate the 
+ *   memory before the balloon driver has reached its new target.  Safer to
+ *   never reduce the cache size here, but only when the balloon driver frees 
+ *   PoD ranges.
+ *
+ * If there are many zero pages, we could reach the target also by doing
+ * zero sweeps and marking the ranges PoD; but the balloon driver will have
+ * to free this memory eventually anyway, so we don't actually gain that much
+ * by doing so.
+ *
+ * NB that the equation (B<T') may require adjustment to the cache
+ * size as PoD pages are freed as well; i.e., freeing a PoD-backed
+ * entry when pod.entry_count == pod.count requires us to reduce both
+ * pod.entry_count and pod.count.
+ */
+int
+p2m_pod_set_mem_target(struct domain *d, unsigned long target)
+{
+    unsigned pod_target;
+    struct p2m_domain *p2md = d->arch.p2m;
+    int ret = 0;
+    unsigned long populated;
+
+    /* P == B: Nothing to do. */
+    if ( p2md->pod.entry_count == 0 )
+        goto out;
+
+    /* T' < B: Don't reduce the cache size; let the balloon driver
+     * take care of it. */
+    if ( target < d->tot_pages )
+        goto out;
+
+    populated  = d->tot_pages - p2md->pod.count;
+
+    pod_target = target - populated;
+
+    /* B < T': Set the cache size equal to # of outstanding entries,
+     * let the balloon driver fill in the rest. */
+    if ( pod_target > p2md->pod.entry_count )
+        pod_target = p2md->pod.entry_count;
+
+    ASSERT( pod_target > p2md->pod.count );
+
+    ret = p2m_pod_set_cache_target(d, pod_target);
+
+out:
+    return ret;
+}
+
 void
 p2m_pod_empty_cache(struct domain *d)
 {
@@ -537,6 +681,13 @@ p2m_pod_decrease_reservation(struct doma
             ram--;
         }
     }    
+
+    /* If we've reduced our "liabilities" beyond our "assets", free some */
+    if ( p2md->pod.entry_count < p2md->pod.count )
+    {
+        printk("b %d\n", p2md->pod.entry_count);
+        p2m_pod_set_cache_target(d, p2md->pod.entry_count);
+    }
 
     /* If there are no more non-PoD entries, tell decrease_reservation() that
      * there's nothing left to do. */
@@ -786,7 +937,7 @@ p2m_pod_emergency_sweep_super(struct dom
         /* Stop if we're past our limit and we have found *something*.
          *
          * NB that this is a zero-sum game; we're increasing our cache size
-         * by re-increasing our 'debt'.  Since we hold the p2m lock,
+         * by increasing our 'debt'.  Since we hold the p2m lock,
          * (entry_count - count) must remain the same. */
         if ( !list_empty(&p2md->pod.super) &&  i < limit )
             break;
diff -r ebe11a452393 -r 2090917489c5 xen/arch/x86/x86_64/compat/mm.c
--- a/xen/arch/x86/x86_64/compat/mm.c   Mon Jan 05 10:45:09 2009 +0000
+++ b/xen/arch/x86/x86_64/compat/mm.c   Mon Jan 05 10:45:48 2009 +0000
@@ -122,6 +122,29 @@ int compat_arch_memory_op(int op, XEN_GU
 #define XLAT_memory_map_HNDL_buffer(_d_, _s_) ((void)0)
         XLAT_memory_map(&cmp, nat);
 #undef XLAT_memory_map_HNDL_buffer
+        if ( copy_to_guest(arg, &cmp, 1) )
+            rc = -EFAULT;
+
+        break;
+    }
+
+    case XENMEM_set_pod_target:
+    case XENMEM_get_pod_target:
+    {
+        struct compat_pod_target cmp;
+        struct xen_pod_target *nat = (void *)COMPAT_ARG_XLAT_VIRT_BASE;
+
+        if ( copy_from_guest(&cmp, arg, 1) )
+            return -EFAULT;
+
+        XLAT_pod_target(nat, &cmp);
+
+        rc = arch_memory_op(op, guest_handle_from_ptr(nat, void));
+        if ( rc < 0 )
+            break;
+
+        XLAT_pod_target(&cmp, nat);
+
         if ( copy_to_guest(arg, &cmp, 1) )
             rc = -EFAULT;
 
diff -r ebe11a452393 -r 2090917489c5 xen/common/memory.c
--- a/xen/common/memory.c       Mon Jan 05 10:45:09 2009 +0000
+++ b/xen/common/memory.c       Mon Jan 05 10:45:48 2009 +0000
@@ -111,31 +111,40 @@ static void populate_physmap(struct memo
         if ( unlikely(__copy_from_guest_offset(&gpfn, a->extent_list, i, 1)) )
             goto out;
 
-        page = alloc_domheap_pages(d, a->extent_order, a->memflags);
-        if ( unlikely(page == NULL) ) 
-        {
-            gdprintk(XENLOG_INFO, "Could not allocate order=%d extent: "
-                     "id=%d memflags=%x (%ld of %d)\n",
-                     a->extent_order, d->domain_id, a->memflags,
-                     i, a->nr_extents);
-            goto out;
-        }
-
-        mfn = page_to_mfn(page);
-        guest_physmap_add_page(d, gpfn, mfn, a->extent_order);
-
-        if ( !paging_mode_translate(d) )
-        {
-            for ( j = 0; j < (1 << a->extent_order); j++ )
-                set_gpfn_from_mfn(mfn + j, gpfn + j);
-
-            /* Inform the domain of the new page's machine address. */ 
-            if ( unlikely(__copy_to_guest_offset(a->extent_list, i, &mfn, 1)) )
+        if ( a->memflags & MEMF_populate_on_demand )
+        {
+            if ( guest_physmap_mark_populate_on_demand(d, gpfn,
+                                                       a->extent_order) < 0 )
                 goto out;
         }
-    }
-
- out:
+        else
+        {
+            page = alloc_domheap_pages(d, a->extent_order, a->memflags);
+            if ( unlikely(page == NULL) ) 
+            {
+                gdprintk(XENLOG_INFO, "Could not allocate order=%d extent: "
+                         "id=%d memflags=%x (%ld of %d)\n",
+                         a->extent_order, d->domain_id, a->memflags,
+                         i, a->nr_extents);
+                goto out;
+            }
+
+            mfn = page_to_mfn(page);
+            guest_physmap_add_page(d, gpfn, mfn, a->extent_order);
+
+            if ( !paging_mode_translate(d) )
+            {
+                for ( j = 0; j < (1 << a->extent_order); j++ )
+                    set_gpfn_from_mfn(mfn + j, gpfn + j);
+
+                /* Inform the domain of the new page's machine address. */ 
+                if ( unlikely(__copy_to_guest_offset(a->extent_list, i, &mfn, 
1)) )
+                    goto out;
+            }
+        }
+    }
+
+out:
     a->nr_done = i;
 }
 
@@ -527,6 +536,10 @@ long do_memory_op(unsigned long cmd, XEN
 
         args.memflags |= MEMF_node(XENMEMF_get_node(reservation.mem_flags));
 
+        if ( op == XENMEM_populate_physmap
+             && (reservation.mem_flags & XENMEMF_populate_on_demand) )
+            args.memflags |= MEMF_populate_on_demand;
+
         if ( likely(reservation.domid == DOMID_SELF) )
         {
             d = rcu_lock_current_domain();
diff -r ebe11a452393 -r 2090917489c5 xen/include/asm-x86/p2m.h
--- a/xen/include/asm-x86/p2m.h Mon Jan 05 10:45:09 2009 +0000
+++ b/xen/include/asm-x86/p2m.h Mon Jan 05 10:45:48 2009 +0000
@@ -261,6 +261,10 @@ void p2m_pod_dump_data(struct domain *d)
  * (usually in preparation for domain destruction) */
 void p2m_pod_empty_cache(struct domain *d);
 
+/* Set populate-on-demand cache size so that the total memory allocated to a
+ * domain matches target */
+int p2m_pod_set_mem_target(struct domain *d, unsigned long target);
+
 /* Call when decreasing memory reservation to handle PoD entries properly.
  * Will return '1' if all entries were handled and nothing more need be done.*/
 int
diff -r ebe11a452393 -r 2090917489c5 xen/include/public/memory.h
--- a/xen/include/public/memory.h       Mon Jan 05 10:45:09 2009 +0000
+++ b/xen/include/public/memory.h       Mon Jan 05 10:45:48 2009 +0000
@@ -48,6 +48,8 @@
 /* NUMA node to allocate from. */
 #define XENMEMF_node(x)     (((x) + 1) << 8)
 #define XENMEMF_get_node(x) ((((x) >> 8) - 1) & 0xffu)
+/* Flag to populate physmap with populate-on-demand entries */
+#define XENMEMF_populate_on_demand (1<<16)
 #endif
 
 struct xen_memory_reservation {
@@ -299,6 +301,19 @@ typedef struct xen_foreign_memory_map xe
 typedef struct xen_foreign_memory_map xen_foreign_memory_map_t;
 DEFINE_XEN_GUEST_HANDLE(xen_foreign_memory_map_t);
 
+#define XENMEM_set_pod_target       16
+#define XENMEM_get_pod_target       17
+struct xen_pod_target {
+    /* IN */
+    uint64_t target_pages;
+    /* OUT */
+    uint64_t tot_pages;
+    uint64_t pod_cache_pages;
+    uint64_t pod_entries;
+    /* IN */
+    domid_t domid;
+};
+typedef struct xen_pod_target xen_pod_target_t;
 #endif /* __XEN_PUBLIC_MEMORY_H__ */
 
 /*
diff -r ebe11a452393 -r 2090917489c5 xen/include/xen/hypercall.h
--- a/xen/include/xen/hypercall.h       Mon Jan 05 10:45:09 2009 +0000
+++ b/xen/include/xen/hypercall.h       Mon Jan 05 10:45:48 2009 +0000
@@ -48,7 +48,7 @@ do_platform_op(
  * at what point in the page list to resume. For this purpose I steal the
  * high-order bits of the @cmd parameter, which are otherwise unused and zero.
  */
-#define MEMOP_EXTENT_SHIFT 4 /* cmd[:4] == start_extent */
+#define MEMOP_EXTENT_SHIFT 6 /* cmd[:6] == start_extent */
 #define MEMOP_CMD_MASK     ((1 << MEMOP_EXTENT_SHIFT) - 1)
 
 extern long
diff -r ebe11a452393 -r 2090917489c5 xen/include/xen/mm.h
--- a/xen/include/xen/mm.h      Mon Jan 05 10:45:09 2009 +0000
+++ b/xen/include/xen/mm.h      Mon Jan 05 10:45:48 2009 +0000
@@ -72,6 +72,8 @@ int assign_pages(
 /* memflags: */
 #define _MEMF_no_refcount 0
 #define  MEMF_no_refcount (1U<<_MEMF_no_refcount)
+#define _MEMF_populate_on_demand 1
+#define  MEMF_populate_on_demand (1U<<_MEMF_populate_on_demand)
 #define _MEMF_node        8
 #define  MEMF_node(n)     ((((n)+1)&0xff)<<_MEMF_node)
 #define _MEMF_bits        24
diff -r ebe11a452393 -r 2090917489c5 xen/include/xlat.lst
--- a/xen/include/xlat.lst      Mon Jan 05 10:45:09 2009 +0000
+++ b/xen/include/xlat.lst      Mon Jan 05 10:45:48 2009 +0000
@@ -38,6 +38,7 @@
 !      memory_exchange                 memory.h
 !      memory_map                      memory.h
 !      memory_reservation              memory.h
+!      pod_target                      memory.h
 !      translate_gpfn_list             memory.h
 !      sched_poll                      sched.h
 ?      sched_remote_shutdown           sched.h

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-changelog] [xen-unstable] PoD memory 7/9: Xen interface, Xen patchbot-unstable <=