WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH xen.git] Add hugepage support to balloon driver

To: Jeremy Fitzhardinge <jeremy@xxxxxxxx>
Subject: [Xen-devel] [PATCH xen.git] Add hugepage support to balloon driver
From: Dave McCracken <dcm@xxxxxxxx>
Date: Wed, 04 Nov 2009 07:48:33 -0600
Cc: Xen Developers List <xen-devel@xxxxxxxxxxxxxxxxxxx>, Keir Fraser <Keir.Fraser@xxxxxxxxxxxxx>
Delivery-date: Wed, 04 Nov 2009 05:49:05 -0800
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
This patch adds hugepage support to the balloon driver.  It is activated
by specifying "balloon_hugepages" on the kernel command line.  Once activated,
the balloon driver will work entirely in hugepage sized chunks.

If, when returning pages, it finds a hugepage that is not contiguous
at the machine level, it will return each underlying page separately.
When this page is later repopulated it will be contiguous.

Signed-off-by: Dave McCracken <dave.mccracken@xxxxxxxxxx>

--------

 balloon.c |  171 +++++++++++++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 125 insertions(+), 46 deletions(-)

--- 2.6-xen/drivers/xen/balloon.c       2009-10-29 17:48:30.000000000 -0500
+++ 2.6-xen-balloon/drivers/xen/balloon.c       2009-10-29 19:14:33.000000000 
-0500
@@ -59,7 +59,7 @@
 #include <xen/features.h>
 #include <xen/page.h>
 
-#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10))
+#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT+balloon_order-10))
 
 #define BALLOON_CLASS_NAME "xen_memory"
 
@@ -85,6 +85,14 @@ static int register_balloon(struct sys_d
 
 static struct balloon_stats balloon_stats;
 
+/*
+ * Work in pages of this order.  Can be either 0 for normal pages
+ * or 9 for hugepages.
+ */
+static int balloon_order;
+static unsigned long balloon_npages;
+static unsigned long discontig_frame_list[PAGE_SIZE / sizeof(unsigned long)];
+
 /* We increase/decrease in batches which fit in a page */
 static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)];
 
@@ -113,10 +121,41 @@ static struct timer_list balloon_timer;
 static void scrub_page(struct page *page)
 {
 #ifdef CONFIG_XEN_SCRUB_PAGES
-       clear_highpage(page);
+       int i;
+
+       for (i = 0; i < balloon_npages; i++)
+               clear_highpage(page++);
 #endif
 }
 
+static void free_discontig_frame(void)
+{
+       int rc;
+       struct xen_memory_reservation reservation = {
+               .address_bits = 0,
+               .domid        = DOMID_SELF,
+               .nr_extents   = balloon_npages,
+               .extent_order = 0
+       };
+
+       set_xen_guest_handle(reservation.extent_start, discontig_frame_list);
+       rc = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
+       BUG_ON(rc != balloon_npages);
+}
+
+static unsigned long shrink_frame(unsigned long nr_pages)
+{
+       unsigned long i, j;
+
+       for (i = 0, j = 0; i < nr_pages; i++, j++) {
+               if (frame_list[i] == 0)
+                       j++;
+               if (i != j)
+                       frame_list[i] = frame_list[j];
+       }
+       return i;
+}
+
 /* balloon_append: add the given page to the balloon. */
 static void balloon_append(struct page *page)
 {
@@ -190,12 +229,11 @@ static unsigned long current_target(void
 
 static int increase_reservation(unsigned long nr_pages)
 {
-       unsigned long  pfn, i, flags;
+       unsigned long  pfn, mfn, i, j, flags;
        struct page   *page;
        long           rc;
        struct xen_memory_reservation reservation = {
                .address_bits = 0,
-               .extent_order = 0,
                .domid        = DOMID_SELF
        };
 
@@ -207,12 +245,14 @@ static int increase_reservation(unsigned
        page = balloon_first_page();
        for (i = 0; i < nr_pages; i++) {
                BUG_ON(page == NULL);
-               frame_list[i] = page_to_pfn(page);;
+               frame_list[i] = page_to_pfn(page);
                page = balloon_next_page(page);
        }
 
        set_xen_guest_handle(reservation.extent_start, frame_list);
        reservation.nr_extents = nr_pages;
+       reservation.extent_order = balloon_order;
+
        rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation);
        if (rc < 0)
                goto out;
@@ -222,19 +262,22 @@ static int increase_reservation(unsigned
                BUG_ON(page == NULL);
 
                pfn = page_to_pfn(page);
+               mfn = frame_list[i];
                BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap) &&
                       phys_to_machine_mapping_valid(pfn));
 
-               set_phys_to_machine(pfn, frame_list[i]);
+               for (j = 0; j < balloon_npages; j++, pfn++, mfn++) {
+                       set_phys_to_machine(pfn, mfn);
 
-               /* Link back into the page tables if not highmem. */
-               if (pfn < max_low_pfn) {
-                       int ret;
-                       ret = HYPERVISOR_update_va_mapping(
-                               (unsigned long)__va(pfn << PAGE_SHIFT),
-                               mfn_pte(frame_list[i], PAGE_KERNEL),
-                               0);
-                       BUG_ON(ret);
+                       /* Link back into the page tables if not highmem. */
+                       if (pfn < max_low_pfn) {
+                               int ret;
+                               ret = HYPERVISOR_update_va_mapping(
+                                       (unsigned long)__va(pfn << PAGE_SHIFT),
+                                       mfn_pte(mfn, PAGE_KERNEL),
+                                       0);
+                               BUG_ON(ret);
+                       }
                }
 
                /* Relinquish the page back to the allocator. */
@@ -253,13 +296,13 @@ static int increase_reservation(unsigned
 
 static int decrease_reservation(unsigned long nr_pages)
 {
-       unsigned long  pfn, i, flags;
+       unsigned long  pfn, lpfn, mfn, i, j, flags;
        struct page   *page;
        int            need_sleep = 0;
-       int ret;
+       int             discontig, discontig_free;
+       int             ret;
        struct xen_memory_reservation reservation = {
                .address_bits = 0,
-               .extent_order = 0,
                .domid        = DOMID_SELF
        };
 
@@ -267,7 +310,7 @@ static int decrease_reservation(unsigned
                nr_pages = ARRAY_SIZE(frame_list);
 
        for (i = 0; i < nr_pages; i++) {
-               if ((page = alloc_page(GFP_BALLOON)) == NULL) {
+               if ((page = alloc_pages(GFP_BALLOON, balloon_order)) == NULL) {
                        nr_pages = i;
                        need_sleep = 1;
                        break;
@@ -277,14 +320,6 @@ static int decrease_reservation(unsigned
                frame_list[i] = pfn_to_mfn(pfn);
 
                scrub_page(page);
-
-               if (!PageHighMem(page)) {
-                       ret = HYPERVISOR_update_va_mapping(
-                               (unsigned long)__va(pfn << PAGE_SHIFT),
-                               __pte_ma(0), 0);
-                       BUG_ON(ret);
-                }
-
        }
 
        /* Ensure that ballooned highmem pages don't have kmaps. */
@@ -295,18 +330,39 @@ static int decrease_reservation(unsigned
 
        /* No more mappings: invalidate P2M and add to balloon. */
        for (i = 0; i < nr_pages; i++) {
-               pfn = mfn_to_pfn(frame_list[i]);
-               set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
+               mfn = frame_list[i];
+               lpfn = pfn = mfn_to_pfn(mfn);
                balloon_append(pfn_to_page(pfn));
+               discontig_free = 0;
+               for (j = 0; j < balloon_npages; j++, lpfn++, mfn++) {
+                       if ((discontig_frame_list[j] = pfn_to_mfn(lpfn)) != mfn)
+                               discontig_free = 1;
+
+                       set_phys_to_machine(lpfn, INVALID_P2M_ENTRY);
+                       if (!PageHighMem(page)) {
+                               ret = HYPERVISOR_update_va_mapping(
+                                       (unsigned long)__va(lpfn << PAGE_SHIFT),
+                                       __pte_ma(0), 0);
+                               BUG_ON(ret);
+                       }
+               }
+               if (discontig_free) {
+                       free_discontig_frame();
+                       frame_list[i] = 0;
+                       discontig = 1;
+               }
        }
+       balloon_stats.current_pages -= nr_pages;
+
+       if (discontig)
+               nr_pages = shrink_frame(nr_pages);
 
        set_xen_guest_handle(reservation.extent_start, frame_list);
        reservation.nr_extents   = nr_pages;
+       reservation.extent_order = balloon_order;
        ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
        BUG_ON(ret != nr_pages);
 
-       balloon_stats.current_pages -= nr_pages;
-
        spin_unlock_irqrestore(&xen_reservation_lock, flags);
 
        return need_sleep;
@@ -374,7 +430,7 @@ static void watch_target(struct xenbus_w
        /* The given memory/target value is in KiB, so it needs converting to
         * pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10.
         */
-       balloon_set_new_target(new_target >> (PAGE_SHIFT - 10));
+       balloon_set_new_target(new_target >> ((PAGE_SHIFT - 10) + 
balloon_order));
 }
 
 static int balloon_init_watcher(struct notifier_block *notifier,
@@ -399,9 +455,12 @@ static int __init balloon_init(void)
        if (!xen_pv_domain())
                return -ENODEV;
 
-       pr_info("xen_balloon: Initialising balloon driver.\n");
+       pr_info("xen_balloon: Initialising balloon driver with page order 
%d.\n",
+               balloon_order);
+
+       balloon_npages = 1 << balloon_order;
 
-       balloon_stats.current_pages = min(xen_start_info->nr_pages, max_pfn);
+       balloon_stats.current_pages = (min(xen_start_info->nr_pages, max_pfn)) 
>> balloon_order;
        balloon_stats.target_pages  = balloon_stats.current_pages;
        balloon_stats.balloon_low   = 0;
        balloon_stats.balloon_high  = 0;
@@ -414,7 +473,7 @@ static int __init balloon_init(void)
        register_balloon(&balloon_sysdev);
 
        /* Initialise the balloon with excess memory space. */
-       for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
+       for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn += 
balloon_npages) {
                if (page_is_ram(pfn)) {
                        struct page *page = pfn_to_page(pfn);
                        if (!PageReserved(page))
@@ -464,16 +523,20 @@ static int dealloc_pte_fn(pte_t *pte, st
 struct page **alloc_empty_pages_and_pagevec(int nr_pages)
 {
        struct page *page, **pagevec;
-       int i, ret;
+       int npages;
+       int i, j, ret;
+
+       /* Round up to next number of balloon_order pages */
+       npages = (nr_pages + (balloon_npages-1)) >> balloon_order;
 
-       pagevec = kmalloc(sizeof(page) * nr_pages, GFP_KERNEL);
+       pagevec = kmalloc(sizeof(page) * nr_pages << balloon_order, GFP_KERNEL);
        if (pagevec == NULL)
                return NULL;
 
        for (i = 0; i < nr_pages; i++) {
                void *v;
 
-               page = pagevec[i] = alloc_page(GFP_KERNEL|__GFP_COLD);
+               page = alloc_pages(GFP_KERNEL|__GFP_COLD, balloon_order);
                if (page == NULL)
                        goto err;
 
@@ -484,8 +547,8 @@ struct page **alloc_empty_pages_and_page
                v = page_address(page);
 
                ret = apply_to_page_range(&init_mm, (unsigned long)v,
-                                         PAGE_SIZE, dealloc_pte_fn,
-                                         NULL);
+                                         PAGE_SIZE << balloon_order,
+                                         dealloc_pte_fn, NULL);
 
                if (ret != 0) {
                        mutex_unlock(&balloon_mutex);
@@ -493,8 +556,10 @@ struct page **alloc_empty_pages_and_page
                        __free_page(page);
                        goto err;
                }
+               for (j = 0; j < balloon_npages; j++)
+                       pagevec[(i<<balloon_order)+j] = page++;
 
-               totalram_pages = --balloon_stats.current_pages;
+               totalram_pages = balloon_stats.current_pages -= balloon_npages;
 
                mutex_unlock(&balloon_mutex);
        }
@@ -507,7 +572,7 @@ struct page **alloc_empty_pages_and_page
  err:
        mutex_lock(&balloon_mutex);
        while (--i >= 0)
-               balloon_append(pagevec[i]);
+               balloon_append(pagevec[i << balloon_order]);
        mutex_unlock(&balloon_mutex);
        kfree(pagevec);
        pagevec = NULL;
@@ -517,15 +582,21 @@ EXPORT_SYMBOL_GPL(alloc_empty_pages_and_
 
 void free_empty_pages_and_pagevec(struct page **pagevec, int nr_pages)
 {
+       struct page *page;
        int i;
+       int npages;
 
        if (pagevec == NULL)
                return;
 
+       /* Round up to next number of balloon_order pages */
+       npages = (nr_pages + (balloon_npages-1)) >> balloon_order;
+
        mutex_lock(&balloon_mutex);
        for (i = 0; i < nr_pages; i++) {
-               BUG_ON(page_count(pagevec[i]) != 1);
-               balloon_append(pagevec[i]);
+               page = pagevec[i << balloon_order];
+               BUG_ON(page_count(page) != 1);
+               balloon_append(page);
        }
        mutex_unlock(&balloon_mutex);
 
@@ -535,6 +606,14 @@ void free_empty_pages_and_pagevec(struct
 }
 EXPORT_SYMBOL_GPL(free_empty_pages_and_pagevec);
 
+static int __init balloon_parse_huge(char *s)
+{
+       balloon_order = 9;
+       return 1;
+}
+
+__setup("balloon_hugepages", balloon_parse_huge);
+
 #define BALLOON_SHOW(name, format, args...)                            \
        static ssize_t show_##name(struct sys_device *dev,              \
                                   struct sysdev_attribute *attr,       \
@@ -568,7 +647,7 @@ static ssize_t store_target_kb(struct sy
 
        target_bytes = simple_strtoull(buf, &endchar, 0) * 1024;
 
-       balloon_set_new_target(target_bytes >> PAGE_SHIFT);
+       balloon_set_new_target(target_bytes >> (PAGE_SHIFT + balloon_order));
 
        return count;
 }
@@ -582,7 +661,7 @@ static ssize_t show_target(struct sys_de
 {
        return sprintf(buf, "%llu\n",
                       (unsigned long long)balloon_stats.target_pages
-                      << PAGE_SHIFT);
+                      << (PAGE_SHIFT + balloon_order));
 }
 
 static ssize_t store_target(struct sys_device *dev,
@@ -598,7 +677,7 @@ static ssize_t store_target(struct sys_d
 
        target_bytes = memparse(buf, &endchar);
 
-       balloon_set_new_target(target_bytes >> PAGE_SHIFT);
+       balloon_set_new_target(target_bytes >> (PAGE_SHIFT + balloon_order));
 
        return count;
 }

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-devel] [PATCH xen.git] Add hugepage support to balloon driver, Dave McCracken <=