[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH] GSoC 2010 - Memory hotplug support for Xen guests - third fully working version



Hi,

On Fri, Aug 13, 2010 at 09:49:52AM +0400, Vasiliy G Tolstov wrote:
[...]
> Thank You for try.
> link ftp://ftp.suse.com/pub/projects/kernel/kotd/SLE11-SP1/src/

Here is the patch which applies to Linux Kernel Ver. 2.6.32.19.
(SLES kernel is based on 2.6.32 and patch applies with one
warning). It compiles however I did not have time to test it.

Daniel

Signed-off-by: Daniel Kiper <dkiper@xxxxxxxxxxxx>
---
diff -Npru linux-2.6.32.19.orig/drivers/xen/Kconfig 
linux-2.6.32.19/drivers/xen/Kconfig
--- linux-2.6.32.19.orig/drivers/xen/Kconfig    2010-08-13 22:24:37.000000000 
+0200
+++ linux-2.6.32.19/drivers/xen/Kconfig 2010-08-16 02:01:35.000000000 +0200
@@ -7,6 +7,16 @@ config XEN_BALLOON
          the system to expand the domain's memory allocation, or alternatively
          return unneeded memory to the system.
 
+config XEN_BALLOON_MEMORY_HOTPLUG
+       bool "Xen memory balloon driver with memory hotplug support"
+       default n
+       depends on XEN_BALLOON && MEMORY_HOTPLUG
+       help
+         Xen memory balloon driver with memory hotplug support allows expanding
+         memory available for the system above limit declared at system 
startup.
+         It is very useful on critical systems which require long run without
+         rebooting.
+
 config XEN_SCRUB_PAGES
        bool "Scrub pages before returning them to system"
        depends on XEN_BALLOON
diff -Npru linux-2.6.32.19.orig/drivers/xen/balloon.c 
linux-2.6.32.19/drivers/xen/balloon.c
--- linux-2.6.32.19.orig/drivers/xen/balloon.c  2010-08-13 22:24:37.000000000 
+0200
+++ linux-2.6.32.19/drivers/xen/balloon.c       2010-08-16 02:13:12.000000000 
+0200
@@ -6,6 +6,7 @@
  * Copyright (c) 2003, B Dragovic
  * Copyright (c) 2003-2004, M Williamson, K Fraser
  * Copyright (c) 2005 Dan M. Smith, IBM Corporation
+ * Copyright (c) 2010 Daniel Kiper
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License version 2
@@ -43,6 +44,7 @@
 #include <linux/mutex.h>
 #include <linux/list.h>
 #include <linux/sysdev.h>
+#include <linux/memory.h>
 
 #include <asm/page.h>
 #include <asm/pgalloc.h>
@@ -74,6 +76,11 @@ struct balloon_stats {
        /* Number of pages in high- and low-memory balloons. */
        unsigned long balloon_low;
        unsigned long balloon_high;
+#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
+       unsigned long boot_max_pfn;
+       u64 hotplug_start_paddr;
+       u64 hotplug_size;
+#endif
 };
 
 static DEFINE_MUTEX(balloon_mutex);
@@ -181,17 +188,173 @@ static void balloon_alarm(unsigned long 
        schedule_work(&balloon_worker);
 }
 
-static unsigned long current_target(void)
+#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
+static inline u64 is_memory_resource_reserved(void)
+{
+       return balloon_stats.hotplug_start_paddr;
+}
+
+static int allocate_additional_memory(unsigned long nr_pages)
+{
+       long rc;
+       resource_size_t r_min, r_size;
+       struct resource *r;
+       struct xen_memory_reservation reservation = {
+               .address_bits = 0,
+               .extent_order = 0,
+               .domid        = DOMID_SELF
+       };
+       unsigned long flags, i, pfn;
+
+       if (nr_pages > ARRAY_SIZE(frame_list))
+               nr_pages = ARRAY_SIZE(frame_list);
+
+       if (!is_memory_resource_reserved()) {
+
+               /*
+                * Look for first unused memory region starting at page
+                * boundary. Skip last memory section created at boot time
+                * becuase it may contains unused memory pages with PG_reserved
+                * bit not set (online_pages require PG_reserved bit set).
+                */
+
+               r = kzalloc(sizeof(struct resource), GFP_KERNEL);
+
+               if (!r) {
+                       rc = -ENOMEM;
+                       goto out_0;
+               }
+
+               r->name = "System RAM";
+               r->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+               r_min = 
PFN_PHYS(section_nr_to_pfn(pfn_to_section_nr(balloon_stats.boot_max_pfn) + 1));
+               r_size = (balloon_stats.target_pages - 
balloon_stats.current_pages) << PAGE_SHIFT;
+
+               rc = allocate_resource(&iomem_resource, r, r_size, r_min,
+                                       ULONG_MAX, PAGE_SIZE, NULL, NULL);
+
+               if (rc < 0) {
+                       kfree(r);
+                       goto out_0;
+               }
+
+               balloon_stats.hotplug_start_paddr = r->start;
+       }
+
+       spin_lock_irqsave(&balloon_lock, flags);
+
+       pfn = PFN_DOWN(balloon_stats.hotplug_start_paddr + 
balloon_stats.hotplug_size);
+
+       for (i = 0; i < nr_pages; ++i, ++pfn)
+               frame_list[i] = pfn;
+
+       set_xen_guest_handle(reservation.extent_start, frame_list);
+       reservation.nr_extents = nr_pages;
+
+       rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation);
+
+       if (rc < 0)
+               goto out_1;
+
+       pfn = PFN_DOWN(balloon_stats.hotplug_start_paddr + 
balloon_stats.hotplug_size);
+
+       for (i = 0; i < rc; ++i, ++pfn) {
+               BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap) &&
+                      phys_to_machine_mapping_valid(pfn));
+               set_phys_to_machine(pfn, frame_list[i]);
+       }
+
+       balloon_stats.hotplug_size += rc << PAGE_SHIFT;
+       balloon_stats.current_pages += rc;
+
+out_1:
+       spin_unlock_irqrestore(&balloon_lock, flags);
+
+out_0:
+       return rc < 0 ? rc : rc != nr_pages;
+}
+
+static void hotplug_allocated_memory(void)
+{
+       int nid, ret;
+       struct memory_block *mem;
+       unsigned long pfn, pfn_limit;
+
+       nid = memory_add_physaddr_to_nid(balloon_stats.hotplug_start_paddr);
+
+       ret = add_registered_memory(nid, balloon_stats.hotplug_start_paddr,
+                                               balloon_stats.hotplug_size);
+
+       if (ret) {
+               pr_err("%s: add_registered_memory: Memory hotplug failed: %i\n",
+                       __func__, ret);
+               goto error;
+       }
+
+       if (xen_pv_domain()) {
+               pfn = PFN_DOWN(balloon_stats.hotplug_start_paddr);
+               pfn_limit = pfn + (balloon_stats.hotplug_size >> PAGE_SHIFT);
+
+               for (; pfn < pfn_limit; ++pfn)
+                       if (!PageHighMem(pfn_to_page(pfn)))
+                               BUG_ON(HYPERVISOR_update_va_mapping(
+                                       (unsigned long)__va(pfn << PAGE_SHIFT),
+                                       mfn_pte(pfn_to_mfn(pfn), PAGE_KERNEL), 
0));
+       }
+
+       ret = online_pages(PFN_DOWN(balloon_stats.hotplug_start_paddr),
+                               balloon_stats.hotplug_size >> PAGE_SHIFT);
+
+       if (ret) {
+               pr_err("%s: online_pages: Failed: %i\n", __func__, ret);
+               goto error;
+       }
+
+       pfn = PFN_DOWN(balloon_stats.hotplug_start_paddr);
+       pfn_limit = pfn + (balloon_stats.hotplug_size >> PAGE_SHIFT);
+
+       for (; pfn < pfn_limit; pfn += PAGES_PER_SECTION) {
+               mem = find_memory_block(__pfn_to_section(pfn));
+               BUG_ON(!mem);
+               BUG_ON(!present_section_nr(mem->phys_index));
+               mutex_lock(&mem->state_mutex);
+               mem->state = MEM_ONLINE;
+               mutex_unlock(&mem->state_mutex);
+       }
+
+       goto out;
+
+error:
+       balloon_stats.current_pages -= balloon_stats.hotplug_size >> PAGE_SHIFT;
+       balloon_stats.target_pages -= balloon_stats.hotplug_size >> PAGE_SHIFT;
+
+out:
+       balloon_stats.hotplug_start_paddr = 0;
+       balloon_stats.hotplug_size = 0;
+}
+#else
+static inline u64 is_memory_resource_reserved(void)
+{
+       return 0;
+}
+
+static inline int allocate_additional_memory(unsigned long nr_pages)
 {
-       unsigned long target = balloon_stats.target_pages;
+       /*
+        * CONFIG_XEN_BALLOON_MEMORY_HOTPLUG is not set.
+        * balloon_stats.target_pages could not be bigger
+        * than balloon_stats.current_pages because additional
+        * memory allocation is not possible.
+        */
+       balloon_stats.target_pages = balloon_stats.current_pages;
 
-       target = min(target,
-                    balloon_stats.current_pages +
-                    balloon_stats.balloon_low +
-                    balloon_stats.balloon_high);
+       return 0;
+}
 
-       return target;
+static inline void hotplug_allocated_memory(void)
+{
 }
+#endif /* CONFIG_XEN_BALLOON_MEMORY_HOTPLUG */
 
 static int increase_reservation(unsigned long nr_pages)
 {
@@ -233,7 +396,7 @@ static int increase_reservation(unsigned
                set_phys_to_machine(pfn, frame_list[i]);
 
                /* Link back into the page tables if not highmem. */
-               if (pfn < max_low_pfn) {
+               if (xen_pv_domain() && !PageHighMem(page)) {
                        int ret;
                        ret = HYPERVISOR_update_va_mapping(
                                (unsigned long)__va(pfn << PAGE_SHIFT),
@@ -283,7 +446,7 @@ static int decrease_reservation(unsigned
 
                scrub_page(page);
 
-               if (!PageHighMem(page)) {
+               if (xen_pv_domain() && !PageHighMem(page)) {
                        ret = HYPERVISOR_update_va_mapping(
                                (unsigned long)__va(pfn << PAGE_SHIFT),
                                __pte_ma(0), 0);
@@ -331,9 +494,15 @@ static void balloon_process(struct work_
        mutex_lock(&balloon_mutex);
 
        do {
-               credit = current_target() - balloon_stats.current_pages;
-               if (credit > 0)
-                       need_sleep = (increase_reservation(credit) != 0);
+               credit = balloon_stats.target_pages - 
balloon_stats.current_pages;
+
+               if (credit > 0) {
+                       if (balloon_stats.balloon_low || 
balloon_stats.balloon_high)
+                               need_sleep = (increase_reservation(credit) != 
0);
+                       else
+                               need_sleep = 
(allocate_additional_memory(credit) != 0);
+               }
+
                if (credit < 0)
                        need_sleep = (decrease_reservation(-credit) != 0);
 
@@ -344,8 +513,10 @@ static void balloon_process(struct work_
        } while ((credit != 0) && !need_sleep);
 
        /* Schedule more work if there is some still to be done. */
-       if (current_target() != balloon_stats.current_pages)
+       if (balloon_stats.target_pages != balloon_stats.current_pages)
                mod_timer(&balloon_timer, jiffies + HZ);
+       else if (is_memory_resource_reserved())
+               hotplug_allocated_memory();
 
        mutex_unlock(&balloon_mutex);
 }
@@ -402,17 +573,27 @@ static int __init balloon_init(void)
        unsigned long pfn;
        struct page *page;
 
-       if (!xen_pv_domain())
+       if (!xen_domain())
                return -ENODEV;
 
        pr_info("xen_balloon: Initialising balloon driver.\n");
 
-       balloon_stats.current_pages = min(xen_start_info->nr_pages, max_pfn);
+       if (xen_pv_domain())
+               balloon_stats.current_pages = min(xen_start_info->nr_pages, 
max_pfn);
+       else
+               balloon_stats.current_pages = max_pfn;
+
        balloon_stats.target_pages  = balloon_stats.current_pages;
        balloon_stats.balloon_low   = 0;
        balloon_stats.balloon_high  = 0;
        balloon_stats.driver_pages  = 0UL;
 
+#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
+       balloon_stats.boot_max_pfn = max_pfn;
+       balloon_stats.hotplug_start_paddr = 0;
+       balloon_stats.hotplug_size = 0;
+#endif
+
        init_timer(&balloon_timer);
        balloon_timer.data = 0;
        balloon_timer.function = balloon_alarm;
@@ -420,11 +601,12 @@ static int __init balloon_init(void)
        register_balloon(&balloon_sysdev);
 
        /* Initialise the balloon with excess memory space. */
-       for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
-               page = pfn_to_page(pfn);
-               if (!PageReserved(page))
-                       balloon_append(page);
-       }
+       if (xen_pv_domain())
+               for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
+                       page = pfn_to_page(pfn);
+                       if (!PageReserved(page))
+                               balloon_append(page);
+               }
 
        target_watch.callback = watch_target;
        xenstore_notifier.notifier_call = balloon_init_watcher;
diff -Npru linux-2.6.32.19.orig/include/linux/memory_hotplug.h 
linux-2.6.32.19/include/linux/memory_hotplug.h
--- linux-2.6.32.19.orig/include/linux/memory_hotplug.h 2010-08-13 
22:24:37.000000000 +0200
+++ linux-2.6.32.19/include/linux/memory_hotplug.h      2010-08-16 
02:01:35.000000000 +0200
@@ -203,6 +203,7 @@ static inline int is_mem_section_removab
 }
 #endif /* CONFIG_MEMORY_HOTREMOVE */
 
+extern int add_registered_memory(int nid, u64 start, u64 size);
 extern int add_memory(int nid, u64 start, u64 size);
 extern int arch_add_memory(int nid, u64 start, u64 size);
 extern int remove_memory(u64 start, u64 size);
diff -Npru linux-2.6.32.19.orig/mm/memory_hotplug.c 
linux-2.6.32.19/mm/memory_hotplug.c
--- linux-2.6.32.19.orig/mm/memory_hotplug.c    2010-08-13 22:24:37.000000000 
+0200
+++ linux-2.6.32.19/mm/memory_hotplug.c 2010-08-16 02:20:13.000000000 +0200
@@ -477,22 +477,13 @@ static void rollback_node_hotadd(int nid
        return;
 }
 
-
 /* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */
-int __ref add_memory(int nid, u64 start, u64 size)
+static int __ref __add_memory(int nid, u64 start, u64 size)
 {
        pg_data_t *pgdat = NULL;
        int new_pgdat = 0;
-       struct resource *res;
        int ret;
 
-       lock_system_sleep();
-
-       res = register_memory_resource(start, size);
-       ret = -EEXIST;
-       if (!res)
-               goto out;
-
        if (!node_online(nid)) {
                pgdat = hotadd_new_pgdat(nid, start);
                ret = -ENOMEM;
@@ -523,14 +514,48 @@ int __ref add_memory(int nid, u64 start,
        goto out;
 
 error:
-       /* rollback pgdat allocation and others */
+       /* rollback pgdat allocation */
        if (new_pgdat)
                rollback_node_hotadd(nid, pgdat);
-       if (res)
-               release_memory_resource(res);
 
 out:
+       return ret;
+}
+
+int add_registered_memory(int nid, u64 start, u64 size)
+{
+       int ret;
+
+       lock_system_sleep();
+       ret = __add_memory(nid, start, size);
        unlock_system_sleep();
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(add_registered_memory);
+
+int add_memory(int nid, u64 start, u64 size)
+{
+       int ret = -EEXIST;
+       struct resource *res;
+
+       lock_system_sleep();
+
+       res = register_memory_resource(start, size);
+
+       if (!res)
+               goto out;
+
+       ret = __add_memory(nid, start, size);
+
+       if (!ret)
+               goto out;
+
+       release_memory_resource(res);
+
+out:
+       unlock_system_sleep();
+
        return ret;
 }
 EXPORT_SYMBOL_GPL(add_memory);

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.