--- 2.6.31/./drivers/xen/balloon.c 2009-09-09 17:13:59.000000000 -0500 +++ 2.6.31-balloon/./drivers/xen/balloon.c 2009-10-28 07:21:01.000000000 -0500 @@ -58,7 +58,7 @@ #include #include -#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10)) +#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT+balloon_order-10)) #define BALLOON_CLASS_NAME "xen_memory" @@ -93,6 +93,14 @@ static DEFINE_SPINLOCK(balloon_lock); static struct balloon_stats balloon_stats; +/* + * Work in pages of this order. Can be either 0 for normal pages + * or 9 for hugepages. + */ +static int balloon_order; +static unsigned long balloon_npages; +static unsigned long discontig_frame_list[PAGE_SIZE / sizeof(unsigned long)]; + /* We increase/decrease in batches which fit in a page */ static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)]; @@ -124,10 +132,41 @@ static struct timer_list balloon_timer; static void scrub_page(struct page *page) { #ifdef CONFIG_XEN_SCRUB_PAGES - clear_highpage(page); + int i; + + for (i = 0; i < balloon_npages; i++) + clear_highpage(page++); #endif } +static void free_discontig_frame(void) +{ + int rc; + struct xen_memory_reservation reservation = { + .address_bits = 0, + .domid = DOMID_SELF, + .nr_extents = balloon_npages, + .extent_order = 0 + }; + + set_xen_guest_handle(reservation.extent_start, discontig_frame_list); + rc = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); + BUG_ON(rc != balloon_npages); +} + +static unsigned long shrink_frame(unsigned long nr_pages) +{ + unsigned long i, j; + + for (i = 0, j = 0; i < nr_pages; i++, j++) { + if (frame_list[i] == 0) + j++; + if (i != j) + frame_list[i] = frame_list[j]; + } + return i; +} + /* balloon_append: add the given page to the balloon. */ static void balloon_append(struct page *page) { @@ -197,12 +236,11 @@ static unsigned long current_target(void static int increase_reservation(unsigned long nr_pages) { - unsigned long pfn, i, flags; + unsigned long pfn, mfn, i, j, flags; struct page *page; long rc; struct xen_memory_reservation reservation = { .address_bits = 0, - .extent_order = 0, .domid = DOMID_SELF }; @@ -214,12 +252,14 @@ static int increase_reservation(unsigned page = balloon_first_page(); for (i = 0; i < nr_pages; i++) { BUG_ON(page == NULL); - frame_list[i] = page_to_pfn(page);; + frame_list[i] = page_to_pfn(page); page = balloon_next_page(page); } set_xen_guest_handle(reservation.extent_start, frame_list); reservation.nr_extents = nr_pages; + reservation.extent_order = balloon_order; + rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation); if (rc < nr_pages) { if (rc > 0) { @@ -242,19 +282,22 @@ static int increase_reservation(unsigned BUG_ON(page == NULL); pfn = page_to_pfn(page); + mfn = frame_list[i]; BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap) && phys_to_machine_mapping_valid(pfn)); - set_phys_to_machine(pfn, frame_list[i]); + for (j = 0; j < balloon_npages; j++, pfn++, mfn++) { + set_phys_to_machine(pfn, mfn); - /* Link back into the page tables if not highmem. */ - if (pfn < max_low_pfn) { - int ret; - ret = HYPERVISOR_update_va_mapping( - (unsigned long)__va(pfn << PAGE_SHIFT), - mfn_pte(frame_list[i], PAGE_KERNEL), - 0); - BUG_ON(ret); + /* Link back into the page tables if not highmem. */ + if (pfn < max_low_pfn) { + int ret; + ret = HYPERVISOR_update_va_mapping( + (unsigned long)__va(pfn << PAGE_SHIFT), + mfn_pte(mfn, PAGE_KERNEL), + 0); + BUG_ON(ret); + } } /* Relinquish the page back to the allocator. */ @@ -264,7 +307,7 @@ static int increase_reservation(unsigned } balloon_stats.current_pages += nr_pages; - totalram_pages = balloon_stats.current_pages; + totalram_pages = balloon_stats.current_pages << balloon_order; out: spin_unlock_irqrestore(&balloon_lock, flags); @@ -274,13 +317,13 @@ static int increase_reservation(unsigned static int decrease_reservation(unsigned long nr_pages) { - unsigned long pfn, i, flags; + unsigned long pfn, lpfn, mfn, i, j, flags; struct page *page; int need_sleep = 0; - int ret; + int discontig, discontig_free; + int ret; struct xen_memory_reservation reservation = { .address_bits = 0, - .extent_order = 0, .domid = DOMID_SELF }; @@ -288,7 +331,7 @@ static int decrease_reservation(unsigned nr_pages = ARRAY_SIZE(frame_list); for (i = 0; i < nr_pages; i++) { - if ((page = alloc_page(GFP_BALLOON)) == NULL) { + if ((page = alloc_pages(GFP_BALLOON, balloon_order)) == NULL) { nr_pages = i; need_sleep = 1; break; @@ -298,14 +341,6 @@ static int decrease_reservation(unsigned frame_list[i] = pfn_to_mfn(pfn); scrub_page(page); - - if (!PageHighMem(page)) { - ret = HYPERVISOR_update_va_mapping( - (unsigned long)__va(pfn << PAGE_SHIFT), - __pte_ma(0), 0); - BUG_ON(ret); - } - } /* Ensure that ballooned highmem pages don't have kmaps. */ @@ -315,20 +350,42 @@ static int decrease_reservation(unsigned spin_lock_irqsave(&balloon_lock, flags); /* No more mappings: invalidate P2M and add to balloon. */ + discontig = 0; for (i = 0; i < nr_pages; i++) { - pfn = mfn_to_pfn(frame_list[i]); - set_phys_to_machine(pfn, INVALID_P2M_ENTRY); + mfn = frame_list[i]; + lpfn = pfn = mfn_to_pfn(mfn); balloon_append(pfn_to_page(pfn)); + discontig_free = 0; + for (j = 0; j < balloon_npages; j++, lpfn++, mfn++) { + if ((discontig_frame_list[j] = pfn_to_mfn(lpfn)) != mfn) + discontig_free = 1; + + set_phys_to_machine(lpfn, INVALID_P2M_ENTRY); + if (!PageHighMem(page)) { + ret = HYPERVISOR_update_va_mapping( + (unsigned long)__va(lpfn << PAGE_SHIFT), + __pte_ma(0), 0); + BUG_ON(ret); + } + } + if (discontig_free) { + free_discontig_frame(); + frame_list[i] = 0; + discontig = 1; + } } + balloon_stats.current_pages -= nr_pages; + totalram_pages = balloon_stats.current_pages << balloon_order; + + if (discontig) + nr_pages = shrink_frame(nr_pages); set_xen_guest_handle(reservation.extent_start, frame_list); reservation.nr_extents = nr_pages; + reservation.extent_order = balloon_order; ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); BUG_ON(ret != nr_pages); - balloon_stats.current_pages -= nr_pages; - totalram_pages = balloon_stats.current_pages; - spin_unlock_irqrestore(&balloon_lock, flags); return need_sleep; @@ -397,7 +454,7 @@ static void watch_target(struct xenbus_w /* The given memory/target value is in KiB, so it needs converting to * pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10. */ - balloon_set_new_target(new_target >> (PAGE_SHIFT - 10)); + balloon_set_new_target(new_target >> ((PAGE_SHIFT - 10) + balloon_order)); } static int balloon_init_watcher(struct notifier_block *notifier, @@ -423,10 +480,13 @@ static int __init balloon_init(void) if (!xen_pv_domain()) return -ENODEV; - pr_info("xen_balloon: Initialising balloon driver.\n"); + pr_info("xen_balloon: Initialising balloon driver with page order %d.\n", + balloon_order); - balloon_stats.current_pages = min(xen_start_info->nr_pages, max_pfn); - totalram_pages = balloon_stats.current_pages; + balloon_npages = 1 << balloon_order; + + balloon_stats.current_pages = (min(xen_start_info->nr_pages, max_pfn)) >> balloon_order; + totalram_pages = balloon_stats.current_pages << balloon_order; balloon_stats.target_pages = balloon_stats.current_pages; balloon_stats.balloon_low = 0; balloon_stats.balloon_high = 0; @@ -440,10 +500,12 @@ static int __init balloon_init(void) register_balloon(&balloon_sysdev); /* Initialise the balloon with excess memory space. */ - for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) { - page = pfn_to_page(pfn); - if (!PageReserved(page)) - balloon_append(page); + for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn += balloon_npages) { + if (pfn_valid(pfn)) { + page = pfn_to_page(pfn); + if (!PageReserved(page)) + balloon_append(page); + } } target_watch.callback = watch_target; @@ -464,6 +526,14 @@ static void balloon_exit(void) module_exit(balloon_exit); +static int __init balloon_parse_huge(char *s) +{ + balloon_order = 9; + return 1; +} + +__setup("balloon_hugepages", balloon_parse_huge); + #define BALLOON_SHOW(name, format, args...) \ static ssize_t show_##name(struct sys_device *dev, \ struct sysdev_attribute *attr, \ @@ -500,7 +570,7 @@ static ssize_t store_target_kb(struct sy target_bytes = simple_strtoull(buf, &endchar, 0) * 1024; - balloon_set_new_target(target_bytes >> PAGE_SHIFT); + balloon_set_new_target(target_bytes >> (PAGE_SHIFT + balloon_order)); return count; } @@ -514,7 +584,7 @@ static ssize_t show_target(struct sys_de { return sprintf(buf, "%llu\n", (unsigned long long)balloon_stats.target_pages - << PAGE_SHIFT); + << (PAGE_SHIFT + balloon_order)); } static ssize_t store_target(struct sys_device *dev, @@ -530,7 +600,7 @@ static ssize_t store_target(struct sys_d target_bytes = memparse(buf, &endchar); - balloon_set_new_target(target_bytes >> PAGE_SHIFT); + balloon_set_new_target(target_bytes >> (PAGE_SHIFT + balloon_order)); return count; }