# HG changeset patch # User yamahata@xxxxxxxxxxxxx # Date 1155023705 -32400 # Node ID 519d868316914cb894ba38f34dc7e64be0a032f2 # Parent 86d4ea768120579994cf79da9e5ece9c688f52f3 the second proto type of deferred page freeing. more refinement is needed. This patch focuses on vnif and ballon driver. When the page in which tlb insert isn't tracked is unmapped/zapped from domain, full vTLB flush is necessary again. Balloon driver and grant table page transfer is the case. This patch focuses on it. It tries to batch freeing/zapping page from domain in order to reduce full vTLB flush PATCHNAME: deferred_page_freeing Signed-off-by: Isaku Yamahata diff -r 86d4ea768120 -r 519d86831691 xen/arch/ia64/Rules.mk --- a/xen/arch/ia64/Rules.mk Wed Aug 09 20:01:25 2006 +0900 +++ b/xen/arch/ia64/Rules.mk Tue Aug 08 16:55:05 2006 +0900 @@ -42,6 +42,9 @@ ifeq ($(xen_ia64_tlb_track),y) ifeq ($(xen_ia64_tlb_track),y) CFLAGS += -DCONFIG_XEN_IA64_TLB_TRACK endif +ifeq ($(xen_ia64_deferred_free),y) +CFLAGS += -DCONFIG_XEN_IA64_DEFERRED_FREE +endif ifeq ($(no_warns),y) CFLAGS += -Wa,--fatal-warnings -Werror -Wno-uninitialized endif diff -r 86d4ea768120 -r 519d86831691 xen/arch/ia64/xen/Makefile --- a/xen/arch/ia64/xen/Makefile Wed Aug 09 20:01:25 2006 +0900 +++ b/xen/arch/ia64/xen/Makefile Tue Aug 08 16:55:05 2006 +0900 @@ -28,3 +28,4 @@ obj-y += privop_stat.o obj-$(crash_debug) += gdbstub.o obj-$(xen_ia64_tlb_track) += tlb_track.o +obj-$(xen_ia64_deferred_free) += deferred_free.o diff -r 86d4ea768120 -r 519d86831691 xen/arch/ia64/xen/domain.c --- a/xen/arch/ia64/xen/domain.c Wed Aug 09 20:01:25 2006 +0900 +++ b/xen/arch/ia64/xen/domain.c Tue Aug 08 16:55:05 2006 +0900 @@ -277,6 +277,9 @@ struct vcpu *alloc_vcpu_struct(struct do return NULL; } } +#ifdef CONFIG_XEN_IA64_DEFERRED_FREE + dfree_init(v, &v->arch.dfree); +#endif /* Create privregs page only if not VTi. */ order = get_order_from_shift(XMAPPEDREGS_SHIFT); @@ -315,6 +318,9 @@ struct vcpu *alloc_vcpu_struct(struct do void relinquish_vcpu_resources(struct vcpu *v) { +#ifdef CONFIG_XEN_IA64_DEFERRED_FREE + dfree_relinquish(&v->arch.dfree); +#endif if (HAS_PERVCPU_VHPT(v->domain)) pervcpu_vhpt_free(v); if (v->arch.privregs != NULL) { diff -r 86d4ea768120 -r 519d86831691 xen/arch/ia64/xen/mm.c --- a/xen/arch/ia64/xen/mm.c Wed Aug 09 20:01:25 2006 +0900 +++ b/xen/arch/ia64/xen/mm.c Tue Aug 08 16:55:05 2006 +0900 @@ -36,7 +36,7 @@ * * operations on this structure: * - global tlb purge - * vcpu_ptc_g(), vcpu_ptc_ga() and domain_page_flush() + * vcpu_ptc_g(), vcpu_ptc_ga() and domain_page_flush_and_put() * I.e. callers of domain_flush_vtlb_range() and domain_flush_vtlb_all() * These functions invalidate VHPT entry and vcpu->arch.{i, d}tlb * @@ -177,8 +177,20 @@ #include #endif -static void domain_page_flush(struct domain* d, unsigned long mpaddr, - volatile pte_t* ptep, pte_t old_pte); +#ifdef CONFIG_XEN_IA64_DEFERRED_FREE +static void __domain_page_flush_and_put(struct domain* d, unsigned long mpaddr, + volatile pte_t* ptep, pte_t old_pte, + struct page_info* page, + int do_defer); +#define domain_page_flush_and_put(d, mpaddr, ptep, old_pte, page) \ + __domain_page_flush_and_put((d), (mpaddr), (ptep), (old_pte), (page), 1) +#define domain_page_flush_and_put_nodefer(d, mpaddr, ptep, old_pte, page) \ + __domain_page_flush_and_put((d), (mpaddr), (ptep), (old_pte), (page), 0) +#else +static void domain_page_flush_and_put(struct domain* d, unsigned long mpaddr, + volatile pte_t* ptep, pte_t old_pte, + struct page_info* page); +#endif extern unsigned long ia64_iobase; @@ -1033,10 +1045,8 @@ assign_domain_page_replace(struct domain set_gpfn_from_mfn(old_mfn, INVALID_M2P_ENTRY); } - domain_page_flush(d, mpaddr, pte, old_pte); - try_to_clear_PGC_allocate(d, old_page); - put_page(old_page); + domain_page_flush_and_put(d, mpaddr, pte, old_pte, old_page); } } perfc_incrc(assign_domain_page_replace); @@ -1098,8 +1108,11 @@ assign_domain_page_cmpxchg_rel(struct do set_gpfn_from_mfn(old_mfn, INVALID_M2P_ENTRY); - domain_page_flush(d, mpaddr, pte, old_pte); - put_page(old_page); +#ifdef CONFIG_XEN_IA64_DEFERRED_FREE + domain_page_flush_and_put_nodefer(d, mpaddr, pte, old_pte, old_page); +#else + domain_page_flush_and_put(d, mpaddr, pte, old_pte, old_page); +#endif perfc_incrc(assign_domain_pge_cmpxchg_rel); return 0; } @@ -1167,12 +1180,10 @@ zap_domain_page_one(struct domain *d, un set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY); } - domain_page_flush(d, mpaddr, pte, old_pte); - if (page_get_owner(page) != NULL) { try_to_clear_PGC_allocate(d, page); } - put_page(page); + domain_page_flush_and_put(d, mpaddr, pte, old_pte, page); perfc_incrc(zap_dcomain_page_one); } @@ -1186,6 +1197,9 @@ dom0vp_zap_physmap(struct domain *d, uns } zap_domain_page_one(d, gpfn << PAGE_SHIFT, INVALID_MFN); +#ifdef CONFIG_XEN_IA64_DEFERRED_FREE + dfree_flush(¤t->arch.dfree); +#endif perfc_incrc(dom0vp_zap_physmap); return 0; } @@ -1232,7 +1246,11 @@ dom0vp_add_physmap(struct domain* d, uns get_gpfn_from_mfn(mfn) != INVALID_M2P_ENTRY); assign_domain_page_replace(d, gpfn << PAGE_SHIFT, mfn, flags); //don't update p2m table because this page belongs to rd, not d. +#ifdef CONFIG_XEN_IA64_DEFERRED_FREE + dfree_flush(¤t->arch.dfree); +#endif perfc_incrc(dom0vp_add_physmap); + out1: put_domain(rd); return error; @@ -1268,6 +1286,9 @@ create_grant_host_mapping(unsigned long #endif ((flags & GNTMAP_readonly) ? ASSIGN_readonly: ASSIGN_writable)); +#ifdef CONFIG_XEN_IA64_DEFERRED_FREE + dfree_flush(¤t->arch.dfree); +#endif perfc_incrc(create_grant_host_mapping); return GNTST_okay; } @@ -1322,11 +1343,9 @@ destroy_grant_host_mapping(unsigned long } BUG_ON(pte_pfn(old_pte) != mfn); - domain_page_flush(d, gpaddr, pte, old_pte); - page = mfn_to_page(mfn); BUG_ON(page_get_owner(page) == d);//try_to_clear_PGC_allocate(d, page) is not needed. - put_page(page); + domain_page_flush_and_put(d, gpaddr, pte, old_pte, page); perfc_incrc(destroy_grant_host_mapping); return GNTST_okay; @@ -1408,9 +1427,17 @@ steal_page(struct domain *d, struct page // page->u.inused._domain = 0; _nd = x >> 32; - if (unlikely(!(memflags & MEMF_no_refcount) && + if ( +#ifdef CONFIG_XEN_IA64_DEFERRED_FREE + // 1 is for deferred page freeing reference. + unlikely(!(memflags & MEMF_no_refcount) && + ((x & (PGC_count_mask | PGC_allocated)) != + (2 | PGC_allocated))) || +#else + unlikely(!(memflags & MEMF_no_refcount) && ((x & (PGC_count_mask | PGC_allocated)) != (1 | PGC_allocated))) || +#endif // when MEMF_no_refcount, page isn't de-assigned from // this domain yet. So count_info = 2 @@ -1475,6 +1502,9 @@ guest_physmap_add_page(struct domain *d, set_gpfn_from_mfn(mfn, gpfn); smp_mb(); assign_domain_page_replace(d, gpfn << PAGE_SHIFT, mfn, ASSIGN_writable); +#ifdef CONFIG_XEN_IA64_DEFERRED_FREE + dfree_flush(¤t->arch.dfree); +#endif //BUG_ON(mfn != ((lookup_domain_mpa(d, gpfn << PAGE_SHIFT) & _PFN_MASK) >> PAGE_SHIFT)); @@ -1492,9 +1522,18 @@ guest_physmap_remove_page(struct domain //XXX sledgehammer. // flush finer range. +#ifdef CONFIG_XEN_IA64_DEFERRED_FREE static void -domain_page_flush(struct domain* d, unsigned long mpaddr, - volatile pte_t* ptep, pte_t old_pte) +__domain_page_flush_and_put(struct domain* d, unsigned long mpaddr, + volatile pte_t* ptep, pte_t old_pte, + struct page_info* page, + int do_defer) +#else +static void +domain_page_flush_and_put(struct domain* d, unsigned long mpaddr, + volatile pte_t* ptep, pte_t old_pte, + struct page_info* page) +#endif { #ifdef CONFIG_XEN_IA64_TLB_TRACK struct tlb_track_entry* entry; @@ -1505,25 +1544,65 @@ domain_page_flush(struct domain* d, unsi #ifndef CONFIG_XEN_IA64_TLB_TRACK domain_flush_vtlb_all(); + put_page(page); #else switch (tlb_track_search_and_remove(d->arch.tlb_track, ptep, old_pte, &entry)) { case TLB_TRACK_NOT_TRACKED: //DPRINTK("%s TLB_TRACK_NOT_TRACKED\n", __func__); + // This page is zapped from this domain + // by memory decrease or exchange or dom0vp_zap_physmap. + // I.e. the page is zapped for returning this page to xen + // (balloon driver or DMA page allocation) or + // foreign domain mapped page is unmapped from the domain. + // In the former case the page is to be freed so that + // we can defer freeing page to batch. + // In the latter case the page is unmapped so that + // we need to flush it. But to optimize it, we + // queue the page and flush vTLB only once. + // I.e. The caller must call dfree_flush() explicitly. +#ifdef CONFIG_XEN_IA64_DEFERRED_FREE + if (do_defer) { + dfree_queue(¤t->arch.dfree, page); + } else { + dfree_queue_array(¤t->arch.dfree, page); + } +#else domain_flush_vtlb_all(); + put_page(page); +#endif break; case TLB_TRACK_NOT_FOUND: + //DPRINTK("%s TLB_TRACK_NOT_FOUND\n", __func__); + // This page is zapped from this domain + // by grant table page unmap. + // Luckily the domain that mapped this page didn't + // access this page so that we don't have to flush vTLB. + // Probably the domain did only DMA. + // // do nothing - //DPRINTK("%s TLB_TRACK_NOT_FOUND\n", __func__); + put_page(page); break; case TLB_TRACK_FOUND: //DPRINTK("%s TLB_TRACK_FOUND\n", __func__); + // This page is zapped from this domain + // by grant table page unmap. + // Fortunately this page is accessced via only one virtual + // memory address. So it is easy to flush it. domain_flush_vtlb_track_entry(d, entry); tlb_track_free_entry(d->arch.tlb_track, entry); + put_page(page); break; case TLB_TRACK_MANY: DPRINTK("%s TLB_TRACK_MANY\n", __func__); + // This page is zapped from this domain + // by grant table page unmap. + // Unfortunately this page is accessced via many virtual + // memory address (or too many times with single virtual address). + // So we abondaned to track virtual addresses. + // full vTLB flush is necessary. domain_flush_vtlb_all(); + put_page(page); break; case TLB_TRACK_AGAIN: DPRINTK("%s TLB_TRACK_AGAIN\n", __func__); @@ -1531,7 +1610,7 @@ domain_page_flush(struct domain* d, unsi break; } #endif - perfc_incrc(domain_page_flush); + perfc_incrc(domain_page_flush_and_put); } int diff -r 86d4ea768120 -r 519d86831691 xen/common/grant_table.c --- a/xen/common/grant_table.c Wed Aug 09 20:01:25 2006 +0900 +++ b/xen/common/grant_table.c Tue Aug 08 16:55:05 2006 +0900 @@ -31,6 +31,9 @@ #include #include #include +#ifdef CONFIG_XEN_IA64_DEFERRED_FREE +#include +#endif #define PIN_FAIL(_lbl, _rc, _f, _a...) \ do { \ @@ -310,10 +313,17 @@ gnttab_map_grant_ref( if ( unlikely(__copy_from_guest_offset(&op, uop, i, 1)) ) return -EFAULT; __gnttab_map_grant_ref(&op); - if ( unlikely(__copy_to_guest_offset(uop, i, &op, 1)) ) + if ( unlikely(__copy_to_guest_offset(uop, i, &op, 1)) ) { +#ifdef CONFIG_XEN_IA64_DEFERRED_FREE + dfree_flush(¤t->arch.dfree); +#endif return -EFAULT; - } - + } + } + +#ifdef CONFIG_XEN_IA64_DEFERRED_FREE + dfree_flush(¤t->arch.dfree); +#endif return 0; } @@ -450,11 +460,19 @@ gnttab_unmap_grant_ref( goto fault; } +#ifdef CONFIG_XEN_IA64_DEFERRED_FREE + dfree_flush(¤t->arch.dfree); +#else flush_tlb_mask(current->domain->domain_dirty_cpumask); +#endif return 0; fault: +#ifdef CONFIG_XEN_IA64_DEFERRED_FREE + dfree_flush(¤t->arch.dfree); +#else flush_tlb_mask(current->domain->domain_dirty_cpumask); +#endif return -EFAULT; } @@ -609,6 +627,9 @@ gnttab_transfer( if ( unlikely(__copy_from_guest_offset(&gop, uop, i, 1)) ) { DPRINTK("gnttab_transfer: error reading req %d/%d\n", i, count); +#ifdef CONFIG_XEN_IA64_DEFERRED_FREE + dfree_flush(¤t->arch.dfree); +#endif return -EFAULT; } @@ -696,11 +717,17 @@ gnttab_transfer( copyback: if ( unlikely(__copy_to_guest_offset(uop, i, &gop, 1)) ) { +#ifdef CONFIG_XEN_IA64_DEFERRED_FREE + dfree_flush(¤t->arch.dfree); +#endif DPRINTK("gnttab_transfer: error writing resp %d/%d\n", i, count); return -EFAULT; } } +#ifdef CONFIG_XEN_IA64_DEFERRED_FREE + dfree_flush(¤t->arch.dfree); +#endif return 0; } diff -r 86d4ea768120 -r 519d86831691 xen/common/memory.c --- a/xen/common/memory.c Wed Aug 09 20:01:25 2006 +0900 +++ b/xen/common/memory.c Tue Aug 08 16:55:05 2006 +0900 @@ -20,6 +20,9 @@ #include #include #include +#ifdef CONFIG_XEN_IA64_DEFERRED_FREE +#include +#endif /* * To allow safe resume of do_memory_op() after preemption, we need to know @@ -218,6 +221,9 @@ decrease_reservation( } } +#ifdef CONFIG_XEN_IA64_DEFERRED_FREE + dfree_flush(¤t->arch.dfree); +#endif return nr_extents; } @@ -475,6 +481,9 @@ memory_exchange(XEN_GUEST_HANDLE(xen_mem exch.nr_exchanged += exch.in.nr_extents; if ( copy_field_to_guest(arg, &exch, nr_exchanged) ) rc = -EFAULT; +#ifdef CONFIG_XEN_IA64_DEFERRED_FREE + dfree_flush(¤t->arch.dfree); +#endif return rc; /* @@ -504,6 +513,9 @@ memory_exchange(XEN_GUEST_HANDLE(xen_mem fail_early: if ( copy_field_to_guest(arg, &exch, nr_exchanged) ) rc = -EFAULT; +#ifdef CONFIG_XEN_IA64_DEFERRED_FREE + dfree_flush(¤t->arch.dfree); +#endif return rc; } diff -r 86d4ea768120 -r 519d86831691 xen/include/asm-ia64/domain.h --- a/xen/include/asm-ia64/domain.h Wed Aug 09 20:01:25 2006 +0900 +++ b/xen/include/asm-ia64/domain.h Tue Aug 08 16:55:05 2006 +0900 @@ -12,6 +12,7 @@ #include #include #include +#include struct p2m_entry; #ifdef CONFIG_XEN_IA64_TLB_TRACK @@ -194,6 +195,10 @@ struct arch_vcpu { unsigned long vhpt_entries; #endif +#ifdef CONFIG_XEN_IA64_DEFERRED_FREE + struct deferred_free dfree; +#endif + #define INVALID_PROCESSOR INT_MAX int last_processor; }; diff -r 86d4ea768120 -r 519d86831691 xen/include/asm-ia64/perfc_defn.h --- a/xen/include/asm-ia64/perfc_defn.h Wed Aug 09 20:01:25 2006 +0900 +++ b/xen/include/asm-ia64/perfc_defn.h Tue Aug 08 16:55:05 2006 +0900 @@ -65,7 +65,7 @@ PERFCOUNTER_CPU(steal_page, "steal_pag PERFCOUNTER_CPU(steal_page, "steal_page") PERFCOUNTER_CPU(guest_physmap_add_page, "guest_physmap_add_page") PERFCOUNTER_CPU(guest_physmap_remove_page, "guest_physmap_remove_page") -PERFCOUNTER_CPU(domain_page_flush, "domain_page_flush") +PERFCOUNTER_CPU(domain_page_flush_and_put, "domain_page_flush_and_put") // dom0vp PERFCOUNTER_CPU(dom0vp_phystomach, "dom0vp_phystomach") @@ -97,3 +97,17 @@ PERFCOUNTER_CPU(tlb_track_use_rr7, "tlb PERFCOUNTER_CPU(tlb_track_use_rr7, "tlb_track_use_rr7") PERFCOUNTER_CPU(tlb_track_swap_rr0, "tlb_track_swap_rr0") #endif + +#ifdef CONFIG_XEN_IA64_DEFERRED_FREE +// deferred_free.c +PERFCOUNTER_CPU(dfree_timer_fn, "dfree_timer_fn") +PERFCOUNTER_CPU(dfree_put_pages_list, "dfree_put_pages_list") +PERFCOUNTER_CPU(dfree_put_pages_array, "dfree_put_pages_array") +PERFCOUNTER_CPU(dfree_flush_no_flush, "dfree_flush_no_flush") +PERFCOUNTER_CPU(dfree_flush_flush_now, "dfree_flush_flush_now") +PERFCOUNTER_CPU(dfree_flush_set_timer, "dfree_flush_set_timer") +PERFCOUNTER_CPU(dfree_queue_flush_now, "dfree_queue_flush_now") +PERFCOUNTER_CPU(dfree_queue, "dfree_queue") +PERFCOUNTER_CPU(dfree_queue_array_flush_now, "dfree_queue_array_flush_now") +PERFCOUNTER_CPU(dfree_queue_array, "dfree_queue_array") +#endif diff -r 86d4ea768120 -r 519d86831691 xen/include/asm-ia64/vhpt.h --- a/xen/include/asm-ia64/vhpt.h Wed Aug 09 20:01:25 2006 +0900 +++ b/xen/include/asm-ia64/vhpt.h Tue Aug 08 16:55:05 2006 +0900 @@ -80,5 +80,45 @@ vcpu_pta(struct vcpu* v) return VHPT_ADDR | (1 << 8) | (VHPT_SIZE_LOG2 << 2) | VHPT_ENABLED; } +#ifdef CONFIG_XEN_IA64_DEFERRED_FREE +void __dfree_flush(struct deferred_free* dfree); +// This is here to avoid include hell. +// This should be in deferred_free.h. +static inline void +dfree_flush(struct deferred_free* dfree) +{ + int no_flush = 0; + int flush_now = 0; + spin_lock(&dfree->lock); + if (dfree->nr_list_pages ==0 && dfree->nr_array_pages == 0) { + no_flush = 1; + } else if (dfree->nr_array_pages > 0) { + flush_now = 1; + } + spin_unlock(&dfree->lock); + + if (no_flush) { + // this case is most common. + // inilise this function to optimize. + perfc_incrc(dfree_flush_no_flush); + return; + } + +#if 1 + // XXX guest domain must flush virtual address. + flush_now = 1; +#endif + + if (flush_now) { + __dfree_flush(dfree); + perfc_incrc(dfree_flush_flush_now); + } else { + migrate_timer(&dfree->timer, smp_processor_id());//XXX + set_timer(&dfree->timer, NOW() + DEFERRED_FREE_DELAY); + perfc_incrc(dfree_flush_set_timer); + } +} +#endif + #endif /* !__ASSEMBLY */ #endif diff -r 86d4ea768120 -r 519d86831691 xen/arch/ia64/xen/deferred_free.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/ia64/xen/deferred_free.c Tue Aug 08 16:55:05 2006 +0900 @@ -0,0 +1,221 @@ +/****************************************************************************** + * deferred_free.c + * + * Copyright (c) 2006 Isaku Yamahata + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include +#include +#include +#include // for __dfree_flush() + +//XXX when vcpu will be downed, does the timer need to be killed? +// or does timer need to increment the domain reference? + +static void +dfree_timer_fn(void* v) +{ + struct vcpu* vcpu = (struct vcpu*)v; + __dfree_flush(&vcpu->arch.dfree); + perfc_incrc(dfree_timer_fn); +} + +void +dfree_init(struct vcpu* vcpu, struct deferred_free* dfree) +{ + int i; + + dfree->vcpu = vcpu; + spin_lock_init(&dfree->lock); + dfree->nr_list_pages = 0; + INIT_LIST_HEAD(&dfree->pages_list); + dfree->nr_array_pages = 0; + for (i = 0; i < DEFERED_FREE_ARRAY_MAX; i++) { + dfree->pages[i] = NULL; + } + init_timer(&dfree->timer, &dfree_timer_fn, vcpu, vcpu->processor); +} + +// we don't need to flush vTLB because this domain is dying. +void +dfree_relinquish(struct deferred_free* dfree) +{ + struct page_info* page; + struct page_info* next; + int i; + + kill_timer(&dfree->timer); + + spin_lock(&dfree->lock); + list_for_each_entry_safe(page, next, &dfree->pages_list, list) { + BUG_ON((page->count_info & PGC_count_mask) != 1); + put_page(page); + } + for (i = 0; i < dfree->nr_array_pages; i++) { + put_page(dfree->pages[i]); + } + + spin_unlock(&dfree->lock); +} + +static void +list_transfer_tail(struct list_head* src, struct list_head* dst) +{ + if (list_empty(src)) { + INIT_LIST_HEAD(dst); + return; + } + + dst->next = src->next; + dst->prev = src->prev; + + src->next->prev = dst; + src->prev->next = dst; + + INIT_LIST_HEAD(src); +} + +// caller must lock dfree->lock +static void +__dfree_drain_pages(struct deferred_free* dfree, + struct list_head* list, + struct page_info* pages[DEFERED_FREE_ARRAY_MAX], + int* nr_array_pages) +{ + int i; + + list_transfer_tail(&dfree->pages_list, list); + dfree->nr_list_pages = 0; + + *nr_array_pages = dfree->nr_array_pages; + for (i = 0; i < dfree->nr_array_pages; i++) { + pages[i] = dfree->pages[i]; + } + dfree->nr_array_pages = 0; +} + +static void +__dfree_put_pages(struct list_head* list, + struct page_info* pages[DEFERED_FREE_ARRAY_MAX], + int nr_array_pages) +{ + struct page_info* page; + struct page_info* next; + int i; + + list_for_each_entry_safe(page, next, list, list) { + list_del(&page->list); + put_page(page); + perfc_incrc(dfree_put_pages_list); + } + for (i = 0; i < nr_array_pages; i++) { + put_page(pages[i]); + perfc_incrc(dfree_put_pages_array); + } +} + +void +__dfree_flush(struct deferred_free* dfree) +{ + struct list_head list; + int nr_array_pages; + struct page_info* pages[DEFERED_FREE_ARRAY_MAX];//XXX too large to use stack. + + spin_lock(&dfree->lock); + __dfree_drain_pages(dfree, &list, pages, &nr_array_pages); + spin_unlock(&dfree->lock); + + domain_flush_vtlb_all(); + + __dfree_put_pages(&list, pages, nr_array_pages); +} + +static int +__dfree_queue_array(struct deferred_free* dfree, struct page_info* page) +{ + BUG_ON(dfree->nr_array_pages >= DEFERED_FREE_ARRAY_MAX); + dfree->pages[dfree->nr_array_pages] = page; + dfree->nr_array_pages++; + + if (dfree->nr_array_pages >= DEFERED_FREE_ARRAY_MAX) { + return 1; + } + return 0; +} + +void +dfree_queue(struct deferred_free* dfree, struct page_info* page) +{ + struct domain* d = dfree->vcpu->domain; + int flush_now = 0; + + // balloon case + // XXX consider steal_page(page, 0) case + spin_lock(&dfree->lock); + if (page_get_owner(page) == d) { + BUG_ON(test_bit(_PGC_allocated, &page->count_info)); + // caller might get_page(). + //BUG_ON((page->count_info & PGC_count_mask) != 1); + spin_lock(&d->page_alloc_lock); + page_set_owner(page, NULL); + list_del(&page->list); + dfree->nr_list_pages++; + list_add_tail(&page->list, &dfree->pages_list); + d->tot_pages--; + spin_unlock(&d->page_alloc_lock); + + if (dfree->nr_list_pages > DEFERRED_FREE_LIST_PAGES_MAX) { + flush_now = 1; + } + } else { + flush_now = __dfree_queue_array(dfree, page); + } + spin_unlock(&dfree->lock); + + if (flush_now) { + __dfree_flush(dfree); + perfc_incrc(dfree_queue_flush_now); + } + perfc_incrc(dfree_queue); +} + +void +dfree_queue_array(struct deferred_free* dfree, struct page_info* page) +{ + int flush_now; + spin_lock(&dfree->lock); + flush_now = __dfree_queue_array(dfree, page); + spin_unlock(&dfree->lock); + + if (flush_now) { + __dfree_flush(dfree); + perfc_incrc(dfree_queue_array_flush_now); + } + perfc_incrc(dfree_queue_array); +} + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -r 86d4ea768120 -r 519d86831691 xen/include/asm-ia64/deferred_free.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/include/asm-ia64/deferred_free.h Tue Aug 08 16:55:05 2006 +0900 @@ -0,0 +1,85 @@ +/****************************************************************************** + * deferred_free.h + * + * Copyright (c) 2006 Isaku Yamahata + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef __ASM_IA64_DEFERRED_FREE_H__ +#define __ASM_IA64_DEFERRED_FREE_H__ + +#ifdef CONFIG_XEN_IA64_DEFERRED_FREE +#include +#include + +struct deferred_free { + struct vcpu* vcpu; + + // lock order + // vcpu->deferred_free->lock => d->page_alloc_lock + spinlock_t lock; + + // decrease_reservation() and balloon_alloc_empty_page_range() + // decrease_reservation(): + // balloon driver uses globally allocated array + // unsigned long frame[PAGE_SIZE / sizeof(unsigned long)] + // with PAGE_SIZE = 16KB, its arrays size is 2048 + // balloon_alloc_empty_page_range(): + // blkback: blkif_req * BLKIF_MAX_SEGMENTS_PER_REQUEST + // (boot parameter:default is 64) * 11 = 704 + // blktap: MAX_PENDING_REQS(64) * BLKIF_MAX_SEGMENTS_PER_REQUEST + // note:blktap isn't supported on xen/ia64 yet. + // netback: MAX_PENDING_REQS = 256 +#define DEFERRED_FREE_LIST_PAGES_MAX 4096 + int nr_list_pages; + struct list_head pages_list; + + // netback allow at most 256 request. MAX_PENDING_REQS=256 + // If the value is changed, this value should be revised. + // Or dynamic allocating. +#define DEFERED_FREE_ARRAY_MAX 512 + int nr_array_pages; + struct page_info* pages[DEFERED_FREE_ARRAY_MAX]; + +#define DEFERRED_FREE_DELAY ((500UL * 1000 * 1000) + 50 * 1000) + // 500ms + TIMER_SLOP + // balloon driver uses 1sec timer. + // see how balloon_timer is set in balloon_process() + struct timer timer; +}; + +void dfree_init(struct vcpu* vcpu, struct deferred_free* dfree); +void dfree_relinquish(struct deferred_free* dfree); + +void dfree_queue(struct deferred_free* dfree, struct page_info* page); +void dfree_queue_array(struct deferred_free* dfree, struct page_info* page); +// void dfree_flush() is defined in mm.h to avoid include hell. + +#endif + +#endif // __ASM_IA64_DEFERRED_FREE_H__ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */