--- xen-unstable//xen/common/page_alloc.c 2010-04-19 09:23:24.000000000 -0500 +++ xen-sdev//xen/common/page_alloc.c 2010-05-13 13:02:49.000000000 -0500 @@ -1083,6 +1083,50 @@ void init_domheap_pages(paddr_t ps, padd init_heap_pages(mfn_to_page(smfn), emfn - smfn); } +static void enable_superpage( + struct page_info *pg, + unsigned int order) +{ + struct spage_info *spage; + int i; + + spage = page_to_spage(pg); + if (order < SUPERPAGE_ORDER) + { + test_and_clear_bit(_SGT_enabled, &spage->type_info); + return; + } + if (order == SUPERPAGE_ORDER) + { + test_and_set_bit(_SGT_enabled, &spage->type_info); + return; + } + order -= SUPERPAGE_ORDER; + for(i = 0; i < (1 << order); i++) + test_and_set_bit(_SGT_enabled, &spage[i].type_info); +} + +static void disable_superpage( + struct page_info *pg, + unsigned int order) +{ + struct spage_info *spage; + int i; + + spage = page_to_spage(pg); + test_and_clear_bit(_SGT_enabled, &spage->type_info); + + if (order > SUPERPAGE_ORDER) + { + order -= SUPERPAGE_ORDER; + for(i = 1; i < (1 << order); i++) + { + BUG_ON((spage[i].type_info & SGT_count_mask) != 0); + test_and_clear_bit(_SGT_enabled, &spage[i].type_info); + } + } + +} int assign_pages( struct domain *d, @@ -1128,6 +1172,9 @@ int assign_pages( page_list_add_tail(&pg[i], &d->page_list); } + if (opt_allow_superpage) + enable_superpage(pg, order); + spin_unlock(&d->page_alloc_lock); return 0; @@ -1201,6 +1248,9 @@ void free_domheap_pages(struct page_info page_list_del2(&pg[i], &d->page_list, &d->arch.relmem_list); } + if (opt_allow_superpage) + disable_superpage(pg, order); + d->tot_pages -= 1 << order; drop_dom_ref = (d->tot_pages == 0); --- xen-unstable//xen/include/asm-x86/mm.h 2010-04-28 09:31:26.000000000 -0500 +++ xen-sdev//xen/include/asm-x86/mm.h 2010-05-05 09:33:49.000000000 -0500 @@ -214,6 +214,33 @@ struct page_info #define PGC_count_width PG_shift(9) #define PGC_count_mask ((1UL<arch.hvm_vcpu.guest_cr[4] & X86_CR4_PSE))); + || (v->arch.hvm_vcpu.guest_cr[4] & X86_CR4_PSE)))); } static inline int --- xen-unstable//xen/include/asm-x86/x86_32/page.h 2009-10-07 15:43:52.000000000 -0500 +++ xen-sdev//xen/include/asm-x86/x86_32/page.h 2010-05-04 08:14:07.000000000 -0500 @@ -6,6 +6,7 @@ #define L2_PAGETABLE_SHIFT 21 #define L3_PAGETABLE_SHIFT 30 #define PAGE_SHIFT L1_PAGETABLE_SHIFT +#define SUPERPAGE_SHIFT L2_PAGETABLE_SHIFT #define ROOT_PAGETABLE_SHIFT L3_PAGETABLE_SHIFT #define PAGETABLE_ORDER 9 @@ -13,6 +14,7 @@ #define L2_PAGETABLE_ENTRIES (1<>(SUPERPAGE_SHIFT-PAGE_SHIFT)) +#define sdx_to_pfn(sdx) ((sdx)<<(SUPERPAGE_SHIFT-PAGE_SHIFT)) + static inline unsigned long __virt_to_maddr(unsigned long va) { ASSERT(va >= DIRECTMAP_VIRT_START && va < DIRECTMAP_VIRT_END); --- xen-unstable//xen/include/asm-x86/config.h 2010-04-06 07:44:56.000000000 -0500 +++ xen-sdev//xen/include/asm-x86/config.h 2010-05-03 09:57:00.000000000 -0500 @@ -225,6 +225,11 @@ extern unsigned int video_mode, video_fl /* Slot 261: xen text, static data and bss (1GB). */ #define XEN_VIRT_START (HIRO_COMPAT_MPT_VIRT_END) #define XEN_VIRT_END (XEN_VIRT_START + GB(1)) +/* Slot 261: superpage information array (40MB). */ +#define SPAGETABLE_VIRT_END FRAMETABLE_VIRT_START +#define SPAGETABLE_SIZE ((DIRECTMAP_SIZE >> SUPERPAGE_SHIFT) * \ + sizeof(struct spage_info)) +#define SPAGETABLE_VIRT_START (SPAGETABLE_VIRT_END - SPAGETABLE_SIZE) /* Slot 261: page-frame information array (40GB). */ #define FRAMETABLE_VIRT_END DIRECTMAP_VIRT_START #define FRAMETABLE_SIZE ((DIRECTMAP_SIZE >> PAGE_SHIFT) * \ --- xen-unstable//xen/include/asm-x86/page.h 2009-12-18 08:35:12.000000000 -0600 +++ xen-sdev//xen/include/asm-x86/page.h 2010-05-10 11:14:21.000000000 -0500 @@ -240,6 +240,13 @@ void copy_page_sse2(void *, const void * #define __pfn_to_paddr(pfn) ((paddr_t)(pfn) << PAGE_SHIFT) #define __paddr_to_pfn(pa) ((unsigned long)((pa) >> PAGE_SHIFT)) +/* Convert between machine frame numbers and spage-info structures. */ +#define __mfn_to_spage(mfn) (spage_table + pfn_to_sdx(mfn)) +#define __spage_to_mfn(pg) sdx_to_pfn((unsigned long)((pg) - spage_table)) + +/* Convert between page-info structures and spage-info structures. */ +#define page_to_spage(page) (spage_table+(((page)-frame_table)>>(SUPERPAGE_SHIFT-PAGE_SHIFT))) + /* * We define non-underscored wrappers for above conversion functions. These are * overridden in various source files while underscored versions remain intact. @@ -251,6 +258,8 @@ void copy_page_sse2(void *, const void * #define maddr_to_virt(ma) __maddr_to_virt((unsigned long)(ma)) #define mfn_to_page(mfn) __mfn_to_page(mfn) #define page_to_mfn(pg) __page_to_mfn(pg) +#define mfn_to_spage(mfn) __mfn_to_spage(mfn) +#define spage_to_mfn(pg) __spage_to_mfn(pg) #define maddr_to_page(ma) __maddr_to_page(ma) #define page_to_maddr(pg) __page_to_maddr(pg) #define virt_to_page(va) __virt_to_page(va) --- xen-unstable//xen/include/asm-x86/x86_64/page.h 2009-10-07 15:43:52.000000000 -0500 +++ xen-sdev//xen/include/asm-x86/x86_64/page.h 2010-05-04 10:44:38.000000000 -0500 @@ -7,6 +7,7 @@ #define L3_PAGETABLE_SHIFT 30 #define L4_PAGETABLE_SHIFT 39 #define PAGE_SHIFT L1_PAGETABLE_SHIFT +#define SUPERPAGE_SHIFT L2_PAGETABLE_SHIFT #define ROOT_PAGETABLE_SHIFT L4_PAGETABLE_SHIFT #define PAGETABLE_ORDER 9 @@ -15,6 +16,7 @@ #define L3_PAGETABLE_ENTRIES (1<>(SUPERPAGE_SHIFT-PAGE_SHIFT)) - spage_table) +#define pdx_to_spage(pdx) (spage_table + ((pdx)<<(SUPERPAGE_SHIFT-PAGE_SHIFT))) /* * Note: These are solely for the use by page_{get,set}_owner(), and * therefore don't need to handle the XEN_VIRT_{START,END} range. @@ -64,6 +68,16 @@ static inline unsigned long pdx_to_pfn(u ((pdx << pfn_pdx_hole_shift) & pfn_top_mask); } +static inline unsigned long pfn_to_sdx(unsigned long pfn) +{ + return pfn_to_pdx(pfn) >> (SUPERPAGE_SHIFT-PAGE_SHIFT); +} + +static inline unsigned long sdx_to_pfn(unsigned long sdx) +{ + return pdx_to_pfn(sdx << (SUPERPAGE_SHIFT-PAGE_SHIFT)); +} + static inline unsigned long __virt_to_maddr(unsigned long va) { ASSERT(va >= XEN_VIRT_START); --- xen-unstable//xen/arch/x86/mm.c 2010-04-28 09:31:26.000000000 -0500 +++ xen-sdev//xen/arch/x86/mm.c 2010-05-13 13:07:57.000000000 -0500 @@ -151,8 +151,15 @@ unsigned long __read_mostly pdx_group_va #define PAGE_CACHE_ATTRS (_PAGE_PAT|_PAGE_PCD|_PAGE_PWT) -int opt_allow_hugepage; +int opt_allow_superpage; +static int opt_allow_hugepage; boolean_param("allowhugepage", opt_allow_hugepage); +boolean_param("allowsuperpage", opt_allow_superpage); + +static void get_spage(struct spage_info *spage); +static void put_spage(struct spage_info *spage); +static int get_spage_type(struct spage_info *spage, unsigned long type); +static void put_spage_type(struct spage_info *spage); #define l1_disallow_mask(d) \ ((d != dom_io) && \ @@ -202,6 +209,28 @@ static void __init init_frametable_chunk memset(end, -1, s - (unsigned long)end); } +static void __init init_spagetable(void) +{ + unsigned long s, start = SPAGETABLE_VIRT_START; + unsigned long end = SPAGETABLE_VIRT_END; + unsigned long step, mfn; + unsigned int max_entries; + + step = 1UL << PAGETABLE_ORDER; + max_entries = (max_pdx + ((1UL<> SUPERPAGE_ORDER; + end = start + (((max_entries * sizeof(*spage_table)) + + ((1UL< mfn ) - put_data_page(mfn_to_page(m), writeable); - return -EINVAL; - } - } while ( m++ < (mfn + (L1_PAGETABLE_ENTRIES-1)) ); + MEM_LOG("Unaligned superpage map attempt mfn %lx", mfn); + return -EINVAL; + } + spage = mfn_to_spage(mfn); - rc = 1; + if (!(spage->type_info & SGT_enabled)) + { + MEM_LOG("Map attempt on non-contiguous superpage, mfn %lx", mfn); + return -EINVAL; } - return rc; + get_spage(spage); + + if (l2e_get_flags(l2e) & _PAGE_RW) + { + if (!get_spage_type(spage, SGT_superpage)) + { + put_spage(spage); + MEM_LOG("Superpage in use as page table mfn %lx, type %lx", + mfn, spage->type_info); + return -EINVAL; + } + } + return 0; } @@ -1101,13 +1133,11 @@ static int put_page_from_l2e(l2_pgentry_ if ( l2e_get_flags(l2e) & _PAGE_PSE ) { - unsigned long mfn = l2e_get_pfn(l2e), m = mfn; - int writeable = l2e_get_flags(l2e) & _PAGE_RW; + struct spage_info *spage = mfn_to_spage(l2e_get_pfn(l2e)); - ASSERT(!(mfn & (L1_PAGETABLE_ENTRIES-1))); - do { - put_data_page(mfn_to_page(m), writeable); - } while ( m++ < (mfn + (L1_PAGETABLE_ENTRIES-1)) ); + put_spage(spage); + if (l2e_get_flags(l2e) & _PAGE_RW) + put_spage_type(spage); } else { @@ -2038,6 +2068,75 @@ int get_page(struct page_info *page, str return 0; } +static void get_spage(struct spage_info *spage) +{ + unsigned long x, y = spage->count_info; + + do { + x = y; + } + while ( (y = cmpxchg(&spage->count_info, x, x + 1)) != x ); + + return; +} + +static void put_spage(struct spage_info *spage) +{ + unsigned long x, y = spage->count_info; + + do { + ASSERT(y != 0); + x = y; + } + while ( (y = cmpxchg(&spage->count_info, x, x - 1)) != x ); + + return; +} + +static int get_spage_type(struct spage_info *spage, unsigned long type) +{ + unsigned long x, nx, y = spage->type_info; + + do { + x = y; + nx = x + 1; + if ( unlikely((nx & SGT_count_mask) == 0) ) + { + MEM_LOG("Superpage type count overflow on pfn %lx", spage_to_mfn(spage)); + return 0; + } + if ( unlikely((x & SGT_count_mask) == 0) ) + { + nx = (nx & ~SGT_type_mask) | type; + } + else + { + if ( unlikely((x & SGT_type_mask) != type) ) + return 0; + } + } + while ( (y = cmpxchg(&spage->type_info, x, nx)) != x ); + return 1; +} + +static void put_spage_type(struct spage_info *spage) +{ + unsigned long x, nx, y = spage->type_info; + + do { + x = y; + nx = x - 1; + if ((x & SGT_count_mask) == 0) + { + return; + } + if ((nx & SGT_count_mask) == 0) + nx = (nx & ~SGT_type_mask) | SGT_none; + } + while ( (y = cmpxchg(&spage->type_info, x, nx)) != x ); + return; +} + /* * Special version of get_page() to be used exclusively when * - a page is known to already have a non-zero reference count @@ -2279,6 +2378,10 @@ static int __put_page_type(struct page_i return -EINTR; } + if (opt_allow_superpage && spage_conflicts(x & PGT_type_mask)) + { + put_spage_type(page_to_spage(page)); + } return rc; } @@ -2413,6 +2516,15 @@ static int __get_page_type(struct page_i rc = alloc_page_type(page, type, preemptible); } + if (opt_allow_superpage && spage_conflicts(type)) + { + if (!get_spage_type(page_to_spage(page), SGT_nosuper)) + { + __put_page_type(page, 0); + return -EINVAL; + } + } + if ( (x & PGT_partial) && !(nx & PGT_partial) ) put_page(page);