Index: 2007-03-19/xen/arch/x86/mm.c =================================================================== --- 2007-03-19.orig/xen/arch/x86/mm.c 2007-03-27 10:32:56.000000000 +0200 +++ 2007-03-19/xen/arch/x86/mm.c 2007-04-03 14:33:46.000000000 +0200 @@ -146,6 +146,12 @@ struct page_info *frame_table; unsigned long max_page; unsigned long total_pages; +#define l1_disallow_mask(d) (!(d)->iomem_caps || \ + !rangeset_is_empty((d)->iomem_caps) || \ + !rangeset_is_empty((d)->arch.ioport_caps) ? \ + L1_DISALLOW_MASK : \ + L1_DISALLOW_MASK|_PAGE_PAT|_PAGE_PCD|_PAGE_PWT) + #ifdef CONFIG_COMPAT l2_pgentry_t *compat_idle_pg_table_l2 = NULL; #define l3_disallow_mask(d) (!IS_COMPAT(d) ? \ @@ -256,9 +262,10 @@ void share_xen_page_with_guest( spin_lock(&d->page_alloc_lock); - /* The incremented type count pins as writable or read-only. */ page->u.inuse.type_info = (readonly ? PGT_none : PGT_writable_page); - page->u.inuse.type_info |= PGT_validated | 1; + if ( readonly || d != dom_io ) + /* The incremented type count pins as writable or read-only. */ + page->u.inuse.type_info |= PGT_validated | 1; page_set_owner(page, d); wmb(); /* install valid domain ptr before updating refcnt. */ @@ -577,15 +584,16 @@ get_page_from_l1e( l1_pgentry_t l1e, struct domain *d) { unsigned long mfn = l1e_get_pfn(l1e); + unsigned int flags = l1e_get_flags(l1e); struct page_info *page = mfn_to_page(mfn); int okay; - if ( !(l1e_get_flags(l1e) & _PAGE_PRESENT) ) + if ( !(flags & _PAGE_PRESENT) ) return 1; - if ( unlikely(l1e_get_flags(l1e) & L1_DISALLOW_MASK) ) + if ( unlikely(flags & l1_disallow_mask(d)) ) { - MEM_LOG("Bad L1 flags %x", l1e_get_flags(l1e) & L1_DISALLOW_MASK); + MEM_LOG("Bad L1 flags %x", flags & l1_disallow_mask(d)); return 0; } @@ -615,10 +623,23 @@ get_page_from_l1e( * contribute to writeable mapping refcounts. (This allows the * qemu-dm helper process in dom0 to map the domain's memory without * messing up the count of "real" writable mappings.) */ - okay = (((l1e_get_flags(l1e) & _PAGE_RW) && - !(unlikely(paging_mode_external(d) && (d != current->domain)))) - ? get_page_and_type(page, d, PGT_writable_page) - : get_page(page, d)); + if ( !(flags & (_PAGE_RW|_PAGE_PWT|_PAGE_PCD|_PAGE_PAT)) || + (unlikely(paging_mode_external(d) && (d != current->domain))) ) + okay = get_page(page, d); + else + { + unsigned long type = PGT_writable_page; + + if ( flags & _PAGE_PWT ) + type |= PGT_pwt_mask; + if ( flags & _PAGE_PCD ) + type |= PGT_pcd_mask; +#ifdef CONFIG_PAT + if ( flags & _PAGE_PAT ) + type |= PGT_pat_mask; +#endif + okay = get_page_and_type(page, d, type); + } if ( !okay ) { MEM_LOG("Error getting mfn %lx (pfn %lx) from L1 entry %" PRIpte @@ -1331,10 +1352,10 @@ static int mod_l1_entry(l1_pgentry_t *pl nl1e = l1e_from_pfn(gmfn_to_mfn(FOREIGNDOM, l1e_get_pfn(nl1e)), l1e_get_flags(nl1e)); - if ( unlikely(l1e_get_flags(nl1e) & L1_DISALLOW_MASK) ) + if ( unlikely(l1e_get_flags(nl1e) & l1_disallow_mask(d)) ) { MEM_LOG("Bad L1 flags %x", - l1e_get_flags(nl1e) & L1_DISALLOW_MASK); + l1e_get_flags(nl1e) & l1_disallow_mask(d)); return 0; } @@ -1539,10 +1560,41 @@ static int mod_l4_entry(struct domain *d #endif -int alloc_page_type(struct page_info *page, unsigned long type) +static int alloc_page_type(struct page_info *page, unsigned long type) { struct domain *owner = page_get_owner(page); + if ( type & PGT_writable_page ) + { + unsigned long mfn = page_to_mfn(page); + unsigned long flags = 0; + int ret; + + if ( owner == dom_io ) + return 1; +#ifdef __i386__ + if ( mfn >= ((DIRECTMAP_VIRT_END - DIRECTMAP_VIRT_START) >> PAGE_SHIFT) ) + return 1; +#endif + + if ( type & PGT_pwt_mask ) + flags |= _PAGE_PWT; + if ( type & PGT_pcd_mask ) + flags |= _PAGE_PCD; +#ifdef CONFIG_PAT + if ( type & PGT_pat_mask ) + flags |= _PAGE_PAT; +#endif + ASSERT(flags); + ret = map_pages_to_xen((unsigned long)mfn_to_virt(mfn), mfn, 1, + PAGE_HYPERVISOR | flags); + if ( ret == 0 ) + return 1; + + MEM_LOG("Error %d changing cacheability of mfn %lx", ret, mfn); + return 0; + } + /* A page table is dirtied when its type count becomes non-zero. */ if ( likely(owner != NULL) ) mark_dirty(owner, page_to_mfn(page)); @@ -1576,6 +1628,27 @@ void free_page_type(struct page_info *pa struct domain *owner = page_get_owner(page); unsigned long gmfn; + if ( type & PGT_writable_page ) + { + unsigned long mfn = page_to_mfn(page); + + if ( owner == dom_io ) + return; +#ifdef __i386__ + if ( mfn >= ((DIRECTMAP_VIRT_END - DIRECTMAP_VIRT_START) >> PAGE_SHIFT) ) + return; +#endif + + if ( map_pages_to_xen((unsigned long)mfn_to_virt(mfn), mfn, 1, + PAGE_HYPERVISOR) ) + { + printk("Reverting cacheability for %lx failed\n", mfn); + BUG(); + } + + return; + } + if ( likely(owner != NULL) ) { /* @@ -1645,11 +1718,13 @@ void put_page_type(struct page_info *pag if ( unlikely((nx & PGT_count_mask) == 0) ) { - if ( unlikely((nx & PGT_type_mask) <= PGT_l4_page_table) && + if ( (unlikely((nx & PGT_type_mask) <= PGT_l4_page_table) || + unlikely((nx & PGT_type_mask) > PGT_writable_page)) && likely(nx & PGT_validated) ) { /* - * Page-table pages must be unvalidated when count is zero. The + * Page-table pages and writable pages with non-default + * cacheability must be unvalidated when count is zero. The * 'free' is safe because the refcnt is non-zero and validated * bit is clear => other ops will spin or fail. */ @@ -1718,7 +1793,7 @@ int get_page_type(struct page_info *page if ( unlikely(!cpus_empty(mask)) && /* Shadow mode: track only writable pages. */ (!shadow_mode_enabled(page_get_owner(page)) || - ((nx & PGT_type_mask) == PGT_writable_page)) ) + (nx & PGT_writable_page)) ) { perfc_incrc(need_flush_tlb_flush); flush_tlb_mask(mask); @@ -3435,6 +3510,8 @@ int map_pages_to_xen( if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) ) { pl1e = alloc_xen_pagetable(); + if ( pl1e == NULL ) + return -ENOMEM; clear_page(pl1e); l2e_write(pl2e, l2e_from_pfn(virt_to_mfn(pl1e), __PAGE_HYPERVISOR)); @@ -3442,6 +3519,8 @@ int map_pages_to_xen( else if ( l2e_get_flags(*pl2e) & _PAGE_PSE ) { pl1e = alloc_xen_pagetable(); + if ( pl1e == NULL ) + return -ENOMEM; for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ ) l1e_write(&pl1e[i], l1e_from_pfn(l2e_get_pfn(*pl2e) + i, @@ -3460,6 +3539,28 @@ int map_pages_to_xen( virt += 1UL << L1_PAGETABLE_SHIFT; mfn += 1UL; nr_mfns -= 1UL; + + if ( !map_small_pages && + flags == PAGE_HYPERVISOR && + ( nr_mfns == 0 || + ((((virt>>PAGE_SHIFT) | mfn) & ((1<shadow_flags & (1<type)) ); /* Bad type count on guest page? */ - if ( (gpg->u.inuse.type_info & PGT_type_mask) == PGT_writable_page + if ( (gpg->u.inuse.type_info & PGT_writable_page) && (gpg->u.inuse.type_info & PGT_count_mask) != 0 ) { SHADOW_ERROR("MFN %#lx shadowed (by %#"PRI_mfn")" Index: 2007-03-19/xen/arch/x86/mm/shadow/multi.c =================================================================== --- 2007-03-19.orig/xen/arch/x86/mm/shadow/multi.c 2007-03-19 13:24:00.000000000 +0100 +++ 2007-03-19/xen/arch/x86/mm/shadow/multi.c 2007-04-03 11:26:31.000000000 +0200 @@ -4135,8 +4135,7 @@ audit_gfn_to_mfn(struct vcpu *v, gfn_t g if ( !shadow_mode_translate(v->domain) ) return _mfn(gfn_x(gfn)); - if ( (mfn_to_page(gmfn)->u.inuse.type_info & PGT_type_mask) - != PGT_writable_page ) + if ( !(mfn_to_page(gmfn)->u.inuse.type_info & PGT_writable_page) ) return _mfn(gfn_x(gfn)); /* This is a paging-disabled shadow */ else return gfn_to_mfn(v->domain, gfn_x(gfn)); Index: 2007-03-19/xen/include/asm-x86/mm.h =================================================================== --- 2007-03-19.orig/xen/include/asm-x86/mm.h 2007-03-19 13:17:45.000000000 +0100 +++ 2007-03-19/xen/include/asm-x86/mm.h 2007-04-03 10:04:07.000000000 +0200 @@ -64,24 +64,35 @@ struct page_info }; /* The following page types are MUTUALLY EXCLUSIVE. */ -#define PGT_none (0U<<29) /* no special uses of this page */ -#define PGT_l1_page_table (1U<<29) /* using this page as an L1 page table? */ -#define PGT_l2_page_table (2U<<29) /* using this page as an L2 page table? */ -#define PGT_l3_page_table (3U<<29) /* using this page as an L3 page table? */ -#define PGT_l4_page_table (4U<<29) /* using this page as an L4 page table? */ -#define PGT_gdt_page (5U<<29) /* using this page in a GDT? */ -#define PGT_ldt_page (6U<<29) /* using this page in an LDT? */ -#define PGT_writable_page (7U<<29) /* has writable mappings of this page? */ -#define PGT_type_mask (7U<<29) /* Bits 29-31. */ +#define PGT_none (0U<<28) /* no special uses of this page */ +#define PGT_l1_page_table (1U<<28) /* using this page as an L1 page table? */ +#define PGT_l2_page_table (2U<<28) /* using this page as an L2 page table? */ +#define PGT_l3_page_table (3U<<28) /* using this page as an L3 page table? */ +#define PGT_l4_page_table (4U<<28) /* using this page as an L4 page table? */ +#define PGT_gdt_page (5U<<28) /* using this page in a GDT? */ +#define PGT_ldt_page (6U<<28) /* using this page in an LDT? */ +#define PGT_writable_page (0x8U<<28) /* has writable mappings of this page? */ +#define PGT_pwt_mask (0x1U<<28) /* (l1e & _PAGE_PWT) mirror */ +#define PGT_pcd_mask (0x2U<<28) /* (l1e & _PAGE_PCD) mirror */ +#define PGT_wb_page (0x8U<<28) /* WB cached writable page? */ +#define PGT_wt_page (0x9U<<28) /* WT cached writable page? */ +#define PGT_ucm_page (0xAU<<28) /* UC- cached writable page? */ +#define PGT_uc_page (0xBU<<28) /* UC cached writable page? */ +#ifdef CONFIG_PAT +#define PGT_pat_mask (0x4U<<28) /* (l1e & _PAGE_PAT) mirror */ +#define PGT_wc_page (0xCU<<28) /* WC cached writable page? */ +#define PGT_wp_page (0xDU<<28) /* WP cached writable page? */ +#endif +#define PGT_type_mask (0xFU<<28) /* Bits 28-31. */ /* Owning guest has pinned this page to its current type? */ -#define _PGT_pinned 28 +#define _PGT_pinned 22 #define PGT_pinned (1U<<_PGT_pinned) /* Has this page been validated for use as its current type? */ -#define _PGT_validated 27 +#define _PGT_validated 21 #define PGT_validated (1U<<_PGT_validated) /* PAE only: is this an L2 page directory containing Xen-private mappings? */ -#define _PGT_pae_xen_l2 26 +#define _PGT_pae_xen_l2 20 #define PGT_pae_xen_l2 (1U<<_PGT_pae_xen_l2) /* 16-bit count of uses of this frame as its current type. */ @@ -141,7 +152,6 @@ extern unsigned long max_page; extern unsigned long total_pages; void init_frametable(void); -int alloc_page_type(struct page_info *page, unsigned long type); void free_page_type(struct page_info *page, unsigned long type); int _shadow_mode_refcounts(struct domain *d); Index: 2007-03-19/xen/include/asm-x86/x86_32/page-3level.h =================================================================== --- 2007-03-19.orig/xen/include/asm-x86/x86_32/page-3level.h 2007-01-08 14:15:32.000000000 +0100 +++ 2007-03-19/xen/include/asm-x86/x86_32/page-3level.h 2007-04-03 09:10:24.000000000 +0200 @@ -85,6 +85,6 @@ typedef l3_pgentry_t root_pgentry_t; #define get_pte_flags(x) (((int)((x) >> 32) & ~0xFFF) | ((int)(x) & 0xFFF)) #define put_pte_flags(x) (((intpte_t)((x) & ~0xFFF) << 32) | ((x) & 0xFFF)) -#define L3_DISALLOW_MASK 0xFFFFF1E6U /* must-be-zero */ +#define L3_DISALLOW_MASK 0xFFFFF1FEU /* must-be-zero */ #endif /* __X86_32_PAGE_3LEVEL_H__ */ Index: 2007-03-19/xen/include/asm-x86/x86_32/page.h =================================================================== --- 2007-03-19.orig/xen/include/asm-x86/x86_32/page.h 2006-12-04 08:49:58.000000000 +0100 +++ 2007-03-19/xen/include/asm-x86/x86_32/page.h 2007-04-03 09:30:46.000000000 +0200 @@ -29,13 +29,13 @@ extern unsigned int PAGE_HYPERVISOR_NOCA (_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_GNTTAB) /* - * Disallow unused flag bits plus PAT, PSE and GLOBAL. + * Disallow unused flag bits plus PAT/PSE and GLOBAL. * Permit the NX bit if the hardware supports it. */ #define BASE_DISALLOW_MASK (0xFFFFF180U & ~_PAGE_NX) #define L1_DISALLOW_MASK (BASE_DISALLOW_MASK | _PAGE_GNTTAB) -#define L2_DISALLOW_MASK (BASE_DISALLOW_MASK) +#define L2_DISALLOW_MASK (BASE_DISALLOW_MASK | _PAGE_PCD | _PAGE_PWT) #endif /* __X86_32_PAGE_H__ */ Index: 2007-03-19/xen/include/asm-x86/x86_64/page.h =================================================================== --- 2007-03-19.orig/xen/include/asm-x86/x86_64/page.h 2007-02-26 14:59:09.000000000 +0100 +++ 2007-03-19/xen/include/asm-x86/x86_64/page.h 2007-04-03 09:29:31.000000000 +0200 @@ -87,18 +87,18 @@ typedef l4_pgentry_t root_pgentry_t; #define _PAGE_NX (cpu_has_nx ? _PAGE_NX_BIT : 0U) /* - * Disallow unused flag bits plus PAT, PSE and GLOBAL. + * Disallow unused flag bits plus PAT/PSE and GLOBAL. * Permit the NX bit if the hardware supports it. * Note that range [62:52] is available for software use on x86/64. */ #define BASE_DISALLOW_MASK (0xFF800180U & ~_PAGE_NX) #define L1_DISALLOW_MASK (BASE_DISALLOW_MASK | _PAGE_GNTTAB) -#define L2_DISALLOW_MASK (BASE_DISALLOW_MASK) -#define L3_DISALLOW_MASK (BASE_DISALLOW_MASK) -#define L4_DISALLOW_MASK (BASE_DISALLOW_MASK) +#define L2_DISALLOW_MASK (BASE_DISALLOW_MASK | _PAGE_PCD | _PAGE_PWT) +#define L3_DISALLOW_MASK (BASE_DISALLOW_MASK | _PAGE_PCD | _PAGE_PWT) +#define L4_DISALLOW_MASK (BASE_DISALLOW_MASK | _PAGE_PCD | _PAGE_PWT) -#define COMPAT_L3_DISALLOW_MASK 0xFFFFF1E6U +#define COMPAT_L3_DISALLOW_MASK 0xFFFFF1FEU #define PAGE_HYPERVISOR (__PAGE_HYPERVISOR | _PAGE_GLOBAL) #define PAGE_HYPERVISOR_NOCACHE (__PAGE_HYPERVISOR_NOCACHE | _PAGE_GLOBAL)