diff -r 4f2c59fb28e6 xen/arch/x86/mm/guest_walk.c --- a/xen/arch/x86/mm/guest_walk.c Mon Jul 18 11:43:45 2011 +0200 +++ b/xen/arch/x86/mm/guest_walk.c Fri Jul 22 16:48:43 2011 +0200 @@ -134,7 +134,8 @@ guest_walk_tables(struct vcpu *v, struct guest_l4e_t *l4p; #endif uint32_t gflags, mflags, iflags, rc = 0; - int pse, smep; + int smep; + bool_t pse1G = 0, pse2M = 0; perfc_incr(guest_walk); memset(gw, 0, sizeof(*gw)); @@ -214,15 +215,56 @@ guest_walk_tables(struct vcpu *v, struct #endif /* All levels... */ +#if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */ + pse1G = (guest_supports_1G_superpages(v) && + (guest_l3e_get_flags(gw->l3e) & _PAGE_PSE)); + + if ( pse1G ) + { + /* Special case: this guest VA is in a PSE superpage, so there's + * no guest l1e. We make one up so that the propagation code + * can generate a shadow l1 table. Start with the gfn of the + * first 4k-page of the superpage. */ + gfn_t start = guest_l2e_get_gfn(gw->l2e); + /* Grant full access in the l1e, since all the guest entry's + * access controls are enforced in the shadow l2e. */ + int flags = (_PAGE_PRESENT|_PAGE_USER|_PAGE_RW| + _PAGE_ACCESSED|_PAGE_DIRTY); + /* Import cache-control bits. Note that _PAGE_PAT is actually + * _PAGE_PSE, and it is always set. We will clear it in case + * _PAGE_PSE_PAT (bit 12, i.e. first bit of gfn) is clear. */ + flags |= (guest_l2e_get_flags(gw->l2e) + & (_PAGE_PAT|_PAGE_PWT|_PAGE_PCD)); + if ( !(gfn_x(start) & 1) ) + /* _PAGE_PSE_PAT not set: remove _PAGE_PAT from flags. */ + flags &= ~_PAGE_PAT; + +#define GUEST_L3_GFN_ALIGN (1 << (GUEST_L3_PAGETABLE_SHIFT - \ + GUEST_L1_PAGETABLE_SHIFT)) + if ( gfn_x(start) & (GUEST_L3_GFN_ALIGN - 1) & ~0x1 ) + { + rc |= _PAGE_INVALID_BITS; + } + + /* Increment the pfn by the right number of 4k pages. + * Mask out PAT and invalid bits. */ + start = _gfn((gfn_x(start) & ~(GUEST_L3_GFN_ALIGN - 1)) + + guest_l1_table_offset(va)); + gw->l1e = guest_l1e_from_gfn(start, flags); + gw->l2mfn = gw->l1mfn = _mfn(INVALID_MFN); + goto set_ad; + } +#endif + gflags = guest_l2e_get_flags(gw->l2e) ^ iflags; rc |= ((gflags & mflags) ^ mflags); if ( rc & _PAGE_PRESENT ) goto out; - pse = (guest_supports_superpages(v) && + pse2M = (guest_supports_superpages(v) && (guest_l2e_get_flags(gw->l2e) & _PAGE_PSE)); - if ( pse ) + if ( pse2M ) { /* Special case: this guest VA is in a PSE superpage, so there's * no guest l1e. We make one up so that the propagation code @@ -290,22 +332,29 @@ guest_walk_tables(struct vcpu *v, struct * success. Although the PRMs say higher-level _PAGE_ACCESSED bits * get set whenever a lower-level PT is used, at least some hardware * walkers behave this way. */ +#if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */ +set_ad: +#endif if ( rc == 0 ) { #if GUEST_PAGING_LEVELS == 4 /* 64-bit only... */ if ( set_ad_bits(l4p + guest_l4_table_offset(va), &gw->l4e, 0) ) paging_mark_dirty(d, mfn_x(gw->l4mfn)); - if ( set_ad_bits(l3p + guest_l3_table_offset(va), &gw->l3e, 0) ) + if ( set_ad_bits(l3p + guest_l3_table_offset(va), &gw->l3e, + (pse1G && (pfec & PFEC_write_access))) ) paging_mark_dirty(d, mfn_x(gw->l3mfn)); #endif - if ( set_ad_bits(l2p + guest_l2_table_offset(va), &gw->l2e, - (pse && (pfec & PFEC_write_access))) ) - paging_mark_dirty(d, mfn_x(gw->l2mfn)); - if ( !pse ) + if ( !pse1G ) { - if ( set_ad_bits(l1p + guest_l1_table_offset(va), &gw->l1e, - (pfec & PFEC_write_access)) ) - paging_mark_dirty(d, mfn_x(gw->l1mfn)); + if ( set_ad_bits(l2p + guest_l2_table_offset(va), &gw->l2e, + (pse2M && (pfec & PFEC_write_access))) ) + paging_mark_dirty(d, mfn_x(gw->l2mfn)); + if ( !pse2M ) + { + if ( set_ad_bits(l1p + guest_l1_table_offset(va), &gw->l1e, + (pfec & PFEC_write_access)) ) + paging_mark_dirty(d, mfn_x(gw->l1mfn)); + } } } diff -r 4f2c59fb28e6 xen/include/asm-x86/guest_pt.h --- a/xen/include/asm-x86/guest_pt.h Mon Jul 18 11:43:45 2011 +0200 +++ b/xen/include/asm-x86/guest_pt.h Fri Jul 22 16:48:43 2011 +0200 @@ -194,6 +194,17 @@ guest_supports_superpages(struct vcpu *v } static inline int +guest_supports_1G_superpages(struct vcpu *v) +{ + if (!guest_supports_superpages(v)) + return 0; + + return (GUEST_PAGING_LEVELS >= 3 + && cpu_has_page1gb + && hvm_long_mode_enabled(v)); +} + +static inline int guest_supports_nx(struct vcpu *v) { if ( GUEST_PAGING_LEVELS == 2 || !cpu_has_nx )