# HG changeset patch
# User Tim Deegan <Tim.Deegan@xxxxxxxxxxxxx>
# Node ID 0b6f49d25d4fbb0c3ab6cd1f2ac9dd32ed3d1ff9
# Parent 77e1baf0a5679a155c1f2ccdf3328bf476bf0c8b
[XEN] Prefetch multiple shadow entries per pagefault
Also, clean up the shadow *_propagate/fault routines.
This allows us to quickly dispatch some guest-not-present faults
and most MMIO accesses without taking the shadow lock.
Signed-off-by: Tim Deegan <Tim.Deegan@xxxxxxxxxxxxx>
---
xen/arch/x86/mm/shadow/common.c | 12
xen/arch/x86/mm/shadow/multi.c | 675 ++++++++++++++++++++-------------------
xen/arch/x86/mm/shadow/private.h | 117 ------
xen/arch/x86/mm/shadow/types.h | 71 ++++
xen/include/asm-x86/perfc_defn.h | 3
xen/include/asm-x86/shadow.h | 6
6 files changed, 446 insertions(+), 438 deletions(-)
diff -r 77e1baf0a567 -r 0b6f49d25d4f xen/arch/x86/mm/shadow/common.c
--- a/xen/arch/x86/mm/shadow/common.c Wed Nov 01 10:02:00 2006 +0000
+++ b/xen/arch/x86/mm/shadow/common.c Wed Nov 01 10:31:11 2006 +0000
@@ -1327,8 +1327,18 @@ static void sh_hash_audit_bucket(struct
&& e->t != (PGC_SH_fl1_pae_shadow >> PGC_SH_type_shift)
&& e->t != (PGC_SH_fl1_64_shadow >> PGC_SH_type_shift) )
{
+ struct page_info *gpg = mfn_to_page(_mfn(e->n));
/* Bad shadow flags on guest page? */
- BUG_ON( !(mfn_to_page(_mfn(e->n))->shadow_flags & (1<<e->t)) );
+ BUG_ON( !(gpg->shadow_flags & (1<<e->t)) );
+ /* Bad type count on guest page? */
+ if ( (gpg->u.inuse.type_info & PGT_type_mask) == PGT_writable_page
+ && (gpg->u.inuse.type_info & PGT_count_mask) != 0 )
+ {
+ SHADOW_ERROR("MFN %#"SH_PRI_mfn" shadowed (by %#"SH_PRI_mfn")"
+ " but has typecount %#lx\n",
+ e->n, mfn_x(e->smfn), gpg->u.inuse.type_info);
+ BUG();
+ }
}
/* That entry was OK; on we go */
e = e->next;
diff -r 77e1baf0a567 -r 0b6f49d25d4f xen/arch/x86/mm/shadow/multi.c
--- a/xen/arch/x86/mm/shadow/multi.c Wed Nov 01 10:02:00 2006 +0000
+++ b/xen/arch/x86/mm/shadow/multi.c Wed Nov 01 10:31:11 2006 +0000
@@ -36,10 +36,7 @@
#include "private.h"
#include "types.h"
-/* The first cut: an absolutely synchronous, trap-and-emulate version,
- * supporting only HVM guests (and so only "external" shadow mode).
- *
- * THINGS TO DO LATER:
+/* THINGS TO DO LATER:
*
* TEARDOWN HEURISTICS
* Also: have a heuristic for when to destroy a previous paging-mode's
@@ -55,14 +52,6 @@
* map_domain_page() version is OK on PAE, we could maybe allow a lightweight
* l3-and-l2h-only shadow mode for PAE PV guests that would allow them
* to share l2h pages again.
- *
- * PAE L3 COPYING
- * In this code, we copy all 32 bytes of a PAE L3 every time we change an
- * entry in it, and every time we change CR3. We copy it for the linear
- * mappings (ugh! PAE linear mappings) and we copy it to the low-memory
- * buffer so it fits in CR3. Maybe we can avoid some of this recopying
- * by using the shadow directly in some places.
- * Also, for SMP, need to actually respond to seeing shadow.pae_flip_pending.
*
* GUEST_WALK_TABLES TLB FLUSH COALESCE
* guest_walk_tables can do up to three remote TLB flushes as it walks to
@@ -98,9 +87,6 @@ static char *fetch_type_names[] = {
[ft_demand_write] "demand write",
};
#endif
-
-/* XXX forward declarations */
-static inline void sh_update_linear_entries(struct vcpu *v);
/**************************************************************************/
/* Hash table mapping from guest pagetables to shadows
@@ -460,16 +446,20 @@ static u32 guest_set_ad_bits(struct vcpu
u32 flags;
int res = 0;
+ ASSERT(ep && !(((unsigned long)ep) & ((sizeof *ep) - 1)));
+ ASSERT(level <= GUEST_PAGING_LEVELS);
+ ASSERT(shadow_lock_is_acquired(v->domain));
+
+ flags = guest_l1e_get_flags(*ep);
+
+ /* Only set A and D bits for guest-initiated accesses */
+ if ( !(ft & FETCH_TYPE_DEMAND) )
+ return flags;
+
ASSERT(valid_mfn(gmfn)
&& (sh_mfn_is_a_page_table(gmfn)
|| ((mfn_to_page(gmfn)->u.inuse.type_info & PGT_count_mask)
== 0)));
- ASSERT(ep && !(((unsigned long)ep) & ((sizeof *ep) - 1)));
- ASSERT(level <= GUEST_PAGING_LEVELS);
- ASSERT(ft == ft_demand_read || ft == ft_demand_write);
- ASSERT(shadow_lock_is_acquired(v->domain));
-
- flags = guest_l1e_get_flags(*ep);
/* PAE l3s do not have A and D bits */
ASSERT(GUEST_PAGING_LEVELS > 3 || level != 3);
@@ -496,12 +486,20 @@ static u32 guest_set_ad_bits(struct vcpu
/* Set the bit(s) */
sh_mark_dirty(v->domain, gmfn);
SHADOW_DEBUG(A_AND_D, "gfn = %" SH_PRI_gfn ", "
- "old flags = %#x, new flags = %#x\n",
- gfn_x(guest_l1e_get_gfn(*ep)), guest_l1e_get_flags(*ep),
flags);
+ "old flags = %#x, new flags = %#x\n",
+ gfn_x(guest_l1e_get_gfn(*ep)), guest_l1e_get_flags(*ep),
+ flags);
*ep = guest_l1e_from_gfn(guest_l1e_get_gfn(*ep), flags);
- /* Propagate this change to any existing shadows */
- res = __shadow_validate_guest_entry(v, gmfn, ep, sizeof(*ep));
+ /* Propagate this change to any other shadows of the page
+ * (only necessary if there is more than one shadow) */
+ if ( mfn_to_page(gmfn)->count_info & PGC_page_table )
+ {
+ u32 shflags = mfn_to_page(gmfn)->shadow_flags & SHF_page_type_mask;
+ /* More than one type bit set in shadow-flags? */
+ if ( shflags & ~(1UL << find_first_set_bit(shflags)) )
+ res = __shadow_validate_guest_entry(v, gmfn, ep, sizeof(*ep));
+ }
/* We should never need to flush the TLB or recopy PAE entries */
ASSERT((res == 0) || (res == SHADOW_SET_CHANGED));
@@ -637,78 +635,69 @@ shadow_l4_index(mfn_t *smfn, u32 guest_i
/**************************************************************************/
-/* Functions which compute shadow entries from their corresponding guest
- * entries.
- *
- * These are the "heart" of the shadow code.
- *
- * There are two sets of these: those that are called on demand faults (read
- * faults and write faults), and those that are essentially called to
- * "prefetch" (or propagate) entries from the guest into the shadow. The read
- * fault and write fault are handled as two separate cases for L1 entries (due
- * to the _PAGE_DIRTY bit handling), but for L[234], they are grouped together
- * into the respective demand_fault functions.
+/* Function which computes shadow entries from their corresponding guest
+ * entries. This is the "heart" of the shadow code. It operates using
+ * level-1 shadow types, but handles all levels of entry.
+ * Don't call it directly, but use the four wrappers below.
*/
-// The function below tries to capture all of the flag manipulation for the
-// demand and propagate functions into one place.
-//
-static always_inline u32
-sh_propagate_flags(struct vcpu *v, mfn_t target_mfn,
- u32 gflags, guest_l1e_t *guest_entry_ptr, mfn_t gmfn,
- int mmio, int level, fetch_type_t ft)
-{
-#define CHECK(_cond) \
-do { \
- if (unlikely(!(_cond))) \
- { \
- printk("%s %s %d ASSERTION (%s) FAILED\n", \
- __func__, __FILE__, __LINE__, #_cond); \
- domain_crash(d); \
- } \
-} while (0);
-
+
+static always_inline void
+_sh_propagate(struct vcpu *v,
+ void *guest_entry_ptr,
+ mfn_t guest_table_mfn,
+ mfn_t target_mfn,
+ void *shadow_entry_ptr,
+ int level,
+ fetch_type_t ft,
+ int mmio)
+{
+ guest_l1e_t *gp = guest_entry_ptr;
+ shadow_l1e_t *sp = shadow_entry_ptr;
struct domain *d = v->domain;
u32 pass_thru_flags;
- u32 sflags;
+ u32 gflags, sflags;
/* We don't shadow PAE l3s */
ASSERT(GUEST_PAGING_LEVELS > 3 || level != 3);
- // XXX -- might want to think about PAT support for HVM guests...
-
-#ifndef NDEBUG
- // MMIO can only occur from L1e's
- //
- if ( mmio )
- CHECK(level == 1);
-
- // We should always have a pointer to the guest entry if it's a non-PSE
- // non-MMIO demand access.
- if ( ft & FETCH_TYPE_DEMAND )
- CHECK(guest_entry_ptr || level == 1);
-#endif
-
- // A not-present guest entry has a special signature in the shadow table,
- // so that we do not have to consult the guest tables multiple times...
- //
+ if ( valid_mfn(guest_table_mfn) )
+ /* Handle A and D bit propagation into the guest */
+ gflags = guest_set_ad_bits(v, guest_table_mfn, gp, level, ft);
+ else
+ {
+ /* Must be an fl1e or a prefetch */
+ ASSERT(level==1 || !(ft & FETCH_TYPE_DEMAND));
+ gflags = guest_l1e_get_flags(*gp);
+ }
+
if ( unlikely(!(gflags & _PAGE_PRESENT)) )
- return _PAGE_SHADOW_GUEST_NOT_PRESENT;
-
- // Must have a valid target_mfn, unless this is mmio, or unless this is a
- // prefetch. In the case of a prefetch, an invalid mfn means that we can
- // not usefully shadow anything, and so we return early.
+ {
+ /* If a guest l1 entry is not present, shadow with the magic
+ * guest-not-present entry. */
+ if ( level == 1 )
+ *sp = sh_l1e_gnp();
+ else
+ *sp = shadow_l1e_empty();
+ goto done;
+ }
+
+ if ( level == 1 && mmio )
+ {
+ /* Guest l1e maps MMIO space */
+ *sp = sh_l1e_mmio(guest_l1e_get_gfn(*gp), gflags);
+ goto done;
+ }
+
+ // Must have a valid target_mfn, unless this is a prefetch. In the
+ // case of a prefetch, an invalid mfn means that we can not usefully
+ // shadow anything, and so we return early.
//
if ( !valid_mfn(target_mfn) )
{
- CHECK((ft == ft_prefetch) || mmio);
- if ( !mmio )
- return 0;
- }
-
- // Set the A and D bits in the guest entry, if we need to.
- if ( guest_entry_ptr && (ft & FETCH_TYPE_DEMAND) )
- gflags = guest_set_ad_bits(v, gmfn, guest_entry_ptr, level, ft);
-
+ ASSERT((ft == ft_prefetch));
+ *sp = shadow_l1e_empty();
+ goto done;
+ }
// Propagate bits from the guest to the shadow.
// Some of these may be overwritten, below.
@@ -719,12 +708,7 @@ do {
_PAGE_RW | _PAGE_PRESENT);
if ( guest_supports_nx(v) )
pass_thru_flags |= _PAGE_NX_BIT;
- sflags = (gflags & pass_thru_flags) | _PAGE_SHADOW_PRESENT;
-
- // Copy the guest's RW bit into the SHADOW_RW bit.
- //
- if ( gflags & _PAGE_RW )
- sflags |= _PAGE_SHADOW_RW;
+ sflags = gflags & pass_thru_flags;
// Set the A&D bits for higher level shadows.
// Higher level entries do not, strictly speaking, have dirty bits, but
@@ -750,49 +734,35 @@ do {
&& !(gflags & _PAGE_DIRTY)) )
sflags &= ~_PAGE_RW;
- // MMIO caching
+ // shadow_mode_log_dirty support
//
- // MMIO mappings are marked as not present, but we set the SHADOW_MMIO bit
- // to cache the fact that this entry is in MMIO space.
+ // Only allow the guest write access to a page a) on a demand fault,
+ // or b) if the page is already marked as dirty.
//
- if ( (level == 1) && mmio )
- {
- sflags &= ~(_PAGE_PRESENT);
- sflags |= _PAGE_SHADOW_MMIO;
- }
- else
- {
- // shadow_mode_log_dirty support
- //
- // Only allow the guest write access to a page a) on a demand fault,
- // or b) if the page is already marked as dirty.
- //
- if ( unlikely((level == 1) &&
- !(ft & FETCH_TYPE_WRITE) &&
- shadow_mode_log_dirty(d) &&
- !sh_mfn_is_dirty(d, target_mfn)) )
- {
+ if ( unlikely((level == 1) && shadow_mode_log_dirty(d)) )
+ {
+ if ( ft & FETCH_TYPE_WRITE )
+ sh_mark_dirty(d, target_mfn);
+ else if ( !sh_mfn_is_dirty(d, target_mfn) )
sflags &= ~_PAGE_RW;
- }
-
- // protect guest page tables
- //
- if ( unlikely((level == 1) &&
- sh_mfn_is_a_page_table(target_mfn)) )
- {
- if ( shadow_mode_trap_reads(d) )
- {
- // if we are trapping both reads & writes, then mark this page
- // as not present...
- //
- sflags &= ~_PAGE_PRESENT;
- }
- else
- {
- // otherwise, just prevent any writes...
- //
- sflags &= ~_PAGE_RW;
- }
+ }
+
+ // protect guest page tables
+ //
+ if ( unlikely((level == 1) && sh_mfn_is_a_page_table(target_mfn)) )
+ {
+ if ( shadow_mode_trap_reads(d) )
+ {
+ // if we are trapping both reads & writes, then mark this page
+ // as not present...
+ //
+ sflags &= ~_PAGE_PRESENT;
+ }
+ else
+ {
+ // otherwise, just prevent any writes...
+ //
+ sflags &= ~_PAGE_RW;
}
}
@@ -804,9 +774,17 @@ do {
sflags |= _PAGE_USER;
}
- return sflags;
-#undef CHECK
-}
+ *sp = shadow_l1e_from_mfn(target_mfn, sflags);
+ done:
+ SHADOW_DEBUG(PROPAGATE,
+ "%s level %u guest %" SH_PRI_gpte " shadow %" SH_PRI_pte "\n",
+ fetch_type_names[ft], level, gp->l1, sp->l1);
+}
+
+
+/* These four wrappers give us a little bit of type-safety back around the
+ * use of void-* pointers in _sh_propagate(), and allow the compiler to
+ * optimize out some level checks. */
#if GUEST_PAGING_LEVELS >= 4
static void
@@ -814,19 +792,10 @@ l4e_propagate_from_guest(struct vcpu *v,
guest_l4e_t *gl4e,
mfn_t gl4mfn,
mfn_t sl3mfn,
- shadow_l4e_t *sl4p,
+ shadow_l4e_t *sl4e,
fetch_type_t ft)
{
- u32 gflags = guest_l4e_get_flags(*gl4e);
- u32 sflags = sh_propagate_flags(v, sl3mfn, gflags, (guest_l1e_t *) gl4e,
- gl4mfn, 0, 4, ft);
-
- *sl4p = shadow_l4e_from_mfn(sl3mfn, sflags);
-
- SHADOW_DEBUG(PROPAGATE,
- "%s gl4e=%" SH_PRI_gpte " sl4e=%" SH_PRI_pte "\n",
- fetch_type_names[ft], gl4e->l4, sl4p->l4);
- ASSERT(sflags != -1);
+ _sh_propagate(v, gl4e, gl4mfn, sl3mfn, sl4e, 4, ft, 0);
}
static void
@@ -834,19 +803,10 @@ l3e_propagate_from_guest(struct vcpu *v,
guest_l3e_t *gl3e,
mfn_t gl3mfn,
mfn_t sl2mfn,
- shadow_l3e_t *sl3p,
+ shadow_l3e_t *sl3e,
fetch_type_t ft)
{
- u32 gflags = guest_l3e_get_flags(*gl3e);
- u32 sflags = sh_propagate_flags(v, sl2mfn, gflags, (guest_l1e_t *) gl3e,
- gl3mfn, 0, 3, ft);
-
- *sl3p = shadow_l3e_from_mfn(sl2mfn, sflags);
-
- SHADOW_DEBUG(PROPAGATE,
- "%s gl3e=%" SH_PRI_gpte " sl3e=%" SH_PRI_pte "\n",
- fetch_type_names[ft], gl3e->l3, sl3p->l3);
- ASSERT(sflags != -1);
+ _sh_propagate(v, gl3e, gl3mfn, sl2mfn, sl3e, 3, ft, 0);
}
#endif // GUEST_PAGING_LEVELS >= 4
@@ -854,95 +814,23 @@ l2e_propagate_from_guest(struct vcpu *v,
l2e_propagate_from_guest(struct vcpu *v,
guest_l2e_t *gl2e,
mfn_t gl2mfn,
- mfn_t sl1mfn,
- shadow_l2e_t *sl2p,
+ mfn_t sl1mfn,
+ shadow_l2e_t *sl2e,
fetch_type_t ft)
{
- u32 gflags = guest_l2e_get_flags(*gl2e);
- u32 sflags = sh_propagate_flags(v, sl1mfn, gflags, (guest_l1e_t *) gl2e,
- gl2mfn, 0, 2, ft);
-
- *sl2p = shadow_l2e_from_mfn(sl1mfn, sflags);
-
- SHADOW_DEBUG(PROPAGATE,
- "%s gl2e=%" SH_PRI_gpte " sl2e=%" SH_PRI_pte "\n",
- fetch_type_names[ft], gl2e->l2, sl2p->l2);
- ASSERT(sflags != -1);
-}
-
-static inline int
-l1e_read_fault(struct vcpu *v, walk_t *gw, mfn_t gmfn, shadow_l1e_t *sl1p,
- int mmio)
-/* returns 1 if emulation is required, and 0 otherwise */
-{
- struct domain *d = v->domain;
- u32 gflags = guest_l1e_get_flags(gw->eff_l1e);
- u32 sflags = sh_propagate_flags(v, gmfn, gflags, gw->l1e, gw->l1mfn,
- mmio, 1, ft_demand_read);
-
- if ( shadow_mode_trap_reads(d) && !mmio && sh_mfn_is_a_page_table(gmfn) )
- {
- // emulation required!
- *sl1p = shadow_l1e_empty();
- return 1;
- }
-
- *sl1p = shadow_l1e_from_mfn(gmfn, sflags);
-
- SHADOW_DEBUG(PROPAGATE,
- "va=%p eff_gl1e=%" SH_PRI_gpte " sl1e=%" SH_PRI_pte "\n",
- (void *)gw->va, gw->eff_l1e.l1, sl1p->l1);
-
- ASSERT(sflags != -1);
- return 0;
-}
-
-static inline int
-l1e_write_fault(struct vcpu *v, walk_t *gw, mfn_t gmfn, shadow_l1e_t *sl1p,
- int mmio)
-/* returns 1 if emulation is required, and 0 otherwise */
-{
- struct domain *d = v->domain;
- u32 gflags = guest_l1e_get_flags(gw->eff_l1e);
- u32 sflags = sh_propagate_flags(v, gmfn, gflags, gw->l1e, gw->l1mfn,
- mmio, 1, ft_demand_write);
-
- sh_mark_dirty(d, gmfn);
-
- if ( !mmio && sh_mfn_is_a_page_table(gmfn) )
- {
- // emulation required!
- *sl1p = shadow_l1e_empty();
- return 1;
- }
-
- *sl1p = shadow_l1e_from_mfn(gmfn, sflags);
-
- SHADOW_DEBUG(PROPAGATE,
- "va=%p eff_gl1e=%" SH_PRI_gpte " sl1e=%" SH_PRI_pte "\n",
- (void *)gw->va, gw->eff_l1e.l1, sl1p->l1);
-
- ASSERT(sflags != -1);
- return 0;
-}
-
-static inline void
-l1e_propagate_from_guest(struct vcpu *v, guest_l1e_t gl1e, shadow_l1e_t *sl1p,
+ _sh_propagate(v, gl2e, gl2mfn, sl1mfn, sl2e, 2, ft, 0);
+}
+
+static void
+l1e_propagate_from_guest(struct vcpu *v,
+ guest_l1e_t *gl1e,
+ mfn_t gl1mfn,
+ mfn_t gmfn,
+ shadow_l1e_t *sl1e,
+ fetch_type_t ft,
int mmio)
{
- gfn_t gfn = guest_l1e_get_gfn(gl1e);
- mfn_t gmfn = (mmio) ? _mfn(gfn_x(gfn)) : vcpu_gfn_to_mfn(v, gfn);
- u32 gflags = guest_l1e_get_flags(gl1e);
- u32 sflags = sh_propagate_flags(v, gmfn, gflags, 0, _mfn(INVALID_MFN),
- mmio, 1, ft_prefetch);
-
- *sl1p = shadow_l1e_from_mfn(gmfn, sflags);
-
- SHADOW_DEBUG(PROPAGATE,
- "gl1e=%" SH_PRI_gpte " sl1e=%" SH_PRI_pte "\n",
- gl1e.l1, sl1p->l1);
-
- ASSERT(sflags != -1);
+ _sh_propagate(v, gl1e, gl1mfn, gmfn, sl1e, 1, ft, mmio);
}
@@ -956,8 +844,6 @@ l1e_propagate_from_guest(struct vcpu *v,
* SHADOW_SET_FLUSH -- the caller must cause a TLB flush.
* SHADOW_SET_ERROR -- the input is not a valid entry (for example, if
* shadow_get_page_from_l1e() fails).
- * SHADOW_SET_L3PAE_RECOPY -- one or more vcpu's need to have their local
- * copies of their PAE L3 entries re-copied.
*/
static inline void safe_write_entry(void *dst, void *src)
@@ -1041,16 +927,13 @@ shadow_get_page_from_l1e(shadow_l1e_t sl
int res;
mfn_t mfn;
struct domain *owner;
- shadow_l1e_t sanitized_sl1e =
- shadow_l1e_remove_flags(sl1e, _PAGE_SHADOW_RW | _PAGE_SHADOW_PRESENT);
-
- //ASSERT(shadow_l1e_get_flags(sl1e) & _PAGE_PRESENT);
- //ASSERT((shadow_l1e_get_flags(sl1e) & L1_DISALLOW_MASK) == 0);
+
+ ASSERT(!sh_l1e_is_magic(sl1e));
if ( !shadow_mode_refcounts(d) )
return 1;
- res = get_page_from_l1e(sanitized_sl1e, d);
+ res = get_page_from_l1e(sl1e, d);
// If a privileged domain is attempting to install a map of a page it does
// not own, we let it succeed anyway.
@@ -1062,7 +945,7 @@ shadow_get_page_from_l1e(shadow_l1e_t sl
(owner = page_get_owner(mfn_to_page(mfn))) &&
(d != owner) )
{
- res = get_page_from_l1e(sanitized_sl1e, owner);
+ res = get_page_from_l1e(sl1e, owner);
SHADOW_PRINTK("privileged domain %d installs map of mfn %05lx "
"which is owned by domain %d: %s\n",
d->domain_id, mfn_x(mfn), owner->domain_id,
@@ -1250,7 +1133,8 @@ static int shadow_set_l1e(struct vcpu *v
if ( old_sl1e.l1 == new_sl1e.l1 ) return 0; /* Nothing to do */
- if ( shadow_l1e_get_flags(new_sl1e) & _PAGE_PRESENT )
+ if ( (shadow_l1e_get_flags(new_sl1e) & _PAGE_PRESENT)
+ && !sh_l1e_is_magic(new_sl1e) )
{
/* About to install a new reference */
if ( shadow_mode_refcounts(d) ) {
@@ -1267,7 +1151,8 @@ static int shadow_set_l1e(struct vcpu *v
shadow_write_entries(sl1e, &new_sl1e, 1, sl1mfn);
flags |= SHADOW_SET_CHANGED;
- if ( shadow_l1e_get_flags(old_sl1e) & _PAGE_PRESENT )
+ if ( (shadow_l1e_get_flags(old_sl1e) & _PAGE_PRESENT)
+ && !sh_l1e_is_magic(old_sl1e) )
{
/* We lost a reference to an old mfn. */
/* N.B. Unlike higher-level sets, never need an extra flush
@@ -2133,7 +2018,8 @@ void sh_destroy_l1_shadow(struct vcpu *v
/* Decrement refcounts of all the old entries */
mfn_t sl1mfn = smfn;
SHADOW_FOREACH_L1E(sl1mfn, sl1e, 0, 0, {
- if ( shadow_l1e_get_flags(*sl1e) & _PAGE_PRESENT )
+ if ( (shadow_l1e_get_flags(*sl1e) & _PAGE_PRESENT)
+ && !sh_l1e_is_magic(*sl1e) )
shadow_put_page_from_l1e(*sl1e, d);
});
}
@@ -2399,16 +2285,17 @@ static int validate_gl1e(struct vcpu *v,
guest_l1e_t *new_gl1e = new_ge;
shadow_l1e_t *sl1p = se;
gfn_t gfn;
- mfn_t mfn;
- int result = 0;
+ mfn_t gmfn;
+ int result = 0, mmio;
perfc_incrc(shadow_validate_gl1e_calls);
gfn = guest_l1e_get_gfn(*new_gl1e);
- mfn = vcpu_gfn_to_mfn(v, gfn);
-
- l1e_propagate_from_guest(v, *new_gl1e, &new_sl1e,
- /* mmio? */ !valid_mfn(mfn));
+ gmfn = vcpu_gfn_to_mfn(v, gfn);
+
+ mmio = (hvm_guest(v) && shadow_vcpu_mode_translate(v) && !valid_mfn(gmfn));
+ l1e_propagate_from_guest(v, new_gl1e, _mfn(INVALID_MFN), gmfn, &new_sl1e,
+ ft_prefetch, mmio);
result |= shadow_set_l1e(v, sl1p, new_sl1e, sl1mfn);
return result;
@@ -2576,6 +2463,80 @@ static inline void reset_early_unshadow(
#endif
}
+
+
+/**************************************************************************/
+/* Optimization: Prefetch multiple L1 entries. This is called after we have
+ * demand-faulted a shadow l1e in the fault handler, to see if it's
+ * worth fetching some more.
+ */
+
+#if SHADOW_OPTIMIZATIONS & SHOPT_PREFETCH
+
+/* XXX magic number */
+#define PREFETCH_DISTANCE 32
+
+static void sh_prefetch(struct vcpu *v, walk_t *gw,
+ shadow_l1e_t *ptr_sl1e, mfn_t sl1mfn)
+{
+ int i, dist, mmio;
+ gfn_t gfn;
+ mfn_t gmfn;
+ guest_l1e_t gl1e;
+ shadow_l1e_t sl1e;
+ u32 gflags;
+
+ /* Prefetch no further than the end of the _shadow_ l1 MFN */
+ dist = (PAGE_SIZE - ((unsigned long)ptr_sl1e & ~PAGE_MASK)) / sizeof sl1e;
+ /* And no more than a maximum fetches-per-fault */
+ if ( dist > PREFETCH_DISTANCE )
+ dist = PREFETCH_DISTANCE;
+
+ for ( i = 1; i < dist ; i++ )
+ {
+ /* No point in prefetching if there's already a shadow */
+ if ( ptr_sl1e[i].l1 != 0 )
+ break;
+
+ if ( gw->l1e )
+ {
+ /* Normal guest page; grab the next guest entry */
+ gl1e = gw->l1e[i];
+ /* Not worth continuing if we hit an entry that will need another
+ * fault for A/D-bit propagation anyway */
+ gflags = guest_l1e_get_flags(gl1e);
+ if ( (gflags & _PAGE_PRESENT)
+ && (!(gflags & _PAGE_ACCESSED)
+ || ((gflags & _PAGE_RW) && !(gflags & _PAGE_DIRTY))) )
+ break;
+ }
+ else
+ {
+ /* Fragmented superpage, unless we've been called wrongly */
+ ASSERT(guest_l2e_get_flags(*gw->l2e) & _PAGE_PSE);
+ /* Increment the l1e's GFN by the right number of guest pages */
+ gl1e = guest_l1e_from_gfn(
+ _gfn(gfn_x(guest_l1e_get_gfn(gw->eff_l1e)) + i),
+ guest_l1e_get_flags(gw->eff_l1e));
+ }
+
+ /* Look at the gfn that the l1e is pointing at */
+ gfn = guest_l1e_get_gfn(gl1e);
+ gmfn = vcpu_gfn_to_mfn(v, gfn);
+ mmio = ( hvm_guest(v)
+ && shadow_vcpu_mode_translate(v)
+ && mmio_space(gfn_to_paddr(gfn)) );
+
+ /* Propagate the entry. Safe to use a pointer to our local
+ * gl1e, since this is not a demand-fetch so there will be no
+ * write-back to the guest. */
+ l1e_propagate_from_guest(v, &gl1e, _mfn(INVALID_MFN),
+ gmfn, &sl1e, ft_prefetch, mmio);
+ (void) shadow_set_l1e(v, ptr_sl1e + i, sl1e, sl1mfn);
+ }
+}
+
+#endif /* SHADOW_OPTIMIZATIONS & SHOPT_PREFETCH */
/**************************************************************************/
@@ -2602,16 +2563,70 @@ static int sh_page_fault(struct vcpu *v,
int r, mmio;
fetch_type_t ft = 0;
+ SHADOW_PRINTK("d:v=%u:%u va=%#lx err=%u\n",
+ v->domain->domain_id, v->vcpu_id, va, regs->error_code);
+
//
// XXX: Need to think about eventually mapping superpages directly in the
// shadow (when possible), as opposed to splintering them into a
// bunch of 4K maps.
//
+#if (SHADOW_OPTIMIZATIONS & SHOPT_FAST_FAULT_PATH) && SHADOW_PAGING_LEVELS > 2
+ if ( (regs->error_code & PFEC_reserved_bit) )
+ {
+ /* The only reasons for reserved bits to be set in shadow entries
+ * are the two "magic" shadow_l1e entries. */
+ if ( likely((__copy_from_user(&sl1e,
+ (sh_linear_l1_table(v)
+ + shadow_l1_linear_offset(va)),
+ sizeof(sl1e)) == 0)
+ && sh_l1e_is_magic(sl1e)) )
+ {
+ if ( sh_l1e_is_gnp(sl1e) )
+ {
+ if ( likely(!hvm_guest(v) || shadow_vcpu_mode_translate(v)) )
+ {
+ /* Not-present in a guest PT: pass to the guest as
+ * a not-present fault (by flipping two bits). */
+ ASSERT(regs->error_code & PFEC_page_present);
+ regs->error_code ^= (PFEC_reserved_bit|PFEC_page_present);
+ perfc_incrc(shadow_fault_fast_gnp);
+ SHADOW_PRINTK("fast path not-present\n");
+ return 0;
+ }
+ else
+ {
+ /* Not-present in the P2M: MMIO */
+ gpa = va;
+ }
+ }
+ else
+ {
+ /* Magic MMIO marker: extract gfn for MMIO address */
+ ASSERT(sh_l1e_is_mmio(sl1e));
+ gpa = (((paddr_t)(gfn_x(sh_l1e_mmio_get_gfn(sl1e))))
+ << PAGE_SHIFT)
+ | (va & ~PAGE_MASK);
+ }
+ perfc_incrc(shadow_fault_fast_mmio);
+ SHADOW_PRINTK("fast path mmio %#"PRIpaddr"\n", gpa);
+ reset_early_unshadow(v);
+ handle_mmio(gpa);
+ return EXCRET_fault_fixed;
+ }
+ else
+ {
+ /* This should be exceptionally rare: another vcpu has fixed
+ * the tables between the fault and our reading the l1e.
+ * Fall through to the normal fault handing logic */
+ perfc_incrc(shadow_fault_fast_fail);
+ SHADOW_PRINTK("fast path false alarm!\n");
+ }
+ }
+#endif /* SHOPT_FAST_FAULT_PATH */
+
shadow_lock(d);
-
- SHADOW_PRINTK("d:v=%u:%u va=%#lx err=%u\n",
- v->domain->domain_id, v->vcpu_id, va, regs->error_code);
shadow_audit_tables(v);
@@ -2659,8 +2674,9 @@ static int sh_page_fault(struct vcpu *v,
}
// Was it a write fault?
- //
- if ( regs->error_code & PFEC_write_access )
+ ft = ((regs->error_code & PFEC_write_access)
+ ? ft_demand_write : ft_demand_read);
+ if ( ft == ft_demand_write )
{
if ( unlikely(!(accumulated_gflags & _PAGE_RW)) )
{
@@ -2685,26 +2701,19 @@ static int sh_page_fault(struct vcpu *v,
}
}
- /* Is this an MMIO access? */
+ /* What mfn is the guest trying to access? */
gfn = guest_l1e_get_gfn(gw.eff_l1e);
+ gmfn = vcpu_gfn_to_mfn(v, gfn);
mmio = ( hvm_guest(v)
&& shadow_vcpu_mode_translate(v)
&& mmio_space(gfn_to_paddr(gfn)) );
- /* For MMIO, the shadow holds the *gfn*; for normal accesses, it holds
- * the equivalent mfn. */
- if ( mmio )
- gmfn = _mfn(gfn_x(gfn));
- else
- {
- gmfn = vcpu_gfn_to_mfn(v, gfn);
- if ( !valid_mfn(gmfn) )
- {
- perfc_incrc(shadow_fault_bail_bad_gfn);
- SHADOW_PRINTK("BAD gfn=%"SH_PRI_gfn" gmfn=%"SH_PRI_mfn"\n",
- gfn_x(gfn), mfn_x(gmfn));
- goto not_a_shadow_fault;
- }
+ if ( !mmio && !valid_mfn(gmfn) )
+ {
+ perfc_incrc(shadow_fault_bail_bad_gfn);
+ SHADOW_PRINTK("BAD gfn=%"SH_PRI_gfn" gmfn=%"SH_PRI_mfn"\n",
+ gfn_x(gfn), mfn_x(gmfn));
+ goto not_a_shadow_fault;
}
/* Make sure there is enough free shadow memory to build a chain of
@@ -2717,44 +2726,39 @@ static int sh_page_fault(struct vcpu *v,
* for the shadow entry, since we might promote a page here. */
// XXX -- this code will need to change somewhat if/when the shadow code
// can directly map superpages...
- ft = ((regs->error_code & PFEC_write_access) ?
- ft_demand_write : ft_demand_read);
ptr_sl1e = shadow_get_and_create_l1e(v, &gw, &sl1mfn, ft);
ASSERT(ptr_sl1e);
- /* Calculate the shadow entry */
- if ( ft == ft_demand_write )
- {
- if ( l1e_write_fault(v, &gw, gmfn, &sl1e, mmio) )
+ /* Calculate the shadow entry and write it */
+ l1e_propagate_from_guest(v, (gw.l1e) ? gw.l1e : &gw.eff_l1e, gw.l1mfn,
+ gmfn, &sl1e, ft, mmio);
+ r = shadow_set_l1e(v, ptr_sl1e, sl1e, sl1mfn);
+
+#if SHADOW_OPTIMIZATIONS & SHOPT_PREFETCH
+ /* Prefetch some more shadow entries */
+ sh_prefetch(v, &gw, ptr_sl1e, sl1mfn);
+#endif
+
+ /* Need to emulate accesses to page tables */
+ if ( sh_mfn_is_a_page_table(gmfn) )
+ {
+ if ( ft == ft_demand_write )
{
perfc_incrc(shadow_fault_emulate_write);
goto emulate;
}
- }
- else if ( l1e_read_fault(v, &gw, gmfn, &sl1e, mmio) )
- {
- perfc_incrc(shadow_fault_emulate_read);
- goto emulate;
- }
-
- /* Quick sanity check: we never make an MMIO entry that's got the
- * _PAGE_PRESENT flag set in it. */
- ASSERT(!mmio || !(shadow_l1e_get_flags(sl1e) & _PAGE_PRESENT));
-
- r = shadow_set_l1e(v, ptr_sl1e, sl1e, sl1mfn);
+ else if ( shadow_mode_trap_reads(d) && ft == ft_demand_read )
+ {
+ perfc_incrc(shadow_fault_emulate_read);
+ goto emulate;
+ }
+ }
if ( mmio )
{
gpa = guest_walk_to_gpa(&gw);
goto mmio;
}
-
-#if 0
- if ( !(r & SHADOW_SET_CHANGED) )
- debugtrace_printk("%s: shadow_set_l1e(va=%p, sl1e=%" SH_PRI_pte
- ") did not change anything\n",
- __func__, gw.va, l1e_get_intpte(sl1e));
-#endif
perfc_incrc(shadow_fault_fixed);
d->arch.shadow.fault_count++;
@@ -2769,7 +2773,6 @@ static int sh_page_fault(struct vcpu *v,
return EXCRET_fault_fixed;
emulate:
-
/* Take the register set we were called with */
emul_regs = *regs;
if ( hvm_guest(v) )
@@ -3932,25 +3935,48 @@ int sh_audit_l1_table(struct vcpu *v, mf
gfn_t gfn;
char *s;
int done = 0;
-
+
/* Follow the backpointer */
gl1mfn = _mfn(mfn_to_page(sl1mfn)->u.inuse.type_info);
gl1e = gp = sh_map_domain_page(gl1mfn);
SHADOW_FOREACH_L1E(sl1mfn, sl1e, &gl1e, done, {
- s = sh_audit_flags(v, 1, guest_l1e_get_flags(*gl1e),
- shadow_l1e_get_flags(*sl1e));
- if ( s ) AUDIT_FAIL(1, "%s", s);
-
- if ( SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES_MFNS )
- {
- gfn = guest_l1e_get_gfn(*gl1e);
- mfn = shadow_l1e_get_mfn(*sl1e);
- gmfn = audit_gfn_to_mfn(v, gfn, gl1mfn);
- if ( mfn_x(gmfn) != mfn_x(mfn) )
- AUDIT_FAIL(1, "bad translation: gfn %" SH_PRI_gfn
- " --> %" SH_PRI_mfn " != mfn %" SH_PRI_mfn "\n",
- gfn_x(gfn), mfn_x(gmfn), mfn_x(mfn));
+ if ( sh_l1e_is_magic(*sl1e) )
+ {
+#if (SHADOW_OPTIMIZATIONS & SHOPT_FAST_FAULT_PATH) && SHADOW_PAGING_LEVELS > 2
+ if ( sh_l1e_is_gnp(*sl1e) )
+ {
+ if ( guest_l1e_get_flags(*gl1e) & _PAGE_PRESENT )
+ AUDIT_FAIL(1, "shadow is GNP magic but guest is present");
+ }
+ else
+ {
+ ASSERT(sh_l1e_is_mmio(*sl1e));
+ gfn = sh_l1e_mmio_get_gfn(*sl1e);
+ if ( gfn_x(gfn) != gfn_x(guest_l1e_get_gfn(*gl1e)) )
+ AUDIT_FAIL(1, "shadow MMIO gfn is %" SH_PRI_gfn
+ " but guest gfn is %" SH_PRI_gfn,
+ gfn_x(gfn),
+ gfn_x(guest_l1e_get_gfn(*gl1e)));
+ }
+#endif
+ }
+ else
+ {
+ s = sh_audit_flags(v, 1, guest_l1e_get_flags(*gl1e),
+ shadow_l1e_get_flags(*sl1e));
+ if ( s ) AUDIT_FAIL(1, "%s", s);
+
+ if ( SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES_MFNS )
+ {
+ gfn = guest_l1e_get_gfn(*gl1e);
+ mfn = shadow_l1e_get_mfn(*sl1e);
+ gmfn = audit_gfn_to_mfn(v, gfn, gl1mfn);
+ if ( mfn_x(gmfn) != mfn_x(mfn) )
+ AUDIT_FAIL(1, "bad translation: gfn %" SH_PRI_gfn
+ " --> %" SH_PRI_mfn " != mfn %" SH_PRI_mfn,
+ gfn_x(gfn), mfn_x(gmfn), mfn_x(mfn));
+ }
}
});
sh_unmap_domain_page(gp);
@@ -3973,7 +3999,8 @@ int sh_audit_fl1_table(struct vcpu *v, m
if ( !(f == 0
|| f == (_PAGE_PRESENT|_PAGE_USER|_PAGE_RW|
_PAGE_ACCESSED|_PAGE_DIRTY)
- || f == (_PAGE_PRESENT|_PAGE_USER|_PAGE_ACCESSED|_PAGE_DIRTY)) )
+ || f == (_PAGE_PRESENT|_PAGE_USER|_PAGE_ACCESSED|_PAGE_DIRTY)
+ || sh_l1e_is_magic(*sl1e)) )
AUDIT_FAIL(1, "fl1e has bad flags");
});
return 0;
@@ -4011,7 +4038,7 @@ int sh_audit_l2_table(struct vcpu *v, mf
if ( mfn_x(gmfn) != mfn_x(mfn) )
AUDIT_FAIL(2, "bad translation: gfn %" SH_PRI_gfn
" (--> %" SH_PRI_mfn ")"
- " --> %" SH_PRI_mfn " != mfn %" SH_PRI_mfn "\n",
+ " --> %" SH_PRI_mfn " != mfn %" SH_PRI_mfn,
gfn_x(gfn),
(guest_l2e_get_flags(*gl2e) & _PAGE_PSE) ? 0
: mfn_x(audit_gfn_to_mfn(v, gfn, gl2mfn)),
@@ -4053,7 +4080,7 @@ int sh_audit_l3_table(struct vcpu *v, mf
: PGC_SH_l2_shadow);
if ( mfn_x(gmfn) != mfn_x(mfn) )
AUDIT_FAIL(3, "bad translation: gfn %" SH_PRI_gfn
- " --> %" SH_PRI_mfn " != mfn %" SH_PRI_mfn "\n",
+ " --> %" SH_PRI_mfn " != mfn %" SH_PRI_mfn,
gfn_x(gfn), mfn_x(gmfn), mfn_x(mfn));
}
});
@@ -4088,7 +4115,7 @@ int sh_audit_l4_table(struct vcpu *v, mf
PGC_SH_l3_shadow);
if ( mfn_x(gmfn) != mfn_x(mfn) )
AUDIT_FAIL(4, "bad translation: gfn %" SH_PRI_gfn
- " --> %" SH_PRI_mfn " != mfn %" SH_PRI_mfn "\n",
+ " --> %" SH_PRI_mfn " != mfn %" SH_PRI_mfn,
gfn_x(gfn), mfn_x(gmfn), mfn_x(mfn));
}
});
diff -r 77e1baf0a567 -r 0b6f49d25d4f xen/arch/x86/mm/shadow/private.h
--- a/xen/arch/x86/mm/shadow/private.h Wed Nov 01 10:02:00 2006 +0000
+++ b/xen/arch/x86/mm/shadow/private.h Wed Nov 01 10:31:11 2006 +0000
@@ -30,111 +30,6 @@
#include <xen/domain_page.h>
#include <asm/x86_emulate.h>
#include <asm/hvm/support.h>
-
-
-/******************************************************************************
- * Definitions for the use of the "available" bits in the shadow PTEs.
- *
- * Review of the low 12 bits of a shadow page table entry:
- *
- * in a guest: in a shadow:
- * Bit 11: _PAGE_AVAIL2, aka _PAGE_GNTTAB
- * Bit 10: _PAGE_AVAIL1 _PAGE_SHADOW_RW ("SW" below)
- * Bit 9: _PAGE_AVAIL0 _PAGE_SHADOW_PRESENT ("SP" below)
- * Bit 8: _PAGE_GLOBAL _PAGE_SHADOW_MMIO ("MMIO" below),
- * aka _PAGE_SHADOW_GUEST_NOT_PRESENT
- * Bit 7: _PAGE_PSE, aka _PAGE_PAT
- * Bit 6: _PAGE_DIRTY
- * Bit 5: _PAGE_ACCESSED
- * Bit 4: _PAGE_PCD
- * Bit 3: _PAGE_PWT
- * Bit 2: _PAGE_USER
- * Bit 1: _PAGE_RW ("GW" below)
- * Bit 0: _PAGE_PRESENT ("GP" below)
- *
- * Given a guest entry, as shown below, we can expect the following in the
- * corresponding shadow entry:
- *
- * Guest entry Shadow entry Commentary
- * ----------- ---------------- ---------------------------------------------
- * Maps
- * GP GW IO GP SP GW SW MMIO
- * -- -- ---- -- -- -- -- ----
- * - - - 0 0 0 0 0 The guest entry has not yet been shadowed.
- * 0 - - 0 0 0 0 1 The guest entry is marked not-present.
- * 1 1 no ? 1 ? 1 0 Writable entry in the guest.
- * 1 0 no ? 1 0 0 0 Read-only entry in the guest.
- * 1 1 yes 0 1 ? 1 1 Writable MMIO mapping in the guest.
- * 1 0 yes 0 1 0 0 1 Read-only MMIO mapping in the guest.
- *
- * Normally, we would expect that GP=1 in the guest to imply GP=1 in the
- * shadow, and similarly for GW=1. However, various functionality that may be
- * implemented via the shadow can cause GP or GW to be cleared in such cases.
- * A & D bit emulation is a prime example of such functionality.
- *
- * If _PAGE_SHADOW_PRESENT is zero, then the _PAGE_PRESENT bit in that same
- * entry will always be zero, too.
-
- * Bit 11 is used in debug builds as the _PAGE_GNTTAB bit in PV guests. It is
- * currently available for random (ab)use in shadow entries.
- *
- * Bit 8 (the global bit) could be propagated from an HVM guest to the shadow,
- * but currently there is no benefit, as the guest's TLB is flushed on every
- * transition of CR3 anyway due to the HVM exit/re-entry.
- *
- * In shadow entries in which the _PAGE_SHADOW_PRESENT is set, bit 8 is used
- * as the _PAGE_SHADOW_MMIO bit. In such entries, if _PAGE_SHADOW_MMIO is
- * set, then the entry contains the *gfn* directly from the corresponding
- * guest entry (not an mfn!!).
- *
- * Bit 7 is set in a guest L2 to signify a superpage entry. The current
- * shadow code splinters superpage mappings into 512 or 1024 4K mappings; the
- * resulting shadow L1 table is called an FL1. Note that there is no guest
- * page that corresponds to an FL1.
- *
- * Bit 7 in a guest L1 is the PAT2 bit. Currently we do not support PAT in
- * this shadow code.
- *
- * Bit 6 is the dirty bit.
- *
- * Bit 5 is the accessed bit.
- *
- * Bit 4 is the cache disable bit. If set in a guest, the hardware is
- * supposed to refuse to cache anything found via this entry. It can be set
- * in an L4e, L3e, L2e, or L1e. This shadow code currently does not support
- * cache disable bits. They are silently ignored.
- *
- * Bit 4 is a guest L1 is also the PAT1 bit. Currently we do not support PAT
- * in this shadow code.
- *
- * Bit 3 is the cache write-thru bit. If set in a guest, the hardware is
- * supposed to use write-thru instead of write-back caching for anything found
- * via this entry. It can be set in an L4e, L3e, L2e, or L1e. This shadow
- * code currently does not support cache write-thru bits. They are silently
- * ignored.
- *
- * Bit 3 is a guest L1 is also the PAT0 bit. Currently we do not support PAT
- * in this shadow code.
- *
- * Bit 2 is the user bit.
- *
- * Bit 1 is the read-write bit.
- *
- * Bit 0 is the present bit.
- */
-
-// Copy of the _PAGE_RW bit from the guest's PTE, appropriately zero'ed by
-// the appropriate shadow rules.
-#define _PAGE_SHADOW_RW _PAGE_AVAIL1
-
-// Copy of the _PAGE_PRESENT bit from the guest's PTE
-#define _PAGE_SHADOW_PRESENT _PAGE_AVAIL0
-
-// The matching guest entry maps MMIO space
-#define _PAGE_SHADOW_MMIO _PAGE_GLOBAL
-
-// Shadow flags value used when the guest is not present
-#define _PAGE_SHADOW_GUEST_NOT_PRESENT _PAGE_GLOBAL
/******************************************************************************
@@ -151,13 +46,13 @@
} while (0)
// The flags for use with SHADOW_DEBUG:
-#define SHADOW_DEBUG_PROPAGATE 0
-#define SHADOW_DEBUG_MAKE_SHADOW 0
-#define SHADOW_DEBUG_DESTROY_SHADOW 0
+#define SHADOW_DEBUG_PROPAGATE 1
+#define SHADOW_DEBUG_MAKE_SHADOW 1
+#define SHADOW_DEBUG_DESTROY_SHADOW 1
#define SHADOW_DEBUG_P2M 0
-#define SHADOW_DEBUG_A_AND_D 0
-#define SHADOW_DEBUG_EMULATE 0
-#define SHADOW_DEBUG_LOGDIRTY 1
+#define SHADOW_DEBUG_A_AND_D 1
+#define SHADOW_DEBUG_EMULATE 1
+#define SHADOW_DEBUG_LOGDIRTY 0
/******************************************************************************
diff -r 77e1baf0a567 -r 0b6f49d25d4f xen/arch/x86/mm/shadow/types.h
--- a/xen/arch/x86/mm/shadow/types.h Wed Nov 01 10:02:00 2006 +0000
+++ b/xen/arch/x86/mm/shadow/types.h Wed Nov 01 10:31:11 2006 +0000
@@ -591,6 +591,77 @@ accumulate_guest_flags(struct vcpu *v, w
return accumulated_flags;
}
+
+#if (SHADOW_OPTIMIZATIONS & SHOPT_FAST_FAULT_PATH) && SHADOW_PAGING_LEVELS > 2
+/******************************************************************************
+ * We implement a "fast path" for two special cases: faults that require
+ * MMIO emulation, and faults where the guest PTE is not present. We
+ * record these as shadow l1 entries that have reserved bits set in
+ * them, so we can spot them immediately in the fault handler and handle
+ * them without needing to hold the shadow lock or walk the guest
+ * pagetables.
+ *
+ * This is only feasible for PAE and 64bit Xen: 32-bit non-PAE PTEs don't
+ * have reserved bits that we can use for this.
+ */
+
+#define SH_L1E_MAGIC 0xffffffff00000000ULL
+static inline int sh_l1e_is_magic(shadow_l1e_t sl1e)
+{
+ return ((sl1e.l1 & SH_L1E_MAGIC) == SH_L1E_MAGIC);
+}
+
+/* Guest not present: a single magic value */
+static inline shadow_l1e_t sh_l1e_gnp(void)
+{
+ return (shadow_l1e_t){ -1ULL };
+}
+
+static inline int sh_l1e_is_gnp(shadow_l1e_t sl1e)
+{
+ return (sl1e.l1 == sh_l1e_gnp().l1);
+}
+
+/* MMIO: an invalid PTE that contains the GFN of the equivalent guest l1e.
+ * We store 28 bits of GFN in bits 4:32 of the entry.
+ * The present bit is set, and the U/S and R/W bits are taken from the guest.
+ * Bit 3 is always 0, to differentiate from gnp above. */
+#define SH_L1E_MMIO_MAGIC 0xffffffff00000001ULL
+#define SH_L1E_MMIO_MAGIC_MASK 0xffffffff00000009ULL
+#define SH_L1E_MMIO_GFN_MASK 0x00000000fffffff0ULL
+#define SH_L1E_MMIO_GFN_SHIFT 4
+
+static inline shadow_l1e_t sh_l1e_mmio(gfn_t gfn, u32 gflags)
+{
+ return (shadow_l1e_t) { (SH_L1E_MMIO_MAGIC
+ | (gfn_x(gfn) << SH_L1E_MMIO_GFN_SHIFT)
+ | (gflags & (_PAGE_USER|_PAGE_RW))) };
+}
+
+static inline int sh_l1e_is_mmio(shadow_l1e_t sl1e)
+{
+ return ((sl1e.l1 & SH_L1E_MMIO_MAGIC_MASK) == SH_L1E_MMIO_MAGIC);
+}
+
+static inline gfn_t sh_l1e_mmio_get_gfn(shadow_l1e_t sl1e)
+{
+ return _gfn((sl1e.l1 & SH_L1E_MMIO_GFN_MASK) >> SH_L1E_MMIO_GFN_SHIFT);
+}
+
+static inline u32 sh_l1e_mmio_get_flags(shadow_l1e_t sl1e)
+{
+ return (u32)((sl1e.l1 & (_PAGE_USER|_PAGE_RW)));
+}
+
+#else
+
+#define sh_l1e_gnp() shadow_l1e_empty()
+#define sh_l1e_mmio(_gfn, _flags) shadow_l1e_empty()
+#define sh_l1e_is_magic(_e) (0)
+
+#endif /* SHOPT_FAST_FAULT_PATH */
+
+
#endif /* _XEN_SHADOW_TYPES_H */
/*
diff -r 77e1baf0a567 -r 0b6f49d25d4f xen/include/asm-x86/perfc_defn.h
--- a/xen/include/asm-x86/perfc_defn.h Wed Nov 01 10:02:00 2006 +0000
+++ b/xen/include/asm-x86/perfc_defn.h Wed Nov 01 10:31:11 2006 +0000
@@ -43,6 +43,9 @@ PERFCOUNTER_CPU(shadow_a_update, "
PERFCOUNTER_CPU(shadow_a_update, "shadow A bit update")
PERFCOUNTER_CPU(shadow_ad_update, "shadow A&D bit update")
PERFCOUNTER_CPU(shadow_fault, "calls to shadow_fault")
+PERFCOUNTER_CPU(shadow_fault_fast_gnp, "shadow_fault fast path n/p")
+PERFCOUNTER_CPU(shadow_fault_fast_mmio, "shadow_fault fast path mmio")
+PERFCOUNTER_CPU(shadow_fault_fast_fail, "shadow_fault fast path error")
PERFCOUNTER_CPU(shadow_fault_bail_bad_gfn, "shadow_fault guest bad gfn")
PERFCOUNTER_CPU(shadow_fault_bail_not_present,
"shadow_fault guest not-present")
diff -r 77e1baf0a567 -r 0b6f49d25d4f xen/include/asm-x86/shadow.h
--- a/xen/include/asm-x86/shadow.h Wed Nov 01 10:02:00 2006 +0000
+++ b/xen/include/asm-x86/shadow.h Wed Nov 01 10:31:11 2006 +0000
@@ -161,8 +161,10 @@ extern int shadow_audit_enable;
*/
#define SHOPT_WRITABLE_HEURISTIC 0x01 /* Guess at RW PTEs via linear maps */
#define SHOPT_EARLY_UNSHADOW 0x02 /* Unshadow l1s on fork or exit */
-
-#define SHADOW_OPTIMIZATIONS 0x03
+#define SHOPT_FAST_FAULT_PATH 0x04 /* Fast-path MMIO and not-present */
+#define SHOPT_PREFETCH 0x08 /* Shadow multiple entries per fault */
+
+#define SHADOW_OPTIMIZATIONS 0x0f
/* With shadow pagetables, the different kinds of address start
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|