# HG changeset patch
# User Keir Fraser <keir@xxxxxxxxxxxxx>
# Date 1194021491 0
# Node ID 650cadd1b28303dae4927ca65e64eb2507e3a3f3
# Parent 838e77a41a3c53a54428e642cb0440a8a6f8912b
# Parent db9f62d8f7f4d2d8f8ccf7c512623977132bcffa
Merge.
---
xen/arch/x86/hvm/hvm.c | 36 +
xen/arch/x86/hvm/platform.c | 25 -
xen/arch/x86/hvm/svm/svm.c | 13
xen/arch/x86/hvm/vmx/vmx.c | 14
xen/arch/x86/mm/hap/guest_walk.c | 4
xen/arch/x86/mm/hap/hap.c | 2
xen/arch/x86/mm/hap/private.h | 9
xen/arch/x86/mm/p2m.c | 6
xen/arch/x86/mm/shadow/common.c | 16
xen/arch/x86/mm/shadow/multi.c | 689 ++++++++++++++++----------------------
xen/arch/x86/mm/shadow/private.h | 13
xen/arch/x86/mm/shadow/types.h | 49 +-
xen/include/asm-x86/hvm/support.h | 1
xen/include/asm-x86/paging.h | 18
xen/include/asm-x86/perfc_defn.h | 8
15 files changed, 420 insertions(+), 483 deletions(-)
diff -r 838e77a41a3c -r 650cadd1b283 xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c Fri Nov 02 16:34:54 2007 +0000
+++ b/xen/arch/x86/hvm/hvm.c Fri Nov 02 16:38:11 2007 +0000
@@ -931,6 +931,7 @@ static void *hvm_map(unsigned long va, i
{
unsigned long gfn, mfn;
p2m_type_t p2mt;
+ uint32_t pfec;
if ( ((va & ~PAGE_MASK) + size) > PAGE_SIZE )
{
@@ -939,11 +940,15 @@ static void *hvm_map(unsigned long va, i
return NULL;
}
- gfn = paging_gva_to_gfn(current, va);
+ /* We're mapping on behalf of the segment-load logic, which might
+ * write the accessed flags in the descriptors (in 32-bit mode), but
+ * we still treat it as a kernel-mode read (i.e. no access checks). */
+ pfec = PFEC_page_present;
+ gfn = paging_gva_to_gfn(current, va, &pfec);
mfn = mfn_x(gfn_to_mfn_current(gfn, &p2mt));
if ( !p2m_is_ram(p2mt) )
{
- hvm_inject_exception(TRAP_page_fault, PFEC_write_access, va);
+ hvm_inject_exception(TRAP_page_fault, pfec, va);
return NULL;
}
@@ -1263,14 +1268,24 @@ void hvm_task_switch(
* @size = number of bytes to copy
* @dir = copy *to* guest (TRUE) or *from* guest (FALSE)?
* @virt = addr is *virtual* (TRUE) or *guest physical* (FALSE)?
+ * @fetch = copy is an instruction fetch?
* Returns number of bytes failed to copy (0 == complete success).
*/
-static int __hvm_copy(void *buf, paddr_t addr, int size, int dir, int virt)
+static int __hvm_copy(void *buf, paddr_t addr, int size, int dir,
+ int virt, int fetch)
{
unsigned long gfn, mfn;
p2m_type_t p2mt;
char *p;
int count, todo;
+ uint32_t pfec = PFEC_page_present;
+
+ if ( dir )
+ pfec |= PFEC_write_access;
+ if ( ring_3(guest_cpu_user_regs()) )
+ pfec |= PFEC_user_mode;
+ if ( fetch )
+ pfec |= PFEC_insn_fetch;
todo = size;
while ( todo > 0 )
@@ -1278,7 +1293,7 @@ static int __hvm_copy(void *buf, paddr_t
count = min_t(int, PAGE_SIZE - (addr & ~PAGE_MASK), todo);
if ( virt )
- gfn = paging_gva_to_gfn(current, addr);
+ gfn = paging_gva_to_gfn(current, addr, &pfec);
else
gfn = addr >> PAGE_SHIFT;
@@ -1310,22 +1325,27 @@ static int __hvm_copy(void *buf, paddr_t
int hvm_copy_to_guest_phys(paddr_t paddr, void *buf, int size)
{
- return __hvm_copy(buf, paddr, size, 1, 0);
+ return __hvm_copy(buf, paddr, size, 1, 0, 0);
}
int hvm_copy_from_guest_phys(void *buf, paddr_t paddr, int size)
{
- return __hvm_copy(buf, paddr, size, 0, 0);
+ return __hvm_copy(buf, paddr, size, 0, 0, 0);
}
int hvm_copy_to_guest_virt(unsigned long vaddr, void *buf, int size)
{
- return __hvm_copy(buf, vaddr, size, 1, 1);
+ return __hvm_copy(buf, vaddr, size, 1, 1, 0);
}
int hvm_copy_from_guest_virt(void *buf, unsigned long vaddr, int size)
{
- return __hvm_copy(buf, vaddr, size, 0, 1);
+ return __hvm_copy(buf, vaddr, size, 0, 1, 0);
+}
+
+int hvm_fetch_from_guest_virt(void *buf, unsigned long vaddr, int size)
+{
+ return __hvm_copy(buf, vaddr, size, 0, 1, hvm_nx_enabled(current));
}
diff -r 838e77a41a3c -r 650cadd1b283 xen/arch/x86/hvm/platform.c
--- a/xen/arch/x86/hvm/platform.c Fri Nov 02 16:34:54 2007 +0000
+++ b/xen/arch/x86/hvm/platform.c Fri Nov 02 16:38:11 2007 +0000
@@ -833,7 +833,7 @@ int inst_copy_from_guest(unsigned char *
{
if ( inst_len > MAX_INST_LEN || inst_len <= 0 )
return 0;
- if ( hvm_copy_from_guest_virt(buf, guest_eip, inst_len) )
+ if ( hvm_fetch_from_guest_virt(buf, guest_eip, inst_len) )
return 0;
return inst_len;
}
@@ -1075,6 +1075,7 @@ void handle_mmio(unsigned long gpa)
unsigned long addr, gfn;
paddr_t paddr;
int dir, size = op_size;
+ uint32_t pfec;
ASSERT(count);
@@ -1082,8 +1083,11 @@ void handle_mmio(unsigned long gpa)
addr = regs->edi;
if ( ad_size == WORD )
addr &= 0xFFFF;
- addr += hvm_get_segment_base(v, x86_seg_es);
- gfn = paging_gva_to_gfn(v, addr);
+ addr += hvm_get_segment_base(v, x86_seg_es);
+ pfec = PFEC_page_present | PFEC_write_access;
+ if ( ring_3(regs) )
+ pfec |= PFEC_user_mode;
+ gfn = paging_gva_to_gfn(v, addr, &pfec);
paddr = (paddr_t)gfn << PAGE_SHIFT | (addr & ~PAGE_MASK);
if ( paddr == gpa )
{
@@ -1105,7 +1109,8 @@ void handle_mmio(unsigned long gpa)
default: domain_crash_synchronous();
}
addr += hvm_get_segment_base(v, seg);
- gfn = paging_gva_to_gfn(v, addr);
+ pfec &= ~PFEC_write_access;
+ gfn = paging_gva_to_gfn(v, addr, &pfec);
paddr = (paddr_t)gfn << PAGE_SHIFT | (addr & ~PAGE_MASK);
}
else
@@ -1115,12 +1120,9 @@ void handle_mmio(unsigned long gpa)
{
/* The guest does not have the non-mmio address mapped.
* Need to send in a page fault */
- int errcode = 0;
- /* IO read --> memory write */
- if ( dir == IOREQ_READ ) errcode |= PFEC_write_access;
regs->eip -= inst_len; /* do not advance %eip */
regs->eflags |= X86_EFLAGS_RF; /* RF was set by original #PF */
- hvm_inject_exception(TRAP_page_fault, errcode, addr);
+ hvm_inject_exception(TRAP_page_fault, pfec, addr);
return;
}
@@ -1308,10 +1310,9 @@ void handle_mmio(unsigned long gpa)
DEFINE_PER_CPU(int, guest_handles_in_xen_space);
-/* Note that copy_{to,from}_user_hvm don't set the A and D bits on
- PTEs, and require the PTE to be writable even when they're only
- trying to read from it. The guest is expected to deal with
- this. */
+/* Note that copy_{to,from}_user_hvm require the PTE to be writable even
+ when they're only trying to read from it. The guest is expected to
+ deal with this. */
unsigned long copy_to_user_hvm(void *to, const void *from, unsigned len)
{
if ( this_cpu(guest_handles_in_xen_space) )
diff -r 838e77a41a3c -r 650cadd1b283 xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c Fri Nov 02 16:34:54 2007 +0000
+++ b/xen/arch/x86/hvm/svm/svm.c Fri Nov 02 16:38:11 2007 +0000
@@ -1441,6 +1441,7 @@ static void svm_io_instruction(struct vc
unsigned long addr, count;
paddr_t paddr;
unsigned long gfn;
+ uint32_t pfec;
int sign = regs->eflags & X86_EFLAGS_DF ? -1 : 1;
if (!svm_get_io_address(v, regs, size, info, &count, &addr))
@@ -1459,15 +1460,17 @@ static void svm_io_instruction(struct vc
}
/* Translate the address to a physical address */
- gfn = paging_gva_to_gfn(v, addr);
+ pfec = PFEC_page_present;
+ if ( dir == IOREQ_READ ) /* Read from PIO --> write to RAM */
+ pfec |= PFEC_write_access;
+ if ( ring_3(regs) )
+ pfec |= PFEC_user_mode;
+ gfn = paging_gva_to_gfn(v, addr, &pfec);
if ( gfn == INVALID_GFN )
{
/* The guest does not have the RAM address mapped.
* Need to send in a page fault */
- int errcode = 0;
- /* IO read --> memory write */
- if ( dir == IOREQ_READ ) errcode |= PFEC_write_access;
- svm_hvm_inject_exception(TRAP_page_fault, errcode, addr);
+ svm_hvm_inject_exception(TRAP_page_fault, pfec, addr);
return;
}
paddr = (paddr_t)gfn << PAGE_SHIFT | (addr & ~PAGE_MASK);
diff -r 838e77a41a3c -r 650cadd1b283 xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c Fri Nov 02 16:34:54 2007 +0000
+++ b/xen/arch/x86/hvm/vmx/vmx.c Fri Nov 02 16:38:11 2007 +0000
@@ -1642,7 +1642,7 @@ static void vmx_do_str_pio(unsigned long
unsigned long addr, count = 1, base;
paddr_t paddr;
unsigned long gfn;
- u32 ar_bytes, limit;
+ u32 ar_bytes, limit, pfec;
int sign;
int long_mode = 0;
@@ -1714,15 +1714,17 @@ static void vmx_do_str_pio(unsigned long
#endif
/* Translate the address to a physical address */
- gfn = paging_gva_to_gfn(current, addr);
+ pfec = PFEC_page_present;
+ if ( dir == IOREQ_READ ) /* Read from PIO --> write to RAM */
+ pfec |= PFEC_write_access;
+ if ( ring_3(regs) )
+ pfec |= PFEC_user_mode;
+ gfn = paging_gva_to_gfn(current, addr, &pfec);
if ( gfn == INVALID_GFN )
{
/* The guest does not have the RAM address mapped.
* Need to send in a page fault */
- int errcode = 0;
- /* IO read --> memory write */
- if ( dir == IOREQ_READ ) errcode |= PFEC_write_access;
- vmx_inject_exception(TRAP_page_fault, errcode, addr);
+ vmx_inject_exception(TRAP_page_fault, pfec, addr);
return;
}
paddr = (paddr_t)gfn << PAGE_SHIFT | (addr & ~PAGE_MASK);
diff -r 838e77a41a3c -r 650cadd1b283 xen/arch/x86/mm/hap/guest_walk.c
--- a/xen/arch/x86/mm/hap/guest_walk.c Fri Nov 02 16:34:54 2007 +0000
+++ b/xen/arch/x86/mm/hap/guest_walk.c Fri Nov 02 16:38:11 2007 +0000
@@ -40,7 +40,7 @@
#if GUEST_PAGING_LEVELS > CONFIG_PAGING_LEVELS
unsigned long hap_gva_to_gfn(GUEST_PAGING_LEVELS)(
- struct vcpu *v, unsigned long gva)
+ struct vcpu *v, unsigned long gva, uint32_t *pfec)
{
gdprintk(XENLOG_ERR,
"Guest paging level is greater than host paging level!\n");
@@ -61,7 +61,7 @@ unsigned long hap_gva_to_gfn(GUEST_PAGIN
#endif
unsigned long hap_gva_to_gfn(GUEST_PAGING_LEVELS)(
- struct vcpu *v, unsigned long gva)
+ struct vcpu *v, unsigned long gva, uint32_t *pfec)
{
unsigned long gcr3 = v->arch.hvm_vcpu.guest_cr[3];
int mode = GUEST_PAGING_LEVELS;
diff -r 838e77a41a3c -r 650cadd1b283 xen/arch/x86/mm/hap/hap.c
--- a/xen/arch/x86/mm/hap/hap.c Fri Nov 02 16:34:54 2007 +0000
+++ b/xen/arch/x86/mm/hap/hap.c Fri Nov 02 16:38:11 2007 +0000
@@ -695,7 +695,7 @@ hap_write_p2m_entry(struct vcpu *v, unsi
}
static unsigned long hap_gva_to_gfn_real_mode(
- struct vcpu *v, unsigned long gva)
+ struct vcpu *v, unsigned long gva, uint32_t *pfec)
{
return ((paddr_t)gva >> PAGE_SHIFT);
}
diff -r 838e77a41a3c -r 650cadd1b283 xen/arch/x86/mm/hap/private.h
--- a/xen/arch/x86/mm/hap/private.h Fri Nov 02 16:34:54 2007 +0000
+++ b/xen/arch/x86/mm/hap/private.h Fri Nov 02 16:38:11 2007 +0000
@@ -26,9 +26,12 @@
/********************************************/
/* GUEST TRANSLATION FUNCS */
/********************************************/
-unsigned long hap_gva_to_gfn_2level(struct vcpu *v, unsigned long gva);
-unsigned long hap_gva_to_gfn_3level(struct vcpu *v, unsigned long gva);
-unsigned long hap_gva_to_gfn_4level(struct vcpu *v, unsigned long gva);
+unsigned long hap_gva_to_gfn_2level(struct vcpu *v, unsigned long gva,
+ uint32_t *pfec);
+unsigned long hap_gva_to_gfn_3level(struct vcpu *v, unsigned long gva,
+ uint32_t *pfec);
+unsigned long hap_gva_to_gfn_4level(struct vcpu *v, unsigned long gva,
+ uint32_t *pfec);
/********************************************/
/* MISC DEFINITIONS */
diff -r 838e77a41a3c -r 650cadd1b283 xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c Fri Nov 02 16:34:54 2007 +0000
+++ b/xen/arch/x86/mm/p2m.c Fri Nov 02 16:38:11 2007 +0000
@@ -31,7 +31,7 @@
/* Debugging and auditing of the P2M code? */
#define P2M_AUDIT 0
-#define P2M_DEBUGGING 1
+#define P2M_DEBUGGING 0
/*
* The P2M lock. This protects all updates to the p2m table.
@@ -290,11 +290,11 @@ int p2m_alloc_table(struct domain *d,
void (*free_page)(struct domain *d, struct page_info *pg))
{
- mfn_t mfn;
+ mfn_t mfn = _mfn(INVALID_MFN);
struct list_head *entry;
struct page_info *page, *p2m_top;
unsigned int page_count = 0;
- unsigned long gfn;
+ unsigned long gfn = -1UL;
p2m_lock(d);
diff -r 838e77a41a3c -r 650cadd1b283 xen/arch/x86/mm/shadow/common.c
--- a/xen/arch/x86/mm/shadow/common.c Fri Nov 02 16:34:54 2007 +0000
+++ b/xen/arch/x86/mm/shadow/common.c Fri Nov 02 16:38:11 2007 +0000
@@ -150,11 +150,13 @@ hvm_read(enum x86_segment seg,
return rc;
*val = 0;
- // XXX -- this is WRONG.
- // It entirely ignores the permissions in the page tables.
- // In this case, that is only a user vs supervisor access check.
- //
- if ( (rc = hvm_copy_from_guest_virt(val, addr, bytes)) == 0 )
+
+ if ( access_type == hvm_access_insn_fetch )
+ rc = hvm_fetch_from_guest_virt(val, addr, bytes);
+ else
+ rc = hvm_copy_from_guest_virt(val, addr, bytes);
+
+ if ( rc == 0 )
return X86EMUL_OKAY;
/* If we got here, there was nothing mapped here, or a bad GFN
@@ -395,7 +397,7 @@ struct x86_emulate_ops *shadow_init_emul
(!hvm_translate_linear_addr(
x86_seg_cs, regs->eip, sizeof(sh_ctxt->insn_buf),
hvm_access_insn_fetch, sh_ctxt, &addr) &&
- !hvm_copy_from_guest_virt(
+ !hvm_fetch_from_guest_virt(
sh_ctxt->insn_buf, addr, sizeof(sh_ctxt->insn_buf)))
? sizeof(sh_ctxt->insn_buf) : 0;
@@ -423,7 +425,7 @@ void shadow_continue_emulation(struct sh
(!hvm_translate_linear_addr(
x86_seg_cs, regs->eip, sizeof(sh_ctxt->insn_buf),
hvm_access_insn_fetch, sh_ctxt, &addr) &&
- !hvm_copy_from_guest_virt(
+ !hvm_fetch_from_guest_virt(
sh_ctxt->insn_buf, addr, sizeof(sh_ctxt->insn_buf)))
? sizeof(sh_ctxt->insn_buf) : 0;
sh_ctxt->insn_buf_eip = regs->eip;
diff -r 838e77a41a3c -r 650cadd1b283 xen/arch/x86/mm/shadow/multi.c
--- a/xen/arch/x86/mm/shadow/multi.c Fri Nov 02 16:34:54 2007 +0000
+++ b/xen/arch/x86/mm/shadow/multi.c Fri Nov 02 16:38:11 2007 +0000
@@ -189,7 +189,7 @@ guest_supports_nx(struct vcpu *v)
if ( GUEST_PAGING_LEVELS == 2 || !cpu_has_nx )
return 0;
if ( !is_hvm_vcpu(v) )
- return 1;
+ return cpu_has_nx;
return hvm_nx_enabled(v);
}
@@ -197,22 +197,119 @@ guest_supports_nx(struct vcpu *v)
/**************************************************************************/
/* Functions for walking the guest page tables */
-
-/* Walk the guest pagetables, filling the walk_t with what we see.
- * Takes an uninitialised walk_t. The caller must call unmap_walk()
- * on the walk_t before discarding it or calling guest_walk_tables again.
- * If "guest_op" is non-zero, we are serving a genuine guest memory access,
+/* Flags that are needed in a pagetable entry, with the sense of NX inverted */
+static uint32_t mandatory_flags(struct vcpu *v, uint32_t pfec)
+{
+ static uint32_t flags[] = {
+ /* I/F - Usr Wr */
+ /* 0 0 0 0 */ _PAGE_PRESENT,
+ /* 0 0 0 1 */ _PAGE_PRESENT|_PAGE_RW,
+ /* 0 0 1 0 */ _PAGE_PRESENT|_PAGE_USER,
+ /* 0 0 1 1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_USER,
+ /* 0 1 0 0 */ _PAGE_PRESENT,
+ /* 0 1 0 1 */ _PAGE_PRESENT|_PAGE_RW,
+ /* 0 1 1 0 */ _PAGE_PRESENT|_PAGE_USER,
+ /* 0 1 1 1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_USER,
+ /* 1 0 0 0 */ _PAGE_PRESENT|_PAGE_NX_BIT,
+ /* 1 0 0 1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_NX_BIT,
+ /* 1 0 1 0 */ _PAGE_PRESENT|_PAGE_USER|_PAGE_NX_BIT,
+ /* 1 0 1 1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_NX_BIT,
+ /* 1 1 0 0 */ _PAGE_PRESENT|_PAGE_NX_BIT,
+ /* 1 1 0 1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_NX_BIT,
+ /* 1 1 1 0 */ _PAGE_PRESENT|_PAGE_USER|_PAGE_NX_BIT,
+ /* 1 1 1 1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_NX_BIT,
+ };
+ uint32_t f = flags[(pfec & 0x1f) >> 1];
+ /* Don't demand not-NX if the CPU wouldn't enforce it. */
+ if ( !guest_supports_nx(v) )
+ f &= ~_PAGE_NX_BIT;
+ return f;
+}
+
+/* Read, check and modify a guest pagetable entry. Returns 0 if the
+ * flags are OK. Although we use l1e types here, the logic and the bits
+ * are the same for all types except PAE l3es. */
+static int guest_walk_entry(struct vcpu *v, mfn_t gmfn,
+ void *gp, void *wp,
+ uint32_t flags, int level)
+{
+ guest_l1e_t e, old_e;
+ uint32_t gflags;
+ int rc;
+
+ /* Read the guest entry */
+ e = *(guest_l1e_t *)gp;
+
+ /* Check that all the mandatory flag bits are there. Invert NX, to
+ * calculate as if there were an "X" bit that allowed access. */
+ gflags = guest_l1e_get_flags(e) ^ _PAGE_NX_BIT;
+ rc = ((gflags & flags) != flags);
+
+ /* Set the accessed/dirty bits */
+ if ( rc == 0 )
+ {
+ uint32_t bits = _PAGE_ACCESSED;
+ if ( (flags & _PAGE_RW) // Implies that the action is a write
+ && ((level == 1) || ((level == 2) && (gflags & _PAGE_PSE))) )
+ bits |= _PAGE_DIRTY;
+ old_e = e;
+ e.l1 |= bits;
+ SHADOW_PRINTK("flags %lx bits %lx old_e %llx e %llx\n",
+ (unsigned long) flags,
+ (unsigned long) bits,
+ (unsigned long long) old_e.l1,
+ (unsigned long long) e.l1);
+ /* Try to write the entry back. If it's changed under out feet
+ * then leave it alone */
+ if ( e.l1 != old_e.l1 )
+ {
+ (void) cmpxchg(((guest_intpte_t *)gp), old_e.l1, e.l1);
+ paging_mark_dirty(v->domain, mfn_x(gmfn));
+ }
+ }
+
+ /* Record the entry in the walk */
+ *(guest_l1e_t *)wp = e;
+ return rc;
+}
+
+/* Walk the guest pagetables, after the manner of a hardware walker.
+ *
+ * Inputs: a vcpu, a virtual address, a walk_t to fill, a
+ * pointer to a pagefault code, and a flag "shadow_op".
+ *
+ * We walk the vcpu's guest pagetables, filling the walk_t with what we
+ * see and adding any Accessed and Dirty bits that are needed in the
+ * guest entries. Using the pagefault code, we check the permissions as
+ * we go. For the purposes of reading pagetables we treat all non-RAM
+ * memory as contining zeroes.
+ *
+ * If "shadow_op" is non-zero, we are serving a genuine guest memory access,
* and must (a) be under the shadow lock, and (b) remove write access
- * from any gueat PT pages we see, as we will be using their contents to
- * perform shadow updates.
- * Returns 0 for success or non-zero if the guest pagetables are malformed.
- * N.B. Finding a not-present entry does not cause a non-zero return code. */
-static inline int
-guest_walk_tables(struct vcpu *v, unsigned long va, walk_t *gw, int guest_op)
+ * from any guest PT pages we see, as we will be shadowing them soon
+ * and will rely on the contents' not having changed.
+ *
+ * Returns 0 for success or non-zero if the walk did not complete.
+ * N.B. This is different from the old return code but almost no callers
+ * checked the old return code anyway.
+ */
+static int
+guest_walk_tables(struct vcpu *v, unsigned long va, walk_t *gw,
+ uint32_t pfec, int shadow_op)
{
struct domain *d = v->domain;
p2m_type_t p2mt;
- ASSERT(!guest_op || shadow_locked_by_me(d));
+ guest_l1e_t *l1p;
+#if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */
+ guest_l1e_t *l2p;
+#if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
+ guest_l1e_t *l3p;
+#endif
+#endif
+ uint32_t flags = mandatory_flags(v, pfec);
+ int rc;
+
+ ASSERT(!shadow_op || shadow_locked_by_me(d));
perfc_incr(shadow_guest_walk);
memset(gw, 0, sizeof(*gw));
@@ -220,84 +317,104 @@ guest_walk_tables(struct vcpu *v, unsign
#if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */
#if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
- /* Get l4e from the top level table */
+ /* Get the l4e from the top level table and check its flags*/
gw->l4mfn = pagetable_get_mfn(v->arch.guest_table);
- gw->l4e = (guest_l4e_t *)v->arch.paging.shadow.guest_vtable
- + guest_l4_table_offset(va);
- /* Walk down to the l3e */
- if ( !(guest_l4e_get_flags(*gw->l4e) & _PAGE_PRESENT) ) return 0;
- gw->l3mfn = gfn_to_mfn(d, guest_l4e_get_gfn(*gw->l4e), &p2mt);
+ rc = guest_walk_entry(v, gw->l4mfn,
+ (guest_l4e_t *)v->arch.paging.shadow.guest_vtable
+ + guest_l4_table_offset(va),
+ &gw->l4e, flags, 4);
+ if ( rc != 0 ) return rc;
+
+ /* Map the l3 table */
+ gw->l3mfn = gfn_to_mfn(d, guest_l4e_get_gfn(gw->l4e), &p2mt);
if ( !p2m_is_ram(p2mt) ) return 1;
ASSERT(mfn_valid(gw->l3mfn));
/* This mfn is a pagetable: make sure the guest can't write to it. */
- if ( guest_op && sh_remove_write_access(v, gw->l3mfn, 3, va) != 0 )
+ if ( shadow_op && sh_remove_write_access(v, gw->l3mfn, 3, va) != 0 )
flush_tlb_mask(d->domain_dirty_cpumask);
- gw->l3e = ((guest_l3e_t *)sh_map_domain_page(gw->l3mfn))
- + guest_l3_table_offset(va);
+ /* Get the l3e and check its flags*/
+ l3p = sh_map_domain_page(gw->l3mfn);
+ rc = guest_walk_entry(v, gw->l3mfn, l3p + guest_l3_table_offset(va),
+ &gw->l3e, flags, 3);
+ sh_unmap_domain_page(l3p);
+ if ( rc != 0 ) return rc;
+
#else /* PAE only... */
- /* Get l3e from the cache of the guest's top level table */
- gw->l3e = (guest_l3e_t
*)&v->arch.paging.shadow.gl3e[guest_l3_table_offset(va)];
+
+ /* Get l3e from the cache of the top level table and check its flag */
+ gw->l3e = v->arch.paging.shadow.gl3e[guest_l3_table_offset(va)];
+ if ( !(guest_l3e_get_flags(gw->l3e) & _PAGE_PRESENT) ) return 1;
+
#endif /* PAE or 64... */
- /* Walk down to the l2e */
- if ( !(guest_l3e_get_flags(*gw->l3e) & _PAGE_PRESENT) ) return 0;
- gw->l2mfn = gfn_to_mfn(d, guest_l3e_get_gfn(*gw->l3e), &p2mt);
+
+ /* Map the l2 table */
+ gw->l2mfn = gfn_to_mfn(d, guest_l3e_get_gfn(gw->l3e), &p2mt);
if ( !p2m_is_ram(p2mt) ) return 1;
ASSERT(mfn_valid(gw->l2mfn));
/* This mfn is a pagetable: make sure the guest can't write to it. */
- if ( guest_op && sh_remove_write_access(v, gw->l2mfn, 2, va) != 0 )
+ if ( shadow_op && sh_remove_write_access(v, gw->l2mfn, 2, va) != 0 )
flush_tlb_mask(d->domain_dirty_cpumask);
- gw->l2e = ((guest_l2e_t *)sh_map_domain_page(gw->l2mfn))
- + guest_l2_table_offset(va);
+ /* Get the l2e */
+ l2p = sh_map_domain_page(gw->l2mfn);
+ rc = guest_walk_entry(v, gw->l2mfn, l2p + guest_l2_table_offset(va),
+ &gw->l2e, flags, 2);
+ sh_unmap_domain_page(l2p);
+ if ( rc != 0 ) return rc;
+
#else /* 32-bit only... */
- /* Get l2e from the top level table */
+
+ /* Get l2e from the top level table and check its flags */
gw->l2mfn = pagetable_get_mfn(v->arch.guest_table);
- gw->l2e = (guest_l2e_t *)v->arch.paging.shadow.guest_vtable
- + guest_l2_table_offset(va);
+ rc = guest_walk_entry(v, gw->l2mfn,
+ (guest_l2e_t *)v->arch.paging.shadow.guest_vtable
+ + guest_l2_table_offset(va),
+ &gw->l2e, flags, 2);
+ if ( rc != 0 ) return rc;
+
#endif /* All levels... */
-
- if ( !(guest_l2e_get_flags(*gw->l2e) & _PAGE_PRESENT) ) return 0;
+
if ( guest_supports_superpages(v) &&
- (guest_l2e_get_flags(*gw->l2e) & _PAGE_PSE) )
+ (guest_l2e_get_flags(gw->l2e) & _PAGE_PSE) )
{
/* Special case: this guest VA is in a PSE superpage, so there's
* no guest l1e. We make one up so that the propagation code
* can generate a shadow l1 table. Start with the gfn of the
* first 4k-page of the superpage. */
- gfn_t start = guest_l2e_get_gfn(*gw->l2e);
+ gfn_t start = guest_l2e_get_gfn(gw->l2e);
/* Grant full access in the l1e, since all the guest entry's
- * access controls are enforced in the shadow l2e. This lets
- * us reflect l2 changes later without touching the l1s. */
+ * access controls are enforced in the shadow l2e. */
int flags = (_PAGE_PRESENT|_PAGE_USER|_PAGE_RW|
_PAGE_ACCESSED|_PAGE_DIRTY);
- /* propagate PWT PCD to level 1 for PSE */
- if ( (guest_l2e_get_flags(*gw->l2e) & _PAGE_PWT) )
- flags |= _PAGE_PWT;
- if ( (guest_l2e_get_flags(*gw->l2e) & _PAGE_PCD) )
- flags |= _PAGE_PCD;
/* PSE level 2 entries use bit 12 for PAT; propagate it to bit 7
- * of the level 1 */
- if ( (guest_l2e_get_flags(*gw->l2e) & _PAGE_PSE_PAT) )
- flags |= _PAGE_PAT;
+ * of the level 1. */
+ if ( (guest_l2e_get_flags(gw->l2e) & _PAGE_PSE_PAT) )
+ flags |= _PAGE_PAT;
+ /* Copy the cache-control bits to the l1 as well, because we
+ * can't represent PAT in the (non-PSE) shadow l2e. :(
+ * This could cause problems if a guest ever maps an area of
+ * memory with superpages using more than one caching mode. */
+ flags |= guest_l2e_get_flags(gw->l2e) & (_PAGE_PWT|_PAGE_PCD);
/* Increment the pfn by the right number of 4k pages.
* The ~0x1 is to mask out the PAT bit mentioned above. */
start = _gfn((gfn_x(start) & ~0x1) + guest_l1_table_offset(va));
- gw->eff_l1e = guest_l1e_from_gfn(start, flags);
- gw->l1e = NULL;
+ gw->l1e = guest_l1e_from_gfn(start, flags);
gw->l1mfn = _mfn(INVALID_MFN);
}
else
{
/* Not a superpage: carry on and find the l1e. */
- gw->l1mfn = gfn_to_mfn(d, guest_l2e_get_gfn(*gw->l2e), &p2mt);
+ gw->l1mfn = gfn_to_mfn(d, guest_l2e_get_gfn(gw->l2e), &p2mt);
if ( !p2m_is_ram(p2mt) ) return 1;
ASSERT(mfn_valid(gw->l1mfn));
/* This mfn is a pagetable: make sure the guest can't write to it. */
- if ( guest_op
+ if ( shadow_op
&& sh_remove_write_access(v, gw->l1mfn, 1, va) != 0 )
flush_tlb_mask(d->domain_dirty_cpumask);
- gw->l1e = ((guest_l1e_t *)sh_map_domain_page(gw->l1mfn))
- + guest_l1_table_offset(va);
- gw->eff_l1e = *gw->l1e;
+ l1p = sh_map_domain_page(gw->l1mfn);
+ rc = guest_walk_entry(v, gw->l2mfn, l1p + guest_l1_table_offset(va),
+ &gw->l1e, flags, 1);
+ sh_unmap_domain_page(l1p);
+ if ( rc != 0 ) return rc;
}
return 0;
@@ -308,9 +425,9 @@ static inline gfn_t
static inline gfn_t
guest_walk_to_gfn(walk_t *gw)
{
- if ( !(guest_l1e_get_flags(gw->eff_l1e) & _PAGE_PRESENT) )
+ if ( !(guest_l1e_get_flags(gw->l1e) & _PAGE_PRESENT) )
return _gfn(INVALID_GFN);
- return guest_l1e_get_gfn(gw->eff_l1e);
+ return guest_l1e_get_gfn(gw->l1e);
}
/* Given a walk_t, translate the gw->va into the guest's notion of the
@@ -318,29 +435,12 @@ static inline paddr_t
static inline paddr_t
guest_walk_to_gpa(walk_t *gw)
{
- if ( !(guest_l1e_get_flags(gw->eff_l1e) & _PAGE_PRESENT) )
+ if ( !(guest_l1e_get_flags(gw->l1e) & _PAGE_PRESENT) )
return 0;
- return guest_l1e_get_paddr(gw->eff_l1e) + (gw->va & ~PAGE_MASK);
-}
-
-
-/* Unmap (and reinitialise) a guest walk.
- * Call this to dispose of any walk filled in by guest_walk_tables() */
-static void unmap_walk(struct vcpu *v, walk_t *gw)
-{
-#if GUEST_PAGING_LEVELS >= 3
-#if GUEST_PAGING_LEVELS >= 4
- if ( gw->l3e != NULL ) sh_unmap_domain_page(gw->l3e);
-#endif
- if ( gw->l2e != NULL ) sh_unmap_domain_page(gw->l2e);
-#endif
- if ( gw->l1e != NULL ) sh_unmap_domain_page(gw->l1e);
-#ifdef DEBUG
- memset(gw, 0, sizeof(*gw));
-#endif
-}
-
-
+ return guest_l1e_get_paddr(gw->l1e) + (gw->va & ~PAGE_MASK);
+}
+
+#if 0 /* Keep for debugging */
/* Pretty-print the contents of a guest-walk */
static inline void print_gw(walk_t *gw)
{
@@ -348,26 +448,17 @@ static inline void print_gw(walk_t *gw)
#if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */
#if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
SHADOW_PRINTK(" l4mfn=%" PRI_mfn "\n", mfn_x(gw->l4mfn));
- SHADOW_PRINTK(" l4e=%p\n", gw->l4e);
- if ( gw->l4e )
- SHADOW_PRINTK(" *l4e=%" SH_PRI_gpte "\n", gw->l4e->l4);
+ SHADOW_PRINTK(" l4e=%" SH_PRI_gpte "\n", gw->l4e.l4);
SHADOW_PRINTK(" l3mfn=%" PRI_mfn "\n", mfn_x(gw->l3mfn));
#endif /* PAE or 64... */
- SHADOW_PRINTK(" l3e=%p\n", gw->l3e);
- if ( gw->l3e )
- SHADOW_PRINTK(" *l3e=%" SH_PRI_gpte "\n", gw->l3e->l3);
+ SHADOW_PRINTK(" l3e=%" SH_PRI_gpte "\n", gw->l3e.l3);
#endif /* All levels... */
SHADOW_PRINTK(" l2mfn=%" PRI_mfn "\n", mfn_x(gw->l2mfn));
- SHADOW_PRINTK(" l2e=%p\n", gw->l2e);
- if ( gw->l2e )
- SHADOW_PRINTK(" *l2e=%" SH_PRI_gpte "\n", gw->l2e->l2);
+ SHADOW_PRINTK(" l2e=%" SH_PRI_gpte "\n", gw->l2e.l2);
SHADOW_PRINTK(" l1mfn=%" PRI_mfn "\n", mfn_x(gw->l1mfn));
- SHADOW_PRINTK(" l1e=%p\n", gw->l1e);
- if ( gw->l1e )
- SHADOW_PRINTK(" *l1e=%" SH_PRI_gpte "\n", gw->l1e->l1);
- SHADOW_PRINTK(" eff_l1e=%" SH_PRI_gpte "\n", gw->eff_l1e.l1);
-}
-
+ SHADOW_PRINTK(" l1e=%" SH_PRI_gpte "\n", gw->l1e.l1);
+}
+#endif /* 0 */
#if SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES
/* Lightweight audit: pass all the shadows associated with this guest walk
@@ -404,10 +495,10 @@ static void sh_audit_gw(struct vcpu *v,
&& mfn_valid((smfn = get_shadow_status(v, gw->l1mfn,
SH_type_l1_shadow))) )
(void) sh_audit_l1_table(v, smfn, _mfn(INVALID_MFN));
- else if ( gw->l2e
- && (guest_l2e_get_flags(*gw->l2e) & _PAGE_PSE)
+ else if ( (guest_l2e_get_flags(gw->l2e) & _PAGE_PRESENT)
+ && (guest_l2e_get_flags(gw->l2e) & _PAGE_PSE)
&& mfn_valid(
- (smfn = get_fl1_shadow_status(v, guest_l2e_get_gfn(*gw->l2e)))) )
+ (smfn = get_fl1_shadow_status(v, guest_l2e_get_gfn(gw->l2e)))) )
(void) sh_audit_fl1_table(v, smfn, _mfn(INVALID_MFN));
}
@@ -415,85 +506,6 @@ static void sh_audit_gw(struct vcpu *v,
#define sh_audit_gw(_v, _gw) do {} while(0)
#endif /* audit code */
-
-
-/**************************************************************************/
-/* Function to write to the guest tables, for propagating accessed and
- * dirty bits from the shadow to the guest.
- * Takes a guest mfn, a pointer to the guest entry, the level of pagetable,
- * and an operation type. The guest entry is always passed as an l1e:
- * since we only ever write flags, that's OK.
- * Returns the new flag bits of the guest entry. */
-
-static u32 guest_set_ad_bits(struct vcpu *v,
- mfn_t gmfn,
- guest_l1e_t *ep,
- unsigned int level,
- fetch_type_t ft)
-{
- u32 flags;
- int res = 0;
-
- ASSERT(ep && !(((unsigned long)ep) & ((sizeof *ep) - 1)));
- ASSERT(level <= GUEST_PAGING_LEVELS);
- ASSERT(shadow_locked_by_me(v->domain));
-
- flags = guest_l1e_get_flags(*ep);
-
- /* Only set A and D bits for guest-initiated accesses */
- if ( !(ft & FETCH_TYPE_DEMAND) )
- return flags;
-
- ASSERT(mfn_valid(gmfn)
- && (sh_mfn_is_a_page_table(gmfn)
- || ((mfn_to_page(gmfn)->u.inuse.type_info & PGT_count_mask)
- == 0)));
-
- /* PAE l3s do not have A and D bits */
- ASSERT(GUEST_PAGING_LEVELS > 3 || level != 3);
-
- /* Need the D bit as well for writes, in L1es and PSE L2es. */
- if ( ft == ft_demand_write
- && (level == 1 ||
- (level == 2 && (flags & _PAGE_PSE) &&
guest_supports_superpages(v))) )
- {
- if ( (flags & (_PAGE_DIRTY | _PAGE_ACCESSED))
- == (_PAGE_DIRTY | _PAGE_ACCESSED) )
- return flags; /* Guest already has A and D bits set */
- flags |= _PAGE_DIRTY | _PAGE_ACCESSED;
- perfc_incr(shadow_ad_update);
- }
- else
- {
- if ( flags & _PAGE_ACCESSED )
- return flags; /* Guest already has A bit set */
- flags |= _PAGE_ACCESSED;
- perfc_incr(shadow_a_update);
- }
-
- /* Set the bit(s) */
- paging_mark_dirty(v->domain, mfn_x(gmfn));
- SHADOW_DEBUG(A_AND_D, "gfn = %" SH_PRI_gfn ", "
- "old flags = %#x, new flags = %#x\n",
- gfn_x(guest_l1e_get_gfn(*ep)), guest_l1e_get_flags(*ep),
- flags);
- *ep = guest_l1e_from_gfn(guest_l1e_get_gfn(*ep), flags);
-
- /* Propagate this change to any other shadows of the page
- * (only necessary if there is more than one shadow) */
- if ( mfn_to_page(gmfn)->count_info & PGC_page_table )
- {
- u32 shflags = mfn_to_page(gmfn)->shadow_flags & SHF_page_type_mask;
- /* More than one type bit set in shadow-flags? */
- if ( shflags & ~(1UL << find_first_set_bit(shflags)) )
- res = sh_validate_guest_entry(v, gmfn, ep, sizeof (*ep));
- }
-
- /* We should never need to flush the TLB or recopy PAE entries */
- ASSERT((res == 0) || (res == SHADOW_SET_CHANGED));
-
- return flags;
-}
#if (CONFIG_PAGING_LEVELS == GUEST_PAGING_LEVELS) && (CONFIG_PAGING_LEVELS ==
SHADOW_PAGING_LEVELS)
void *
@@ -509,11 +521,9 @@ sh_guest_map_l1e(struct vcpu *v, unsigne
// FIXME!
shadow_lock(v->domain);
- guest_walk_tables(v, addr, &gw, 1);
-
- if ( gw.l2e &&
- (guest_l2e_get_flags(*gw.l2e) & _PAGE_PRESENT) &&
- !(guest_supports_superpages(v) && (guest_l2e_get_flags(*gw.l2e) &
_PAGE_PSE)) )
+ guest_walk_tables(v, addr, &gw, 0, 1);
+
+ if ( mfn_valid(gw.l1mfn) )
{
if ( gl1mfn )
*gl1mfn = mfn_x(gw.l1mfn);
@@ -521,7 +531,6 @@ sh_guest_map_l1e(struct vcpu *v, unsigne
(guest_l1_table_offset(addr) * sizeof(guest_l1e_t));
}
- unmap_walk(v, &gw);
shadow_unlock(v->domain);
return pl1e;
@@ -538,9 +547,8 @@ sh_guest_get_eff_l1e(struct vcpu *v, uns
// FIXME!
shadow_lock(v->domain);
- guest_walk_tables(v, addr, &gw, 1);
- *(guest_l1e_t *)eff_l1e = gw.eff_l1e;
- unmap_walk(v, &gw);
+ guest_walk_tables(v, addr, &gw, 0, 1);
+ *(guest_l1e_t *)eff_l1e = gw.l1e;
shadow_unlock(v->domain);
}
#endif /* CONFIG==SHADOW==GUEST */
@@ -636,17 +644,17 @@ unsigned char pat_type_2_pte_flags(unsig
static always_inline void
_sh_propagate(struct vcpu *v,
- void *guest_entry_ptr,
- mfn_t guest_table_mfn,
+ guest_intpte_t guest_intpte,
mfn_t target_mfn,
void *shadow_entry_ptr,
int level,
fetch_type_t ft,
p2m_type_t p2mt)
{
- guest_l1e_t *gp = guest_entry_ptr;
+ guest_l1e_t guest_entry = { guest_intpte };
shadow_l1e_t *sp = shadow_entry_ptr;
struct domain *d = v->domain;
+ gfn_t target_gfn = guest_l1e_get_gfn(guest_entry);
u32 pass_thru_flags;
u32 gflags, sflags;
@@ -660,15 +668,7 @@ _sh_propagate(struct vcpu *v,
goto done;
}
- if ( mfn_valid(guest_table_mfn) )
- /* Handle A and D bit propagation into the guest */
- gflags = guest_set_ad_bits(v, guest_table_mfn, gp, level, ft);
- else
- {
- /* Must be an fl1e or a prefetch */
- ASSERT(level==1 || !(ft & FETCH_TYPE_DEMAND));
- gflags = guest_l1e_get_flags(*gp);
- }
+ gflags = guest_l1e_get_flags(guest_entry);
if ( unlikely(!(gflags & _PAGE_PRESENT)) )
{
@@ -684,7 +684,7 @@ _sh_propagate(struct vcpu *v,
if ( level == 1 && p2mt == p2m_mmio_dm )
{
/* Guest l1e maps emulated MMIO space */
- *sp = sh_l1e_mmio(guest_l1e_get_gfn(*gp), gflags);
+ *sp = sh_l1e_mmio(target_gfn, gflags);
if ( !d->arch.paging.shadow.has_fast_mmio_entries )
d->arch.paging.shadow.has_fast_mmio_entries = 1;
goto done;
@@ -694,9 +694,6 @@ _sh_propagate(struct vcpu *v,
// case of a prefetch, an invalid mfn means that we can not usefully
// shadow anything, and so we return early.
//
- /* N.B. For pass-through MMIO, either this test needs to be relaxed,
- * and shadow_set_l1e() trained to handle non-valid MFNs (ugh), or the
- * MMIO areas need to be added to the frame-table to make them "valid". */
if ( shadow_mode_refcounts(d) &&
!mfn_valid(target_mfn) && (p2mt != p2m_mmio_direct) )
{
@@ -718,20 +715,22 @@ _sh_propagate(struct vcpu *v,
pass_thru_flags |= _PAGE_PAT | _PAGE_PCD | _PAGE_PWT;
sflags = gflags & pass_thru_flags;
- /* Only change memory caching type for pass-through domain */
+ /*
+ * For HVM domains with direct access to MMIO areas, set the correct
+ * caching attributes in the shadows to match what was asked for
+ */
if ( (level == 1) && is_hvm_domain(d) &&
!list_empty(&(domain_hvm_iommu(d)->pdev_list)) )
{
unsigned int type;
- if ( hvm_get_mem_pinned_cacheattr(d, gfn_x(guest_l1e_get_gfn(*gp)),
- &type) )
+ if ( hvm_get_mem_pinned_cacheattr(d, gfn_x(target_gfn), &type) )
sflags |= pat_type_2_pte_flags(type);
- else if ( v->domain->arch.hvm_domain.is_in_uc_mode )
+ else if ( d->arch.hvm_domain.is_in_uc_mode )
sflags |= pat_type_2_pte_flags(PAT_TYPE_UNCACHABLE);
else
sflags |= get_pat_flags(v,
gflags,
- guest_l1e_get_paddr(*gp),
+ gfn_to_paddr(target_gfn),
mfn_x(target_mfn) << PAGE_SHIFT);
}
@@ -813,59 +812,55 @@ _sh_propagate(struct vcpu *v,
done:
SHADOW_DEBUG(PROPAGATE,
"%s level %u guest %" SH_PRI_gpte " shadow %" SH_PRI_pte "\n",
- fetch_type_names[ft], level, gp->l1, sp->l1);
-}
-
-
-/* These four wrappers give us a little bit of type-safety back around the
- * use of void-* pointers in _sh_propagate(), and allow the compiler to
- * optimize out some level checks. */
+ fetch_type_names[ft], level, guest_entry.l1, sp->l1);
+}
+
+
+/* These four wrappers give us a little bit of type-safety back around
+ * the use of void-* pointers and intpte types in _sh_propagate(), and
+ * allow the compiler to optimize out some level checks. */
#if GUEST_PAGING_LEVELS >= 4
static void
l4e_propagate_from_guest(struct vcpu *v,
- guest_l4e_t *gl4e,
- mfn_t gl4mfn,
+ guest_l4e_t gl4e,
mfn_t sl3mfn,
shadow_l4e_t *sl4e,
fetch_type_t ft)
{
- _sh_propagate(v, gl4e, gl4mfn, sl3mfn, sl4e, 4, ft, p2m_ram_rw);
+ _sh_propagate(v, gl4e.l4, sl3mfn, sl4e, 4, ft, p2m_ram_rw);
}
static void
l3e_propagate_from_guest(struct vcpu *v,
- guest_l3e_t *gl3e,
- mfn_t gl3mfn,
+ guest_l3e_t gl3e,
mfn_t sl2mfn,
shadow_l3e_t *sl3e,
fetch_type_t ft)
{
- _sh_propagate(v, gl3e, gl3mfn, sl2mfn, sl3e, 3, ft, p2m_ram_rw);
+ _sh_propagate(v, gl3e.l3, sl2mfn, sl3e, 3, ft, p2m_ram_rw);
}
#endif // GUEST_PAGING_LEVELS >= 4
static void
l2e_propagate_from_guest(struct vcpu *v,
- guest_l2e_t *gl2e,
- mfn_t gl2mfn,
+ guest_l2e_t gl2e,
mfn_t sl1mfn,
shadow_l2e_t *sl2e,
fetch_type_t ft)
{
- _sh_propagate(v, gl2e, gl2mfn, sl1mfn, sl2e, 2, ft, p2m_ram_rw);
+ _sh_propagate(v, gl2e.l2, sl1mfn, sl2e, 2, ft, p2m_ram_rw);
}
static void
l1e_propagate_from_guest(struct vcpu *v,
- guest_l1e_t *gl1e,
- mfn_t gl1mfn,
+ guest_l1e_t gl1e,
mfn_t gmfn,
shadow_l1e_t *sl1e,
fetch_type_t ft,
p2m_type_t p2mt)
{
- _sh_propagate(v, gl1e, gl1mfn, gmfn, sl1e, 1, ft, p2mt);
+ _sh_propagate(v, gl1e.l1, gmfn, sl1e, 1, ft, p2mt);
}
@@ -1859,8 +1854,7 @@ static shadow_l3e_t * shadow_get_and_cre
*sl3mfn = sh_make_shadow(v, gw->l3mfn, SH_type_l3_shadow);
}
/* Install the new sl3 table in the sl4e */
- l4e_propagate_from_guest(v, gw->l4e, gw->l4mfn,
- *sl3mfn, &new_sl4e, ft);
+ l4e_propagate_from_guest(v, gw->l4e, *sl3mfn, &new_sl4e, ft);
r = shadow_set_l4e(v, sl4e, new_sl4e, sl4mfn);
ASSERT((r & SHADOW_SET_FLUSH) == 0);
if ( r & SHADOW_SET_ERROR )
@@ -1909,8 +1903,7 @@ static shadow_l2e_t * shadow_get_and_cre
*sl2mfn = sh_make_shadow(v, gw->l2mfn, t);
}
/* Install the new sl2 table in the sl3e */
- l3e_propagate_from_guest(v, gw->l3e, gw->l3mfn,
- *sl2mfn, &new_sl3e, ft);
+ l3e_propagate_from_guest(v, gw->l3e, *sl2mfn, &new_sl3e, ft);
r = shadow_set_l3e(v, sl3e, new_sl3e, sl3mfn);
ASSERT((r & SHADOW_SET_FLUSH) == 0);
if ( r & SHADOW_SET_ERROR )
@@ -1934,7 +1927,7 @@ static shadow_l2e_t * shadow_get_and_cre
/* This next line is important: the guest l2 has a 16k
* shadow, we need to return the right mfn of the four. This
* call will set it for us as a side-effect. */
- (void) shadow_l2_index(sl2mfn, guest_index(gw->l2e));
+ (void) shadow_l2_index(sl2mfn, guest_l2_table_offset(gw->va));
/* Reading the top level table is always valid. */
return sh_linear_l2_table(v) + shadow_l2_linear_offset(gw->va);
#endif
@@ -1956,8 +1949,8 @@ static shadow_l1e_t * shadow_get_and_cre
* re-do it to fix a PSE dirty bit. */
if ( shadow_l2e_get_flags(*sl2e) & _PAGE_PRESENT
&& likely(ft != ft_demand_write
- || (guest_l2e_get_flags(*gw->l2e) & _PAGE_DIRTY)
- || !(guest_l2e_get_flags(*gw->l2e) & _PAGE_PSE)) )
+ || (shadow_l2e_get_flags(*sl2e) & _PAGE_RW)
+ || !(guest_l2e_get_flags(gw->l2e) & _PAGE_PSE)) )
{
*sl1mfn = shadow_l2e_get_mfn(*sl2e);
ASSERT(mfn_valid(*sl1mfn));
@@ -1965,14 +1958,14 @@ static shadow_l1e_t * shadow_get_and_cre
else
{
shadow_l2e_t new_sl2e;
- int r, flags = guest_l2e_get_flags(*gw->l2e);
+ int r, flags = guest_l2e_get_flags(gw->l2e);
/* No l1 shadow installed: find and install it. */
if ( !(flags & _PAGE_PRESENT) )
return NULL; /* No guest page. */
if ( guest_supports_superpages(v) && (flags & _PAGE_PSE) )
{
/* Splintering a superpage */
- gfn_t l2gfn = guest_l2e_get_gfn(*gw->l2e);
+ gfn_t l2gfn = guest_l2e_get_gfn(gw->l2e);
*sl1mfn = get_fl1_shadow_status(v, l2gfn);
if ( !mfn_valid(*sl1mfn) )
{
@@ -1992,8 +1985,7 @@ static shadow_l1e_t * shadow_get_and_cre
}
}
/* Install the new sl1 table in the sl2e */
- l2e_propagate_from_guest(v, gw->l2e, gw->l2mfn,
- *sl1mfn, &new_sl2e, ft);
+ l2e_propagate_from_guest(v, gw->l2e, *sl1mfn, &new_sl2e, ft);
r = shadow_set_l2e(v, sl2e, new_sl2e, sl2mfn);
ASSERT((r & SHADOW_SET_FLUSH) == 0);
if ( r & SHADOW_SET_ERROR )
@@ -2247,7 +2239,7 @@ static int validate_gl4e(struct vcpu *v,
static int validate_gl4e(struct vcpu *v, void *new_ge, mfn_t sl4mfn, void *se)
{
shadow_l4e_t new_sl4e;
- guest_l4e_t *new_gl4e = new_ge;
+ guest_l4e_t new_gl4e = *(guest_l4e_t *)new_ge;
shadow_l4e_t *sl4p = se;
mfn_t sl3mfn = _mfn(INVALID_MFN);
struct domain *d = v->domain;
@@ -2256,17 +2248,16 @@ static int validate_gl4e(struct vcpu *v,
perfc_incr(shadow_validate_gl4e_calls);
- if ( guest_l4e_get_flags(*new_gl4e) & _PAGE_PRESENT )
- {
- gfn_t gl3gfn = guest_l4e_get_gfn(*new_gl4e);
+ if ( guest_l4e_get_flags(new_gl4e) & _PAGE_PRESENT )
+ {
+ gfn_t gl3gfn = guest_l4e_get_gfn(new_gl4e);
mfn_t gl3mfn = gfn_to_mfn(d, gl3gfn, &p2mt);
if ( p2m_is_ram(p2mt) )
sl3mfn = get_shadow_status(v, gl3mfn, SH_type_l3_shadow);
else
result |= SHADOW_SET_ERROR;
}
- l4e_propagate_from_guest(v, new_gl4e, _mfn(INVALID_MFN),
- sl3mfn, &new_sl4e, ft_prefetch);
+ l4e_propagate_from_guest(v, new_gl4e, sl3mfn, &new_sl4e, ft_prefetch);
// check for updates to xen reserved slots
if ( !shadow_mode_external(d) )
@@ -2301,7 +2292,7 @@ static int validate_gl3e(struct vcpu *v,
static int validate_gl3e(struct vcpu *v, void *new_ge, mfn_t sl3mfn, void *se)
{
shadow_l3e_t new_sl3e;
- guest_l3e_t *new_gl3e = new_ge;
+ guest_l3e_t new_gl3e = *(guest_l3e_t *)new_ge;
shadow_l3e_t *sl3p = se;
mfn_t sl2mfn = _mfn(INVALID_MFN);
p2m_type_t p2mt;
@@ -2309,17 +2300,16 @@ static int validate_gl3e(struct vcpu *v,
perfc_incr(shadow_validate_gl3e_calls);
- if ( guest_l3e_get_flags(*new_gl3e) & _PAGE_PRESENT )
- {
- gfn_t gl2gfn = guest_l3e_get_gfn(*new_gl3e);
+ if ( guest_l3e_get_flags(new_gl3e) & _PAGE_PRESENT )
+ {
+ gfn_t gl2gfn = guest_l3e_get_gfn(new_gl3e);
mfn_t gl2mfn = gfn_to_mfn(v->domain, gl2gfn, &p2mt);
if ( p2m_is_ram(p2mt) )
sl2mfn = get_shadow_status(v, gl2mfn, SH_type_l2_shadow);
else
result |= SHADOW_SET_ERROR;
}
- l3e_propagate_from_guest(v, new_gl3e, _mfn(INVALID_MFN),
- sl2mfn, &new_sl3e, ft_prefetch);
+ l3e_propagate_from_guest(v, new_gl3e, sl2mfn, &new_sl3e, ft_prefetch);
result |= shadow_set_l3e(v, sl3p, new_sl3e, sl3mfn);
return result;
@@ -2329,7 +2319,7 @@ static int validate_gl2e(struct vcpu *v,
static int validate_gl2e(struct vcpu *v, void *new_ge, mfn_t sl2mfn, void *se)
{
shadow_l2e_t new_sl2e;
- guest_l2e_t *new_gl2e = new_ge;
+ guest_l2e_t new_gl2e = *(guest_l2e_t *)new_ge;
shadow_l2e_t *sl2p = se;
mfn_t sl1mfn = _mfn(INVALID_MFN);
p2m_type_t p2mt;
@@ -2337,11 +2327,11 @@ static int validate_gl2e(struct vcpu *v,
perfc_incr(shadow_validate_gl2e_calls);
- if ( guest_l2e_get_flags(*new_gl2e) & _PAGE_PRESENT )
- {
- gfn_t gl1gfn = guest_l2e_get_gfn(*new_gl2e);
+ if ( guest_l2e_get_flags(new_gl2e) & _PAGE_PRESENT )
+ {
+ gfn_t gl1gfn = guest_l2e_get_gfn(new_gl2e);
if ( guest_supports_superpages(v) &&
- (guest_l2e_get_flags(*new_gl2e) & _PAGE_PSE) )
+ (guest_l2e_get_flags(new_gl2e) & _PAGE_PSE) )
{
// superpage -- need to look up the shadow L1 which holds the
// splitters...
@@ -2364,8 +2354,7 @@ static int validate_gl2e(struct vcpu *v,
result |= SHADOW_SET_ERROR;
}
}
- l2e_propagate_from_guest(v, new_gl2e, _mfn(INVALID_MFN),
- sl1mfn, &new_sl2e, ft_prefetch);
+ l2e_propagate_from_guest(v, new_gl2e, sl1mfn, &new_sl2e, ft_prefetch);
// check for updates to xen reserved slots in PV guests...
// XXX -- need to revisit this for PV 3-on-4 guests.
@@ -2415,7 +2404,7 @@ static int validate_gl1e(struct vcpu *v,
static int validate_gl1e(struct vcpu *v, void *new_ge, mfn_t sl1mfn, void *se)
{
shadow_l1e_t new_sl1e;
- guest_l1e_t *new_gl1e = new_ge;
+ guest_l1e_t new_gl1e = *(guest_l1e_t *)new_ge;
shadow_l1e_t *sl1p = se;
gfn_t gfn;
mfn_t gmfn;
@@ -2424,11 +2413,10 @@ static int validate_gl1e(struct vcpu *v,
perfc_incr(shadow_validate_gl1e_calls);
- gfn = guest_l1e_get_gfn(*new_gl1e);
+ gfn = guest_l1e_get_gfn(new_gl1e);
gmfn = gfn_to_mfn(v->domain, gfn, &p2mt);
- l1e_propagate_from_guest(v, new_gl1e, _mfn(INVALID_MFN), gmfn, &new_sl1e,
- ft_prefetch, p2mt);
+ l1e_propagate_from_guest(v, new_gl1e, gmfn, &new_sl1e, ft_prefetch, p2mt);
result |= shadow_set_l1e(v, sl1p, new_sl1e, sl1mfn);
return result;
@@ -2615,7 +2603,7 @@ static void sh_prefetch(struct vcpu *v,
int i, dist;
gfn_t gfn;
mfn_t gmfn;
- guest_l1e_t gl1e;
+ guest_l1e_t *gl1p = NULL, gl1e;
shadow_l1e_t sl1e;
u32 gflags;
p2m_type_t p2mt;
@@ -2626,16 +2614,23 @@ static void sh_prefetch(struct vcpu *v,
if ( dist > PREFETCH_DISTANCE )
dist = PREFETCH_DISTANCE;
+ if ( mfn_valid(gw->l1mfn) )
+ {
+ /* Normal guest page; grab the next guest entry */
+ gl1p = sh_map_domain_page(gw->l1mfn);
+ gl1p += guest_l1_table_offset(gw->va);
+ }
+
for ( i = 1; i < dist ; i++ )
{
/* No point in prefetching if there's already a shadow */
if ( ptr_sl1e[i].l1 != 0 )
break;
- if ( gw->l1e )
+ if ( mfn_valid(gw->l1mfn) )
{
/* Normal guest page; grab the next guest entry */
- gl1e = gw->l1e[i];
+ gl1e = gl1p[i];
/* Not worth continuing if we hit an entry that will need another
* fault for A/D-bit propagation anyway */
gflags = guest_l1e_get_flags(gl1e);
@@ -2647,24 +2642,23 @@ static void sh_prefetch(struct vcpu *v,
else
{
/* Fragmented superpage, unless we've been called wrongly */
- ASSERT(guest_l2e_get_flags(*gw->l2e) & _PAGE_PSE);
+ ASSERT(guest_l2e_get_flags(gw->l2e) & _PAGE_PSE);
/* Increment the l1e's GFN by the right number of guest pages */
gl1e = guest_l1e_from_gfn(
- _gfn(gfn_x(guest_l1e_get_gfn(gw->eff_l1e)) + i),
- guest_l1e_get_flags(gw->eff_l1e));
+ _gfn(gfn_x(guest_l1e_get_gfn(gw->l1e)) + i),
+ guest_l1e_get_flags(gw->l1e));
}
/* Look at the gfn that the l1e is pointing at */
gfn = guest_l1e_get_gfn(gl1e);
gmfn = gfn_to_mfn(v->domain, gfn, &p2mt);
- /* Propagate the entry. Safe to use a pointer to our local
- * gl1e, since this is not a demand-fetch so there will be no
- * write-back to the guest. */
- l1e_propagate_from_guest(v, &gl1e, _mfn(INVALID_MFN),
- gmfn, &sl1e, ft_prefetch, p2mt);
+ /* Propagate the entry. */
+ l1e_propagate_from_guest(v, gl1e, gmfn, &sl1e, ft_prefetch, p2mt);
(void) shadow_set_l1e(v, ptr_sl1e + i, sl1e, sl1mfn);
}
+ if ( gl1p != NULL )
+ sh_unmap_domain_page(gl1p);
}
#endif /* SHADOW_OPTIMIZATIONS & SHOPT_PREFETCH */
@@ -2684,7 +2678,6 @@ static int sh_page_fault(struct vcpu *v,
{
struct domain *d = v->domain;
walk_t gw;
- u32 accumulated_gflags;
gfn_t gfn;
mfn_t gmfn, sl1mfn=_mfn(0);
shadow_l1e_t sl1e, *ptr_sl1e;
@@ -2769,10 +2762,10 @@ static int sh_page_fault(struct vcpu *v,
shadow_audit_tables(v);
- if ( guest_walk_tables(v, va, &gw, 1) != 0 )
- {
- SHADOW_PRINTK("malformed guest pagetable\n");
- print_gw(&gw);
+ if ( guest_walk_tables(v, va, &gw, regs->error_code, 1) != 0 )
+ {
+ perfc_incr(shadow_fault_bail_real_fault);
+ goto not_a_shadow_fault;
}
/* It's possible that the guest has put pagetables in memory that it has
@@ -2788,64 +2781,12 @@ static int sh_page_fault(struct vcpu *v,
sh_audit_gw(v, &gw);
- // We do not look at the gw->l1e, as that will not exist for superpages.
- // Instead, we use the gw->eff_l1e...
- //
- // We need not check all the levels of the guest page table entries for
- // present vs not-present, as the eff_l1e will always be not present if
- // one of the higher level entries is not present.
- //
- if ( unlikely(!(guest_l1e_get_flags(gw.eff_l1e) & _PAGE_PRESENT)) )
- {
- perfc_incr(shadow_fault_bail_not_present);
- goto not_a_shadow_fault;
- }
-
- // All levels of the guest page table are now known to be present.
- accumulated_gflags = accumulate_guest_flags(v, &gw);
-
- // Check for attempts to access supervisor-only pages from user mode,
- // i.e. ring 3. Such errors are not caused or dealt with by the shadow
- // code.
- //
- if ( (regs->error_code & PFEC_user_mode) &&
- !(accumulated_gflags & _PAGE_USER) )
- {
- /* illegal user-mode access to supervisor-only page */
- perfc_incr(shadow_fault_bail_user_supervisor);
- goto not_a_shadow_fault;
- }
-
- // Was it a write fault?
+ /* What kind of access are we dealing with? */
ft = ((regs->error_code & PFEC_write_access)
? ft_demand_write : ft_demand_read);
- if ( ft == ft_demand_write )
- {
- if ( unlikely(!(accumulated_gflags & _PAGE_RW)) )
- {
- perfc_incr(shadow_fault_bail_ro_mapping);
- goto not_a_shadow_fault;
- }
- }
- else // must have been either an insn fetch or read fault
- {
- // Check for NX bit violations: attempts to execute code that is
- // marked "do not execute". Such errors are not caused or dealt with
- // by the shadow code.
- //
- if ( regs->error_code & PFEC_insn_fetch )
- {
- if ( accumulated_gflags & _PAGE_NX_BIT )
- {
- /* NX prevented this code fetch */
- perfc_incr(shadow_fault_bail_nx);
- goto not_a_shadow_fault;
- }
- }
- }
/* What mfn is the guest trying to access? */
- gfn = guest_l1e_get_gfn(gw.eff_l1e);
+ gfn = guest_l1e_get_gfn(gw.l1e);
gmfn = gfn_to_mfn(d, gfn, &p2mt);
if ( shadow_mode_refcounts(d) &&
@@ -2876,14 +2817,12 @@ static int sh_page_fault(struct vcpu *v,
* shadow_set_l*e(), which will have crashed the guest.
* Get out of the fault handler immediately. */
ASSERT(d->is_shutting_down);
- unmap_walk(v, &gw);
shadow_unlock(d);
return 0;
}
/* Calculate the shadow entry and write it */
- l1e_propagate_from_guest(v, (gw.l1e) ? gw.l1e : &gw.eff_l1e, gw.l1mfn,
- gmfn, &sl1e, ft, p2mt);
+ l1e_propagate_from_guest(v, gw.l1e, gmfn, &sl1e, ft, p2mt);
r = shadow_set_l1e(v, ptr_sl1e, sl1e, sl1mfn);
#if SHADOW_OPTIMIZATIONS & SHOPT_PREFETCH
@@ -2921,7 +2860,6 @@ static int sh_page_fault(struct vcpu *v,
done:
sh_audit_gw(v, &gw);
- unmap_walk(v, &gw);
SHADOW_PRINTK("fixed\n");
shadow_audit_tables(v);
shadow_unlock(d);
@@ -2972,7 +2910,6 @@ static int sh_page_fault(struct vcpu *v,
* take it again when we write to the pagetables.
*/
sh_audit_gw(v, &gw);
- unmap_walk(v, &gw);
shadow_audit_tables(v);
shadow_unlock(d);
@@ -3033,7 +2970,6 @@ static int sh_page_fault(struct vcpu *v,
goto not_a_shadow_fault;
perfc_incr(shadow_fault_mmio);
sh_audit_gw(v, &gw);
- unmap_walk(v, &gw);
SHADOW_PRINTK("mmio %#"PRIpaddr"\n", gpa);
shadow_audit_tables(v);
reset_early_unshadow(v);
@@ -3043,7 +2979,6 @@ static int sh_page_fault(struct vcpu *v,
not_a_shadow_fault:
sh_audit_gw(v, &gw);
- unmap_walk(v, &gw);
SHADOW_PRINTK("not a shadow fault\n");
shadow_audit_tables(v);
reset_early_unshadow(v);
@@ -3129,30 +3064,36 @@ sh_invlpg(struct vcpu *v, unsigned long
static unsigned long
-sh_gva_to_gfn(struct vcpu *v, unsigned long va)
+sh_gva_to_gfn(struct vcpu *v, unsigned long va, uint32_t *pfec)
/* Called to translate a guest virtual address to what the *guest*
* pagetables would map it to. */
{
walk_t gw;
gfn_t gfn;
-
+
#if (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB)
struct shadow_vtlb t = {0};
- if ( vtlb_lookup(v, va, &t) )
+ /* Check the vTLB cache first */
+ if ( vtlb_lookup(v, va, pfec[0], &t) )
return t.frame_number;
#endif /* (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB) */
- guest_walk_tables(v, va, &gw, 0);
+ if ( guest_walk_tables(v, va, &gw, pfec[0], 0) != 0 )
+ {
+ if ( !(guest_l1e_get_flags(gw.l1e) & _PAGE_PRESENT) )
+ pfec[0] &= ~PFEC_page_present;
+ return INVALID_GFN;
+ }
gfn = guest_walk_to_gfn(&gw);
#if (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB)
t.page_number = va >> PAGE_SHIFT;
t.frame_number = gfn_x(gfn);
t.flags = accumulate_guest_flags(v, &gw);
+ t.pfec = pfec[0];
vtlb_insert(v, t);
#endif /* (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB) */
- unmap_walk(v, &gw);
return gfn_x(gfn);
}
@@ -4006,9 +3947,8 @@ static inline void * emulate_map_dest(st
struct sh_emulate_ctxt *sh_ctxt,
mfn_t *mfnp)
{
- walk_t gw;
- u32 flags, errcode;
- gfn_t gfn;
+ uint32_t pfec;
+ unsigned long gfn;
mfn_t mfn;
p2m_type_t p2mt;
@@ -4016,50 +3956,20 @@ static inline void * emulate_map_dest(st
if ( ring_3(sh_ctxt->ctxt.regs) )
return NULL;
-#if (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB)
- /* Try the virtual TLB first */
- {
- struct shadow_vtlb t = {0};
- if ( vtlb_lookup(v, vaddr, &t)
- && ((t.flags & (_PAGE_PRESENT|_PAGE_RW))
- == (_PAGE_PRESENT|_PAGE_RW)) )
- {
- flags = t.flags;
- gfn = _gfn(t.frame_number);
- }
+ /* Translate the VA, and exit with a page-fault if we fail */
+ pfec = PFEC_page_present | PFEC_write_access;
+ gfn = sh_gva_to_gfn(v, vaddr, &pfec);
+ if ( gfn == INVALID_GFN )
+ {
+ if ( is_hvm_vcpu(v) )
+ hvm_inject_exception(TRAP_page_fault, pfec, vaddr);
else
- {
- /* Need to do the full lookup, just in case permissions
- * have increased since we cached this entry */
-
-#endif /* (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB) */
-
- /* Walk the guest pagetables */
- guest_walk_tables(v, vaddr, &gw, 1);
- flags = accumulate_guest_flags(v, &gw);
- gfn = guest_l1e_get_gfn(gw.eff_l1e);
- sh_audit_gw(v, &gw);
- unmap_walk(v, &gw);
-
-#if (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB)
- /* Remember this translation for next time */
- t.page_number = vaddr >> PAGE_SHIFT;
- t.frame_number = gfn_x(gfn);
- t.flags = flags;
- vtlb_insert(v, t);
- }
- }
-#endif
-
- errcode = PFEC_write_access;
- if ( !(flags & _PAGE_PRESENT) )
- goto page_fault;
-
- errcode |= PFEC_page_present;
- if ( !(flags & _PAGE_RW) )
- goto page_fault;
-
- mfn = gfn_to_mfn(v->domain, gfn, &p2mt);
+ propagate_page_fault(vaddr, pfec);
+ return NULL;
+ }
+
+ /* Translate the GFN */
+ mfn = gfn_to_mfn(v->domain, _gfn(gfn), &p2mt);
if ( p2m_is_ram(p2mt) )
{
ASSERT(mfn_valid(mfn));
@@ -4069,13 +3979,6 @@ static inline void * emulate_map_dest(st
}
else
return NULL;
-
- page_fault:
- if ( is_hvm_vcpu(v) )
- hvm_inject_exception(TRAP_page_fault, errcode, vaddr);
- else
- propagate_page_fault(vaddr, errcode);
- return NULL;
}
static int safe_not_to_verify_write(mfn_t gmfn, void *dst, void *src,
diff -r 838e77a41a3c -r 650cadd1b283 xen/arch/x86/mm/shadow/private.h
--- a/xen/arch/x86/mm/shadow/private.h Fri Nov 02 16:34:54 2007 +0000
+++ b/xen/arch/x86/mm/shadow/private.h Fri Nov 02 16:38:11 2007 +0000
@@ -665,9 +665,10 @@ void shadow_continue_emulation(
#define VTLB_ENTRIES 13
struct shadow_vtlb {
- unsigned long page_number; /* Guest virtual address >> PAGE_SHIFT */
- unsigned long frame_number; /* Guest physical address >> PAGE_SHIFT */
- u32 flags; /* Accumulated guest pte flags, or 0 for an empty slot. */
+ unsigned long page_number; /* Guest virtual address >> PAGE_SHIFT */
+ unsigned long frame_number; /* Guest physical address >> PAGE_SHIFT */
+ uint32_t pfec; /* Pagefault code for the lookup that filled this entry */
+ uint32_t flags; /* Accumulated guest pte flags, or 0 for an empty slot. */
};
/* Call whenever the guest flushes hit actual TLB */
@@ -692,7 +693,7 @@ static inline void vtlb_insert(struct vc
}
/* Look a translation up in the vTLB. Returns 0 if not found. */
-static inline int vtlb_lookup(struct vcpu *v, unsigned long va,
+static inline int vtlb_lookup(struct vcpu *v, unsigned long va, uint32_t pfec,
struct shadow_vtlb *result)
{
unsigned long page_number = va >> PAGE_SHIFT;
@@ -701,7 +702,9 @@ static inline int vtlb_lookup(struct vcp
spin_lock(&v->arch.paging.vtlb_lock);
if ( v->arch.paging.vtlb[i].flags != 0
- && v->arch.paging.vtlb[i].page_number == page_number )
+ && v->arch.paging.vtlb[i].page_number == page_number
+ /* Any successful walk that had at least these pfec bits is OK */
+ && (v->arch.paging.vtlb[i].pfec & pfec) == pfec )
{
rv = 1;
result[0] = v->arch.paging.vtlb[i];
diff -r 838e77a41a3c -r 650cadd1b283 xen/arch/x86/mm/shadow/types.h
--- a/xen/arch/x86/mm/shadow/types.h Fri Nov 02 16:34:54 2007 +0000
+++ b/xen/arch/x86/mm/shadow/types.h Fri Nov 02 16:38:11 2007 +0000
@@ -251,6 +251,7 @@ TYPE_SAFE(u32,gfn)
/* Types of the guest's page tables */
typedef l1_pgentry_32_t guest_l1e_t;
typedef l2_pgentry_32_t guest_l2e_t;
+typedef intpte_32_t guest_intpte_t;
/* Access functions for them */
static inline paddr_t guest_l1e_get_paddr(guest_l1e_t gl1e)
@@ -319,6 +320,7 @@ typedef l3_pgentry_t guest_l3e_t;
#if GUEST_PAGING_LEVELS >= 4
typedef l4_pgentry_t guest_l4e_t;
#endif
+typedef intpte_t guest_intpte_t;
/* Access functions for them */
static inline paddr_t guest_l1e_get_paddr(guest_l1e_t gl1e)
@@ -419,32 +421,27 @@ gfn_to_paddr(gfn_t gfn)
/* Type used for recording a walk through guest pagetables. It is
* filled in by the pagetable walk function, and also used as a cache
- * for later walks.
- * Any non-null pointer in this structure represents a mapping of guest
- * memory. We must always call walk_init() before using a walk_t, and
- * call walk_unmap() when we're done.
- * The "Effective l1e" field is used when there isn't an l1e to point to,
- * but we have fabricated an l1e for propagation to the shadow (e.g.,
- * for splintering guest superpages into many shadow l1 entries). */
+ * for later walks. When we encounter a suporpage l2e, we fabricate an
+ * l1e for propagation to the shadow (for splintering guest superpages
+ * into many shadow l1 entries). */
typedef struct shadow_walk_t walk_t;
struct shadow_walk_t
{
unsigned long va; /* Address we were looking for */
#if GUEST_PAGING_LEVELS >= 3
#if GUEST_PAGING_LEVELS >= 4
- guest_l4e_t *l4e; /* Pointer to guest's level 4 entry */
-#endif
- guest_l3e_t *l3e; /* Pointer to guest's level 3 entry */
-#endif
- guest_l2e_t *l2e; /* Pointer to guest's level 2 entry */
- guest_l1e_t *l1e; /* Pointer to guest's level 1 entry */
- guest_l1e_t eff_l1e; /* Effective level 1 entry */
-#if GUEST_PAGING_LEVELS >= 4
- mfn_t l4mfn; /* MFN that the level 4 entry is in */
- mfn_t l3mfn; /* MFN that the level 3 entry is in */
-#endif
- mfn_t l2mfn; /* MFN that the level 2 entry is in */
- mfn_t l1mfn; /* MFN that the level 1 entry is in */
+ guest_l4e_t l4e; /* Guest's level 4 entry */
+#endif
+ guest_l3e_t l3e; /* Guest's level 3 entry */
+#endif
+ guest_l2e_t l2e; /* Guest's level 2 entry */
+ guest_l1e_t l1e; /* Guest's level 1 entry (or fabrication) */
+#if GUEST_PAGING_LEVELS >= 4
+ mfn_t l4mfn; /* MFN that the level 4 entry was in */
+ mfn_t l3mfn; /* MFN that the level 3 entry was in */
+#endif
+ mfn_t l2mfn; /* MFN that the level 2 entry was in */
+ mfn_t l1mfn; /* MFN that the level 1 entry was in */
};
/* macros for dealing with the naming of the internal function names of the
@@ -542,7 +539,7 @@ accumulate_guest_flags(struct vcpu *v, w
{
u32 accumulated_flags;
- if ( unlikely(!(guest_l1e_get_flags(gw->eff_l1e) & _PAGE_PRESENT)) )
+ if ( unlikely(!(guest_l1e_get_flags(gw->l1e) & _PAGE_PRESENT)) )
return 0;
// We accumulate the permission flags with bitwise ANDing.
@@ -550,17 +547,17 @@ accumulate_guest_flags(struct vcpu *v, w
// For the NX bit, however, the polarity is wrong, so we accumulate the
// inverse of the NX bit.
//
- accumulated_flags = guest_l1e_get_flags(gw->eff_l1e) ^ _PAGE_NX_BIT;
- accumulated_flags &= guest_l2e_get_flags(*gw->l2e) ^ _PAGE_NX_BIT;
+ accumulated_flags = guest_l1e_get_flags(gw->l1e) ^ _PAGE_NX_BIT;
+ accumulated_flags &= guest_l2e_get_flags(gw->l2e) ^ _PAGE_NX_BIT;
// Note that PAE guests do not have USER or RW or NX bits in their L3s.
//
#if GUEST_PAGING_LEVELS == 3
accumulated_flags &=
- ~_PAGE_PRESENT | (guest_l3e_get_flags(*gw->l3e) & _PAGE_PRESENT);
+ ~_PAGE_PRESENT | (guest_l3e_get_flags(gw->l3e) & _PAGE_PRESENT);
#elif GUEST_PAGING_LEVELS >= 4
- accumulated_flags &= guest_l3e_get_flags(*gw->l3e) ^ _PAGE_NX_BIT;
- accumulated_flags &= guest_l4e_get_flags(*gw->l4e) ^ _PAGE_NX_BIT;
+ accumulated_flags &= guest_l3e_get_flags(gw->l3e) ^ _PAGE_NX_BIT;
+ accumulated_flags &= guest_l4e_get_flags(gw->l4e) ^ _PAGE_NX_BIT;
#endif
// Revert the NX bit back to its original polarity
diff -r 838e77a41a3c -r 650cadd1b283 xen/include/asm-x86/hvm/support.h
--- a/xen/include/asm-x86/hvm/support.h Fri Nov 02 16:34:54 2007 +0000
+++ b/xen/include/asm-x86/hvm/support.h Fri Nov 02 16:38:11 2007 +0000
@@ -86,6 +86,7 @@ int hvm_copy_from_guest_phys(void *buf,
int hvm_copy_from_guest_phys(void *buf, paddr_t paddr, int size);
int hvm_copy_to_guest_virt(unsigned long vaddr, void *buf, int size);
int hvm_copy_from_guest_virt(void *buf, unsigned long vaddr, int size);
+int hvm_fetch_from_guest_virt(void *buf, unsigned long vaddr, int size);
void hvm_print_line(struct vcpu *v, const char c);
void hlt_timer_fn(void *data);
diff -r 838e77a41a3c -r 650cadd1b283 xen/include/asm-x86/paging.h
--- a/xen/include/asm-x86/paging.h Fri Nov 02 16:34:54 2007 +0000
+++ b/xen/include/asm-x86/paging.h Fri Nov 02 16:38:11 2007 +0000
@@ -105,7 +105,8 @@ struct paging_mode {
int (*page_fault )(struct vcpu *v, unsigned long va,
struct cpu_user_regs *regs);
int (*invlpg )(struct vcpu *v, unsigned long va);
- unsigned long (*gva_to_gfn )(struct vcpu *v, unsigned long va);
+ unsigned long (*gva_to_gfn )(struct vcpu *v, unsigned long va,
+ uint32_t *pfec);
void (*update_cr3 )(struct vcpu *v, int do_locking);
void (*update_paging_modes )(struct vcpu *v);
void (*write_p2m_entry )(struct vcpu *v, unsigned long gfn,
@@ -204,12 +205,17 @@ static inline int paging_invlpg(struct v
}
/* Translate a guest virtual address to the frame number that the
- * *guest* pagetables would map it to. Returns INVALID_GFN if the guest
- * tables don't map this address. */
+ * *guest* pagetables would map it to. Returns INVALID_GFN if the guest
+ * tables don't map this address for this kind of access.
+ * pfec[0] is used to determine which kind of access this is when
+ * walking the tables. The caller should set the PFEC_page_present bit
+ * in pfec[0]; in the failure case, that bit will be cleared if appropriate. */
#define INVALID_GFN (-1UL)
-static inline unsigned long paging_gva_to_gfn(struct vcpu *v, unsigned long va)
-{
- return v->arch.paging.mode->gva_to_gfn(v, va);
+static inline unsigned long paging_gva_to_gfn(struct vcpu *v,
+ unsigned long va,
+ uint32_t *pfec)
+{
+ return v->arch.paging.mode->gva_to_gfn(v, va, pfec);
}
/* Update all the things that are derived from the guest's CR3.
diff -r 838e77a41a3c -r 650cadd1b283 xen/include/asm-x86/perfc_defn.h
--- a/xen/include/asm-x86/perfc_defn.h Fri Nov 02 16:34:54 2007 +0000
+++ b/xen/include/asm-x86/perfc_defn.h Fri Nov 02 16:38:11 2007 +0000
@@ -50,12 +50,8 @@ PERFCOUNTER(shadow_fault_fast_mmio, "sha
PERFCOUNTER(shadow_fault_fast_mmio, "shadow_fault fast path mmio")
PERFCOUNTER(shadow_fault_fast_fail, "shadow_fault fast path error")
PERFCOUNTER(shadow_fault_bail_bad_gfn, "shadow_fault guest bad gfn")
-PERFCOUNTER(shadow_fault_bail_not_present,
- "shadow_fault guest not-present")
-PERFCOUNTER(shadow_fault_bail_nx, "shadow_fault guest NX fault")
-PERFCOUNTER(shadow_fault_bail_ro_mapping, "shadow_fault guest R/W fault")
-PERFCOUNTER(shadow_fault_bail_user_supervisor,
- "shadow_fault guest U/S fault")
+PERFCOUNTER(shadow_fault_bail_real_fault,
+ "shadow_fault really guest fault")
PERFCOUNTER(shadow_fault_emulate_read, "shadow_fault emulates a read")
PERFCOUNTER(shadow_fault_emulate_write, "shadow_fault emulates a write")
PERFCOUNTER(shadow_fault_emulate_failed, "shadow_fault emulator fails")
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|