# HG changeset patch
# User Tim Deegan <Tim.Deegan@xxxxxxxxxxxxx>
# Date 1189431750 -3600
# Node ID 4633e9604da9c51f077285465d63db1820e6f574
# Parent 1474db8058b20753eb465273f7dbf5e10662bf0f
[HVM] Add type information to the p2m map.
This is a base for memory tricks like page sharing, copy-on-write, lazy
allocation etc. It should also make pass-through MMIO easier to
implement in the p2m.
Signed-off-by: Tim Deegan <Tim.Deegan@xxxxxxxxxxxxx>
---
xen/arch/x86/hvm/hvm.c | 33 ++++---
xen/arch/x86/hvm/io.c | 9 -
xen/arch/x86/hvm/svm/svm.c | 32 ++++--
xen/arch/x86/hvm/vmx/vmx.c | 18 ++-
xen/arch/x86/mm/hap/guest_walk.c | 10 +-
xen/arch/x86/mm/hap/hap.c | 10 +-
xen/arch/x86/mm/p2m.c | 122 +++++++++++++++++---------
xen/arch/x86/mm/shadow/common.c | 24 +++--
xen/arch/x86/mm/shadow/multi.c | 138 +++++++++++++++++++-----------
xen/arch/x86/mm/shadow/types.h | 2
xen/include/asm-x86/mm.h | 2
xen/include/asm-x86/p2m.h | 179 ++++++++++++++++++++++++++++-----------
12 files changed, 379 insertions(+), 200 deletions(-)
diff -r 1474db8058b2 -r 4633e9604da9 xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c Mon Sep 10 13:59:46 2007 +0100
+++ b/xen/arch/x86/hvm/hvm.c Mon Sep 10 14:42:30 2007 +0100
@@ -161,12 +161,14 @@ static int hvm_set_ioreq_page(
struct domain *d, struct hvm_ioreq_page *iorp, unsigned long gmfn)
{
struct page_info *page;
+ p2m_type_t p2mt;
unsigned long mfn;
void *va;
- mfn = gmfn_to_mfn(d, gmfn);
- if ( !mfn_valid(mfn) )
+ mfn = mfn_x(gfn_to_mfn(d, gmfn, &p2mt));
+ if ( !p2m_is_ram(p2mt) )
return -EINVAL;
+ ASSERT(mfn_valid(mfn));
page = mfn_to_page(mfn);
if ( !get_page_and_type(page, d, PGT_writable_page) )
@@ -517,7 +519,8 @@ int hvm_set_cr0(unsigned long value)
int hvm_set_cr0(unsigned long value)
{
struct vcpu *v = current;
- unsigned long mfn, old_value = v->arch.hvm_vcpu.guest_cr[0];
+ p2m_type_t p2mt;
+ unsigned long gfn, mfn, old_value = v->arch.hvm_vcpu.guest_cr[0];
HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR0 value = %lx", value);
@@ -559,8 +562,10 @@ int hvm_set_cr0(unsigned long value)
if ( !paging_mode_hap(v->domain) )
{
/* The guest CR3 must be pointing to the guest physical. */
- mfn = get_mfn_from_gpfn(v->arch.hvm_vcpu.guest_cr[3]>>PAGE_SHIFT);
- if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain))
+ gfn = v->arch.hvm_vcpu.guest_cr[3]>>PAGE_SHIFT;
+ mfn = mfn_x(gfn_to_mfn_current(gfn, &p2mt));
+ if ( !p2m_is_ram(p2mt) || !mfn_valid(mfn) ||
+ !get_page(mfn_to_page(mfn), v->domain))
{
gdprintk(XENLOG_ERR, "Invalid CR3 value = %lx (mfn=%lx)\n",
v->arch.hvm_vcpu.guest_cr[3], mfn);
@@ -603,16 +608,18 @@ int hvm_set_cr3(unsigned long value)
int hvm_set_cr3(unsigned long value)
{
unsigned long mfn;
+ p2m_type_t p2mt;
struct vcpu *v = current;
if ( hvm_paging_enabled(v) && !paging_mode_hap(v->domain) &&
(value != v->arch.hvm_vcpu.guest_cr[3]) )
{
- /* Shadow-mode CR3 change. Check PDBR and then make a new shadow. */
+ /* Shadow-mode CR3 change. Check PDBR and update refcounts. */
HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 value = %lx", value);
- mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT);
- if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) )
- goto bad_cr3;
+ mfn = mfn_x(gfn_to_mfn_current(value >> PAGE_SHIFT, &p2mt));
+ if ( !p2m_is_ram(p2mt) || !mfn_valid(mfn) ||
+ !get_page(mfn_to_page(mfn), v->domain) )
+ goto bad_cr3;
put_page(pagetable_get_page(v->arch.guest_table));
v->arch.guest_table = pagetable_from_pfn(mfn);
@@ -677,6 +684,7 @@ static int __hvm_copy(void *buf, paddr_t
static int __hvm_copy(void *buf, paddr_t addr, int size, int dir, int virt)
{
unsigned long gfn, mfn;
+ p2m_type_t p2mt;
char *p;
int count, todo;
@@ -690,10 +698,11 @@ static int __hvm_copy(void *buf, paddr_t
else
gfn = addr >> PAGE_SHIFT;
- mfn = get_mfn_from_gpfn(gfn);
-
- if ( mfn == INVALID_MFN )
+ mfn = mfn_x(gfn_to_mfn_current(gfn, &p2mt));
+
+ if ( !p2m_is_ram(p2mt) )
return todo;
+ ASSERT(mfn_valid(mfn));
p = (char *)map_domain_page(mfn) + (addr & ~PAGE_MASK);
diff -r 1474db8058b2 -r 4633e9604da9 xen/arch/x86/hvm/io.c
--- a/xen/arch/x86/hvm/io.c Mon Sep 10 13:59:46 2007 +0100
+++ b/xen/arch/x86/hvm/io.c Mon Sep 10 14:42:30 2007 +0100
@@ -826,9 +826,7 @@ void hvm_io_assist(void)
ioreq_t *p;
struct cpu_user_regs *regs;
struct hvm_io_op *io_opp;
- unsigned long gmfn;
struct vcpu *v = current;
- struct domain *d = v->domain;
io_opp = &v->arch.hvm_vcpu.io_op;
regs = &io_opp->io_context;
@@ -861,13 +859,6 @@ void hvm_io_assist(void)
regs->eflags &= ~X86_EFLAGS_RF;
hvm_load_cpu_guest_regs(v, regs);
memcpy(guest_cpu_user_regs(), regs, HVM_CONTEXT_STACK_BYTES);
-
- /* Has memory been dirtied? */
- if ( (p->dir == IOREQ_READ) && p->data_is_ptr )
- {
- gmfn = get_mfn_from_gpfn(paging_gva_to_gfn(v, p->data));
- paging_mark_dirty(d, gmfn);
- }
out:
vcpu_end_shutdown_deferral(v);
diff -r 1474db8058b2 -r 4633e9604da9 xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c Mon Sep 10 13:59:46 2007 +0100
+++ b/xen/arch/x86/hvm/svm/svm.c Mon Sep 10 14:42:30 2007 +0100
@@ -338,6 +338,7 @@ int svm_vmcb_restore(struct vcpu *v, str
int svm_vmcb_restore(struct vcpu *v, struct hvm_hw_cpu *c)
{
unsigned long mfn = 0;
+ p2m_type_t p2mt;
struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
if ( c->pending_valid &&
@@ -353,8 +354,8 @@ int svm_vmcb_restore(struct vcpu *v, str
{
if ( c->cr0 & X86_CR0_PG )
{
- mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT);
- if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) )
+ mfn = mfn_x(gfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT, &p2mt));
+ if ( !p2m_is_ram(p2mt) || !get_page(mfn_to_page(mfn), v->domain) )
{
gdprintk(XENLOG_ERR, "Invalid CR3 value=0x%"PRIx64"\n",
c->cr3);
@@ -1004,15 +1005,23 @@ int start_svm(struct cpuinfo_x86 *c)
return 1;
}
-static int svm_do_nested_pgfault(paddr_t gpa, struct cpu_user_regs *regs)
-{
- if (mmio_space(gpa)) {
+static void svm_do_nested_pgfault(paddr_t gpa, struct cpu_user_regs *regs)
+{
+ p2m_type_t p2mt;
+ mfn_t mfn;
+ unsigned long gfn = gpa >> PAGE_SHIFT;
+
+ /* If this GFN is emulated MMIO, pass the fault to the mmio handler */
+ mfn = gfn_to_mfn_current(gfn, &p2mt);
+ if ( p2mt == p2m_mmio_dm )
+ {
handle_mmio(gpa);
- return 1;
- }
-
- paging_mark_dirty(current->domain, get_mfn_from_gpfn(gpa >> PAGE_SHIFT));
- return p2m_set_flags(current->domain, gpa, __PAGE_HYPERVISOR|_PAGE_USER);
+ return;
+ }
+
+ /* Log-dirty: mark the page dirty and let the guest write it again */
+ paging_mark_dirty(current->domain, mfn_x(mfn));
+ p2m_change_type(current->domain, gfn, p2m_ram_logdirty, p2m_ram_rw);
}
static void svm_do_no_device_fault(struct vmcb_struct *vmcb)
@@ -2341,8 +2350,7 @@ asmlinkage void svm_vmexit_handler(struc
case VMEXIT_NPF:
regs->error_code = vmcb->exitinfo1;
- if ( !svm_do_nested_pgfault(vmcb->exitinfo2, regs) )
- domain_crash(v->domain);
+ svm_do_nested_pgfault(vmcb->exitinfo2, regs);
break;
default:
diff -r 1474db8058b2 -r 4633e9604da9 xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c Mon Sep 10 13:59:46 2007 +0100
+++ b/xen/arch/x86/hvm/vmx/vmx.c Mon Sep 10 14:42:30 2007 +0100
@@ -566,6 +566,7 @@ int vmx_vmcs_restore(struct vcpu *v, str
int vmx_vmcs_restore(struct vcpu *v, struct hvm_hw_cpu *c)
{
unsigned long mfn = 0;
+ p2m_type_t p2mt;
if ( c->pending_valid &&
((c->pending_type == 1) || (c->pending_type > 6) ||
@@ -578,8 +579,8 @@ int vmx_vmcs_restore(struct vcpu *v, str
if ( c->cr0 & X86_CR0_PG )
{
- mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT);
- if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) )
+ mfn = mfn_x(gfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT, &p2mt));
+ if ( !p2m_is_ram(p2mt) || !get_page(mfn_to_page(mfn), v->domain) )
{
gdprintk(XENLOG_ERR, "Invalid CR3 value=0x%"PRIx64"\n", c->cr3);
return -EINVAL;
@@ -1292,19 +1293,23 @@ static void vmx_do_cpuid(struct cpu_user
* Note that this leaf lives at <max-hypervisor-leaf> + 1.
*/
u64 value = ((u64)regs->edx << 32) | (u32)regs->ecx;
- unsigned long mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT);
+ p2m_type_t p2mt;
+ unsigned long mfn;
struct vcpu *v = current;
char *p;
+ mfn = mfn_x(gfn_to_mfn_current(value >> PAGE_SHIFT, &p2mt));
+
gdprintk(XENLOG_INFO, "Input address is 0x%"PRIx64".\n", value);
/* 8-byte aligned valid pseudophys address from vmxassist, please. */
- if ( (value & 7) || (mfn == INVALID_MFN) ||
+ if ( (value & 7) || !p2m_is_ram(p2mt) ||
!v->arch.hvm_vmx.vmxassist_enabled )
{
domain_crash(v->domain);
return;
}
+ ASSERT(mfn_valid(mfn));
p = map_domain_page(mfn);
value = *((uint64_t *)(p + (value & (PAGE_SIZE - 1))));
@@ -1905,11 +1910,12 @@ static int vmx_world_restore(struct vcpu
static int vmx_world_restore(struct vcpu *v, struct vmx_assist_context *c)
{
unsigned long mfn = 0;
+ p2m_type_t p2mt;
if ( c->cr0 & X86_CR0_PG )
{
- mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT);
- if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) )
+ mfn = mfn_x(gfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT, &p2mt));
+ if ( !p2m_is_ram(p2mt) || !get_page(mfn_to_page(mfn), v->domain) )
{
gdprintk(XENLOG_ERR, "Invalid CR3 value=%x", c->cr3);
return -EINVAL;
diff -r 1474db8058b2 -r 4633e9604da9 xen/arch/x86/mm/hap/guest_walk.c
--- a/xen/arch/x86/mm/hap/guest_walk.c Mon Sep 10 13:59:46 2007 +0100
+++ b/xen/arch/x86/mm/hap/guest_walk.c Mon Sep 10 14:42:30 2007 +0100
@@ -28,7 +28,8 @@
#include <xen/sched.h>
#include <asm/hvm/svm/vmcb.h>
#include <asm/domain.h>
-#include <asm/shadow.h>
+#include <asm/paging.h>
+#include <asm/p2m.h>
#include <asm/hap.h>
#include "private.h"
@@ -67,6 +68,7 @@ unsigned long hap_gva_to_gfn(GUEST_PAGIN
int lev, index;
paddr_t gpa = 0;
unsigned long gpfn, mfn;
+ p2m_type_t p2mt;
int success = 1;
l1_pgentry_t *l1e;
@@ -81,14 +83,16 @@ unsigned long hap_gva_to_gfn(GUEST_PAGIN
gpfn = (gcr3 >> PAGE_SHIFT);
for ( lev = mode; lev >= 1; lev-- )
{
- mfn = get_mfn_from_gpfn(gpfn);
- if ( mfn == INVALID_MFN )
+ mfn = mfn_x(gfn_to_mfn_current(gpfn, &p2mt));
+ if ( !p2m_is_ram(p2mt) )
{
HAP_PRINTK("bad pfn=0x%lx from gva=0x%lx at lev%d\n", gpfn, gva,
lev);
success = 0;
break;
}
+ ASSERT(mfn_valid(mfn));
+
index = (gva >> PT_SHIFT[mode][lev]) & (PT_ENTRIES[mode][lev]-1);
#if GUEST_PAGING_LEVELS >= 4
diff -r 1474db8058b2 -r 4633e9604da9 xen/arch/x86/mm/hap/hap.c
--- a/xen/arch/x86/mm/hap/hap.c Mon Sep 10 13:59:46 2007 +0100
+++ b/xen/arch/x86/mm/hap/hap.c Mon Sep 10 14:42:30 2007 +0100
@@ -60,8 +60,8 @@ int hap_enable_log_dirty(struct domain *
d->arch.paging.mode |= PG_log_dirty;
hap_unlock(d);
- /* set l1e entries of P2M table to NOT_WRITABLE. */
- p2m_set_flags_global(d, (_PAGE_PRESENT|_PAGE_USER));
+ /* set l1e entries of P2M table to be read-only. */
+ p2m_change_type_global(d, p2m_ram_rw, p2m_ram_logdirty);
flush_tlb_mask(d->domain_dirty_cpumask);
return 0;
}
@@ -73,14 +73,14 @@ int hap_disable_log_dirty(struct domain
hap_unlock(d);
/* set l1e entries of P2M table with normal mode */
- p2m_set_flags_global(d, __PAGE_HYPERVISOR|_PAGE_USER);
+ p2m_change_type_global(d, p2m_ram_logdirty, p2m_ram_rw);
return 0;
}
void hap_clean_dirty_bitmap(struct domain *d)
{
- /* mark physical memory as NOT_WRITEABLE and flush the TLB */
- p2m_set_flags_global(d, (_PAGE_PRESENT|_PAGE_USER));
+ /* set l1e entries of P2M table to be read-only. */
+ p2m_change_type_global(d, p2m_ram_rw, p2m_ram_logdirty);
flush_tlb_mask(d->domain_dirty_cpumask);
}
diff -r 1474db8058b2 -r 4633e9604da9 xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c Mon Sep 10 13:59:46 2007 +0100
+++ b/xen/arch/x86/mm/p2m.c Mon Sep 10 14:42:30 2007 +0100
@@ -4,7 +4,7 @@
* physical-to-machine mappings for automatically-translated domains.
*
* Parts of this code are Copyright (c) 2007 by Advanced Micro Devices.
- * Parts of this code are Copyright (c) 2006 by XenSource Inc.
+ * Parts of this code are Copyright (c) 2006-2007 by XenSource Inc.
* Parts of this code are Copyright (c) 2006 by Michael A Fetterman
* Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
*
@@ -93,6 +93,31 @@
#define page_to_mfn(_pg) (_mfn((_pg) - frame_table))
+/* PTE flags for the various types of p2m entry */
+#define P2M_BASE_FLAGS \
+ (_PAGE_PRESENT | _PAGE_USER | _PAGE_DIRTY | _PAGE_ACCESSED)
+
+static unsigned long p2m_type_to_flags(p2m_type_t t)
+{
+ unsigned long flags = (t & 0x7UL) << 9;
+ switch(t)
+ {
+ case p2m_invalid:
+ default:
+ return flags;
+ case p2m_ram_rw:
+ return flags | P2M_BASE_FLAGS | _PAGE_RW;
+ case p2m_ram_logdirty:
+ return flags | P2M_BASE_FLAGS;
+ case p2m_ram_ro:
+ return flags | P2M_BASE_FLAGS;
+ case p2m_mmio_dm:
+ return flags;
+ case p2m_mmio_direct:
+ return flags | P2M_BASE_FLAGS | _PAGE_RW | _PAGE_PCD;
+ }
+}
+
// Find the next level's P2M entry, checking for out-of-range gfn's...
// Returns NULL on error.
@@ -358,19 +383,25 @@ void p2m_teardown(struct domain *d)
}
mfn_t
-gfn_to_mfn_foreign(struct domain *d, unsigned long gpfn)
+gfn_to_mfn_foreign(struct domain *d, unsigned long gfn, p2m_type_t *t)
/* Read another domain's p2m entries */
{
mfn_t mfn;
- paddr_t addr = ((paddr_t)gpfn) << PAGE_SHIFT;
+ paddr_t addr = ((paddr_t)gfn) << PAGE_SHIFT;
l2_pgentry_t *l2e;
l1_pgentry_t *l1e;
ASSERT(paging_mode_translate(d));
+
+ /* XXX This is for compatibility with the old model, where anything not
+ * XXX marked as RAM was considered to be emulated MMIO space.
+ * XXX Once we start explicitly registering MMIO regions in the p2m
+ * XXX we will return p2m_invalid for unmapped gfns */
+ *t = p2m_mmio_dm;
+
mfn = pagetable_get_mfn(d->arch.phys_table);
-
- if ( gpfn > d->arch.p2m.max_mapped_pfn )
+ if ( gfn > d->arch.p2m.max_mapped_pfn )
/* This pfn is higher than the highest the p2m map currently holds */
return _mfn(INVALID_MFN);
@@ -428,9 +459,11 @@ gfn_to_mfn_foreign(struct domain *d, uns
return _mfn(INVALID_MFN);
}
mfn = _mfn(l1e_get_pfn(*l1e));
+ *t = p2m_flags_to_type(l1e_get_flags(*l1e));
unmap_domain_page(l1e);
- return mfn;
+ ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t));
+ return (p2m_is_valid(*t)) ? mfn : _mfn(INVALID_MFN);
}
#if P2M_AUDIT
@@ -630,10 +663,7 @@ p2m_remove_page(struct domain *d, unsign
return;
P2M_DEBUG("removing gfn=%#lx mfn=%#lx\n", gfn, mfn);
- ASSERT(mfn_x(gfn_to_mfn(d, gfn)) == mfn);
- //ASSERT(mfn_to_gfn(d, mfn) == gfn);
-
- set_p2m_entry(d, gfn, _mfn(INVALID_MFN), __PAGE_HYPERVISOR|_PAGE_USER);
+ set_p2m_entry(d, gfn, _mfn(INVALID_MFN), 0);
set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY);
}
@@ -653,6 +683,7 @@ guest_physmap_add_page(struct domain *d,
unsigned long mfn)
{
unsigned long ogfn;
+ p2m_type_t ot;
mfn_t omfn;
if ( !paging_mode_translate(d) )
@@ -663,10 +694,10 @@ guest_physmap_add_page(struct domain *d,
P2M_DEBUG("adding gfn=%#lx mfn=%#lx\n", gfn, mfn);
- omfn = gfn_to_mfn(d, gfn);
- if ( mfn_valid(omfn) )
- {
- set_p2m_entry(d, gfn, _mfn(INVALID_MFN), __PAGE_HYPERVISOR|_PAGE_USER);
+ omfn = gfn_to_mfn(d, gfn, &ot);
+ if ( p2m_is_ram(ot) )
+ {
+ ASSERT(mfn_valid(omfn));
set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
}
@@ -683,8 +714,10 @@ guest_physmap_add_page(struct domain *d,
/* This machine frame is already mapped at another physical address */
P2M_DEBUG("aliased! mfn=%#lx, old gfn=%#lx, new gfn=%#lx\n",
mfn, ogfn, gfn);
- if ( mfn_valid(omfn = gfn_to_mfn(d, ogfn)) )
- {
+ omfn = gfn_to_mfn(d, ogfn, &ot);
+ if ( p2m_is_ram(ot) )
+ {
+ ASSERT(mfn_valid(omfn));
P2M_DEBUG("old gfn=%#lx -> mfn %#lx\n",
ogfn , mfn_x(omfn));
if ( mfn_x(omfn) == mfn )
@@ -692,21 +725,29 @@ guest_physmap_add_page(struct domain *d,
}
}
- set_p2m_entry(d, gfn, _mfn(mfn), __PAGE_HYPERVISOR|_PAGE_USER);
- set_gpfn_from_mfn(mfn, gfn);
+ if ( mfn_valid(_mfn(mfn)) )
+ {
+ set_p2m_entry(d, gfn, _mfn(mfn),
+ p2m_type_to_flags(p2m_ram_rw)|__PAGE_HYPERVISOR|_PAGE_USER);
+ set_gpfn_from_mfn(mfn, gfn);
+ }
+ else
+ {
+ gdprintk(XENLOG_WARNING, "Adding bad mfn to p2m map (%#lx -> %#lx)\n",
+ gfn, mfn);
+ set_p2m_entry(d, gfn, _mfn(INVALID_MFN), 0);
+ }
audit_p2m(d);
p2m_unlock(d);
}
-/* This function goes through P2M table and modify l1e flags of all pages. Note
- * that physical base address of l1e is intact. This function can be used for
- * special purpose, such as marking physical memory as NOT WRITABLE for
- * tracking dirty pages during live migration.
- */
-void p2m_set_flags_global(struct domain *d, u32 l1e_flags)
-{
- unsigned long mfn, gfn;
+/* Walk the whole p2m table, changing any entries of the old type
+ * to the new type. This is used in hardware-assisted paging to
+ * quickly enable or diable log-dirty tracking */
+void p2m_change_type_global(struct domain *d, p2m_type_t ot, p2m_type_t nt)
+{
+ unsigned long mfn, gfn, flags;
l1_pgentry_t l1e_content;
l1_pgentry_t *l1e;
l2_pgentry_t *l2e;
@@ -769,12 +810,14 @@ void p2m_set_flags_global(struct domain
for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++, gfn++ )
{
- if ( !(l1e_get_flags(l1e[i1]) & _PAGE_PRESENT) )
+ flags = l1e_get_flags(l1e[i1]);
+ if ( p2m_flags_to_type(flags) != ot )
continue;
mfn = l1e_get_pfn(l1e[i1]);
gfn = get_gpfn_from_mfn(mfn);
- /* create a new 1le entry using l1e_flags */
- l1e_content = l1e_from_pfn(mfn, l1e_flags);
+ /* create a new 1le entry with the new type */
+ flags = p2m_flags_to_type(nt);
+ l1e_content = l1e_from_pfn(mfn, flags);
paging_write_p2m_entry(d, gfn, &l1e[i1],
l1mfn, l1e_content, 1);
}
@@ -800,24 +843,23 @@ void p2m_set_flags_global(struct domain
p2m_unlock(d);
}
-/* This function traces through P2M table and modifies l1e flags of a specific
- * gpa.
- */
-int p2m_set_flags(struct domain *d, paddr_t gpa, u32 l1e_flags)
-{
- unsigned long gfn;
+/* Modify the p2m type of a single gfn from ot to nt, returning the
+ * entry's previous type */
+p2m_type_t p2m_change_type(struct domain *d, unsigned long gfn,
+ p2m_type_t ot, p2m_type_t nt)
+{
+ p2m_type_t pt;
mfn_t mfn;
p2m_lock(d);
- gfn = gpa >> PAGE_SHIFT;
- mfn = gfn_to_mfn(d, gfn);
- if ( mfn_valid(mfn) )
- set_p2m_entry(d, gfn, mfn, l1e_flags);
+ mfn = gfn_to_mfn(d, gfn, &pt);
+ if ( pt == ot )
+ set_p2m_entry(d, gfn, mfn, p2m_type_to_flags(nt));
p2m_unlock(d);
- return 1;
+ return pt;
}
/*
diff -r 1474db8058b2 -r 4633e9604da9 xen/arch/x86/mm/shadow/common.c
--- a/xen/arch/x86/mm/shadow/common.c Mon Sep 10 13:59:46 2007 +0100
+++ b/xen/arch/x86/mm/shadow/common.c Mon Sep 10 14:42:30 2007 +0100
@@ -2764,19 +2764,23 @@ shadow_write_p2m_entry(struct vcpu *v, u
l1_pgentry_t new, unsigned int level)
{
struct domain *d = v->domain;
- mfn_t mfn;
shadow_lock(d);
- /* handle physmap_add and physmap_remove */
- mfn = gfn_to_mfn(d, gfn);
- if ( v != NULL && level == 1 && mfn_valid(mfn) ) {
- sh_remove_all_shadows_and_parents(v, mfn);
- if ( sh_remove_all_mappings(v, mfn) )
- flush_tlb_mask(d->domain_dirty_cpumask);
- }
-
- /* update the entry with new content */
+ /* If we're removing an MFN from the p2m, remove it from the shadows too */
+ if ( level == 1 )
+ {
+ mfn_t mfn = _mfn(l1e_get_pfn(*p));
+ p2m_type_t p2mt = p2m_flags_to_type(l1e_get_flags(*p));
+ if ( p2m_is_valid(p2mt) && mfn_valid(mfn) )
+ {
+ sh_remove_all_shadows_and_parents(v, mfn);
+ if ( sh_remove_all_mappings(v, mfn) )
+ flush_tlb_mask(d->domain_dirty_cpumask);
+ }
+ }
+
+ /* Update the entry with new content */
safe_write_pte(p, new);
/* install P2M in monitors for PAE Xen */
diff -r 1474db8058b2 -r 4633e9604da9 xen/arch/x86/mm/shadow/multi.c
--- a/xen/arch/x86/mm/shadow/multi.c Mon Sep 10 13:59:46 2007 +0100
+++ b/xen/arch/x86/mm/shadow/multi.c Mon Sep 10 14:42:30 2007 +0100
@@ -209,6 +209,7 @@ guest_walk_tables(struct vcpu *v, unsign
guest_walk_tables(struct vcpu *v, unsigned long va, walk_t *gw, int guest_op)
{
struct domain *d = v->domain;
+ p2m_type_t p2mt;
ASSERT(!guest_op || shadow_locked_by_me(d));
perfc_incr(shadow_guest_walk);
@@ -223,8 +224,9 @@ guest_walk_tables(struct vcpu *v, unsign
+ guest_l4_table_offset(va);
/* Walk down to the l3e */
if ( !(guest_l4e_get_flags(*gw->l4e) & _PAGE_PRESENT) ) return 0;
- gw->l3mfn = gfn_to_mfn(d, guest_l4e_get_gfn(*gw->l4e));
- if ( !mfn_valid(gw->l3mfn) ) return 1;
+ gw->l3mfn = gfn_to_mfn(d, guest_l4e_get_gfn(*gw->l4e), &p2mt);
+ if ( !p2m_is_ram(p2mt) ) return 1;
+ ASSERT(mfn_valid(gw->l3mfn));
/* This mfn is a pagetable: make sure the guest can't write to it. */
if ( guest_op && sh_remove_write_access(v, gw->l3mfn, 3, va) != 0 )
flush_tlb_mask(d->domain_dirty_cpumask);
@@ -236,8 +238,9 @@ guest_walk_tables(struct vcpu *v, unsign
#endif /* PAE or 64... */
/* Walk down to the l2e */
if ( !(guest_l3e_get_flags(*gw->l3e) & _PAGE_PRESENT) ) return 0;
- gw->l2mfn = gfn_to_mfn(d, guest_l3e_get_gfn(*gw->l3e));
- if ( !mfn_valid(gw->l2mfn) ) return 1;
+ gw->l2mfn = gfn_to_mfn(d, guest_l3e_get_gfn(*gw->l3e), &p2mt);
+ if ( !p2m_is_ram(p2mt) ) return 1;
+ ASSERT(mfn_valid(gw->l2mfn));
/* This mfn is a pagetable: make sure the guest can't write to it. */
if ( guest_op && sh_remove_write_access(v, gw->l2mfn, 2, va) != 0 )
flush_tlb_mask(d->domain_dirty_cpumask);
@@ -278,8 +281,9 @@ guest_walk_tables(struct vcpu *v, unsign
else
{
/* Not a superpage: carry on and find the l1e. */
- gw->l1mfn = gfn_to_mfn(d, guest_l2e_get_gfn(*gw->l2e));
- if ( !mfn_valid(gw->l1mfn) ) return 1;
+ gw->l1mfn = gfn_to_mfn(d, guest_l2e_get_gfn(*gw->l2e), &p2mt);
+ if ( !p2m_is_ram(p2mt) ) return 1;
+ ASSERT(mfn_valid(gw->l1mfn));
/* This mfn is a pagetable: make sure the guest can't write to it. */
if ( guest_op
&& sh_remove_write_access(v, gw->l1mfn, 1, va) != 0 )
@@ -626,7 +630,7 @@ _sh_propagate(struct vcpu *v,
void *shadow_entry_ptr,
int level,
fetch_type_t ft,
- int mmio)
+ p2m_type_t p2mt)
{
guest_l1e_t *gp = guest_entry_ptr;
shadow_l1e_t *sp = shadow_entry_ptr;
@@ -636,6 +640,13 @@ _sh_propagate(struct vcpu *v,
/* We don't shadow PAE l3s */
ASSERT(GUEST_PAGING_LEVELS > 3 || level != 3);
+
+ /* Check there's something for the shadows to map to */
+ if ( !p2m_is_valid(p2mt) )
+ {
+ *sp = shadow_l1e_empty();
+ goto done;
+ }
if ( mfn_valid(guest_table_mfn) )
/* Handle A and D bit propagation into the guest */
@@ -658,19 +669,22 @@ _sh_propagate(struct vcpu *v,
goto done;
}
- if ( level == 1 && mmio )
- {
- /* Guest l1e maps MMIO space */
+ if ( level == 1 && p2mt == p2m_mmio_dm )
+ {
+ /* Guest l1e maps emulated MMIO space */
*sp = sh_l1e_mmio(guest_l1e_get_gfn(*gp), gflags);
if ( !d->arch.paging.shadow.has_fast_mmio_entries )
d->arch.paging.shadow.has_fast_mmio_entries = 1;
goto done;
}
- // Must have a valid target_mfn, unless this is a prefetch. In the
+ // Must have a valid target_mfn unless this is a prefetch. In the
// case of a prefetch, an invalid mfn means that we can not usefully
// shadow anything, and so we return early.
//
+ /* N.B. For pass-through MMIO, either this test needs to be relaxed,
+ * and shadow_set_l1e() trained to handle non-valid MFNs (ugh), or the
+ * MMIO areas need to be added to the frame-table to make them "valid". */
if ( !mfn_valid(target_mfn) )
{
ASSERT((ft == ft_prefetch));
@@ -718,6 +732,8 @@ _sh_propagate(struct vcpu *v,
// Only allow the guest write access to a page a) on a demand fault,
// or b) if the page is already marked as dirty.
//
+ // (We handle log-dirty entirely inside the shadow code, without using the
+ // p2m_ram_logdirty p2m type: only HAP uses that.)
if ( unlikely((level == 1) && shadow_mode_log_dirty(d)) )
{
if ( ft & FETCH_TYPE_WRITE )
@@ -725,6 +741,10 @@ _sh_propagate(struct vcpu *v,
else if ( !sh_mfn_is_dirty(d, target_mfn) )
sflags &= ~_PAGE_RW;
}
+
+ /* Read-only memory */
+ if ( p2mt == p2m_ram_ro )
+ sflags &= ~_PAGE_RW;
// protect guest page tables
//
@@ -754,7 +774,12 @@ _sh_propagate(struct vcpu *v,
sflags |= _PAGE_USER;
}
+ /* MMIO addresses should never be cached */
+ if ( p2m_is_mmio(p2mt) )
+ sflags |= _PAGE_PCD;
+
*sp = shadow_l1e_from_mfn(target_mfn, sflags);
+
done:
SHADOW_DEBUG(PROPAGATE,
"%s level %u guest %" SH_PRI_gpte " shadow %" SH_PRI_pte "\n",
@@ -775,7 +800,7 @@ l4e_propagate_from_guest(struct vcpu *v,
shadow_l4e_t *sl4e,
fetch_type_t ft)
{
- _sh_propagate(v, gl4e, gl4mfn, sl3mfn, sl4e, 4, ft, 0);
+ _sh_propagate(v, gl4e, gl4mfn, sl3mfn, sl4e, 4, ft, p2m_ram_rw);
}
static void
@@ -786,7 +811,7 @@ l3e_propagate_from_guest(struct vcpu *v,
shadow_l3e_t *sl3e,
fetch_type_t ft)
{
- _sh_propagate(v, gl3e, gl3mfn, sl2mfn, sl3e, 3, ft, 0);
+ _sh_propagate(v, gl3e, gl3mfn, sl2mfn, sl3e, 3, ft, p2m_ram_rw);
}
#endif // GUEST_PAGING_LEVELS >= 4
@@ -798,7 +823,7 @@ l2e_propagate_from_guest(struct vcpu *v,
shadow_l2e_t *sl2e,
fetch_type_t ft)
{
- _sh_propagate(v, gl2e, gl2mfn, sl1mfn, sl2e, 2, ft, 0);
+ _sh_propagate(v, gl2e, gl2mfn, sl1mfn, sl2e, 2, ft, p2m_ram_rw);
}
static void
@@ -808,9 +833,9 @@ l1e_propagate_from_guest(struct vcpu *v,
mfn_t gmfn,
shadow_l1e_t *sl1e,
fetch_type_t ft,
- int mmio)
-{
- _sh_propagate(v, gl1e, gl1mfn, gmfn, sl1e, 1, ft, mmio);
+ p2m_type_t p2mt)
+{
+ _sh_propagate(v, gl1e, gl1mfn, gmfn, sl1e, 1, ft, p2mt);
}
@@ -2196,6 +2221,7 @@ static int validate_gl4e(struct vcpu *v,
shadow_l4e_t *sl4p = se;
mfn_t sl3mfn = _mfn(INVALID_MFN);
struct domain *d = v->domain;
+ p2m_type_t p2mt;
int result = 0;
perfc_incr(shadow_validate_gl4e_calls);
@@ -2203,8 +2229,8 @@ static int validate_gl4e(struct vcpu *v,
if ( guest_l4e_get_flags(*new_gl4e) & _PAGE_PRESENT )
{
gfn_t gl3gfn = guest_l4e_get_gfn(*new_gl4e);
- mfn_t gl3mfn = gfn_to_mfn(d, gl3gfn);
- if ( mfn_valid(gl3mfn) )
+ mfn_t gl3mfn = gfn_to_mfn(d, gl3gfn, &p2mt);
+ if ( p2m_is_ram(p2mt) )
sl3mfn = get_shadow_status(v, gl3mfn, SH_type_l3_shadow);
else
result |= SHADOW_SET_ERROR;
@@ -2248,6 +2274,7 @@ static int validate_gl3e(struct vcpu *v,
guest_l3e_t *new_gl3e = new_ge;
shadow_l3e_t *sl3p = se;
mfn_t sl2mfn = _mfn(INVALID_MFN);
+ p2m_type_t p2mt;
int result = 0;
perfc_incr(shadow_validate_gl3e_calls);
@@ -2255,8 +2282,8 @@ static int validate_gl3e(struct vcpu *v,
if ( guest_l3e_get_flags(*new_gl3e) & _PAGE_PRESENT )
{
gfn_t gl2gfn = guest_l3e_get_gfn(*new_gl3e);
- mfn_t gl2mfn = gfn_to_mfn(v->domain, gl2gfn);
- if ( mfn_valid(gl2mfn) )
+ mfn_t gl2mfn = gfn_to_mfn(v->domain, gl2gfn, &p2mt);
+ if ( p2m_is_ram(p2mt) )
sl2mfn = get_shadow_status(v, gl2mfn, SH_type_l2_shadow);
else
result |= SHADOW_SET_ERROR;
@@ -2275,6 +2302,7 @@ static int validate_gl2e(struct vcpu *v,
guest_l2e_t *new_gl2e = new_ge;
shadow_l2e_t *sl2p = se;
mfn_t sl1mfn = _mfn(INVALID_MFN);
+ p2m_type_t p2mt;
int result = 0;
perfc_incr(shadow_validate_gl2e_calls);
@@ -2299,8 +2327,8 @@ static int validate_gl2e(struct vcpu *v,
}
else
{
- mfn_t gl1mfn = gfn_to_mfn(v->domain, gl1gfn);
- if ( mfn_valid(gl1mfn) )
+ mfn_t gl1mfn = gfn_to_mfn(v->domain, gl1gfn, &p2mt);
+ if ( p2m_is_ram(p2mt) )
sl1mfn = get_shadow_status(v, gl1mfn, SH_type_l1_shadow);
else
result |= SHADOW_SET_ERROR;
@@ -2361,16 +2389,16 @@ static int validate_gl1e(struct vcpu *v,
shadow_l1e_t *sl1p = se;
gfn_t gfn;
mfn_t gmfn;
- int result = 0, mmio;
+ p2m_type_t p2mt;
+ int result = 0;
perfc_incr(shadow_validate_gl1e_calls);
gfn = guest_l1e_get_gfn(*new_gl1e);
- gmfn = gfn_to_mfn(v->domain, gfn);
-
- mmio = (is_hvm_vcpu(v) && mmio_space(gfn_to_paddr(gfn)));
+ gmfn = gfn_to_mfn(v->domain, gfn, &p2mt);
+
l1e_propagate_from_guest(v, new_gl1e, _mfn(INVALID_MFN), gmfn, &new_sl1e,
- ft_prefetch, mmio);
+ ft_prefetch, p2mt);
result |= shadow_set_l1e(v, sl1p, new_sl1e, sl1mfn);
return result;
@@ -2554,12 +2582,13 @@ static void sh_prefetch(struct vcpu *v,
static void sh_prefetch(struct vcpu *v, walk_t *gw,
shadow_l1e_t *ptr_sl1e, mfn_t sl1mfn)
{
- int i, dist, mmio;
+ int i, dist;
gfn_t gfn;
mfn_t gmfn;
guest_l1e_t gl1e;
shadow_l1e_t sl1e;
u32 gflags;
+ p2m_type_t p2mt;
/* Prefetch no further than the end of the _shadow_ l1 MFN */
dist = (PAGE_SIZE - ((unsigned long)ptr_sl1e & ~PAGE_MASK)) / sizeof sl1e;
@@ -2597,14 +2626,13 @@ static void sh_prefetch(struct vcpu *v,
/* Look at the gfn that the l1e is pointing at */
gfn = guest_l1e_get_gfn(gl1e);
- gmfn = gfn_to_mfn(v->domain, gfn);
- mmio = ( is_hvm_vcpu(v) && mmio_space(gfn_to_paddr(gfn)) );
+ gmfn = gfn_to_mfn(v->domain, gfn, &p2mt);
/* Propagate the entry. Safe to use a pointer to our local
* gl1e, since this is not a demand-fetch so there will be no
* write-back to the guest. */
l1e_propagate_from_guest(v, &gl1e, _mfn(INVALID_MFN),
- gmfn, &sl1e, ft_prefetch, mmio);
+ gmfn, &sl1e, ft_prefetch, p2mt);
(void) shadow_set_l1e(v, ptr_sl1e + i, sl1e, sl1mfn);
}
}
@@ -2633,8 +2661,9 @@ static int sh_page_fault(struct vcpu *v,
paddr_t gpa;
struct sh_emulate_ctxt emul_ctxt;
struct x86_emulate_ops *emul_ops;
- int r, mmio;
+ int r;
fetch_type_t ft = 0;
+ p2m_type_t p2mt;
SHADOW_PRINTK("d:v=%u:%u va=%#lx err=%u\n",
v->domain->domain_id, v->vcpu_id, va, regs->error_code);
@@ -2787,10 +2816,9 @@ static int sh_page_fault(struct vcpu *v,
/* What mfn is the guest trying to access? */
gfn = guest_l1e_get_gfn(gw.eff_l1e);
- gmfn = gfn_to_mfn(d, gfn);
- mmio = (is_hvm_domain(d) && mmio_space(gfn_to_paddr(gfn)));
-
- if ( !mmio && !mfn_valid(gmfn) )
+ gmfn = gfn_to_mfn(d, gfn, &p2mt);
+
+ if ( !p2m_is_valid(p2mt) || (!p2m_is_mmio(p2mt) && !mfn_valid(gmfn)) )
{
perfc_incr(shadow_fault_bail_bad_gfn);
SHADOW_PRINTK("BAD gfn=%"SH_PRI_gfn" gmfn=%"PRI_mfn"\n",
@@ -2821,7 +2849,7 @@ static int sh_page_fault(struct vcpu *v,
/* Calculate the shadow entry and write it */
l1e_propagate_from_guest(v, (gw.l1e) ? gw.l1e : &gw.eff_l1e, gw.l1mfn,
- gmfn, &sl1e, ft, mmio);
+ gmfn, &sl1e, ft, p2mt);
r = shadow_set_l1e(v, ptr_sl1e, sl1e, sl1mfn);
#if SHADOW_OPTIMIZATIONS & SHOPT_PREFETCH
@@ -2844,7 +2872,10 @@ static int sh_page_fault(struct vcpu *v,
}
}
- if ( mmio )
+ /* Need to hand off device-model MMIO and writes to read-only
+ * memory to the device model */
+ if ( p2mt == p2m_mmio_dm
+ || (p2mt == p2m_ram_ro && ft == ft_demand_write) )
{
gpa = guest_walk_to_gpa(&gw);
goto mmio;
@@ -3598,6 +3629,7 @@ sh_update_cr3(struct vcpu *v, int do_loc
int flush = 0;
gfn_t gl2gfn;
mfn_t gl2mfn;
+ p2m_type_t p2mt;
guest_l3e_t *gl3e = (guest_l3e_t*)&v->arch.paging.shadow.gl3e;
/* First, make all four entries read-only. */
for ( i = 0; i < 4; i++ )
@@ -3605,8 +3637,9 @@ sh_update_cr3(struct vcpu *v, int do_loc
if ( guest_l3e_get_flags(gl3e[i]) & _PAGE_PRESENT )
{
gl2gfn = guest_l3e_get_gfn(gl3e[i]);
- gl2mfn = gfn_to_mfn(d, gl2gfn);
- flush |= sh_remove_write_access(v, gl2mfn, 2, 0);
+ gl2mfn = gfn_to_mfn(d, gl2gfn, &p2mt);
+ if ( p2m_is_ram(p2mt) )
+ flush |= sh_remove_write_access(v, gl2mfn, 2, 0);
}
}
if ( flush )
@@ -3617,13 +3650,15 @@ sh_update_cr3(struct vcpu *v, int do_loc
if ( guest_l3e_get_flags(gl3e[i]) & _PAGE_PRESENT )
{
gl2gfn = guest_l3e_get_gfn(gl3e[i]);
- gl2mfn = gfn_to_mfn(d, gl2gfn);
- sh_set_toplevel_shadow(v, i, gl2mfn, (i == 3)
- ? SH_type_l2h_shadow
- : SH_type_l2_shadow);
+ gl2mfn = gfn_to_mfn(d, gl2gfn, &p2mt);
+ if ( p2m_is_ram(p2mt) )
+ sh_set_toplevel_shadow(v, i, gl2mfn, (i == 3)
+ ? SH_type_l2h_shadow
+ : SH_type_l2_shadow);
+ else
+ sh_set_toplevel_shadow(v, i, _mfn(INVALID_MFN), 0);
}
else
- /* The guest is not present: clear out the shadow. */
sh_set_toplevel_shadow(v, i, _mfn(INVALID_MFN), 0);
}
}
@@ -3932,6 +3967,7 @@ static inline void * emulate_map_dest(st
u32 flags, errcode;
gfn_t gfn;
mfn_t mfn;
+ p2m_type_t p2mt;
/* We don't emulate user-mode writes to page tables */
if ( ring_3(sh_ctxt->ctxt.regs) )
@@ -3971,7 +4007,6 @@ static inline void * emulate_map_dest(st
}
}
#endif
- mfn = gfn_to_mfn(v->domain, gfn);
errcode = PFEC_write_access;
if ( !(flags & _PAGE_PRESENT) )
@@ -3981,8 +4016,10 @@ static inline void * emulate_map_dest(st
if ( !(flags & _PAGE_RW) )
goto page_fault;
- if ( mfn_valid(mfn) )
- {
+ mfn = gfn_to_mfn(v->domain, gfn, &p2mt);
+ if ( p2m_is_ram(p2mt) )
+ {
+ ASSERT(mfn_valid(mfn));
*mfnp = mfn;
v->arch.paging.last_write_was_pt = !!sh_mfn_is_a_page_table(mfn);
return sh_map_domain_page(mfn) + (vaddr & ~PAGE_MASK);
@@ -4231,6 +4268,7 @@ audit_gfn_to_mfn(struct vcpu *v, gfn_t g
/* Convert this gfn to an mfn in the manner appropriate for the
* guest pagetable it's used in (gmfn) */
{
+ p2m_type_t p2mt;
if ( !shadow_mode_translate(v->domain) )
return _mfn(gfn_x(gfn));
@@ -4238,7 +4276,7 @@ audit_gfn_to_mfn(struct vcpu *v, gfn_t g
!= PGT_writable_page )
return _mfn(gfn_x(gfn)); /* This is a paging-disabled shadow */
else
- return gfn_to_mfn(v->domain, gfn);
+ return gfn_to_mfn(v->domain, gfn, &p2mt);
}
diff -r 1474db8058b2 -r 4633e9604da9 xen/arch/x86/mm/shadow/types.h
--- a/xen/arch/x86/mm/shadow/types.h Mon Sep 10 13:59:46 2007 +0100
+++ b/xen/arch/x86/mm/shadow/types.h Mon Sep 10 14:42:30 2007 +0100
@@ -414,7 +414,7 @@ gfn_to_paddr(gfn_t gfn)
/* Override gfn_to_mfn to work with gfn_t */
#undef gfn_to_mfn
-#define gfn_to_mfn(d, g) _gfn_to_mfn((d), gfn_x(g))
+#define gfn_to_mfn(d, g, t) _gfn_to_mfn((d), gfn_x(g), (t))
/* Type used for recording a walk through guest pagetables. It is
diff -r 1474db8058b2 -r 4633e9604da9 xen/include/asm-x86/mm.h
--- a/xen/include/asm-x86/mm.h Mon Sep 10 13:59:46 2007 +0100
+++ b/xen/include/asm-x86/mm.h Mon Sep 10 14:42:30 2007 +0100
@@ -328,8 +328,6 @@ TYPE_SAFE(unsigned long,mfn);
? get_gpfn_from_mfn(mfn) \
: (mfn) )
-#define gmfn_to_mfn(_d, gpfn) mfn_x(gfn_to_mfn(_d, gpfn))
-
#define INVALID_MFN (~0UL)
#ifdef CONFIG_COMPAT
diff -r 1474db8058b2 -r 4633e9604da9 xen/include/asm-x86/p2m.h
--- a/xen/include/asm-x86/p2m.h Mon Sep 10 13:59:46 2007 +0100
+++ b/xen/include/asm-x86/p2m.h Mon Sep 10 14:42:30 2007 +0100
@@ -4,7 +4,7 @@
* physical-to-machine mappings for automatically-translated domains.
*
* Copyright (c) 2007 Advanced Micro Devices (Wei Huang)
- * Parts of this code are Copyright (c) 2006 by XenSource Inc.
+ * Parts of this code are Copyright (c) 2006-2007 by XenSource Inc.
* Parts of this code are Copyright (c) 2006 by Michael A Fetterman
* Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
*
@@ -27,49 +27,141 @@
#define _XEN_P2M_H
-/* The phys_to_machine_mapping is the reversed mapping of MPT for full
- * virtualization. It is only used by shadow_mode_translate()==true
- * guests, so we steal the address space that would have normally
- * been used by the read-only MPT map.
+/*
+ * The phys_to_machine_mapping maps guest physical frame numbers
+ * to machine frame numbers. It only exists for paging_mode_translate
+ * guests. It is organised in page-table format, which:
+ *
+ * (1) allows us to use it directly as the second pagetable in hardware-
+ * assisted paging and (hopefully) iommu support; and
+ * (2) lets us map it directly into the guest vcpus' virtual address space
+ * as a linear pagetable, so we can read and write it easily.
+ *
+ * For (2) we steal the address space that would have normally been used
+ * by the read-only MPT map in a non-translated guest. (For
+ * paging_mode_external() guests this mapping is in the monitor table.)
*/
#define phys_to_machine_mapping ((l1_pgentry_t *)RO_MPT_VIRT_START)
-
-/* Read the current domain's P2M table. */
-static inline mfn_t gfn_to_mfn_current(unsigned long gfn)
-{
- l1_pgentry_t l1e = l1e_empty();
- int ret;
-
- if ( gfn > current->domain->arch.p2m.max_mapped_pfn )
- return _mfn(INVALID_MFN);
-
- /* Don't read off the end of the p2m table */
- ASSERT(gfn < (RO_MPT_VIRT_END - RO_MPT_VIRT_START) / sizeof(l1_pgentry_t));
-
- ret = __copy_from_user(&l1e,
- &phys_to_machine_mapping[gfn],
- sizeof(l1e));
-
- if ( (ret == 0) && (l1e_get_flags(l1e) & _PAGE_PRESENT) )
- return _mfn(l1e_get_pfn(l1e));
-
- return _mfn(INVALID_MFN);
+/*
+ * The upper levels of the p2m pagetable always contain full rights; all
+ * variation in the access control bits is made in the level-1 PTEs.
+ *
+ * In addition to the phys-to-machine translation, each p2m PTE contains
+ * *type* information about the gfn it translates, helping Xen to decide
+ * on the correct course of action when handling a page-fault to that
+ * guest frame. We store the type in the "available" bits of the PTEs
+ * in the table, which gives us 8 possible types on 32-bit systems.
+ * Further expansions of the type system will only be supported on
+ * 64-bit Xen.
+ */
+typedef enum {
+ p2m_invalid = 0, /* Nothing mapped here */
+ p2m_ram_rw = 1, /* Normal read/write guest RAM */
+ p2m_ram_logdirty = 2, /* Temporarily read-only for log-dirty */
+ p2m_ram_ro = 3, /* Read-only; writes go to the device model */
+ p2m_mmio_dm = 4, /* Reads and write go to the device model */
+ p2m_mmio_direct = 5, /* Read/write mapping of genuine MMIO area */
+} p2m_type_t;
+
+/* We use bitmaps and maks to handle groups of types */
+#define p2m_to_mask(_t) (1UL << (_t))
+
+/* RAM types, which map to real machine frames */
+#define P2M_RAM_TYPES (p2m_to_mask(p2m_ram_rw) \
+ | p2m_to_mask(p2m_ram_logdirty) \
+ | p2m_to_mask(p2m_ram_ro))
+
+/* MMIO types, which don't have to map to anything in the frametable */
+#define P2M_MMIO_TYPES (p2m_to_mask(p2m_mmio_dm) \
+ | p2m_to_mask(p2m_mmio_direct))
+
+/* Read-only types, which must have the _PAGE_RW bit clear in their PTEs */
+#define P2M_RO_TYPES (p2m_to_mask(p2m_ram_logdirty) \
+ | p2m_to_mask(p2m_ram_ro))
+
+/* Useful predicates */
+#define p2m_is_ram(_t) (p2m_to_mask(_t) & P2M_RAM_TYPES)
+#define p2m_is_mmio(_t) (p2m_to_mask(_t) & P2M_MMIO_TYPES)
+#define p2m_is_readonly(_t) (p2m_to_mask(_t) & P2M_RO_TYPES)
+#define p2m_is_valid(_t) (p2m_to_mask(_t) & (P2M_RAM_TYPES | P2M_MMIO_TYPES))
+
+/* Extract the type from the PTE flags that store it */
+static inline p2m_type_t p2m_flags_to_type(unsigned long flags)
+{
+ /* Type is stored in the "available" bits, 9, 10 and 11 */
+ return (flags >> 9) & 0x7;
+}
+
+/* Read the current domain's p2m table (through the linear mapping). */
+static inline mfn_t gfn_to_mfn_current(unsigned long gfn, p2m_type_t *t)
+{
+ mfn_t mfn = _mfn(INVALID_MFN);
+ p2m_type_t p2mt = p2m_mmio_dm;
+ /* XXX This is for compatibility with the old model, where anything not
+ * XXX marked as RAM was considered to be emulated MMIO space.
+ * XXX Once we start explicitly registering MMIO regions in the p2m
+ * XXX we will return p2m_invalid for unmapped gfns */
+
+ if ( gfn <= current->domain->arch.p2m.max_mapped_pfn )
+ {
+ l1_pgentry_t l1e = l1e_empty();
+ int ret;
+
+ ASSERT(gfn < (RO_MPT_VIRT_END - RO_MPT_VIRT_START)
+ / sizeof(l1_pgentry_t));
+
+ /* Need to __copy_from_user because the p2m is sparse and this
+ * part might not exist */
+ ret = __copy_from_user(&l1e,
+ &phys_to_machine_mapping[gfn],
+ sizeof(l1e));
+
+ if ( ret == 0 ) {
+ p2mt = p2m_flags_to_type(l1e_get_flags(l1e));
+ ASSERT(l1e_get_pfn(l1e) != INVALID_MFN || !p2m_is_ram(*t));
+ if ( p2m_is_valid(p2mt) )
+ mfn = _mfn(l1e_get_pfn(l1e));
+ else
+ /* XXX see above */
+ p2mt = p2m_mmio_dm;
+ }
+ }
+
+ *t = p2mt;
+ return mfn;
}
/* Read another domain's P2M table, mapping pages as we go */
-mfn_t gfn_to_mfn_foreign(struct domain *d, unsigned long gpfn);
+mfn_t gfn_to_mfn_foreign(struct domain *d, unsigned long gfn, p2m_type_t *t);
/* General conversion function from gfn to mfn */
-#define gfn_to_mfn(d, g) _gfn_to_mfn((d), (g))
-static inline mfn_t _gfn_to_mfn(struct domain *d, unsigned long gfn)
+#define gfn_to_mfn(d, g, t) _gfn_to_mfn((d), (g), (t))
+static inline mfn_t _gfn_to_mfn(struct domain *d,
+ unsigned long gfn, p2m_type_t *t)
{
if ( !paging_mode_translate(d) )
+ {
+ /* Not necessarily true, but for non-translated guests, we claim
+ * it's the most generic kind of memory */
+ *t = p2m_ram_rw;
return _mfn(gfn);
+ }
if ( likely(current->domain == d) )
- return gfn_to_mfn_current(gfn);
+ return gfn_to_mfn_current(gfn, t);
else
- return gfn_to_mfn_foreign(d, gfn);
+ return gfn_to_mfn_foreign(d, gfn, t);
+}
+
+/* Compatibility function exporting the old untyped interface */
+static inline unsigned long gmfn_to_mfn(struct domain *d, unsigned long gpfn)
+{
+ mfn_t mfn;
+ p2m_type_t t;
+ mfn = gfn_to_mfn(d, gpfn, &t);
+ if ( p2m_is_valid(t) )
+ return mfn_x(mfn);
+ return INVALID_MFN;
}
/* General conversion function from mfn to gfn */
@@ -81,19 +173,6 @@ static inline unsigned long mfn_to_gfn(s
return mfn_x(mfn);
}
-/* Compatibility function for HVM code */
-static inline unsigned long get_mfn_from_gpfn(unsigned long pfn)
-{
- return mfn_x(gfn_to_mfn_current(pfn));
-}
-
-/* Is this guest address an mmio one? (i.e. not defined in p2m map) */
-static inline int mmio_space(paddr_t gpa)
-{
- unsigned long gfn = gpa >> PAGE_SHIFT;
- return !mfn_valid(mfn_x(gfn_to_mfn_current(gfn)));
-}
-
/* Translate the frame number held in an l1e from guest to machine */
static inline l1_pgentry_t
gl1e_to_ml1e(struct domain *d, l1_pgentry_t l1e)
@@ -105,7 +184,6 @@ gl1e_to_ml1e(struct domain *d, l1_pgentr
}
-
/* Init the datastructures for later use by the p2m code */
void p2m_init(struct domain *d);
@@ -130,11 +208,12 @@ void guest_physmap_remove_page(struct do
void guest_physmap_remove_page(struct domain *d, unsigned long gfn,
unsigned long mfn);
-/* set P2M table l1e flags */
-void p2m_set_flags_global(struct domain *d, u32 l1e_flags);
-
-/* set P2M table l1e flags for a gpa */
-int p2m_set_flags(struct domain *d, paddr_t gpa, u32 l1e_flags);
+/* Change types across all p2m entries in a domain */
+void p2m_change_type_global(struct domain *d, p2m_type_t ot, p2m_type_t nt);
+
+/* Compare-exchange the type of a single p2m entry */
+p2m_type_t p2m_change_type(struct domain *d, unsigned long gfn,
+ p2m_type_t ot, p2m_type_t nt);
#endif /* _XEN_P2M_H */
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|