# HG changeset patch
# User Alex Williamson <alex.williamson@xxxxxx>
# Date 1189454336 21600
# Node ID 42b925c00d8aff2bf2abdd0d23c11fa75271ea58
# Parent 7d9b20d91102803532be2cc1d1b00733ceab12cc
# Parent 154769114a82c4051b196bdfe7fdea4b159d339f
merge with xen-unstable.hg (staging)
---
tools/libxen/src/xen_xspolicy.c | 2
xen/Makefile | 3
xen/arch/ia64/xen/domain.c | 2
xen/arch/powerpc/domain.c | 2
xen/arch/x86/acpi/power.c | 15 ---
xen/arch/x86/hvm/hvm.c | 33 ++++---
xen/arch/x86/hvm/io.c | 9 -
xen/arch/x86/hvm/svm/intr.c | 35 +++++++
xen/arch/x86/hvm/svm/svm.c | 42 ++++++---
xen/arch/x86/hvm/svm/vmcb.c | 30 +++---
xen/arch/x86/hvm/vmx/intr.c | 29 ++++--
xen/arch/x86/hvm/vmx/vmx.c | 18 ++-
xen/arch/x86/machine_kexec.c | 3
xen/arch/x86/mm/hap/guest_walk.c | 10 +-
xen/arch/x86/mm/hap/hap.c | 10 +-
xen/arch/x86/mm/p2m.c | 122 +++++++++++++++++---------
xen/arch/x86/mm/shadow/common.c | 24 +++--
xen/arch/x86/mm/shadow/multi.c | 138 +++++++++++++++++++-----------
xen/arch/x86/mm/shadow/types.h | 2
xen/arch/x86/shutdown.c | 10 --
xen/arch/x86/smp.c | 47 +++++++---
xen/common/keyhandler.c | 2
xen/common/shutdown.c | 4
xen/drivers/char/console.c | 2
xen/include/asm-x86/mm.h | 2
xen/include/asm-x86/p2m.h | 179 ++++++++++++++++++++++++++++-----------
xen/include/xen/shutdown.h | 2
27 files changed, 509 insertions(+), 268 deletions(-)
diff -r 7d9b20d91102 -r 42b925c00d8a tools/libxen/src/xen_xspolicy.c
--- a/tools/libxen/src/xen_xspolicy.c Mon Sep 10 13:56:34 2007 -0600
+++ b/tools/libxen/src/xen_xspolicy.c Mon Sep 10 13:58:56 2007 -0600
@@ -21,8 +21,8 @@
#include <stddef.h>
#include <stdlib.h>
+#include "xen_internal.h"
#include "xen/api/xen_common.h"
-#include "xen/api/xen_internal.h"
#include "xen/api/xen_xspolicy.h"
diff -r 7d9b20d91102 -r 42b925c00d8a xen/Makefile
--- a/xen/Makefile Mon Sep 10 13:56:34 2007 -0600
+++ b/xen/Makefile Mon Sep 10 13:58:56 2007 -0600
@@ -35,12 +35,15 @@ _install: $(TARGET).gz build-headers
$(INSTALL_DIR) $(DESTDIR)/usr/include/xen/hvm
[ -d $(DESTDIR)/usr/include/xen/io ] || \
$(INSTALL_DIR) $(DESTDIR)/usr/include/xen/io
+ [ -d $(DESTDIR)/usr/include/xen/xsm ] || \
+ $(INSTALL_DIR) $(DESTDIR)/usr/include/xen/xsm
[ -d $(DESTDIR)/usr/include/xen/foreign ] || \
$(INSTALL_DIR) $(DESTDIR)/usr/include/xen/foreign
$(INSTALL_DATA) include/public/*.h $(DESTDIR)/usr/include/xen
$(INSTALL_DATA) include/public/arch-x86/*.h
$(DESTDIR)/usr/include/xen/arch-x86
$(INSTALL_DATA) include/public/hvm/*.h $(DESTDIR)/usr/include/xen/hvm
$(INSTALL_DATA) include/public/io/*.h $(DESTDIR)/usr/include/xen/io
+ $(INSTALL_DATA) include/public/xsm/*.h $(DESTDIR)/usr/include/xen/xsm
$(INSTALL_DATA) include/public/foreign/*.h
$(DESTDIR)/usr/include/xen/foreign
$(INSTALL_DATA) include/public/COPYING $(DESTDIR)/usr/include/xen
diff -r 7d9b20d91102 -r 42b925c00d8a xen/arch/ia64/xen/domain.c
--- a/xen/arch/ia64/xen/domain.c Mon Sep 10 13:56:34 2007 -0600
+++ b/xen/arch/ia64/xen/domain.c Mon Sep 10 13:58:56 2007 -0600
@@ -1487,7 +1487,7 @@ int __init construct_dom0(struct domain
return 0;
}
-void machine_restart(char * __unused)
+void machine_restart(void)
{
console_start_sync();
if (running_on_sim)
diff -r 7d9b20d91102 -r 42b925c00d8a xen/arch/powerpc/domain.c
--- a/xen/arch/powerpc/domain.c Mon Sep 10 13:56:34 2007 -0600
+++ b/xen/arch/powerpc/domain.c Mon Sep 10 13:58:56 2007 -0600
@@ -119,7 +119,7 @@ void machine_halt(void)
machine_fail(__func__);
}
-void machine_restart(char * __unused)
+void machine_restart(void)
{
console_start_sync();
printk("%s called\n", __func__);
diff -r 7d9b20d91102 -r 42b925c00d8a xen/arch/x86/acpi/power.c
--- a/xen/arch/x86/acpi/power.c Mon Sep 10 13:56:34 2007 -0600
+++ b/xen/arch/x86/acpi/power.c Mon Sep 10 13:58:56 2007 -0600
@@ -181,11 +181,6 @@ static long enter_state_helper(void *dat
/*
* Dom0 issues this hypercall in place of writing pm1a_cnt. Xen then
* takes over the control and put the system into sleep state really.
- *
- * Guest may issue a two-phases write to PM1x_CNT, to work
- * around poorly implemented hardware. It's better to keep
- * this logic here. Two writes can be differentiated by
- * enable bit setting.
*/
int acpi_enter_sleep(struct xenpf_enter_acpi_sleep *sleep)
{
@@ -204,16 +199,6 @@ int acpi_enter_sleep(struct xenpf_enter_
if ( sleep->flags )
return -EINVAL;
- /* Write #1 */
- if ( !(sleep->pm1a_cnt_val & ACPI_BITMASK_SLEEP_ENABLE) )
- {
- outw((u16)sleep->pm1a_cnt_val, acpi_sinfo.pm1a_cnt);
- if ( acpi_sinfo.pm1b_cnt )
- outw((u16)sleep->pm1b_cnt_val, acpi_sinfo.pm1b_cnt);
- return 0;
- }
-
- /* Write #2 */
acpi_sinfo.pm1a_cnt_val = sleep->pm1a_cnt_val;
acpi_sinfo.pm1b_cnt_val = sleep->pm1b_cnt_val;
acpi_sinfo.sleep_state = sleep->sleep_state;
diff -r 7d9b20d91102 -r 42b925c00d8a xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c Mon Sep 10 13:56:34 2007 -0600
+++ b/xen/arch/x86/hvm/hvm.c Mon Sep 10 13:58:56 2007 -0600
@@ -161,12 +161,14 @@ static int hvm_set_ioreq_page(
struct domain *d, struct hvm_ioreq_page *iorp, unsigned long gmfn)
{
struct page_info *page;
+ p2m_type_t p2mt;
unsigned long mfn;
void *va;
- mfn = gmfn_to_mfn(d, gmfn);
- if ( !mfn_valid(mfn) )
+ mfn = mfn_x(gfn_to_mfn(d, gmfn, &p2mt));
+ if ( !p2m_is_ram(p2mt) )
return -EINVAL;
+ ASSERT(mfn_valid(mfn));
page = mfn_to_page(mfn);
if ( !get_page_and_type(page, d, PGT_writable_page) )
@@ -517,7 +519,8 @@ int hvm_set_cr0(unsigned long value)
int hvm_set_cr0(unsigned long value)
{
struct vcpu *v = current;
- unsigned long mfn, old_value = v->arch.hvm_vcpu.guest_cr[0];
+ p2m_type_t p2mt;
+ unsigned long gfn, mfn, old_value = v->arch.hvm_vcpu.guest_cr[0];
HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR0 value = %lx", value);
@@ -559,8 +562,10 @@ int hvm_set_cr0(unsigned long value)
if ( !paging_mode_hap(v->domain) )
{
/* The guest CR3 must be pointing to the guest physical. */
- mfn = get_mfn_from_gpfn(v->arch.hvm_vcpu.guest_cr[3]>>PAGE_SHIFT);
- if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain))
+ gfn = v->arch.hvm_vcpu.guest_cr[3]>>PAGE_SHIFT;
+ mfn = mfn_x(gfn_to_mfn_current(gfn, &p2mt));
+ if ( !p2m_is_ram(p2mt) || !mfn_valid(mfn) ||
+ !get_page(mfn_to_page(mfn), v->domain))
{
gdprintk(XENLOG_ERR, "Invalid CR3 value = %lx (mfn=%lx)\n",
v->arch.hvm_vcpu.guest_cr[3], mfn);
@@ -603,16 +608,18 @@ int hvm_set_cr3(unsigned long value)
int hvm_set_cr3(unsigned long value)
{
unsigned long mfn;
+ p2m_type_t p2mt;
struct vcpu *v = current;
if ( hvm_paging_enabled(v) && !paging_mode_hap(v->domain) &&
(value != v->arch.hvm_vcpu.guest_cr[3]) )
{
- /* Shadow-mode CR3 change. Check PDBR and then make a new shadow. */
+ /* Shadow-mode CR3 change. Check PDBR and update refcounts. */
HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 value = %lx", value);
- mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT);
- if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) )
- goto bad_cr3;
+ mfn = mfn_x(gfn_to_mfn_current(value >> PAGE_SHIFT, &p2mt));
+ if ( !p2m_is_ram(p2mt) || !mfn_valid(mfn) ||
+ !get_page(mfn_to_page(mfn), v->domain) )
+ goto bad_cr3;
put_page(pagetable_get_page(v->arch.guest_table));
v->arch.guest_table = pagetable_from_pfn(mfn);
@@ -677,6 +684,7 @@ static int __hvm_copy(void *buf, paddr_t
static int __hvm_copy(void *buf, paddr_t addr, int size, int dir, int virt)
{
unsigned long gfn, mfn;
+ p2m_type_t p2mt;
char *p;
int count, todo;
@@ -690,10 +698,11 @@ static int __hvm_copy(void *buf, paddr_t
else
gfn = addr >> PAGE_SHIFT;
- mfn = get_mfn_from_gpfn(gfn);
-
- if ( mfn == INVALID_MFN )
+ mfn = mfn_x(gfn_to_mfn_current(gfn, &p2mt));
+
+ if ( !p2m_is_ram(p2mt) )
return todo;
+ ASSERT(mfn_valid(mfn));
p = (char *)map_domain_page(mfn) + (addr & ~PAGE_MASK);
diff -r 7d9b20d91102 -r 42b925c00d8a xen/arch/x86/hvm/io.c
--- a/xen/arch/x86/hvm/io.c Mon Sep 10 13:56:34 2007 -0600
+++ b/xen/arch/x86/hvm/io.c Mon Sep 10 13:58:56 2007 -0600
@@ -826,9 +826,7 @@ void hvm_io_assist(void)
ioreq_t *p;
struct cpu_user_regs *regs;
struct hvm_io_op *io_opp;
- unsigned long gmfn;
struct vcpu *v = current;
- struct domain *d = v->domain;
io_opp = &v->arch.hvm_vcpu.io_op;
regs = &io_opp->io_context;
@@ -861,13 +859,6 @@ void hvm_io_assist(void)
regs->eflags &= ~X86_EFLAGS_RF;
hvm_load_cpu_guest_regs(v, regs);
memcpy(guest_cpu_user_regs(), regs, HVM_CONTEXT_STACK_BYTES);
-
- /* Has memory been dirtied? */
- if ( (p->dir == IOREQ_READ) && p->data_is_ptr )
- {
- gmfn = get_mfn_from_gpfn(paging_gva_to_gfn(v, p->data));
- paging_mark_dirty(d, gmfn);
- }
out:
vcpu_end_shutdown_deferral(v);
diff -r 7d9b20d91102 -r 42b925c00d8a xen/arch/x86/hvm/svm/intr.c
--- a/xen/arch/x86/hvm/svm/intr.c Mon Sep 10 13:56:34 2007 -0600
+++ b/xen/arch/x86/hvm/svm/intr.c Mon Sep 10 13:58:56 2007 -0600
@@ -30,6 +30,7 @@
#include <asm/hvm/hvm.h>
#include <asm/hvm/io.h>
#include <asm/hvm/support.h>
+#include <asm/hvm/vlapic.h>
#include <asm/hvm/svm/svm.h>
#include <asm/hvm/svm/intr.h>
#include <xen/event.h>
@@ -99,6 +100,33 @@ static void enable_intr_window(struct vc
svm_inject_dummy_vintr(v);
}
+static void update_cr8_intercept(
+ struct vcpu *v, enum hvm_intack masked_intr_source)
+{
+ struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+ struct vlapic *vlapic = vcpu_vlapic(v);
+ int max_irr;
+
+ vmcb->cr_intercepts &= ~CR_INTERCEPT_CR8_WRITE;
+
+ /*
+ * If ExtInts are masked then that dominates the TPR --- the 'interrupt
+ * window' has already been enabled in this case.
+ */
+ if ( (masked_intr_source == hvm_intack_lapic) ||
+ (masked_intr_source == hvm_intack_pic) )
+ return;
+
+ /* Is there an interrupt pending at the LAPIC? Nothing to do if not. */
+ if ( !vlapic_enabled(vlapic) ||
+ ((max_irr = vlapic_find_highest_irr(vlapic)) == -1) )
+ return;
+
+ /* Highest-priority pending interrupt is masked by the TPR? */
+ if ( (vmcb->vintr.fields.tpr & 0xf) >= (max_irr >> 4) )
+ vmcb->cr_intercepts |= CR_INTERCEPT_CR8_WRITE;
+}
+
asmlinkage void svm_intr_assist(void)
{
struct vcpu *v = current;
@@ -113,7 +141,7 @@ asmlinkage void svm_intr_assist(void)
do {
intr_source = hvm_vcpu_has_pending_irq(v);
if ( likely(intr_source == hvm_intack_none) )
- return;
+ goto out;
/*
* Pending IRQs must be delayed if:
@@ -133,7 +161,7 @@ asmlinkage void svm_intr_assist(void)
!hvm_interrupts_enabled(v, intr_source) )
{
enable_intr_window(v, intr_source);
- return;
+ goto out;
}
} while ( !hvm_vcpu_ack_pending_irq(v, intr_source, &intr_vector) );
@@ -152,6 +180,9 @@ asmlinkage void svm_intr_assist(void)
intr_source = hvm_vcpu_has_pending_irq(v);
if ( unlikely(intr_source != hvm_intack_none) )
enable_intr_window(v, intr_source);
+
+ out:
+ update_cr8_intercept(v, intr_source);
}
/*
diff -r 7d9b20d91102 -r 42b925c00d8a xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c Mon Sep 10 13:56:34 2007 -0600
+++ b/xen/arch/x86/hvm/svm/svm.c Mon Sep 10 13:58:56 2007 -0600
@@ -338,6 +338,7 @@ int svm_vmcb_restore(struct vcpu *v, str
int svm_vmcb_restore(struct vcpu *v, struct hvm_hw_cpu *c)
{
unsigned long mfn = 0;
+ p2m_type_t p2mt;
struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
if ( c->pending_valid &&
@@ -353,8 +354,8 @@ int svm_vmcb_restore(struct vcpu *v, str
{
if ( c->cr0 & X86_CR0_PG )
{
- mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT);
- if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) )
+ mfn = mfn_x(gfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT, &p2mt));
+ if ( !p2m_is_ram(p2mt) || !get_page(mfn_to_page(mfn), v->domain) )
{
gdprintk(XENLOG_ERR, "Invalid CR3 value=0x%"PRIx64"\n",
c->cr3);
@@ -1004,15 +1005,23 @@ int start_svm(struct cpuinfo_x86 *c)
return 1;
}
-static int svm_do_nested_pgfault(paddr_t gpa, struct cpu_user_regs *regs)
-{
- if (mmio_space(gpa)) {
+static void svm_do_nested_pgfault(paddr_t gpa, struct cpu_user_regs *regs)
+{
+ p2m_type_t p2mt;
+ mfn_t mfn;
+ unsigned long gfn = gpa >> PAGE_SHIFT;
+
+ /* If this GFN is emulated MMIO, pass the fault to the mmio handler */
+ mfn = gfn_to_mfn_current(gfn, &p2mt);
+ if ( p2mt == p2m_mmio_dm )
+ {
handle_mmio(gpa);
- return 1;
- }
-
- paging_mark_dirty(current->domain, get_mfn_from_gpfn(gpa >> PAGE_SHIFT));
- return p2m_set_flags(current->domain, gpa, __PAGE_HYPERVISOR|_PAGE_USER);
+ return;
+ }
+
+ /* Log-dirty: mark the page dirty and let the guest write it again */
+ paging_mark_dirty(current->domain, mfn_x(mfn));
+ p2m_change_type(current->domain, gfn, p2m_ram_logdirty, p2m_ram_rw);
}
static void svm_do_no_device_fault(struct vmcb_struct *vmcb)
@@ -2144,6 +2153,16 @@ asmlinkage void svm_vmexit_handler(struc
eventinj_t eventinj;
int inst_len, rc;
+ /*
+ * Before doing anything else, we need to sync up the VLAPIC's TPR with
+ * SVM's vTPR if CR8 writes are currently disabled. It's OK if the
+ * guest doesn't touch the CR8 (e.g. 32-bit Windows) because we update
+ * the vTPR on MMIO writes to the TPR
+ */
+ if ( !(vmcb->cr_intercepts & CR_INTERCEPT_CR8_WRITE) )
+ vlapic_set_reg(vcpu_vlapic(v), APIC_TASKPRI,
+ (vmcb->vintr.fields.tpr & 0x0F) << 4);
+
exit_reason = vmcb->exitcode;
HVMTRACE_2D(VMEXIT, v, vmcb->rip, exit_reason);
@@ -2341,8 +2360,7 @@ asmlinkage void svm_vmexit_handler(struc
case VMEXIT_NPF:
regs->error_code = vmcb->exitinfo1;
- if ( !svm_do_nested_pgfault(vmcb->exitinfo2, regs) )
- domain_crash(v->domain);
+ svm_do_nested_pgfault(vmcb->exitinfo2, regs);
break;
default:
diff -r 7d9b20d91102 -r 42b925c00d8a xen/arch/x86/hvm/svm/vmcb.c
--- a/xen/arch/x86/hvm/svm/vmcb.c Mon Sep 10 13:56:34 2007 -0600
+++ b/xen/arch/x86/hvm/svm/vmcb.c Mon Sep 10 13:58:56 2007 -0600
@@ -114,23 +114,29 @@ static int construct_vmcb(struct vcpu *v
svm_asid_init_vcpu(v);
vmcb->general1_intercepts =
- GENERAL1_INTERCEPT_INTR | GENERAL1_INTERCEPT_NMI |
- GENERAL1_INTERCEPT_SMI | GENERAL1_INTERCEPT_INIT |
- GENERAL1_INTERCEPT_CPUID | GENERAL1_INTERCEPT_INVD |
- GENERAL1_INTERCEPT_HLT | GENERAL1_INTERCEPT_INVLPG |
- GENERAL1_INTERCEPT_INVLPGA | GENERAL1_INTERCEPT_IOIO_PROT |
- GENERAL1_INTERCEPT_MSR_PROT | GENERAL1_INTERCEPT_SHUTDOWN_EVT;
+ GENERAL1_INTERCEPT_INTR | GENERAL1_INTERCEPT_NMI |
+ GENERAL1_INTERCEPT_SMI | GENERAL1_INTERCEPT_INIT |
+ GENERAL1_INTERCEPT_CPUID | GENERAL1_INTERCEPT_INVD |
+ GENERAL1_INTERCEPT_HLT | GENERAL1_INTERCEPT_INVLPG |
+ GENERAL1_INTERCEPT_INVLPGA | GENERAL1_INTERCEPT_IOIO_PROT |
+ GENERAL1_INTERCEPT_MSR_PROT | GENERAL1_INTERCEPT_SHUTDOWN_EVT;
vmcb->general2_intercepts =
- GENERAL2_INTERCEPT_VMRUN | GENERAL2_INTERCEPT_VMMCALL |
- GENERAL2_INTERCEPT_VMLOAD | GENERAL2_INTERCEPT_VMSAVE |
- GENERAL2_INTERCEPT_STGI | GENERAL2_INTERCEPT_CLGI |
- GENERAL2_INTERCEPT_SKINIT | GENERAL2_INTERCEPT_RDTSCP;
+ GENERAL2_INTERCEPT_VMRUN | GENERAL2_INTERCEPT_VMMCALL |
+ GENERAL2_INTERCEPT_VMLOAD | GENERAL2_INTERCEPT_VMSAVE |
+ GENERAL2_INTERCEPT_STGI | GENERAL2_INTERCEPT_CLGI |
+ GENERAL2_INTERCEPT_SKINIT | GENERAL2_INTERCEPT_RDTSCP;
/* Intercept all debug-register writes. */
vmcb->dr_intercepts = DR_INTERCEPT_ALL_WRITES;
- /* Intercept all control-register accesses, except to CR2. */
- vmcb->cr_intercepts = ~(CR_INTERCEPT_CR2_READ | CR_INTERCEPT_CR2_WRITE);
+ /*
+ * Intercept all control-register accesses except for CR2 reads/writes
+ * and CR8 reads (and actually CR8 writes, but that's a special case
+ * that's handled in svm/intr.c).
+ */
+ vmcb->cr_intercepts = ~(CR_INTERCEPT_CR2_READ |
+ CR_INTERCEPT_CR2_WRITE |
+ CR_INTERCEPT_CR8_READ);
/* I/O and MSR permission bitmaps. */
arch_svm->msrpm = alloc_xenheap_pages(get_order_from_bytes(MSRPM_SIZE));
diff -r 7d9b20d91102 -r 42b925c00d8a xen/arch/x86/hvm/vmx/intr.c
--- a/xen/arch/x86/hvm/vmx/intr.c Mon Sep 10 13:56:34 2007 -0600
+++ b/xen/arch/x86/hvm/vmx/intr.c Mon Sep 10 13:58:56 2007 -0600
@@ -107,22 +107,35 @@ static void enable_intr_window(struct vc
}
}
-static void update_tpr_threshold(struct vlapic *vlapic)
+static void update_tpr_threshold(
+ struct vcpu *v, enum hvm_intack masked_intr_source)
{
- int max_irr, tpr;
+ struct vlapic *vlapic = vcpu_vlapic(v);
+ int max_irr, tpr, threshold = 0;
if ( !cpu_has_vmx_tpr_shadow )
return;
+ /*
+ * If ExtInts are masked then that dominates the TPR --- the 'interrupt
+ * window' has already been enabled in this case.
+ */
+ if ( (masked_intr_source == hvm_intack_lapic) ||
+ (masked_intr_source == hvm_intack_pic) )
+ goto out;
+
+ /* Is there an interrupt pending at the LAPIC? Nothing to do if not. */
if ( !vlapic_enabled(vlapic) ||
((max_irr = vlapic_find_highest_irr(vlapic)) == -1) )
- {
- __vmwrite(TPR_THRESHOLD, 0);
- return;
- }
+ goto out;
+ /* Highest-priority pending interrupt is masked by the TPR? */
tpr = vlapic_get_reg(vlapic, APIC_TASKPRI) & 0xF0;
- __vmwrite(TPR_THRESHOLD, (max_irr > tpr) ? (tpr >> 4) : (max_irr >> 4));
+ if ( (tpr >> 4) >= (max_irr >> 4) )
+ threshold = max_irr >> 4;
+
+ out:
+ __vmwrite(TPR_THRESHOLD, threshold);
}
asmlinkage void vmx_intr_assist(void)
@@ -171,7 +184,7 @@ asmlinkage void vmx_intr_assist(void)
enable_intr_window(v, intr_source);
out:
- update_tpr_threshold(vcpu_vlapic(v));
+ update_tpr_threshold(v, intr_source);
}
/*
diff -r 7d9b20d91102 -r 42b925c00d8a xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c Mon Sep 10 13:56:34 2007 -0600
+++ b/xen/arch/x86/hvm/vmx/vmx.c Mon Sep 10 13:58:56 2007 -0600
@@ -566,6 +566,7 @@ int vmx_vmcs_restore(struct vcpu *v, str
int vmx_vmcs_restore(struct vcpu *v, struct hvm_hw_cpu *c)
{
unsigned long mfn = 0;
+ p2m_type_t p2mt;
if ( c->pending_valid &&
((c->pending_type == 1) || (c->pending_type > 6) ||
@@ -578,8 +579,8 @@ int vmx_vmcs_restore(struct vcpu *v, str
if ( c->cr0 & X86_CR0_PG )
{
- mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT);
- if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) )
+ mfn = mfn_x(gfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT, &p2mt));
+ if ( !p2m_is_ram(p2mt) || !get_page(mfn_to_page(mfn), v->domain) )
{
gdprintk(XENLOG_ERR, "Invalid CR3 value=0x%"PRIx64"\n", c->cr3);
return -EINVAL;
@@ -1292,19 +1293,23 @@ static void vmx_do_cpuid(struct cpu_user
* Note that this leaf lives at <max-hypervisor-leaf> + 1.
*/
u64 value = ((u64)regs->edx << 32) | (u32)regs->ecx;
- unsigned long mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT);
+ p2m_type_t p2mt;
+ unsigned long mfn;
struct vcpu *v = current;
char *p;
+ mfn = mfn_x(gfn_to_mfn_current(value >> PAGE_SHIFT, &p2mt));
+
gdprintk(XENLOG_INFO, "Input address is 0x%"PRIx64".\n", value);
/* 8-byte aligned valid pseudophys address from vmxassist, please. */
- if ( (value & 7) || (mfn == INVALID_MFN) ||
+ if ( (value & 7) || !p2m_is_ram(p2mt) ||
!v->arch.hvm_vmx.vmxassist_enabled )
{
domain_crash(v->domain);
return;
}
+ ASSERT(mfn_valid(mfn));
p = map_domain_page(mfn);
value = *((uint64_t *)(p + (value & (PAGE_SIZE - 1))));
@@ -1905,11 +1910,12 @@ static int vmx_world_restore(struct vcpu
static int vmx_world_restore(struct vcpu *v, struct vmx_assist_context *c)
{
unsigned long mfn = 0;
+ p2m_type_t p2mt;
if ( c->cr0 & X86_CR0_PG )
{
- mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT);
- if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) )
+ mfn = mfn_x(gfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT, &p2mt));
+ if ( !p2m_is_ram(p2mt) || !get_page(mfn_to_page(mfn), v->domain) )
{
gdprintk(XENLOG_ERR, "Invalid CR3 value=%x", c->cr3);
return -EINVAL;
diff -r 7d9b20d91102 -r 42b925c00d8a xen/arch/x86/machine_kexec.c
--- a/xen/arch/x86/machine_kexec.c Mon Sep 10 13:56:34 2007 -0600
+++ b/xen/arch/x86/machine_kexec.c Mon Sep 10 13:58:56 2007 -0600
@@ -82,9 +82,6 @@ static void __machine_reboot_kexec(void
smp_send_stop();
- disable_IO_APIC();
- hvm_cpu_down();
-
machine_kexec(image);
}
diff -r 7d9b20d91102 -r 42b925c00d8a xen/arch/x86/mm/hap/guest_walk.c
--- a/xen/arch/x86/mm/hap/guest_walk.c Mon Sep 10 13:56:34 2007 -0600
+++ b/xen/arch/x86/mm/hap/guest_walk.c Mon Sep 10 13:58:56 2007 -0600
@@ -28,7 +28,8 @@
#include <xen/sched.h>
#include <asm/hvm/svm/vmcb.h>
#include <asm/domain.h>
-#include <asm/shadow.h>
+#include <asm/paging.h>
+#include <asm/p2m.h>
#include <asm/hap.h>
#include "private.h"
@@ -67,6 +68,7 @@ unsigned long hap_gva_to_gfn(GUEST_PAGIN
int lev, index;
paddr_t gpa = 0;
unsigned long gpfn, mfn;
+ p2m_type_t p2mt;
int success = 1;
l1_pgentry_t *l1e;
@@ -81,14 +83,16 @@ unsigned long hap_gva_to_gfn(GUEST_PAGIN
gpfn = (gcr3 >> PAGE_SHIFT);
for ( lev = mode; lev >= 1; lev-- )
{
- mfn = get_mfn_from_gpfn(gpfn);
- if ( mfn == INVALID_MFN )
+ mfn = mfn_x(gfn_to_mfn_current(gpfn, &p2mt));
+ if ( !p2m_is_ram(p2mt) )
{
HAP_PRINTK("bad pfn=0x%lx from gva=0x%lx at lev%d\n", gpfn, gva,
lev);
success = 0;
break;
}
+ ASSERT(mfn_valid(mfn));
+
index = (gva >> PT_SHIFT[mode][lev]) & (PT_ENTRIES[mode][lev]-1);
#if GUEST_PAGING_LEVELS >= 4
diff -r 7d9b20d91102 -r 42b925c00d8a xen/arch/x86/mm/hap/hap.c
--- a/xen/arch/x86/mm/hap/hap.c Mon Sep 10 13:56:34 2007 -0600
+++ b/xen/arch/x86/mm/hap/hap.c Mon Sep 10 13:58:56 2007 -0600
@@ -60,8 +60,8 @@ int hap_enable_log_dirty(struct domain *
d->arch.paging.mode |= PG_log_dirty;
hap_unlock(d);
- /* set l1e entries of P2M table to NOT_WRITABLE. */
- p2m_set_flags_global(d, (_PAGE_PRESENT|_PAGE_USER));
+ /* set l1e entries of P2M table to be read-only. */
+ p2m_change_type_global(d, p2m_ram_rw, p2m_ram_logdirty);
flush_tlb_mask(d->domain_dirty_cpumask);
return 0;
}
@@ -73,14 +73,14 @@ int hap_disable_log_dirty(struct domain
hap_unlock(d);
/* set l1e entries of P2M table with normal mode */
- p2m_set_flags_global(d, __PAGE_HYPERVISOR|_PAGE_USER);
+ p2m_change_type_global(d, p2m_ram_logdirty, p2m_ram_rw);
return 0;
}
void hap_clean_dirty_bitmap(struct domain *d)
{
- /* mark physical memory as NOT_WRITEABLE and flush the TLB */
- p2m_set_flags_global(d, (_PAGE_PRESENT|_PAGE_USER));
+ /* set l1e entries of P2M table to be read-only. */
+ p2m_change_type_global(d, p2m_ram_rw, p2m_ram_logdirty);
flush_tlb_mask(d->domain_dirty_cpumask);
}
diff -r 7d9b20d91102 -r 42b925c00d8a xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c Mon Sep 10 13:56:34 2007 -0600
+++ b/xen/arch/x86/mm/p2m.c Mon Sep 10 13:58:56 2007 -0600
@@ -4,7 +4,7 @@
* physical-to-machine mappings for automatically-translated domains.
*
* Parts of this code are Copyright (c) 2007 by Advanced Micro Devices.
- * Parts of this code are Copyright (c) 2006 by XenSource Inc.
+ * Parts of this code are Copyright (c) 2006-2007 by XenSource Inc.
* Parts of this code are Copyright (c) 2006 by Michael A Fetterman
* Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
*
@@ -93,6 +93,31 @@
#define page_to_mfn(_pg) (_mfn((_pg) - frame_table))
+/* PTE flags for the various types of p2m entry */
+#define P2M_BASE_FLAGS \
+ (_PAGE_PRESENT | _PAGE_USER | _PAGE_DIRTY | _PAGE_ACCESSED)
+
+static unsigned long p2m_type_to_flags(p2m_type_t t)
+{
+ unsigned long flags = (t & 0x7UL) << 9;
+ switch(t)
+ {
+ case p2m_invalid:
+ default:
+ return flags;
+ case p2m_ram_rw:
+ return flags | P2M_BASE_FLAGS | _PAGE_RW;
+ case p2m_ram_logdirty:
+ return flags | P2M_BASE_FLAGS;
+ case p2m_ram_ro:
+ return flags | P2M_BASE_FLAGS;
+ case p2m_mmio_dm:
+ return flags;
+ case p2m_mmio_direct:
+ return flags | P2M_BASE_FLAGS | _PAGE_RW | _PAGE_PCD;
+ }
+}
+
// Find the next level's P2M entry, checking for out-of-range gfn's...
// Returns NULL on error.
@@ -358,19 +383,25 @@ void p2m_teardown(struct domain *d)
}
mfn_t
-gfn_to_mfn_foreign(struct domain *d, unsigned long gpfn)
+gfn_to_mfn_foreign(struct domain *d, unsigned long gfn, p2m_type_t *t)
/* Read another domain's p2m entries */
{
mfn_t mfn;
- paddr_t addr = ((paddr_t)gpfn) << PAGE_SHIFT;
+ paddr_t addr = ((paddr_t)gfn) << PAGE_SHIFT;
l2_pgentry_t *l2e;
l1_pgentry_t *l1e;
ASSERT(paging_mode_translate(d));
+
+ /* XXX This is for compatibility with the old model, where anything not
+ * XXX marked as RAM was considered to be emulated MMIO space.
+ * XXX Once we start explicitly registering MMIO regions in the p2m
+ * XXX we will return p2m_invalid for unmapped gfns */
+ *t = p2m_mmio_dm;
+
mfn = pagetable_get_mfn(d->arch.phys_table);
-
- if ( gpfn > d->arch.p2m.max_mapped_pfn )
+ if ( gfn > d->arch.p2m.max_mapped_pfn )
/* This pfn is higher than the highest the p2m map currently holds */
return _mfn(INVALID_MFN);
@@ -428,9 +459,11 @@ gfn_to_mfn_foreign(struct domain *d, uns
return _mfn(INVALID_MFN);
}
mfn = _mfn(l1e_get_pfn(*l1e));
+ *t = p2m_flags_to_type(l1e_get_flags(*l1e));
unmap_domain_page(l1e);
- return mfn;
+ ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t));
+ return (p2m_is_valid(*t)) ? mfn : _mfn(INVALID_MFN);
}
#if P2M_AUDIT
@@ -630,10 +663,7 @@ p2m_remove_page(struct domain *d, unsign
return;
P2M_DEBUG("removing gfn=%#lx mfn=%#lx\n", gfn, mfn);
- ASSERT(mfn_x(gfn_to_mfn(d, gfn)) == mfn);
- //ASSERT(mfn_to_gfn(d, mfn) == gfn);
-
- set_p2m_entry(d, gfn, _mfn(INVALID_MFN), __PAGE_HYPERVISOR|_PAGE_USER);
+ set_p2m_entry(d, gfn, _mfn(INVALID_MFN), 0);
set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY);
}
@@ -653,6 +683,7 @@ guest_physmap_add_page(struct domain *d,
unsigned long mfn)
{
unsigned long ogfn;
+ p2m_type_t ot;
mfn_t omfn;
if ( !paging_mode_translate(d) )
@@ -663,10 +694,10 @@ guest_physmap_add_page(struct domain *d,
P2M_DEBUG("adding gfn=%#lx mfn=%#lx\n", gfn, mfn);
- omfn = gfn_to_mfn(d, gfn);
- if ( mfn_valid(omfn) )
- {
- set_p2m_entry(d, gfn, _mfn(INVALID_MFN), __PAGE_HYPERVISOR|_PAGE_USER);
+ omfn = gfn_to_mfn(d, gfn, &ot);
+ if ( p2m_is_ram(ot) )
+ {
+ ASSERT(mfn_valid(omfn));
set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
}
@@ -683,8 +714,10 @@ guest_physmap_add_page(struct domain *d,
/* This machine frame is already mapped at another physical address */
P2M_DEBUG("aliased! mfn=%#lx, old gfn=%#lx, new gfn=%#lx\n",
mfn, ogfn, gfn);
- if ( mfn_valid(omfn = gfn_to_mfn(d, ogfn)) )
- {
+ omfn = gfn_to_mfn(d, ogfn, &ot);
+ if ( p2m_is_ram(ot) )
+ {
+ ASSERT(mfn_valid(omfn));
P2M_DEBUG("old gfn=%#lx -> mfn %#lx\n",
ogfn , mfn_x(omfn));
if ( mfn_x(omfn) == mfn )
@@ -692,21 +725,29 @@ guest_physmap_add_page(struct domain *d,
}
}
- set_p2m_entry(d, gfn, _mfn(mfn), __PAGE_HYPERVISOR|_PAGE_USER);
- set_gpfn_from_mfn(mfn, gfn);
+ if ( mfn_valid(_mfn(mfn)) )
+ {
+ set_p2m_entry(d, gfn, _mfn(mfn),
+ p2m_type_to_flags(p2m_ram_rw)|__PAGE_HYPERVISOR|_PAGE_USER);
+ set_gpfn_from_mfn(mfn, gfn);
+ }
+ else
+ {
+ gdprintk(XENLOG_WARNING, "Adding bad mfn to p2m map (%#lx -> %#lx)\n",
+ gfn, mfn);
+ set_p2m_entry(d, gfn, _mfn(INVALID_MFN), 0);
+ }
audit_p2m(d);
p2m_unlock(d);
}
-/* This function goes through P2M table and modify l1e flags of all pages. Note
- * that physical base address of l1e is intact. This function can be used for
- * special purpose, such as marking physical memory as NOT WRITABLE for
- * tracking dirty pages during live migration.
- */
-void p2m_set_flags_global(struct domain *d, u32 l1e_flags)
-{
- unsigned long mfn, gfn;
+/* Walk the whole p2m table, changing any entries of the old type
+ * to the new type. This is used in hardware-assisted paging to
+ * quickly enable or diable log-dirty tracking */
+void p2m_change_type_global(struct domain *d, p2m_type_t ot, p2m_type_t nt)
+{
+ unsigned long mfn, gfn, flags;
l1_pgentry_t l1e_content;
l1_pgentry_t *l1e;
l2_pgentry_t *l2e;
@@ -769,12 +810,14 @@ void p2m_set_flags_global(struct domain
for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++, gfn++ )
{
- if ( !(l1e_get_flags(l1e[i1]) & _PAGE_PRESENT) )
+ flags = l1e_get_flags(l1e[i1]);
+ if ( p2m_flags_to_type(flags) != ot )
continue;
mfn = l1e_get_pfn(l1e[i1]);
gfn = get_gpfn_from_mfn(mfn);
- /* create a new 1le entry using l1e_flags */
- l1e_content = l1e_from_pfn(mfn, l1e_flags);
+ /* create a new 1le entry with the new type */
+ flags = p2m_flags_to_type(nt);
+ l1e_content = l1e_from_pfn(mfn, flags);
paging_write_p2m_entry(d, gfn, &l1e[i1],
l1mfn, l1e_content, 1);
}
@@ -800,24 +843,23 @@ void p2m_set_flags_global(struct domain
p2m_unlock(d);
}
-/* This function traces through P2M table and modifies l1e flags of a specific
- * gpa.
- */
-int p2m_set_flags(struct domain *d, paddr_t gpa, u32 l1e_flags)
-{
- unsigned long gfn;
+/* Modify the p2m type of a single gfn from ot to nt, returning the
+ * entry's previous type */
+p2m_type_t p2m_change_type(struct domain *d, unsigned long gfn,
+ p2m_type_t ot, p2m_type_t nt)
+{
+ p2m_type_t pt;
mfn_t mfn;
p2m_lock(d);
- gfn = gpa >> PAGE_SHIFT;
- mfn = gfn_to_mfn(d, gfn);
- if ( mfn_valid(mfn) )
- set_p2m_entry(d, gfn, mfn, l1e_flags);
+ mfn = gfn_to_mfn(d, gfn, &pt);
+ if ( pt == ot )
+ set_p2m_entry(d, gfn, mfn, p2m_type_to_flags(nt));
p2m_unlock(d);
- return 1;
+ return pt;
}
/*
diff -r 7d9b20d91102 -r 42b925c00d8a xen/arch/x86/mm/shadow/common.c
--- a/xen/arch/x86/mm/shadow/common.c Mon Sep 10 13:56:34 2007 -0600
+++ b/xen/arch/x86/mm/shadow/common.c Mon Sep 10 13:58:56 2007 -0600
@@ -2764,19 +2764,23 @@ shadow_write_p2m_entry(struct vcpu *v, u
l1_pgentry_t new, unsigned int level)
{
struct domain *d = v->domain;
- mfn_t mfn;
shadow_lock(d);
- /* handle physmap_add and physmap_remove */
- mfn = gfn_to_mfn(d, gfn);
- if ( v != NULL && level == 1 && mfn_valid(mfn) ) {
- sh_remove_all_shadows_and_parents(v, mfn);
- if ( sh_remove_all_mappings(v, mfn) )
- flush_tlb_mask(d->domain_dirty_cpumask);
- }
-
- /* update the entry with new content */
+ /* If we're removing an MFN from the p2m, remove it from the shadows too */
+ if ( level == 1 )
+ {
+ mfn_t mfn = _mfn(l1e_get_pfn(*p));
+ p2m_type_t p2mt = p2m_flags_to_type(l1e_get_flags(*p));
+ if ( p2m_is_valid(p2mt) && mfn_valid(mfn) )
+ {
+ sh_remove_all_shadows_and_parents(v, mfn);
+ if ( sh_remove_all_mappings(v, mfn) )
+ flush_tlb_mask(d->domain_dirty_cpumask);
+ }
+ }
+
+ /* Update the entry with new content */
safe_write_pte(p, new);
/* install P2M in monitors for PAE Xen */
diff -r 7d9b20d91102 -r 42b925c00d8a xen/arch/x86/mm/shadow/multi.c
--- a/xen/arch/x86/mm/shadow/multi.c Mon Sep 10 13:56:34 2007 -0600
+++ b/xen/arch/x86/mm/shadow/multi.c Mon Sep 10 13:58:56 2007 -0600
@@ -209,6 +209,7 @@ guest_walk_tables(struct vcpu *v, unsign
guest_walk_tables(struct vcpu *v, unsigned long va, walk_t *gw, int guest_op)
{
struct domain *d = v->domain;
+ p2m_type_t p2mt;
ASSERT(!guest_op || shadow_locked_by_me(d));
perfc_incr(shadow_guest_walk);
@@ -223,8 +224,9 @@ guest_walk_tables(struct vcpu *v, unsign
+ guest_l4_table_offset(va);
/* Walk down to the l3e */
if ( !(guest_l4e_get_flags(*gw->l4e) & _PAGE_PRESENT) ) return 0;
- gw->l3mfn = gfn_to_mfn(d, guest_l4e_get_gfn(*gw->l4e));
- if ( !mfn_valid(gw->l3mfn) ) return 1;
+ gw->l3mfn = gfn_to_mfn(d, guest_l4e_get_gfn(*gw->l4e), &p2mt);
+ if ( !p2m_is_ram(p2mt) ) return 1;
+ ASSERT(mfn_valid(gw->l3mfn));
/* This mfn is a pagetable: make sure the guest can't write to it. */
if ( guest_op && sh_remove_write_access(v, gw->l3mfn, 3, va) != 0 )
flush_tlb_mask(d->domain_dirty_cpumask);
@@ -236,8 +238,9 @@ guest_walk_tables(struct vcpu *v, unsign
#endif /* PAE or 64... */
/* Walk down to the l2e */
if ( !(guest_l3e_get_flags(*gw->l3e) & _PAGE_PRESENT) ) return 0;
- gw->l2mfn = gfn_to_mfn(d, guest_l3e_get_gfn(*gw->l3e));
- if ( !mfn_valid(gw->l2mfn) ) return 1;
+ gw->l2mfn = gfn_to_mfn(d, guest_l3e_get_gfn(*gw->l3e), &p2mt);
+ if ( !p2m_is_ram(p2mt) ) return 1;
+ ASSERT(mfn_valid(gw->l2mfn));
/* This mfn is a pagetable: make sure the guest can't write to it. */
if ( guest_op && sh_remove_write_access(v, gw->l2mfn, 2, va) != 0 )
flush_tlb_mask(d->domain_dirty_cpumask);
@@ -278,8 +281,9 @@ guest_walk_tables(struct vcpu *v, unsign
else
{
/* Not a superpage: carry on and find the l1e. */
- gw->l1mfn = gfn_to_mfn(d, guest_l2e_get_gfn(*gw->l2e));
- if ( !mfn_valid(gw->l1mfn) ) return 1;
+ gw->l1mfn = gfn_to_mfn(d, guest_l2e_get_gfn(*gw->l2e), &p2mt);
+ if ( !p2m_is_ram(p2mt) ) return 1;
+ ASSERT(mfn_valid(gw->l1mfn));
/* This mfn is a pagetable: make sure the guest can't write to it. */
if ( guest_op
&& sh_remove_write_access(v, gw->l1mfn, 1, va) != 0 )
@@ -626,7 +630,7 @@ _sh_propagate(struct vcpu *v,
void *shadow_entry_ptr,
int level,
fetch_type_t ft,
- int mmio)
+ p2m_type_t p2mt)
{
guest_l1e_t *gp = guest_entry_ptr;
shadow_l1e_t *sp = shadow_entry_ptr;
@@ -636,6 +640,13 @@ _sh_propagate(struct vcpu *v,
/* We don't shadow PAE l3s */
ASSERT(GUEST_PAGING_LEVELS > 3 || level != 3);
+
+ /* Check there's something for the shadows to map to */
+ if ( !p2m_is_valid(p2mt) )
+ {
+ *sp = shadow_l1e_empty();
+ goto done;
+ }
if ( mfn_valid(guest_table_mfn) )
/* Handle A and D bit propagation into the guest */
@@ -658,19 +669,22 @@ _sh_propagate(struct vcpu *v,
goto done;
}
- if ( level == 1 && mmio )
- {
- /* Guest l1e maps MMIO space */
+ if ( level == 1 && p2mt == p2m_mmio_dm )
+ {
+ /* Guest l1e maps emulated MMIO space */
*sp = sh_l1e_mmio(guest_l1e_get_gfn(*gp), gflags);
if ( !d->arch.paging.shadow.has_fast_mmio_entries )
d->arch.paging.shadow.has_fast_mmio_entries = 1;
goto done;
}
- // Must have a valid target_mfn, unless this is a prefetch. In the
+ // Must have a valid target_mfn unless this is a prefetch. In the
// case of a prefetch, an invalid mfn means that we can not usefully
// shadow anything, and so we return early.
//
+ /* N.B. For pass-through MMIO, either this test needs to be relaxed,
+ * and shadow_set_l1e() trained to handle non-valid MFNs (ugh), or the
+ * MMIO areas need to be added to the frame-table to make them "valid". */
if ( !mfn_valid(target_mfn) )
{
ASSERT((ft == ft_prefetch));
@@ -718,6 +732,8 @@ _sh_propagate(struct vcpu *v,
// Only allow the guest write access to a page a) on a demand fault,
// or b) if the page is already marked as dirty.
//
+ // (We handle log-dirty entirely inside the shadow code, without using the
+ // p2m_ram_logdirty p2m type: only HAP uses that.)
if ( unlikely((level == 1) && shadow_mode_log_dirty(d)) )
{
if ( ft & FETCH_TYPE_WRITE )
@@ -725,6 +741,10 @@ _sh_propagate(struct vcpu *v,
else if ( !sh_mfn_is_dirty(d, target_mfn) )
sflags &= ~_PAGE_RW;
}
+
+ /* Read-only memory */
+ if ( p2mt == p2m_ram_ro )
+ sflags &= ~_PAGE_RW;
// protect guest page tables
//
@@ -754,7 +774,12 @@ _sh_propagate(struct vcpu *v,
sflags |= _PAGE_USER;
}
+ /* MMIO addresses should never be cached */
+ if ( p2m_is_mmio(p2mt) )
+ sflags |= _PAGE_PCD;
+
*sp = shadow_l1e_from_mfn(target_mfn, sflags);
+
done:
SHADOW_DEBUG(PROPAGATE,
"%s level %u guest %" SH_PRI_gpte " shadow %" SH_PRI_pte "\n",
@@ -775,7 +800,7 @@ l4e_propagate_from_guest(struct vcpu *v,
shadow_l4e_t *sl4e,
fetch_type_t ft)
{
- _sh_propagate(v, gl4e, gl4mfn, sl3mfn, sl4e, 4, ft, 0);
+ _sh_propagate(v, gl4e, gl4mfn, sl3mfn, sl4e, 4, ft, p2m_ram_rw);
}
static void
@@ -786,7 +811,7 @@ l3e_propagate_from_guest(struct vcpu *v,
shadow_l3e_t *sl3e,
fetch_type_t ft)
{
- _sh_propagate(v, gl3e, gl3mfn, sl2mfn, sl3e, 3, ft, 0);
+ _sh_propagate(v, gl3e, gl3mfn, sl2mfn, sl3e, 3, ft, p2m_ram_rw);
}
#endif // GUEST_PAGING_LEVELS >= 4
@@ -798,7 +823,7 @@ l2e_propagate_from_guest(struct vcpu *v,
shadow_l2e_t *sl2e,
fetch_type_t ft)
{
- _sh_propagate(v, gl2e, gl2mfn, sl1mfn, sl2e, 2, ft, 0);
+ _sh_propagate(v, gl2e, gl2mfn, sl1mfn, sl2e, 2, ft, p2m_ram_rw);
}
static void
@@ -808,9 +833,9 @@ l1e_propagate_from_guest(struct vcpu *v,
mfn_t gmfn,
shadow_l1e_t *sl1e,
fetch_type_t ft,
- int mmio)
-{
- _sh_propagate(v, gl1e, gl1mfn, gmfn, sl1e, 1, ft, mmio);
+ p2m_type_t p2mt)
+{
+ _sh_propagate(v, gl1e, gl1mfn, gmfn, sl1e, 1, ft, p2mt);
}
@@ -2196,6 +2221,7 @@ static int validate_gl4e(struct vcpu *v,
shadow_l4e_t *sl4p = se;
mfn_t sl3mfn = _mfn(INVALID_MFN);
struct domain *d = v->domain;
+ p2m_type_t p2mt;
int result = 0;
perfc_incr(shadow_validate_gl4e_calls);
@@ -2203,8 +2229,8 @@ static int validate_gl4e(struct vcpu *v,
if ( guest_l4e_get_flags(*new_gl4e) & _PAGE_PRESENT )
{
gfn_t gl3gfn = guest_l4e_get_gfn(*new_gl4e);
- mfn_t gl3mfn = gfn_to_mfn(d, gl3gfn);
- if ( mfn_valid(gl3mfn) )
+ mfn_t gl3mfn = gfn_to_mfn(d, gl3gfn, &p2mt);
+ if ( p2m_is_ram(p2mt) )
sl3mfn = get_shadow_status(v, gl3mfn, SH_type_l3_shadow);
else
result |= SHADOW_SET_ERROR;
@@ -2248,6 +2274,7 @@ static int validate_gl3e(struct vcpu *v,
guest_l3e_t *new_gl3e = new_ge;
shadow_l3e_t *sl3p = se;
mfn_t sl2mfn = _mfn(INVALID_MFN);
+ p2m_type_t p2mt;
int result = 0;
perfc_incr(shadow_validate_gl3e_calls);
@@ -2255,8 +2282,8 @@ static int validate_gl3e(struct vcpu *v,
if ( guest_l3e_get_flags(*new_gl3e) & _PAGE_PRESENT )
{
gfn_t gl2gfn = guest_l3e_get_gfn(*new_gl3e);
- mfn_t gl2mfn = gfn_to_mfn(v->domain, gl2gfn);
- if ( mfn_valid(gl2mfn) )
+ mfn_t gl2mfn = gfn_to_mfn(v->domain, gl2gfn, &p2mt);
+ if ( p2m_is_ram(p2mt) )
sl2mfn = get_shadow_status(v, gl2mfn, SH_type_l2_shadow);
else
result |= SHADOW_SET_ERROR;
@@ -2275,6 +2302,7 @@ static int validate_gl2e(struct vcpu *v,
guest_l2e_t *new_gl2e = new_ge;
shadow_l2e_t *sl2p = se;
mfn_t sl1mfn = _mfn(INVALID_MFN);
+ p2m_type_t p2mt;
int result = 0;
perfc_incr(shadow_validate_gl2e_calls);
@@ -2299,8 +2327,8 @@ static int validate_gl2e(struct vcpu *v,
}
else
{
- mfn_t gl1mfn = gfn_to_mfn(v->domain, gl1gfn);
- if ( mfn_valid(gl1mfn) )
+ mfn_t gl1mfn = gfn_to_mfn(v->domain, gl1gfn, &p2mt);
+ if ( p2m_is_ram(p2mt) )
sl1mfn = get_shadow_status(v, gl1mfn, SH_type_l1_shadow);
else
result |= SHADOW_SET_ERROR;
@@ -2361,16 +2389,16 @@ static int validate_gl1e(struct vcpu *v,
shadow_l1e_t *sl1p = se;
gfn_t gfn;
mfn_t gmfn;
- int result = 0, mmio;
+ p2m_type_t p2mt;
+ int result = 0;
perfc_incr(shadow_validate_gl1e_calls);
gfn = guest_l1e_get_gfn(*new_gl1e);
- gmfn = gfn_to_mfn(v->domain, gfn);
-
- mmio = (is_hvm_vcpu(v) && mmio_space(gfn_to_paddr(gfn)));
+ gmfn = gfn_to_mfn(v->domain, gfn, &p2mt);
+
l1e_propagate_from_guest(v, new_gl1e, _mfn(INVALID_MFN), gmfn, &new_sl1e,
- ft_prefetch, mmio);
+ ft_prefetch, p2mt);
result |= shadow_set_l1e(v, sl1p, new_sl1e, sl1mfn);
return result;
@@ -2554,12 +2582,13 @@ static void sh_prefetch(struct vcpu *v,
static void sh_prefetch(struct vcpu *v, walk_t *gw,
shadow_l1e_t *ptr_sl1e, mfn_t sl1mfn)
{
- int i, dist, mmio;
+ int i, dist;
gfn_t gfn;
mfn_t gmfn;
guest_l1e_t gl1e;
shadow_l1e_t sl1e;
u32 gflags;
+ p2m_type_t p2mt;
/* Prefetch no further than the end of the _shadow_ l1 MFN */
dist = (PAGE_SIZE - ((unsigned long)ptr_sl1e & ~PAGE_MASK)) / sizeof sl1e;
@@ -2597,14 +2626,13 @@ static void sh_prefetch(struct vcpu *v,
/* Look at the gfn that the l1e is pointing at */
gfn = guest_l1e_get_gfn(gl1e);
- gmfn = gfn_to_mfn(v->domain, gfn);
- mmio = ( is_hvm_vcpu(v) && mmio_space(gfn_to_paddr(gfn)) );
+ gmfn = gfn_to_mfn(v->domain, gfn, &p2mt);
/* Propagate the entry. Safe to use a pointer to our local
* gl1e, since this is not a demand-fetch so there will be no
* write-back to the guest. */
l1e_propagate_from_guest(v, &gl1e, _mfn(INVALID_MFN),
- gmfn, &sl1e, ft_prefetch, mmio);
+ gmfn, &sl1e, ft_prefetch, p2mt);
(void) shadow_set_l1e(v, ptr_sl1e + i, sl1e, sl1mfn);
}
}
@@ -2633,8 +2661,9 @@ static int sh_page_fault(struct vcpu *v,
paddr_t gpa;
struct sh_emulate_ctxt emul_ctxt;
struct x86_emulate_ops *emul_ops;
- int r, mmio;
+ int r;
fetch_type_t ft = 0;
+ p2m_type_t p2mt;
SHADOW_PRINTK("d:v=%u:%u va=%#lx err=%u\n",
v->domain->domain_id, v->vcpu_id, va, regs->error_code);
@@ -2787,10 +2816,9 @@ static int sh_page_fault(struct vcpu *v,
/* What mfn is the guest trying to access? */
gfn = guest_l1e_get_gfn(gw.eff_l1e);
- gmfn = gfn_to_mfn(d, gfn);
- mmio = (is_hvm_domain(d) && mmio_space(gfn_to_paddr(gfn)));
-
- if ( !mmio && !mfn_valid(gmfn) )
+ gmfn = gfn_to_mfn(d, gfn, &p2mt);
+
+ if ( !p2m_is_valid(p2mt) || (!p2m_is_mmio(p2mt) && !mfn_valid(gmfn)) )
{
perfc_incr(shadow_fault_bail_bad_gfn);
SHADOW_PRINTK("BAD gfn=%"SH_PRI_gfn" gmfn=%"PRI_mfn"\n",
@@ -2821,7 +2849,7 @@ static int sh_page_fault(struct vcpu *v,
/* Calculate the shadow entry and write it */
l1e_propagate_from_guest(v, (gw.l1e) ? gw.l1e : &gw.eff_l1e, gw.l1mfn,
- gmfn, &sl1e, ft, mmio);
+ gmfn, &sl1e, ft, p2mt);
r = shadow_set_l1e(v, ptr_sl1e, sl1e, sl1mfn);
#if SHADOW_OPTIMIZATIONS & SHOPT_PREFETCH
@@ -2844,7 +2872,10 @@ static int sh_page_fault(struct vcpu *v,
}
}
- if ( mmio )
+ /* Need to hand off device-model MMIO and writes to read-only
+ * memory to the device model */
+ if ( p2mt == p2m_mmio_dm
+ || (p2mt == p2m_ram_ro && ft == ft_demand_write) )
{
gpa = guest_walk_to_gpa(&gw);
goto mmio;
@@ -3598,6 +3629,7 @@ sh_update_cr3(struct vcpu *v, int do_loc
int flush = 0;
gfn_t gl2gfn;
mfn_t gl2mfn;
+ p2m_type_t p2mt;
guest_l3e_t *gl3e = (guest_l3e_t*)&v->arch.paging.shadow.gl3e;
/* First, make all four entries read-only. */
for ( i = 0; i < 4; i++ )
@@ -3605,8 +3637,9 @@ sh_update_cr3(struct vcpu *v, int do_loc
if ( guest_l3e_get_flags(gl3e[i]) & _PAGE_PRESENT )
{
gl2gfn = guest_l3e_get_gfn(gl3e[i]);
- gl2mfn = gfn_to_mfn(d, gl2gfn);
- flush |= sh_remove_write_access(v, gl2mfn, 2, 0);
+ gl2mfn = gfn_to_mfn(d, gl2gfn, &p2mt);
+ if ( p2m_is_ram(p2mt) )
+ flush |= sh_remove_write_access(v, gl2mfn, 2, 0);
}
}
if ( flush )
@@ -3617,13 +3650,15 @@ sh_update_cr3(struct vcpu *v, int do_loc
if ( guest_l3e_get_flags(gl3e[i]) & _PAGE_PRESENT )
{
gl2gfn = guest_l3e_get_gfn(gl3e[i]);
- gl2mfn = gfn_to_mfn(d, gl2gfn);
- sh_set_toplevel_shadow(v, i, gl2mfn, (i == 3)
- ? SH_type_l2h_shadow
- : SH_type_l2_shadow);
+ gl2mfn = gfn_to_mfn(d, gl2gfn, &p2mt);
+ if ( p2m_is_ram(p2mt) )
+ sh_set_toplevel_shadow(v, i, gl2mfn, (i == 3)
+ ? SH_type_l2h_shadow
+ : SH_type_l2_shadow);
+ else
+ sh_set_toplevel_shadow(v, i, _mfn(INVALID_MFN), 0);
}
else
- /* The guest is not present: clear out the shadow. */
sh_set_toplevel_shadow(v, i, _mfn(INVALID_MFN), 0);
}
}
@@ -3932,6 +3967,7 @@ static inline void * emulate_map_dest(st
u32 flags, errcode;
gfn_t gfn;
mfn_t mfn;
+ p2m_type_t p2mt;
/* We don't emulate user-mode writes to page tables */
if ( ring_3(sh_ctxt->ctxt.regs) )
@@ -3971,7 +4007,6 @@ static inline void * emulate_map_dest(st
}
}
#endif
- mfn = gfn_to_mfn(v->domain, gfn);
errcode = PFEC_write_access;
if ( !(flags & _PAGE_PRESENT) )
@@ -3981,8 +4016,10 @@ static inline void * emulate_map_dest(st
if ( !(flags & _PAGE_RW) )
goto page_fault;
- if ( mfn_valid(mfn) )
- {
+ mfn = gfn_to_mfn(v->domain, gfn, &p2mt);
+ if ( p2m_is_ram(p2mt) )
+ {
+ ASSERT(mfn_valid(mfn));
*mfnp = mfn;
v->arch.paging.last_write_was_pt = !!sh_mfn_is_a_page_table(mfn);
return sh_map_domain_page(mfn) + (vaddr & ~PAGE_MASK);
@@ -4231,6 +4268,7 @@ audit_gfn_to_mfn(struct vcpu *v, gfn_t g
/* Convert this gfn to an mfn in the manner appropriate for the
* guest pagetable it's used in (gmfn) */
{
+ p2m_type_t p2mt;
if ( !shadow_mode_translate(v->domain) )
return _mfn(gfn_x(gfn));
@@ -4238,7 +4276,7 @@ audit_gfn_to_mfn(struct vcpu *v, gfn_t g
!= PGT_writable_page )
return _mfn(gfn_x(gfn)); /* This is a paging-disabled shadow */
else
- return gfn_to_mfn(v->domain, gfn);
+ return gfn_to_mfn(v->domain, gfn, &p2mt);
}
diff -r 7d9b20d91102 -r 42b925c00d8a xen/arch/x86/mm/shadow/types.h
--- a/xen/arch/x86/mm/shadow/types.h Mon Sep 10 13:56:34 2007 -0600
+++ b/xen/arch/x86/mm/shadow/types.h Mon Sep 10 13:58:56 2007 -0600
@@ -414,7 +414,7 @@ gfn_to_paddr(gfn_t gfn)
/* Override gfn_to_mfn to work with gfn_t */
#undef gfn_to_mfn
-#define gfn_to_mfn(d, g) _gfn_to_mfn((d), gfn_x(g))
+#define gfn_to_mfn(d, g, t) _gfn_to_mfn((d), gfn_x(g), (t))
/* Type used for recording a walk through guest pagetables. It is
diff -r 7d9b20d91102 -r 42b925c00d8a xen/arch/x86/shutdown.c
--- a/xen/arch/x86/shutdown.c Mon Sep 10 13:56:34 2007 -0600
+++ b/xen/arch/x86/shutdown.c Mon Sep 10 13:58:56 2007 -0600
@@ -197,7 +197,7 @@ static void machine_real_restart(const u
#endif
-void machine_restart(char *cmd)
+void machine_restart(void)
{
int i;
@@ -216,18 +216,12 @@ void machine_restart(char *cmd)
safe_halt();
}
- /*
- * Stop all CPUs and turn off local APICs and the IO-APIC, so
- * other OSs see a clean IRQ state.
- */
smp_send_stop();
- disable_IO_APIC();
- hvm_cpu_down();
/* Rebooting needs to touch the page at absolute address 0. */
*((unsigned short *)__va(0x472)) = reboot_mode;
- if (reboot_thru_bios <= 0)
+ if ( reboot_thru_bios <= 0 )
{
for ( ; ; )
{
diff -r 7d9b20d91102 -r 42b925c00d8a xen/arch/x86/smp.c
--- a/xen/arch/x86/smp.c Mon Sep 10 13:56:34 2007 -0600
+++ b/xen/arch/x86/smp.c Mon Sep 10 13:58:56 2007 -0600
@@ -279,6 +279,19 @@ int on_selected_cpus(
ASSERT(local_irq_is_enabled());
+ /* Legacy UP system with no APIC to deliver IPIs? */
+ if ( unlikely(!cpu_has_apic) )
+ {
+ ASSERT(num_online_cpus() == 1);
+ if ( cpu_isset(0, selected) )
+ {
+ local_irq_disable();
+ func(info);
+ local_irq_enable();
+ }
+ return 0;
+ }
+
if ( nr_cpus == 0 )
return 0;
@@ -306,23 +319,33 @@ int on_selected_cpus(
static void stop_this_cpu (void *dummy)
{
+ disable_local_APIC();
+ hvm_cpu_down();
+
cpu_clear(smp_processor_id(), cpu_online_map);
+
+ for ( ; ; )
+ __asm__ __volatile__ ( "hlt" );
+}
+
+/*
+ * Stop all CPUs and turn off local APICs and the IO-APIC, so other OSs see a
+ * clean IRQ state.
+ */
+void smp_send_stop(void)
+{
+ int timeout = 10;
+
+ smp_call_function(stop_this_cpu, NULL, 1, 0);
+
+ /* Wait 10ms for all other CPUs to go offline. */
+ while ( (num_online_cpus() > 1) && (timeout-- > 0) )
+ mdelay(1);
local_irq_disable();
disable_local_APIC();
+ disable_IO_APIC();
hvm_cpu_down();
-
- for ( ; ; )
- __asm__ __volatile__ ( "hlt" );
-}
-
-void smp_send_stop(void)
-{
- /* Stop all other CPUs in the system. */
- smp_call_function(stop_this_cpu, NULL, 1, 0);
-
- local_irq_disable();
- disable_local_APIC();
local_irq_enable();
}
diff -r 7d9b20d91102 -r 42b925c00d8a xen/common/keyhandler.c
--- a/xen/common/keyhandler.c Mon Sep 10 13:56:34 2007 -0600
+++ b/xen/common/keyhandler.c Mon Sep 10 13:58:56 2007 -0600
@@ -123,7 +123,7 @@ static void halt_machine(unsigned char k
static void halt_machine(unsigned char key, struct cpu_user_regs *regs)
{
printk("'%c' pressed -> rebooting machine\n", key);
- machine_restart(NULL);
+ machine_restart();
}
static void cpuset_print(char *set, int size, cpumask_t mask)
diff -r 7d9b20d91102 -r 42b925c00d8a xen/common/shutdown.c
--- a/xen/common/shutdown.c Mon Sep 10 13:56:34 2007 -0600
+++ b/xen/common/shutdown.c Mon Sep 10 13:58:56 2007 -0600
@@ -24,7 +24,7 @@ static void maybe_reboot(void)
printk("rebooting machine in 5 seconds.\n");
watchdog_disable();
mdelay(5000);
- machine_restart(NULL);
+ machine_restart();
}
}
@@ -50,7 +50,7 @@ void dom0_shutdown(u8 reason)
case SHUTDOWN_reboot:
{
printk("Domain 0 shutdown: rebooting machine.\n");
- machine_restart(NULL);
+ machine_restart();
break; /* not reached */
}
diff -r 7d9b20d91102 -r 42b925c00d8a xen/drivers/char/console.c
--- a/xen/drivers/char/console.c Mon Sep 10 13:56:34 2007 -0600
+++ b/xen/drivers/char/console.c Mon Sep 10 13:58:56 2007 -0600
@@ -895,7 +895,7 @@ void panic(const char *fmt, ...)
{
watchdog_disable();
mdelay(5000);
- machine_restart(NULL);
+ machine_restart();
}
}
diff -r 7d9b20d91102 -r 42b925c00d8a xen/include/asm-x86/mm.h
--- a/xen/include/asm-x86/mm.h Mon Sep 10 13:56:34 2007 -0600
+++ b/xen/include/asm-x86/mm.h Mon Sep 10 13:58:56 2007 -0600
@@ -328,8 +328,6 @@ TYPE_SAFE(unsigned long,mfn);
? get_gpfn_from_mfn(mfn) \
: (mfn) )
-#define gmfn_to_mfn(_d, gpfn) mfn_x(gfn_to_mfn(_d, gpfn))
-
#define INVALID_MFN (~0UL)
#ifdef CONFIG_COMPAT
diff -r 7d9b20d91102 -r 42b925c00d8a xen/include/asm-x86/p2m.h
--- a/xen/include/asm-x86/p2m.h Mon Sep 10 13:56:34 2007 -0600
+++ b/xen/include/asm-x86/p2m.h Mon Sep 10 13:58:56 2007 -0600
@@ -4,7 +4,7 @@
* physical-to-machine mappings for automatically-translated domains.
*
* Copyright (c) 2007 Advanced Micro Devices (Wei Huang)
- * Parts of this code are Copyright (c) 2006 by XenSource Inc.
+ * Parts of this code are Copyright (c) 2006-2007 by XenSource Inc.
* Parts of this code are Copyright (c) 2006 by Michael A Fetterman
* Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
*
@@ -27,49 +27,141 @@
#define _XEN_P2M_H
-/* The phys_to_machine_mapping is the reversed mapping of MPT for full
- * virtualization. It is only used by shadow_mode_translate()==true
- * guests, so we steal the address space that would have normally
- * been used by the read-only MPT map.
+/*
+ * The phys_to_machine_mapping maps guest physical frame numbers
+ * to machine frame numbers. It only exists for paging_mode_translate
+ * guests. It is organised in page-table format, which:
+ *
+ * (1) allows us to use it directly as the second pagetable in hardware-
+ * assisted paging and (hopefully) iommu support; and
+ * (2) lets us map it directly into the guest vcpus' virtual address space
+ * as a linear pagetable, so we can read and write it easily.
+ *
+ * For (2) we steal the address space that would have normally been used
+ * by the read-only MPT map in a non-translated guest. (For
+ * paging_mode_external() guests this mapping is in the monitor table.)
*/
#define phys_to_machine_mapping ((l1_pgentry_t *)RO_MPT_VIRT_START)
-
-/* Read the current domain's P2M table. */
-static inline mfn_t gfn_to_mfn_current(unsigned long gfn)
-{
- l1_pgentry_t l1e = l1e_empty();
- int ret;
-
- if ( gfn > current->domain->arch.p2m.max_mapped_pfn )
- return _mfn(INVALID_MFN);
-
- /* Don't read off the end of the p2m table */
- ASSERT(gfn < (RO_MPT_VIRT_END - RO_MPT_VIRT_START) / sizeof(l1_pgentry_t));
-
- ret = __copy_from_user(&l1e,
- &phys_to_machine_mapping[gfn],
- sizeof(l1e));
-
- if ( (ret == 0) && (l1e_get_flags(l1e) & _PAGE_PRESENT) )
- return _mfn(l1e_get_pfn(l1e));
-
- return _mfn(INVALID_MFN);
+/*
+ * The upper levels of the p2m pagetable always contain full rights; all
+ * variation in the access control bits is made in the level-1 PTEs.
+ *
+ * In addition to the phys-to-machine translation, each p2m PTE contains
+ * *type* information about the gfn it translates, helping Xen to decide
+ * on the correct course of action when handling a page-fault to that
+ * guest frame. We store the type in the "available" bits of the PTEs
+ * in the table, which gives us 8 possible types on 32-bit systems.
+ * Further expansions of the type system will only be supported on
+ * 64-bit Xen.
+ */
+typedef enum {
+ p2m_invalid = 0, /* Nothing mapped here */
+ p2m_ram_rw = 1, /* Normal read/write guest RAM */
+ p2m_ram_logdirty = 2, /* Temporarily read-only for log-dirty */
+ p2m_ram_ro = 3, /* Read-only; writes go to the device model */
+ p2m_mmio_dm = 4, /* Reads and write go to the device model */
+ p2m_mmio_direct = 5, /* Read/write mapping of genuine MMIO area */
+} p2m_type_t;
+
+/* We use bitmaps and maks to handle groups of types */
+#define p2m_to_mask(_t) (1UL << (_t))
+
+/* RAM types, which map to real machine frames */
+#define P2M_RAM_TYPES (p2m_to_mask(p2m_ram_rw) \
+ | p2m_to_mask(p2m_ram_logdirty) \
+ | p2m_to_mask(p2m_ram_ro))
+
+/* MMIO types, which don't have to map to anything in the frametable */
+#define P2M_MMIO_TYPES (p2m_to_mask(p2m_mmio_dm) \
+ | p2m_to_mask(p2m_mmio_direct))
+
+/* Read-only types, which must have the _PAGE_RW bit clear in their PTEs */
+#define P2M_RO_TYPES (p2m_to_mask(p2m_ram_logdirty) \
+ | p2m_to_mask(p2m_ram_ro))
+
+/* Useful predicates */
+#define p2m_is_ram(_t) (p2m_to_mask(_t) & P2M_RAM_TYPES)
+#define p2m_is_mmio(_t) (p2m_to_mask(_t) & P2M_MMIO_TYPES)
+#define p2m_is_readonly(_t) (p2m_to_mask(_t) & P2M_RO_TYPES)
+#define p2m_is_valid(_t) (p2m_to_mask(_t) & (P2M_RAM_TYPES | P2M_MMIO_TYPES))
+
+/* Extract the type from the PTE flags that store it */
+static inline p2m_type_t p2m_flags_to_type(unsigned long flags)
+{
+ /* Type is stored in the "available" bits, 9, 10 and 11 */
+ return (flags >> 9) & 0x7;
+}
+
+/* Read the current domain's p2m table (through the linear mapping). */
+static inline mfn_t gfn_to_mfn_current(unsigned long gfn, p2m_type_t *t)
+{
+ mfn_t mfn = _mfn(INVALID_MFN);
+ p2m_type_t p2mt = p2m_mmio_dm;
+ /* XXX This is for compatibility with the old model, where anything not
+ * XXX marked as RAM was considered to be emulated MMIO space.
+ * XXX Once we start explicitly registering MMIO regions in the p2m
+ * XXX we will return p2m_invalid for unmapped gfns */
+
+ if ( gfn <= current->domain->arch.p2m.max_mapped_pfn )
+ {
+ l1_pgentry_t l1e = l1e_empty();
+ int ret;
+
+ ASSERT(gfn < (RO_MPT_VIRT_END - RO_MPT_VIRT_START)
+ / sizeof(l1_pgentry_t));
+
+ /* Need to __copy_from_user because the p2m is sparse and this
+ * part might not exist */
+ ret = __copy_from_user(&l1e,
+ &phys_to_machine_mapping[gfn],
+ sizeof(l1e));
+
+ if ( ret == 0 ) {
+ p2mt = p2m_flags_to_type(l1e_get_flags(l1e));
+ ASSERT(l1e_get_pfn(l1e) != INVALID_MFN || !p2m_is_ram(*t));
+ if ( p2m_is_valid(p2mt) )
+ mfn = _mfn(l1e_get_pfn(l1e));
+ else
+ /* XXX see above */
+ p2mt = p2m_mmio_dm;
+ }
+ }
+
+ *t = p2mt;
+ return mfn;
}
/* Read another domain's P2M table, mapping pages as we go */
-mfn_t gfn_to_mfn_foreign(struct domain *d, unsigned long gpfn);
+mfn_t gfn_to_mfn_foreign(struct domain *d, unsigned long gfn, p2m_type_t *t);
/* General conversion function from gfn to mfn */
-#define gfn_to_mfn(d, g) _gfn_to_mfn((d), (g))
-static inline mfn_t _gfn_to_mfn(struct domain *d, unsigned long gfn)
+#define gfn_to_mfn(d, g, t) _gfn_to_mfn((d), (g), (t))
+static inline mfn_t _gfn_to_mfn(struct domain *d,
+ unsigned long gfn, p2m_type_t *t)
{
if ( !paging_mode_translate(d) )
+ {
+ /* Not necessarily true, but for non-translated guests, we claim
+ * it's the most generic kind of memory */
+ *t = p2m_ram_rw;
return _mfn(gfn);
+ }
if ( likely(current->domain == d) )
- return gfn_to_mfn_current(gfn);
+ return gfn_to_mfn_current(gfn, t);
else
- return gfn_to_mfn_foreign(d, gfn);
+ return gfn_to_mfn_foreign(d, gfn, t);
+}
+
+/* Compatibility function exporting the old untyped interface */
+static inline unsigned long gmfn_to_mfn(struct domain *d, unsigned long gpfn)
+{
+ mfn_t mfn;
+ p2m_type_t t;
+ mfn = gfn_to_mfn(d, gpfn, &t);
+ if ( p2m_is_valid(t) )
+ return mfn_x(mfn);
+ return INVALID_MFN;
}
/* General conversion function from mfn to gfn */
@@ -81,19 +173,6 @@ static inline unsigned long mfn_to_gfn(s
return mfn_x(mfn);
}
-/* Compatibility function for HVM code */
-static inline unsigned long get_mfn_from_gpfn(unsigned long pfn)
-{
- return mfn_x(gfn_to_mfn_current(pfn));
-}
-
-/* Is this guest address an mmio one? (i.e. not defined in p2m map) */
-static inline int mmio_space(paddr_t gpa)
-{
- unsigned long gfn = gpa >> PAGE_SHIFT;
- return !mfn_valid(mfn_x(gfn_to_mfn_current(gfn)));
-}
-
/* Translate the frame number held in an l1e from guest to machine */
static inline l1_pgentry_t
gl1e_to_ml1e(struct domain *d, l1_pgentry_t l1e)
@@ -105,7 +184,6 @@ gl1e_to_ml1e(struct domain *d, l1_pgentr
}
-
/* Init the datastructures for later use by the p2m code */
void p2m_init(struct domain *d);
@@ -130,11 +208,12 @@ void guest_physmap_remove_page(struct do
void guest_physmap_remove_page(struct domain *d, unsigned long gfn,
unsigned long mfn);
-/* set P2M table l1e flags */
-void p2m_set_flags_global(struct domain *d, u32 l1e_flags);
-
-/* set P2M table l1e flags for a gpa */
-int p2m_set_flags(struct domain *d, paddr_t gpa, u32 l1e_flags);
+/* Change types across all p2m entries in a domain */
+void p2m_change_type_global(struct domain *d, p2m_type_t ot, p2m_type_t nt);
+
+/* Compare-exchange the type of a single p2m entry */
+p2m_type_t p2m_change_type(struct domain *d, unsigned long gfn,
+ p2m_type_t ot, p2m_type_t nt);
#endif /* _XEN_P2M_H */
diff -r 7d9b20d91102 -r 42b925c00d8a xen/include/xen/shutdown.h
--- a/xen/include/xen/shutdown.h Mon Sep 10 13:56:34 2007 -0600
+++ b/xen/include/xen/shutdown.h Mon Sep 10 13:58:56 2007 -0600
@@ -6,7 +6,7 @@ extern int opt_noreboot;
void dom0_shutdown(u8 reason);
-void machine_restart(char *cmd);
+void machine_restart(void);
void machine_halt(void);
void machine_power_off(void);
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|