# HG changeset patch
# User Isaku Yamahata <yamahata@xxxxxxxxxxxxx>
# Date 1234491748 -32400
# Node ID c7cba853583da45ee4478237047fdd5d6bed68cd
# Parent af992824b5cfa3b81dbe68293216a5df3ec0bdf4
# Parent 32b15413749255e0cd518f25d9202759586dcb27
merge with xen-unstable.hg
---
.hgignore | 10
Config.mk | 4
docs/man/xm.pod.1 | 2
extras/mini-os/arch/x86/mm.c | 6
tools/blktap/drivers/Makefile | 2
tools/firmware/rombios/rombios.c | 15 -
tools/libxc/xc_domain.c | 58 ++++
tools/libxc/xc_pagetab.c | 36 +-
tools/libxc/xenctrl.h | 25 +
tools/python/xen/xend/XendAPIStore.py | 3
tools/python/xen/xend/image.py | 34 +-
tools/python/xen/xend/server/pciquirk.py | 3
tools/python/xen/xm/create.py | 2
tools/python/xen/xm/main.py | 8
tools/xentrace/xenctx.c | 58 +++-
xen/arch/ia64/linux-xen/iosapic.c | 10
xen/arch/ia64/linux-xen/irq_ia64.c | 26 +-
xen/arch/ia64/linux-xen/mca.c | 9
xen/arch/ia64/xen/hypercall.c | 2
xen/arch/ia64/xen/irq.c | 24 -
xen/arch/x86/Makefile | 1
xen/arch/x86/acpi/cpufreq/cpufreq.c | 16 -
xen/arch/x86/acpi/suspend.c | 3
xen/arch/x86/boot/Makefile | 6
xen/arch/x86/boot/build32.mk | 24 +
xen/arch/x86/boot/head.S | 10
xen/arch/x86/boot/reloc.c | 89 ++++++
xen/arch/x86/cpu/mcheck/amd_k8.c | 14 -
xen/arch/x86/cpu/mcheck/amd_nonfatal.c | 13 -
xen/arch/x86/cpu/mcheck/mce.c | 130 ++++++++++
xen/arch/x86/cpu/mcheck/mce.h | 3
xen/arch/x86/cpu/mcheck/mce_intel.c | 8
xen/arch/x86/domain.c | 38 +-
xen/arch/x86/domain_build.c | 2
xen/arch/x86/domctl.c | 42 ++-
xen/arch/x86/e820.c | 2
xen/arch/x86/i8259.c | 4
xen/arch/x86/io_apic.c | 54 ----
xen/arch/x86/irq.c | 77 +++++
xen/arch/x86/mm.c | 158 ++++++------
xen/arch/x86/mm/hap/hap.c | 23 -
xen/arch/x86/mm/hap/p2m-ept.c | 88 ++++--
xen/arch/x86/mm/p2m.c | 95 ++-----
xen/arch/x86/mm/paging.c | 6
xen/arch/x86/mm/shadow/common.c | 296 +++++++++++------------
xen/arch/x86/mm/shadow/multi.c | 131 ++++------
xen/arch/x86/mm/shadow/private.h | 123 ++-------
xen/arch/x86/numa.c | 2
xen/arch/x86/physdev.c | 4
xen/arch/x86/setup.c | 16 -
xen/arch/x86/smpboot.c | 4
xen/arch/x86/x86_32/xen.lds.S | 1
xen/arch/x86/x86_64/entry.S | 1
xen/arch/x86/x86_64/xen.lds.S | 1
xen/common/domain.c | 18 -
xen/common/grant_table.c | 2
xen/common/hvm/save.c | 48 +++
xen/common/memory.c | 32 --
xen/common/page_alloc.c | 158 +++++++-----
xen/drivers/char/serial.c | 2
xen/drivers/cpufreq/cpufreq.c | 73 +++++
xen/drivers/cpufreq/cpufreq_misc_governors.c | 25 +
xen/drivers/cpufreq/cpufreq_ondemand.c | 95 +++----
xen/drivers/passthrough/amd/iommu_init.c | 23 -
xen/drivers/passthrough/amd/iommu_map.c | 22 -
xen/drivers/passthrough/amd/pci_amd_iommu.c | 1
xen/drivers/passthrough/io.c | 2
xen/drivers/passthrough/iommu.c | 9
xen/drivers/passthrough/vtd/dmar.c | 18 -
xen/drivers/passthrough/vtd/ia64/vtd.c | 13 -
xen/drivers/passthrough/vtd/intremap.c | 2
xen/drivers/passthrough/vtd/iommu.c | 61 +++-
xen/drivers/passthrough/vtd/iommu.h | 4
xen/drivers/passthrough/vtd/qinval.c | 4
xen/drivers/passthrough/vtd/vtd.h | 4
xen/drivers/passthrough/vtd/x86/vtd.c | 9
xen/include/acpi/cpufreq/cpufreq.h | 1
xen/include/asm-ia64/hardirq.h | 1
xen/include/asm-ia64/hvm/iommu.h | 1
xen/include/asm-ia64/hvm/irq.h | 14 -
xen/include/asm-ia64/linux-xen/asm/smp.h | 1
xen/include/asm-ia64/linux-xen/linux/interrupt.h | 4
xen/include/asm-ia64/linux/asm/hw_irq.h | 2
xen/include/asm-ia64/mm.h | 12
xen/include/asm-ia64/tlbflush.h | 3
xen/include/asm-x86/domain.h | 13 -
xen/include/asm-x86/hvm/vmx/vmx.h | 6
xen/include/asm-x86/iocap.h | 5
xen/include/asm-x86/irq.h | 3
xen/include/asm-x86/mm.h | 84 +++++-
xen/include/asm-x86/p2m.h | 4
xen/include/asm-x86/page.h | 38 ++
xen/include/asm-x86/perfc.h | 1
xen/include/asm-x86/processor.h | 1
xen/include/public/arch-ia64/hvm/save.h | 4
xen/include/public/arch-x86/hvm/save.h | 4
xen/include/public/arch-x86/xen-mca.h | 48 +++
xen/include/public/domctl.h | 12
xen/include/public/io/pciif.h | 2
xen/include/xen/hvm/save.h | 2
xen/include/xen/iocap.h | 3
xen/include/xen/irq.h | 20 +
xen/include/xen/mm.h | 186 +++++++++++++-
xen/include/xen/sched.h | 5
xen/xsm/flask/hooks.c | 1
105 files changed, 1880 insertions(+), 1048 deletions(-)
diff -r af992824b5cf -r c7cba853583d .hgignore
--- a/.hgignore Fri Feb 13 10:56:01 2009 +0900
+++ b/.hgignore Fri Feb 13 11:22:28 2009 +0900
@@ -256,6 +256,7 @@
^xen/arch/x86/asm-offsets\.s$
^xen/arch/x86/boot/mkelf32$
^xen/arch/x86/xen\.lds$
+^xen/arch/x86/boot/reloc.S$
^xen/ddb/.*$
^xen/include/asm$
^xen/include/asm-.*/asm-offsets\.h$
@@ -279,15 +280,6 @@
^xen/arch/ia64/asm-xsi-offsets\.s$
^xen/arch/ia64/map\.out$
^xen/arch/ia64/xen\.lds\.s$
-^xen/arch/powerpc/dom0\.bin$
-^xen/arch/powerpc/asm-offsets\.s$
-^xen/arch/powerpc/firmware$
-^xen/arch/powerpc/firmware.dbg$
-^xen/arch/powerpc/firmware_image.bin$
-^xen/arch/powerpc/xen\.lds$
-^xen/arch/powerpc/\.xen-syms$
-^xen/arch/powerpc/xen-syms\.S$
-^xen/arch/powerpc/cmdline.dep$
^unmodified_drivers/linux-2.6/\.tmp_versions
^unmodified_drivers/linux-2.6/.*\.cmd$
^unmodified_drivers/linux-2.6/.*\.ko$
diff -r af992824b5cf -r c7cba853583d Config.mk
--- a/Config.mk Fri Feb 13 10:56:01 2009 +0900
+++ b/Config.mk Fri Feb 13 11:22:28 2009 +0900
@@ -1,7 +1,7 @@
# -*- mode: Makefile; -*-
-# A debug build of Xen and tools?
-debug ?= y ## TEMPORARILY ENABLED
+# A debug build of Xen and tools? TEMPORARILY ENABLED
+debug ?= y
XEN_COMPILE_ARCH ?= $(shell uname -m | sed -e s/i.86/x86_32/ \
-e s/i86pc/x86_32/ -e s/amd64/x86_64/)
diff -r af992824b5cf -r c7cba853583d docs/man/xm.pod.1
--- a/docs/man/xm.pod.1 Fri Feb 13 10:56:01 2009 +0900
+++ b/docs/man/xm.pod.1 Fri Feb 13 11:22:28 2009 +0900
@@ -66,6 +66,8 @@ The attached console will perform much l
The attached console will perform much like a standard serial console,
so running curses based interfaces over the console B<is not
advised>. Vi tends to get very odd when using it over this interface.
+
+Use the key combination Ctrl+] to detach the domain console.
=item B<create> I<configfile> [I<OPTIONS>] [I<vars>]..
diff -r af992824b5cf -r c7cba853583d extras/mini-os/arch/x86/mm.c
--- a/extras/mini-os/arch/x86/mm.c Fri Feb 13 10:56:01 2009 +0900
+++ b/extras/mini-os/arch/x86/mm.c Fri Feb 13 11:22:28 2009 +0900
@@ -550,9 +550,15 @@ static void clear_bootstrap(void)
void arch_init_p2m(unsigned long max_pfn)
{
+#ifdef __x86_64__
#define L1_P2M_SHIFT 9
#define L2_P2M_SHIFT 18
#define L3_P2M_SHIFT 27
+#else
+#define L1_P2M_SHIFT 10
+#define L2_P2M_SHIFT 20
+#define L3_P2M_SHIFT 30
+#endif
#define L1_P2M_ENTRIES (1 << L1_P2M_SHIFT)
#define L2_P2M_ENTRIES (1 << (L2_P2M_SHIFT - L1_P2M_SHIFT))
#define L3_P2M_ENTRIES (1 << (L3_P2M_SHIFT - L2_P2M_SHIFT))
diff -r af992824b5cf -r c7cba853583d tools/blktap/drivers/Makefile
--- a/tools/blktap/drivers/Makefile Fri Feb 13 10:56:01 2009 +0900
+++ b/tools/blktap/drivers/Makefile Fri Feb 13 11:22:28 2009 +0900
@@ -13,7 +13,7 @@ CFLAGS += -I $(LIBAIO_DIR)
CFLAGS += -I $(LIBAIO_DIR)
CFLAGS += -D_GNU_SOURCE
-ifeq ($(shell . ./check_gcrypt),"yes")
+ifeq ($(shell . ./check_gcrypt $(CC)),yes)
CFLAGS += -DUSE_GCRYPT
CRYPT_LIB := -lgcrypt
else
diff -r af992824b5cf -r c7cba853583d tools/firmware/rombios/rombios.c
--- a/tools/firmware/rombios/rombios.c Fri Feb 13 10:56:01 2009 +0900
+++ b/tools/firmware/rombios/rombios.c Fri Feb 13 11:22:28 2009 +0900
@@ -4609,6 +4609,10 @@ int15_function32(regs, ES, DS, FLAGS)
{
Bit32u extended_memory_size=0; // 64bits long
Bit16u CX,DX;
+#ifdef HVMASSIST
+ Bit16u off, e820_table_size;
+ Bit32u base, type, size;
+#endif
BX_DEBUG_INT15("int15 AX=%04x\n",regs.u.r16.ax);
@@ -4625,8 +4629,10 @@ ASM_START
;; Get the count in eax
mov bx, sp
+SEG SS
mov ax, _int15_function32.CX [bx]
shl eax, #16
+SEG SS
mov ax, _int15_function32.DX [bx]
;; convert to numbers of 15usec ticks
@@ -4660,8 +4666,7 @@ ASM_END
{
#ifdef HVMASSIST
case 0x20: {
- Bit16u e820_table_size =
- read_word(E820_SEG, E820_NR_OFFSET) * 0x14;
+ e820_table_size = read_word(E820_SEG, E820_NR_OFFSET) * 0x14;
if (regs.u.r32.edx != 0x534D4150) /* SMAP */
goto int15_unimplemented;
@@ -4674,8 +4679,6 @@ ASM_END
if ((regs.u.r32.ebx + 0x14 - 1) > e820_table_size)
regs.u.r32.ebx = 0;
} else if (regs.u.r16.bx == 1) {
- Bit32u base, type;
- Bit16u off;
for (off = 0; off < e820_table_size; off += 0x14) {
base = read_dword(E820_SEG, E820_OFFSET + off);
type = read_dword(E820_SEG, E820_OFFSET + 0x10 + off);
@@ -4699,9 +4702,7 @@ ASM_END
}
case 0x01: {
- Bit16u off, e820_table_size =
- read_word(E820_SEG, E820_NR_OFFSET) * 0x14;
- Bit32u base, type, size;
+ e820_table_size = read_word(E820_SEG, E820_NR_OFFSET) * 0x14;
// do we have any reason to fail here ?
CLEAR_CF();
diff -r af992824b5cf -r c7cba853583d tools/libxc/xc_domain.c
--- a/tools/libxc/xc_domain.c Fri Feb 13 10:56:01 2009 +0900
+++ b/tools/libxc/xc_domain.c Fri Feb 13 11:22:28 2009 +0900
@@ -269,6 +269,38 @@ int xc_domain_hvm_getcontext(int xc_hand
unlock_pages(ctxt_buf, size);
return (ret < 0 ? -1 : domctl.u.hvmcontext.size);
+}
+
+/* Get just one element of the HVM guest context.
+ * size must be >= HVM_SAVE_LENGTH(type) */
+int xc_domain_hvm_getcontext_partial(int xc_handle,
+ uint32_t domid,
+ uint16_t typecode,
+ uint16_t instance,
+ void *ctxt_buf,
+ uint32_t size)
+{
+ int ret;
+ DECLARE_DOMCTL;
+
+ if ( !ctxt_buf )
+ return -EINVAL;
+
+ domctl.cmd = XEN_DOMCTL_gethvmcontext_partial;
+ domctl.domain = (domid_t) domid;
+ domctl.u.hvmcontext_partial.type = typecode;
+ domctl.u.hvmcontext_partial.instance = instance;
+ set_xen_guest_handle(domctl.u.hvmcontext_partial.buffer, ctxt_buf);
+
+ if ( (ret = lock_pages(ctxt_buf, size)) != 0 )
+ return ret;
+
+ ret = do_domctl(xc_handle, &domctl);
+
+ if ( ctxt_buf )
+ unlock_pages(ctxt_buf, size);
+
+ return ret ? -1 : 0;
}
/* set info to hvm guest for restore */
@@ -909,6 +941,32 @@ int xc_domain_update_msi_irq(
return rc;
}
+int xc_domain_unbind_msi_irq(
+ int xc_handle,
+ uint32_t domid,
+ uint32_t gvec,
+ uint32_t pirq,
+ uint32_t gflags)
+{
+ int rc;
+ xen_domctl_bind_pt_irq_t *bind;
+
+ DECLARE_DOMCTL;
+
+ domctl.cmd = XEN_DOMCTL_unbind_pt_irq;
+ domctl.domain = (domid_t)domid;
+
+ bind = &(domctl.u.bind_pt_irq);
+ bind->hvm_domid = domid;
+ bind->irq_type = PT_IRQ_TYPE_MSI;
+ bind->machine_irq = pirq;
+ bind->u.msi.gvec = gvec;
+ bind->u.msi.gflags = gflags;
+
+ rc = do_domctl(xc_handle, &domctl);
+ return rc;
+}
+
/* Pass-through: binds machine irq to guests irq */
int xc_domain_bind_pt_irq(
int xc_handle,
diff -r af992824b5cf -r c7cba853583d tools/libxc/xc_pagetab.c
--- a/tools/libxc/xc_pagetab.c Fri Feb 13 10:56:01 2009 +0900
+++ b/tools/libxc/xc_pagetab.c Fri Feb 13 11:22:28 2009 +0900
@@ -4,50 +4,42 @@
* Function to translate virtual to physical addresses.
*/
#include "xc_private.h"
+#include <xen/hvm/save.h>
#define CR0_PG 0x80000000
#define CR4_PAE 0x20
#define PTE_PSE 0x80
+#define EFER_LMA 0x400
+
unsigned long xc_translate_foreign_address(int xc_handle, uint32_t dom,
int vcpu, unsigned long long virt)
{
xc_dominfo_t dominfo;
- vcpu_guest_context_any_t ctx;
uint64_t paddr, mask, pte = 0;
int size, level, pt_levels = 2;
void *map;
if (xc_domain_getinfo(xc_handle, dom, 1, &dominfo) != 1
- || dominfo.domid != dom
- || xc_vcpu_getcontext(xc_handle, dom, vcpu, &ctx) != 0)
+ || dominfo.domid != dom)
return 0;
/* What kind of paging are we dealing with? */
if (dominfo.hvm) {
- unsigned long cr0, cr3, cr4;
- xen_capabilities_info_t xen_caps = "";
- if (xc_version(xc_handle, XENVER_capabilities, &xen_caps) != 0)
+ struct hvm_hw_cpu ctx;
+ if (xc_domain_hvm_getcontext_partial(xc_handle, dom,
+ HVM_SAVE_CODE(CPU), vcpu,
+ &ctx, sizeof ctx) != 0)
return 0;
- /* HVM context records are always host-sized */
- if (strstr(xen_caps, "xen-3.0-x86_64")) {
- cr0 = ctx.x64.ctrlreg[0];
- cr3 = ctx.x64.ctrlreg[3];
- cr4 = ctx.x64.ctrlreg[4];
- } else {
- cr0 = ctx.x32.ctrlreg[0];
- cr3 = ctx.x32.ctrlreg[3];
- cr4 = ctx.x32.ctrlreg[4];
- }
- if (!(cr0 & CR0_PG))
+ if (!(ctx.cr0 & CR0_PG))
return virt;
- if (0 /* XXX how to get EFER.LMA? */)
- pt_levels = 4;
- else
- pt_levels = (cr4 & CR4_PAE) ? 3 : 2;
- paddr = cr3 & ((pt_levels == 3) ? ~0x1full : ~0xfffull);
+ pt_levels = (ctx.msr_efer&EFER_LMA) ? 4 : (ctx.cr4&CR4_PAE) ? 3 : 2;
+ paddr = ctx.cr3 & ((pt_levels == 3) ? ~0x1full : ~0xfffull);
} else {
DECLARE_DOMCTL;
+ vcpu_guest_context_any_t ctx;
+ if (xc_vcpu_getcontext(xc_handle, dom, vcpu, &ctx) != 0)
+ return 0;
domctl.domain = dom;
domctl.cmd = XEN_DOMCTL_get_address_size;
if ( do_domctl(xc_handle, &domctl) != 0 )
diff -r af992824b5cf -r c7cba853583d tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h Fri Feb 13 10:56:01 2009 +0900
+++ b/tools/libxc/xenctrl.h Fri Feb 13 11:22:28 2009 +0900
@@ -375,6 +375,25 @@ int xc_domain_hvm_getcontext(int xc_hand
uint8_t *ctxt_buf,
uint32_t size);
+
+/**
+ * This function returns one element of the context of a hvm domain
+ * @parm xc_handle a handle to an open hypervisor interface
+ * @parm domid the domain to get information from
+ * @parm typecode which type of elemnt required
+ * @parm instance which instance of the type
+ * @parm ctxt_buf a pointer to a structure to store the execution context of
+ * the hvm domain
+ * @parm size the size of ctxt_buf (must be >= HVM_SAVE_LENGTH(typecode))
+ * @return 0 on success, -1 on failure
+ */
+int xc_domain_hvm_getcontext_partial(int xc_handle,
+ uint32_t domid,
+ uint16_t typecode,
+ uint16_t instance,
+ void *ctxt_buf,
+ uint32_t size);
+
/**
* This function will set the context for hvm domain
*
@@ -1074,6 +1093,12 @@ int xc_domain_update_msi_irq(
uint32_t gvec,
uint32_t pirq,
uint32_t gflags);
+
+int xc_domain_unbind_msi_irq(int xc_handle,
+ uint32_t domid,
+ uint32_t gvec,
+ uint32_t pirq,
+ uint32_t gflags);
int xc_domain_bind_pt_irq(int xc_handle,
uint32_t domid,
diff -r af992824b5cf -r c7cba853583d tools/python/xen/xend/XendAPIStore.py
--- a/tools/python/xen/xend/XendAPIStore.py Fri Feb 13 10:56:01 2009 +0900
+++ b/tools/python/xen/xend/XendAPIStore.py Fri Feb 13 11:22:28 2009 +0900
@@ -33,7 +33,8 @@ def register(uuid, type, inst):
def deregister(uuid, type):
old = get(uuid, type)
- del __classes[(uuid, type)]
+ if old is not None:
+ del __classes[(uuid, type)]
return old
def get(uuid, type):
diff -r af992824b5cf -r c7cba853583d tools/python/xen/xend/image.py
--- a/tools/python/xen/xend/image.py Fri Feb 13 10:56:01 2009 +0900
+++ b/tools/python/xen/xend/image.py Fri Feb 13 11:22:28 2009 +0900
@@ -372,8 +372,6 @@ class ImageHandler:
env['DISPLAY'] = self.display
if self.xauthority:
env['XAUTHORITY'] = self.xauthority
- if self.vncconsole:
- args = args + ([ "-vncviewer" ])
unique_id = "%i-%i" % (self.vm.getDomid(), time.time())
sentinel_path = sentinel_path_prefix + unique_id
sentinel_path_fifo = sentinel_path + '.fifo'
@@ -558,24 +556,30 @@ class ImageHandler:
os.kill(self.pid, signal.SIGHUP)
except OSError, exn:
log.exception(exn)
- try:
- # Try to reap the child every 100ms for 10s. Then SIGKILL
it.
- for i in xrange(100):
+ # Try to reap the child every 100ms for 10s. Then SIGKILL it.
+ for i in xrange(100):
+ try:
(p, rv) = os.waitpid(self.pid, os.WNOHANG)
if p == self.pid:
break
- time.sleep(0.1)
- else:
- log.warning("DeviceModel %d took more than 10s "
- "to terminate: sending SIGKILL" % self.pid)
+ except OSError:
+ # This is expected if Xend has been restarted within
+ # the life of this domain. In this case, we can kill
+ # the process, but we can't wait for it because it's
+ # not our child. We continue this loop, and after it is
+ # terminated make really sure the process is going away
+ # (SIGKILL).
+ pass
+ time.sleep(0.1)
+ else:
+ log.warning("DeviceModel %d took more than 10s "
+ "to terminate: sending SIGKILL" % self.pid)
+ try:
os.kill(self.pid, signal.SIGKILL)
os.waitpid(self.pid, 0)
- except OSError, exn:
- # This is expected if Xend has been restarted within the
- # life of this domain. In this case, we can kill the
process,
- # but we can't wait for it because it's not our child.
- # We just make really sure it's going away (SIGKILL) first.
- os.kill(self.pid, signal.SIGKILL)
+ except OSError:
+ # This happens if the process doesn't exist.
+ pass
state = xstransact.Remove("/local/domain/0/device-model/%i"
% self.vm.getDomid())
finally:
diff -r af992824b5cf -r c7cba853583d tools/python/xen/xend/server/pciquirk.py
--- a/tools/python/xen/xend/server/pciquirk.py Fri Feb 13 10:56:01 2009 +0900
+++ b/tools/python/xen/xend/server/pciquirk.py Fri Feb 13 11:22:28 2009 +0900
@@ -123,7 +123,8 @@ class PCIQuirk:
log.info("Config file does not exist: %s" % PERMISSIVE_CONFIG_FILE)
self.pci_perm_dev_config = ['xend-pci-perm-devs']
- devices = child_at(child(pci_perm_dev_config,
'unconstrained_dev_ids'),0)
+ devices = child_at(child(self.pci_perm_dev_config,
+ 'unconstrained_dev_ids'),0)
if self.__matchPCIdev( devices ):
log.debug("Permissive mode enabled for PCI device [%s]" %
self.devid)
diff -r af992824b5cf -r c7cba853583d tools/python/xen/xm/create.py
--- a/tools/python/xen/xm/create.py Fri Feb 13 10:56:01 2009 +0900
+++ b/tools/python/xen/xm/create.py Fri Feb 13 11:22:28 2009 +0900
@@ -1337,7 +1337,7 @@ def main(argv):
elif not opts.is_xml:
dom = make_domain(opts, config)
- if opts.vals.vncviewer:
+ if opts.vals.vncconsole:
domid = domain_name_to_domid(sxp.child_value(config, 'name', -1))
vncviewer_autopass = getattr(opts.vals,'vncviewer-autopass', False)
console.runVncViewer(domid, vncviewer_autopass, True)
diff -r af992824b5cf -r c7cba853583d tools/python/xen/xm/main.py
--- a/tools/python/xen/xm/main.py Fri Feb 13 10:56:01 2009 +0900
+++ b/tools/python/xen/xm/main.py Fri Feb 13 11:22:28 2009 +0900
@@ -59,7 +59,11 @@ import XenAPI
import XenAPI
import xen.lowlevel.xc
-xc = xen.lowlevel.xc.xc()
+try:
+ xc = xen.lowlevel.xc.xc()
+except Exception, ex:
+ print >>sys.stderr, ("Is xen kernel running?")
+ sys.exit(1)
import inspect
from xen.xend import XendOptions
@@ -735,7 +739,7 @@ def xm_save(args):
(options, params) = getopt.gnu_getopt(args, 'c', ['checkpoint'])
except getopt.GetoptError, opterr:
err(opterr)
- sys.exit(1)
+ usage('save')
checkpoint = False
for (k, v) in options:
diff -r af992824b5cf -r c7cba853583d tools/xentrace/xenctx.c
--- a/tools/xentrace/xenctx.c Fri Feb 13 10:56:01 2009 +0900
+++ b/tools/xentrace/xenctx.c Fri Feb 13 11:22:28 2009 +0900
@@ -26,6 +26,7 @@
#include "xenctrl.h"
#include <xen/foreign/x86_32.h>
#include <xen/foreign/x86_64.h>
+#include <xen/hvm/save.h>
int xc_handle = 0;
int domid = 0;
@@ -287,6 +288,35 @@ static void print_ctx_32(vcpu_guest_cont
}
}
+static void print_ctx_32on64(vcpu_guest_context_x86_64_t *ctx)
+{
+ struct cpu_user_regs_x86_64 *regs = &ctx->user_regs;
+
+ printf("cs:eip: %04x:%08x ", regs->cs, (uint32_t)regs->eip);
+ print_symbol((uint32_t)regs->eip);
+ print_flags((uint32_t)regs->eflags);
+ printf("ss:esp: %04x:%08x\n", regs->ss, (uint32_t)regs->esp);
+
+ printf("eax: %08x\t", (uint32_t)regs->eax);
+ printf("ebx: %08x\t", (uint32_t)regs->ebx);
+ printf("ecx: %08x\t", (uint32_t)regs->ecx);
+ printf("edx: %08x\n", (uint32_t)regs->edx);
+
+ printf("esi: %08x\t", (uint32_t)regs->esi);
+ printf("edi: %08x\t", (uint32_t)regs->edi);
+ printf("ebp: %08x\n", (uint32_t)regs->ebp);
+
+ printf(" ds: %04x\t", regs->ds);
+ printf(" es: %04x\t", regs->es);
+ printf(" fs: %04x\t", regs->fs);
+ printf(" gs: %04x\n", regs->gs);
+
+ if (disp_all) {
+ print_special(ctx->ctrlreg, "cr", 0x1d, 4);
+ print_special(ctx->debugreg, "dr", 0xcf, 4);
+ }
+}
+
static void print_ctx_64(vcpu_guest_context_x86_64_t *ctx)
{
struct cpu_user_regs_x86_64 *regs = &ctx->user_regs;
@@ -335,6 +365,8 @@ static void print_ctx(vcpu_guest_context
{
if (ctxt_word_size == 4)
print_ctx_32(&ctx->x32);
+ else if (guest_word_size == 4)
+ print_ctx_32on64(&ctx->x64);
else
print_ctx_64(&ctx->x64);
}
@@ -788,23 +820,29 @@ static void dump_ctx(int vcpu)
#if defined(__i386__) || defined(__x86_64__)
{
- struct xen_domctl domctl;
- memset(&domctl, 0, sizeof domctl);
- domctl.domain = domid;
- domctl.cmd = XEN_DOMCTL_get_address_size;
- if (xc_domctl(xc_handle, &domctl) == 0)
- ctxt_word_size = guest_word_size = domctl.u.address_size.size / 8;
if (dominfo.hvm) {
+ struct hvm_hw_cpu cpuctx;
xen_capabilities_info_t xen_caps = "";
+ if (xc_domain_hvm_getcontext_partial(
+ xc_handle, domid, HVM_SAVE_CODE(CPU),
+ vcpu, &cpuctx, sizeof cpuctx) != 0) {
+ perror("xc_domain_hvm_getcontext_partial");
+ exit(-1);
+ }
+ guest_word_size = (cpuctx.msr_efer & 0x400) ? 8 : 4;
+ /* HVM guest context records are always host-sized */
if (xc_version(xc_handle, XENVER_capabilities, &xen_caps) != 0) {
perror("xc_version");
exit(-1);
}
- /* HVM guest context records are always host-sized */
ctxt_word_size = (strstr(xen_caps, "xen-3.0-x86_64")) ? 8 : 4;
- /* XXX For now we can't tell whether a HVM guest is in long
- * XXX mode; eventually fix this here and in xc_pagetab.c */
- guest_word_size = 4;
+ } else {
+ struct xen_domctl domctl;
+ memset(&domctl, 0, sizeof domctl);
+ domctl.domain = domid;
+ domctl.cmd = XEN_DOMCTL_get_address_size;
+ if (xc_domctl(xc_handle, &domctl) == 0)
+ ctxt_word_size = guest_word_size = domctl.u.address_size.size
/ 8;
}
}
#endif
diff -r af992824b5cf -r c7cba853583d xen/arch/ia64/linux-xen/iosapic.c
--- a/xen/arch/ia64/linux-xen/iosapic.c Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/ia64/linux-xen/iosapic.c Fri Feb 13 11:22:28 2009 +0900
@@ -93,6 +93,16 @@
#include <asm/ptrace.h>
#include <asm/system.h>
+#ifdef XEN
+static inline int iosapic_irq_to_vector (int irq)
+{
+ return irq;
+}
+
+#undef irq_to_vector
+#define irq_to_vector(irq) iosapic_irq_to_vector(irq)
+#define AUTO_ASSIGN AUTO_ASSIGN_IRQ
+#endif
#undef DEBUG_INTERRUPT_ROUTING
diff -r af992824b5cf -r c7cba853583d xen/arch/ia64/linux-xen/irq_ia64.c
--- a/xen/arch/ia64/linux-xen/irq_ia64.c Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/ia64/linux-xen/irq_ia64.c Fri Feb 13 11:22:28 2009 +0900
@@ -250,6 +250,7 @@ register_percpu_irq (ia64_vector vec, st
register_percpu_irq (ia64_vector vec, struct irqaction *action)
{
irq_desc_t *desc;
+#ifndef XEN
unsigned int irq;
for (irq = 0; irq < NR_IRQS; ++irq)
@@ -258,16 +259,19 @@ register_percpu_irq (ia64_vector vec, st
desc->status |= IRQ_PER_CPU;
desc->handler = &irq_type_ia64_lsapic;
if (action)
-#ifdef XEN
- setup_vector(irq, action);
-#else
setup_irq(irq, action);
-#endif
- }
-}
-
-#ifdef XEN
-int request_irq(unsigned int irq,
+ }
+#else
+ desc = irq_descp(vec);
+ desc->status |= IRQ_PER_CPU;
+ desc->handler = &irq_type_ia64_lsapic;
+ if (action)
+ setup_vector(vec, action);
+#endif
+}
+
+#ifdef XEN
+int request_irq_vector(unsigned int vector,
void (*handler)(int, void *, struct cpu_user_regs *),
unsigned long irqflags, const char * devname, void *dev_id)
{
@@ -279,7 +283,7 @@ int request_irq(unsigned int irq,
* otherwise we'll have trouble later trying to figure out
* which interrupt is which (messes up the interrupt freeing logic etc).
* */
- if (irq >= NR_IRQS)
+ if (vector >= NR_VECTORS)
return -EINVAL;
if (!handler)
return -EINVAL;
@@ -291,7 +295,7 @@ int request_irq(unsigned int irq,
action->handler = handler;
action->name = devname;
action->dev_id = dev_id;
- setup_vector(irq, action);
+ setup_vector(vector, action);
if (retval)
xfree(action);
diff -r af992824b5cf -r c7cba853583d xen/arch/ia64/linux-xen/mca.c
--- a/xen/arch/ia64/linux-xen/mca.c Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/ia64/linux-xen/mca.c Fri Feb 13 11:22:28 2009 +0900
@@ -114,7 +114,6 @@ extern void ia64_slave_init_handler (v
extern void ia64_slave_init_handler (void);
#ifdef XEN
extern void setup_vector (unsigned int vec, struct irqaction *action);
-#define setup_irq(irq, action) setup_vector(irq, action)
#endif
static ia64_mc_info_t ia64_mc_info;
@@ -1931,12 +1930,18 @@ ia64_mca_late_init(void)
if (cpe_vector >= 0) {
/* If platform supports CPEI, enable the irq. */
cpe_poll_enabled = 0;
+#ifndef XEN
for (irq = 0; irq < NR_IRQS; ++irq)
if (irq_to_vector(irq) == cpe_vector) {
desc = irq_descp(irq);
desc->status |= IRQ_PER_CPU;
- setup_irq(irq, &mca_cpe_irqaction);
+ setup_vector(irq, &mca_cpe_irqaction);
}
+#else
+ desc = irq_descp(cpe_vector);
+ desc->status |= IRQ_PER_CPU;
+ setup_vector(cpe_vector, &mca_cpe_irqaction);
+#endif
ia64_mca_register_cpev(cpe_vector);
IA64_MCA_DEBUG("%s: CPEI/P setup and enabled.\n",
__FUNCTION__);
} else {
diff -r af992824b5cf -r c7cba853583d xen/arch/ia64/xen/hypercall.c
--- a/xen/arch/ia64/xen/hypercall.c Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/ia64/xen/hypercall.c Fri Feb 13 11:22:28 2009 +0900
@@ -543,7 +543,7 @@ long do_physdev_op(int cmd, XEN_GUEST_HA
break;
irq_status_query.flags = 0;
/* Edge-triggered interrupts don't need an explicit unmask downcall. */
- if ( !strstr(irq_desc[irq_to_vector(irq)].handler->typename, "edge") )
+ if ( !strstr(irq_descp(irq)->handler->typename, "edge") )
irq_status_query.flags |= XENIRQSTAT_needs_eoi;
ret = copy_to_guest(arg, &irq_status_query, 1) ? -EFAULT : 0;
break;
diff -r af992824b5cf -r c7cba853583d xen/arch/ia64/xen/irq.c
--- a/xen/arch/ia64/xen/irq.c Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/ia64/xen/irq.c Fri Feb 13 11:22:28 2009 +0900
@@ -228,11 +228,11 @@ out:
* disabled.
*/
-int setup_vector(unsigned int irq, struct irqaction * new)
+int setup_vector(unsigned int vector, struct irqaction * new)
{
unsigned long flags;
struct irqaction *old, **p;
- irq_desc_t *desc = irq_descp(irq);
+ irq_desc_t *desc = irq_descp(vector);
/*
* The following block of code has to be executed atomically
@@ -248,8 +248,8 @@ int setup_vector(unsigned int irq, struc
desc->depth = 0;
desc->status &= ~(IRQ_DISABLED | IRQ_INPROGRESS | IRQ_GUEST);
- desc->handler->startup(irq);
- desc->handler->enable(irq);
+ desc->handler->startup(vector);
+ desc->handler->enable(vector);
spin_unlock_irqrestore(&desc->lock,flags);
return 0;
@@ -258,13 +258,11 @@ int setup_vector(unsigned int irq, struc
/* Vectors reserved by xen (and thus not sharable with domains). */
unsigned long ia64_xen_vector[BITS_TO_LONGS(NR_IRQS)];
-int setup_irq(unsigned int irq, struct irqaction * new)
-{
- unsigned int vec;
+int setup_irq_vector(unsigned int vec, struct irqaction * new)
+{
int res;
- /* Get vector for IRQ. */
- if (acpi_gsi_to_irq (irq, &vec) < 0)
+ if ( vec == IA64_INVALID_VECTOR )
return -ENOSYS;
/* Reserve the vector (and thus the irq). */
if (test_and_set_bit(vec, ia64_xen_vector))
@@ -273,14 +271,12 @@ int setup_irq(unsigned int irq, struct i
return res;
}
-void free_irq(unsigned int irq)
-{
- unsigned int vec;
+void release_irq_vector(unsigned int vec)
+{
unsigned long flags;
irq_desc_t *desc;
- /* Get vector for IRQ. */
- if (acpi_gsi_to_irq(irq, &vec) < 0)
+ if ( vec == IA64_INVALID_VECTOR )
return;
desc = irq_descp(vec);
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/Makefile
--- a/xen/arch/x86/Makefile Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/Makefile Fri Feb 13 11:22:28 2009 +0900
@@ -92,3 +92,4 @@ clean::
clean::
rm -f asm-offsets.s xen.lds boot/*.o boot/*~ boot/core boot/mkelf32
rm -f $(BASEDIR)/.xen-syms.[0-9]* boot/.*.d
+ rm -f boot/reloc.S boot/reloc.lnk boot/reloc.bin
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/acpi/cpufreq/cpufreq.c
--- a/xen/arch/x86/acpi/cpufreq/cpufreq.c Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/acpi/cpufreq/cpufreq.c Fri Feb 13 11:22:28 2009 +0900
@@ -58,6 +58,9 @@ static struct acpi_cpufreq_data *drv_dat
static struct cpufreq_driver acpi_cpufreq_driver;
+static unsigned int __read_mostly acpi_pstate_strict;
+integer_param("acpi_pstate_strict", acpi_pstate_strict);
+
static int check_est_cpu(unsigned int cpuid)
{
struct cpuinfo_x86 *cpu = &cpu_data[cpuid];
@@ -180,7 +183,7 @@ static void drv_read(struct drv_cmd *cmd
ASSERT(cpus_weight(cmd->mask) == 1);
/* to reduce IPI for the sake of performance */
- if (cpu_isset(smp_processor_id(), cmd->mask))
+ if (likely(cpu_isset(smp_processor_id(), cmd->mask)))
do_drv_read((void *)cmd);
else
on_selected_cpus( cmd->mask, do_drv_read, (void *)cmd, 0, 1);
@@ -196,15 +199,16 @@ static u32 get_cur_val(cpumask_t mask)
struct cpufreq_policy *policy;
struct processor_performance *perf;
struct drv_cmd cmd;
- unsigned int cpu;
+ unsigned int cpu = smp_processor_id();
if (unlikely(cpus_empty(mask)))
return 0;
- cpu = first_cpu(mask);
+ if (!cpu_isset(cpu, mask))
+ cpu = first_cpu(mask);
policy = cpufreq_cpu_policy[cpu];
- if (!policy)
+ if (cpu >= NR_CPUS || !policy || !drv_data[policy->cpu])
return 0;
switch (drv_data[policy->cpu]->cpu_feature) {
@@ -214,7 +218,7 @@ static u32 get_cur_val(cpumask_t mask)
break;
case SYSTEM_IO_CAPABLE:
cmd.type = SYSTEM_IO_CAPABLE;
- perf = drv_data[first_cpu(mask)]->acpi_data;
+ perf = drv_data[policy->cpu]->acpi_data;
cmd.addr.io.port = perf->control_register.address;
cmd.addr.io.bit_width = perf->control_register.bit_width;
break;
@@ -393,7 +397,7 @@ static int acpi_cpufreq_target(struct cp
drv_write(&cmd);
- if (!check_freqs(cmd.mask, freqs.new, data)) {
+ if (acpi_pstate_strict && !check_freqs(cmd.mask, freqs.new, data)) {
printk(KERN_WARNING "Fail transfer to new freq %d\n", freqs.new);
return -EAGAIN;
}
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/acpi/suspend.c
--- a/xen/arch/x86/acpi/suspend.c Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/acpi/suspend.c Fri Feb 13 11:22:28 2009 +0900
@@ -65,6 +65,9 @@ void restore_rest_processor_state(void)
/* Reload FPU state on next FPU use. */
stts();
+ if (cpu_has_pat)
+ wrmsrl(MSR_IA32_CR_PAT, host_pat);
+
mtrr_ap_init();
mcheck_init(&boot_cpu_data);
}
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/boot/Makefile
--- a/xen/arch/x86/boot/Makefile Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/boot/Makefile Fri Feb 13 11:22:28 2009 +0900
@@ -1,1 +1,7 @@ obj-y += head.o
obj-y += head.o
+
+head.o: reloc.S
+
+# NB. BOOT_TRAMPOLINE == 0x8c000
+%.S: %.c
+ RELOC=0x8c000 $(MAKE) -f build32.mk $@
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/boot/build32.mk
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/boot/build32.mk Fri Feb 13 11:22:28 2009 +0900
@@ -0,0 +1,24 @@
+XEN_ROOT=../../../..
+override XEN_TARGET_ARCH=x86_32
+CFLAGS =
+include $(XEN_ROOT)/Config.mk
+
+# Disable PIE/SSP if GCC supports them. They can break us.
+$(call cc-option-add,CFLAGS,CC,-nopie)
+$(call cc-option-add,CFLAGS,CC,-fno-stack-protector)
+$(call cc-option-add,CFLAGS,CC,-fno-stack-protector-all)
+
+CFLAGS += -Werror -fno-builtin -msoft-float
+
+%.S: %.bin
+ (od -v -t x $< | head -n -1 | \
+ sed 's/ /,0x/g' | sed 's/^[0-9]*,/ .long /') >$@
+
+%.bin: %.lnk
+ $(OBJCOPY) -O binary $< $@
+
+%.lnk: %.o
+ $(LD) $(LDFLAGS_DIRECT) -N -Ttext 0x8c000 -o $@ $<
+
+%.o: %.c
+ $(CC) $(CFLAGS) -c $< -o $@
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/boot/head.S
--- a/xen/arch/x86/boot/head.S Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/boot/head.S Fri Feb 13 11:22:28 2009 +0900
@@ -79,8 +79,11 @@ __start:
cmp $0x2BADB002,%eax
jne not_multiboot
- /* Save the Multiboot info structure for later use. */
- mov %ebx,sym_phys(multiboot_ptr)
+ /* Save the Multiboot info struct (after relocation) for later use. */
+ mov $sym_phys(cpu0_stack)+1024,%esp
+ push %ebx
+ call reloc
+ mov %eax,sym_phys(multiboot_ptr)
/* Initialize BSS (no nasty surprises!) */
mov $sym_phys(__bss_start),%edi
@@ -192,6 +195,9 @@ 2: cmp $L1_PAGETABLE_ENTRIES,%e
#include "cmdline.S"
+reloc:
+#include "reloc.S"
+
.align 16
.globl trampoline_start, trampoline_end
trampoline_start:
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/boot/reloc.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/boot/reloc.c Fri Feb 13 11:22:28 2009 +0900
@@ -0,0 +1,89 @@
+/******************************************************************************
+ * reloc.c
+ *
+ * 32-bit flat memory-map routines for relocating Multiboot structures
+ * and modules. This is most easily done early with paging disabled.
+ *
+ * Copyright (c) 2009, Citrix Systems, Inc.
+ *
+ * Authors:
+ * Keir Fraser <keir.fraser@xxxxxxxxxx>
+ */
+
+asm (
+ " .text \n"
+ " .globl _start \n"
+ "_start: \n"
+ " mov $_start,%edi \n"
+ " call 1f \n"
+ "1: pop %esi \n"
+ " sub $1b-_start,%esi \n"
+ " mov $__bss_start-_start,%ecx \n"
+ " rep movsb \n"
+ " xor %eax,%eax \n"
+ " mov $_end,%ecx \n"
+ " sub %edi,%ecx \n"
+ " rep stosb \n"
+ " mov $reloc,%eax \n"
+ " jmp *%eax \n"
+ );
+
+typedef unsigned int u32;
+#include "../../../include/xen/multiboot.h"
+
+extern char _start[];
+
+static void *memcpy(void *dest, const void *src, unsigned int n)
+{
+ char *s = (char *)src, *d = dest;
+ while ( n-- )
+ *d++ = *s++;
+ return dest;
+}
+
+static void *reloc_mbi_struct(void *old, unsigned int bytes)
+{
+ static void *alloc = &_start;
+ alloc = (void *)(((unsigned long)alloc - bytes) & ~15ul);
+ return memcpy(alloc, old, bytes);
+}
+
+static char *reloc_mbi_string(char *old)
+{
+ char *p;
+ for ( p = old; *p != '\0'; p++ )
+ continue;
+ return reloc_mbi_struct(old, p - old + 1);
+}
+
+multiboot_info_t *reloc(multiboot_info_t *mbi_old)
+{
+ multiboot_info_t *mbi = reloc_mbi_struct(mbi_old, sizeof(*mbi));
+ int i;
+
+ if ( mbi->flags & MBI_CMDLINE )
+ mbi->cmdline = (u32)reloc_mbi_string((char *)mbi->cmdline);
+
+ if ( mbi->flags & MBI_MODULES )
+ {
+ module_t *mods = reloc_mbi_struct(
+ (module_t *)mbi->mods_addr, mbi->mods_count * sizeof(module_t));
+ mbi->mods_addr = (u32)mods;
+ for ( i = 0; i < mbi->mods_count; i++ )
+ if ( mods[i].string )
+ mods[i].string = (u32)reloc_mbi_string((char *)mods[i].string);
+ }
+
+ if ( mbi->flags & MBI_MEMMAP )
+ mbi->mmap_addr = (u32)reloc_mbi_struct(
+ (memory_map_t *)mbi->mmap_addr, mbi->mmap_length);
+
+ /* Mask features we don't understand or don't relocate. */
+ mbi->flags &= (MBI_MEMLIMITS |
+ MBI_DRIVES |
+ MBI_CMDLINE |
+ MBI_MODULES |
+ MBI_MEMMAP);
+
+ return mbi;
+}
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/cpu/mcheck/amd_k8.c
--- a/xen/arch/x86/cpu/mcheck/amd_k8.c Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/cpu/mcheck/amd_k8.c Fri Feb 13 11:22:28 2009 +0900
@@ -99,6 +99,8 @@ void k8_machine_check(struct cpu_user_re
mc_data = x86_mcinfo_getptr();
cpu_nr = smp_processor_id();
+ BUG_ON(cpu_nr != vcpu->processor);
+
curdom = vcpu->domain;
memset(&mc_global, 0, sizeof(mc_global));
@@ -106,14 +108,12 @@ void k8_machine_check(struct cpu_user_re
mc_global.common.size = sizeof(mc_global);
mc_global.mc_domid = curdom->domain_id; /* impacted domain */
- mc_global.mc_coreid = vcpu->processor; /* impacted physical cpu */
- BUG_ON(cpu_nr != vcpu->processor);
- mc_global.mc_core_threadid = 0;
+
+ x86_mc_get_cpu_info(cpu_nr, &mc_global.mc_socketid,
+ &mc_global.mc_coreid, &mc_global.mc_core_threadid,
+ &mc_global.mc_apicid, NULL, NULL, NULL);
+
mc_global.mc_vcpuid = vcpu->vcpu_id; /* impacted vcpu */
-#if 0 /* TODO: on which socket is this physical core?
- It's not clear to me how to figure this out. */
- mc_global.mc_socketid = ???;
-#endif
mc_global.mc_flags |= MC_FLAG_UNCORRECTABLE;
rdmsrl(MSR_IA32_MCG_STATUS, mc_global.mc_gstatus);
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/cpu/mcheck/amd_nonfatal.c
--- a/xen/arch/x86/cpu/mcheck/amd_nonfatal.c Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/cpu/mcheck/amd_nonfatal.c Fri Feb 13 11:22:28 2009 +0900
@@ -95,6 +95,7 @@ void mce_amd_checkregs(void *info)
mc_data = NULL;
cpu_nr = smp_processor_id();
+ BUG_ON(cpu_nr != vcpu->processor);
event_enabled = guest_enabled_event(dom0->vcpu[0], VIRQ_MCA);
error_found = 0;
@@ -103,14 +104,12 @@ void mce_amd_checkregs(void *info)
mc_global.common.size = sizeof(mc_global);
mc_global.mc_domid = vcpu->domain->domain_id; /* impacted domain */
- mc_global.mc_coreid = vcpu->processor; /* impacted physical cpu */
- BUG_ON(cpu_nr != vcpu->processor);
- mc_global.mc_core_threadid = 0;
mc_global.mc_vcpuid = vcpu->vcpu_id; /* impacted vcpu */
-#if 0 /* TODO: on which socket is this physical core?
- It's not clear to me how to figure this out. */
- mc_global.mc_socketid = ???;
-#endif
+
+ x86_mc_get_cpu_info(cpu_nr, &mc_global.mc_socketid,
+ &mc_global.mc_coreid, &mc_global.mc_core_threadid,
+ &mc_global.mc_apicid, NULL, NULL, NULL);
+
mc_global.mc_flags |= MC_FLAG_CORRECTABLE;
rdmsrl(MSR_IA32_MCG_STATUS, mc_global.mc_gstatus);
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/cpu/mcheck/mce.c
--- a/xen/arch/x86/cpu/mcheck/mce.c Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/cpu/mcheck/mce.c Fri Feb 13 11:22:28 2009 +0900
@@ -443,6 +443,96 @@ next:
+static void do_mc_get_cpu_info(void *v)
+{
+ int cpu = smp_processor_id();
+ int cindex, cpn;
+ struct cpuinfo_x86 *c;
+ xen_mc_logical_cpu_t *log_cpus, *xcp;
+ uint32_t junk, ebx;
+
+ log_cpus = v;
+ c = &cpu_data[cpu];
+ cindex = 0;
+ cpn = cpu - 1;
+
+ /*
+ * Deal with sparse masks, condensed into a contig array.
+ */
+ while (cpn >= 0) {
+ if (cpu_isset(cpn, cpu_online_map))
+ cindex++;
+ cpn--;
+ }
+
+ xcp = &log_cpus[cindex];
+ c = &cpu_data[cpu];
+ xcp->mc_cpunr = cpu;
+ x86_mc_get_cpu_info(cpu, &xcp->mc_chipid,
+ &xcp->mc_coreid, &xcp->mc_threadid,
+ &xcp->mc_apicid, &xcp->mc_ncores,
+ &xcp->mc_ncores_active, &xcp->mc_nthreads);
+ xcp->mc_cpuid_level = c->cpuid_level;
+ xcp->mc_family = c->x86;
+ xcp->mc_vendor = c->x86_vendor;
+ xcp->mc_model = c->x86_model;
+ xcp->mc_step = c->x86_mask;
+ xcp->mc_cache_size = c->x86_cache_size;
+ xcp->mc_cache_alignment = c->x86_cache_alignment;
+ memcpy(xcp->mc_vendorid, c->x86_vendor_id, sizeof xcp->mc_vendorid);
+ memcpy(xcp->mc_brandid, c->x86_model_id, sizeof xcp->mc_brandid);
+ memcpy(xcp->mc_cpu_caps, c->x86_capability, sizeof xcp->mc_cpu_caps);
+
+ /*
+ * This part needs to run on the CPU itself.
+ */
+ xcp->mc_nmsrvals = __MC_NMSRS;
+ xcp->mc_msrvalues[0].reg = MSR_IA32_MCG_CAP;
+ rdmsrl(MSR_IA32_MCG_CAP, xcp->mc_msrvalues[0].value);
+
+ if (c->cpuid_level >= 1) {
+ cpuid(1, &junk, &ebx, &junk, &junk);
+ xcp->mc_clusterid = (ebx >> 24) & 0xff;
+ } else
+ xcp->mc_clusterid = hard_smp_processor_id();
+}
+
+
+void x86_mc_get_cpu_info(unsigned cpu, uint32_t *chipid, uint16_t *coreid,
+ uint16_t *threadid, uint32_t *apicid,
+ unsigned *ncores, unsigned *ncores_active,
+ unsigned *nthreads)
+{
+ struct cpuinfo_x86 *c;
+
+ *apicid = cpu_physical_id(cpu);
+ c = &cpu_data[cpu];
+ if (c->apicid == BAD_APICID) {
+ *chipid = cpu;
+ *coreid = 0;
+ *threadid = 0;
+ if (ncores != NULL)
+ *ncores = 1;
+ if (ncores_active != NULL)
+ *ncores_active = 1;
+ if (nthreads != NULL)
+ *nthreads = 1;
+ } else {
+ *chipid = phys_proc_id[cpu];
+ if (c->x86_max_cores > 1)
+ *coreid = cpu_core_id[cpu];
+ else
+ *coreid = 0;
+ *threadid = c->apicid & ((1 << (c->x86_num_siblings - 1)) - 1);
+ if (ncores != NULL)
+ *ncores = c->x86_max_cores;
+ if (ncores_active != NULL)
+ *ncores_active = c->booted_cores;
+ if (nthreads != NULL)
+ *nthreads = c->x86_num_siblings;
+ }
+}
+
/* Machine Check Architecture Hypercall */
long do_mca(XEN_GUEST_HANDLE(xen_mc_t) u_xen_mc)
{
@@ -452,6 +542,7 @@ long do_mca(XEN_GUEST_HANDLE(xen_mc_t) u
struct domain *domU;
struct xen_mc_fetch *mc_fetch;
struct xen_mc_notifydomain *mc_notifydomain;
+ struct xen_mc_physcpuinfo *mc_physcpuinfo;
struct mc_info *mi;
uint32_t flags;
uint32_t fetch_idx;
@@ -460,6 +551,8 @@ long do_mca(XEN_GUEST_HANDLE(xen_mc_t) u
* a DomU to fetch mc data while Dom0 notifies another DomU. */
static DEFINE_SPINLOCK(mc_lock);
static DEFINE_SPINLOCK(mc_notify_lock);
+ int nlcpu;
+ xen_mc_logical_cpu_t *log_cpus = NULL;
if ( copy_from_guest(op, u_xen_mc, 1) )
return -EFAULT;
@@ -580,6 +673,43 @@ long do_mca(XEN_GUEST_HANDLE(xen_mc_t) u
spin_unlock(&mc_notify_lock);
break;
+
+ case XEN_MC_physcpuinfo:
+ if ( !IS_PRIV(v->domain) )
+ return -EPERM;
+
+ mc_physcpuinfo = &op->u.mc_physcpuinfo;
+ nlcpu = num_online_cpus();
+
+ if (!guest_handle_is_null(mc_physcpuinfo->info)) {
+ if (mc_physcpuinfo->ncpus <= 0)
+ return -EINVAL;
+ nlcpu = min(nlcpu, (int)mc_physcpuinfo->ncpus);
+ log_cpus = xmalloc_array(xen_mc_logical_cpu_t, nlcpu);
+ if (log_cpus == NULL)
+ return -ENOMEM;
+
+ if (on_each_cpu(do_mc_get_cpu_info, log_cpus,
+ 1, 1) != 0) {
+ xfree(log_cpus);
+ return -EIO;
+ }
+ }
+
+ mc_physcpuinfo->ncpus = nlcpu;
+
+ if (copy_to_guest(u_xen_mc, op, 1)) {
+ if (log_cpus != NULL)
+ xfree(log_cpus);
+ return -EFAULT;
+ }
+
+ if (!guest_handle_is_null(mc_physcpuinfo->info)) {
+ if (copy_to_guest(mc_physcpuinfo->info,
+ log_cpus, nlcpu))
+ ret = -EFAULT;
+ xfree(log_cpus);
+ }
}
return ret;
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/cpu/mcheck/mce.h
--- a/xen/arch/x86/cpu/mcheck/mce.h Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/cpu/mcheck/mce.h Fri Feb 13 11:22:28 2009 +0900
@@ -34,4 +34,5 @@ int x86_mcinfo_add(struct mc_info *mi, v
int x86_mcinfo_add(struct mc_info *mi, void *mcinfo);
void x86_mcinfo_dump(struct mc_info *mi);
void mc_panic(char *s);
-
+void x86_mc_get_cpu_info(unsigned, uint32_t *, uint16_t *, uint16_t *,
+ uint32_t *, uint32_t *, uint32_t *, uint32_t *);
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/cpu/mcheck/mce_intel.c
--- a/xen/arch/x86/cpu/mcheck/mce_intel.c Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/cpu/mcheck/mce_intel.c Fri Feb 13 11:22:28 2009 +0900
@@ -182,11 +182,9 @@ static struct mc_info *machine_check_pol
mcg.mc_flags = MC_FLAG_POLLED;
else if (calltype == MC_FLAG_CMCI)
mcg.mc_flags = MC_FLAG_CMCI;
- mcg.mc_socketid = phys_proc_id[cpu];
- mcg.mc_coreid = cpu_core_id[cpu];
- mcg.mc_apicid = cpu_physical_id(cpu);
- mcg.mc_core_threadid =
- mcg.mc_apicid & ( 1 << (cpu_data[cpu].x86_num_siblings - 1));
+ x86_mc_get_cpu_info(
+ cpu, &mcg.mc_socketid, &mcg.mc_coreid,
+ &mcg.mc_core_threadid, &mcg.mc_apicid, NULL, NULL, NULL);
rdmsrl(MSR_IA32_MCG_STATUS, mcg.mc_gstatus);
for ( i = 0; i < nr_mce_banks; i++ ) {
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/domain.c Fri Feb 13 11:22:28 2009 +0900
@@ -141,7 +141,7 @@ void dump_pageframe_info(struct domain *
}
else
{
- list_for_each_entry ( page, &d->page_list, list )
+ page_list_for_each ( page, &d->page_list )
{
printk(" DomPage %p: caf=%08lx, taf=%" PRtype_info "\n",
_p(page_to_mfn(page)),
@@ -154,7 +154,7 @@ void dump_pageframe_info(struct domain *
p2m_pod_dump_data(d);
}
- list_for_each_entry ( page, &d->xenpage_list, list )
+ page_list_for_each ( page, &d->xenpage_list )
{
printk(" XenPage %p: caf=%08lx, taf=%" PRtype_info "\n",
_p(page_to_mfn(page)),
@@ -352,6 +352,8 @@ int vcpu_initialise(struct vcpu *v)
v->arch.perdomain_ptes =
d->arch.mm_perdomain_pt + (v->vcpu_id << GDT_LDT_VCPU_SHIFT);
+ spin_lock_init(&v->arch.shadow_ldt_lock);
+
return (is_pv_32on64_vcpu(v) ? setup_compat_l4(v) : 0);
}
@@ -380,7 +382,7 @@ int arch_domain_create(struct domain *d,
INIT_LIST_HEAD(&d->arch.pdev_list);
d->arch.relmem = RELMEM_not_started;
- INIT_LIST_HEAD(&d->arch.relmem_list);
+ INIT_PAGE_LIST_HEAD(&d->arch.relmem_list);
pdpt_order = get_order_from_bytes(PDPT_L1_ENTRIES * sizeof(l1_pgentry_t));
d->arch.mm_perdomain_pt = alloc_xenheap_pages(pdpt_order, 0);
@@ -1655,9 +1657,8 @@ int hypercall_xlat_continuation(unsigned
#endif
static int relinquish_memory(
- struct domain *d, struct list_head *list, unsigned long type)
-{
- struct list_head *ent;
+ struct domain *d, struct page_list_head *list, unsigned long type)
+{
struct page_info *page;
unsigned long x, y;
int ret = 0;
@@ -1665,17 +1666,13 @@ static int relinquish_memory(
/* Use a recursive lock, as we may enter 'free_domheap_page'. */
spin_lock_recursive(&d->page_alloc_lock);
- ent = list->next;
- while ( ent != list )
- {
- page = list_entry(ent, struct page_info, list);
-
+ while ( (page = page_list_remove_head(list)) )
+ {
/* Grab a reference to the page so it won't disappear from under us. */
if ( unlikely(!get_page(page, d)) )
{
/* Couldn't get a reference -- someone is freeing this page. */
- ent = ent->next;
- list_move_tail(&page->list, &d->arch.relmem_list);
+ page_list_add_tail(page, &d->arch.relmem_list);
continue;
}
@@ -1687,6 +1684,7 @@ static int relinquish_memory(
break;
case -EAGAIN:
case -EINTR:
+ page_list_add(page, list);
set_bit(_PGT_pinned, &page->u.inuse.type_info);
put_page(page);
goto out;
@@ -1723,6 +1721,7 @@ static int relinquish_memory(
case 0:
break;
case -EINTR:
+ page_list_add(page, list);
page->u.inuse.type_info |= PGT_validated;
if ( x & PGT_partial )
put_page(page);
@@ -1730,6 +1729,7 @@ static int relinquish_memory(
ret = -EAGAIN;
goto out;
case -EAGAIN:
+ page_list_add(page, list);
page->u.inuse.type_info |= PGT_partial;
if ( x & PGT_partial )
put_page(page);
@@ -1746,9 +1746,8 @@ static int relinquish_memory(
}
}
- /* Follow the list chain and /then/ potentially free the page. */
- ent = ent->next;
- list_move_tail(&page->list, &d->arch.relmem_list);
+ /* Put the page on the list and /then/ potentially free it. */
+ page_list_add_tail(page, &d->arch.relmem_list);
put_page(page);
if ( hypercall_preempt_check() )
@@ -1758,7 +1757,12 @@ static int relinquish_memory(
}
}
- list_splice_init(&d->arch.relmem_list, list);
+ /* list is empty at this point. */
+ if ( !page_list_empty(&d->arch.relmem_list) )
+ {
+ *list = d->arch.relmem_list;
+ INIT_PAGE_LIST_HEAD(&d->arch.relmem_list);
+ }
out:
spin_unlock_recursive(&d->page_alloc_lock);
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/domain_build.c
--- a/xen/arch/x86/domain_build.c Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/domain_build.c Fri Feb 13 11:22:28 2009 +0900
@@ -880,7 +880,7 @@ int __init construct_dom0(
}
si->first_p2m_pfn = pfn;
si->nr_p2m_frames = d->tot_pages - count;
- list_for_each_entry ( page, &d->page_list, list )
+ page_list_for_each ( page, &d->page_list )
{
mfn = page_to_mfn(page);
if ( get_gpfn_from_mfn(mfn) >= count )
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/domctl.c
--- a/xen/arch/x86/domctl.c Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/domctl.c Fri Feb 13 11:22:28 2009 +0900
@@ -240,7 +240,7 @@ long arch_do_domctl(
struct domain *d = rcu_lock_domain_by_id(domctl->domain);
unsigned long max_pfns = domctl->u.getmemlist.max_pfns;
uint64_t mfn;
- struct list_head *list_ent;
+ struct page_info *page;
ret = -EINVAL;
if ( d != NULL )
@@ -259,19 +259,19 @@ long arch_do_domctl(
goto getmemlist_out;
}
- ret = 0;
- list_ent = d->page_list.next;
- for ( i = 0; (i < max_pfns) && (list_ent != &d->page_list); i++ )
+ ret = i = 0;
+ page_list_for_each(page, &d->page_list)
{
- mfn = page_to_mfn(list_entry(
- list_ent, struct page_info, list));
+ if ( i >= max_pfns )
+ break;
+ mfn = page_to_mfn(page);
if ( copy_to_guest_offset(domctl->u.getmemlist.buffer,
i, &mfn, 1) )
{
ret = -EFAULT;
break;
}
- list_ent = mfn_to_page(mfn)->list.next;
+ ++i;
}
spin_unlock(&d->page_alloc_lock);
@@ -416,6 +416,34 @@ long arch_do_domctl(
rcu_unlock_domain(d);
}
break;
+
+ case XEN_DOMCTL_gethvmcontext_partial:
+ {
+ struct domain *d;
+
+ ret = -ESRCH;
+ if ( (d = rcu_lock_domain_by_id(domctl->domain)) == NULL )
+ break;
+
+ ret = xsm_hvmcontext(d, domctl->cmd);
+ if ( ret )
+ goto gethvmcontext_partial_out;
+
+ ret = -EINVAL;
+ if ( !is_hvm_domain(d) )
+ goto gethvmcontext_partial_out;
+
+ domain_pause(d);
+ ret = hvm_save_one(d, domctl->u.hvmcontext_partial.type,
+ domctl->u.hvmcontext_partial.instance,
+ domctl->u.hvmcontext_partial.buffer);
+ domain_unpause(d);
+
+ gethvmcontext_partial_out:
+ rcu_unlock_domain(d);
+ }
+ break;
+
case XEN_DOMCTL_set_address_size:
{
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/e820.c
--- a/xen/arch/x86/e820.c Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/e820.c Fri Feb 13 11:22:28 2009 +0900
@@ -1,10 +1,10 @@
#include <xen/config.h>
#include <xen/init.h>
#include <xen/lib.h>
+#include <xen/mm.h>
#include <xen/compat.h>
#include <xen/dmi.h>
#include <asm/e820.h>
-#include <asm/mm.h>
#include <asm/page.h>
/* opt_mem: Limit of physical RAM. Any RAM beyond this point is ignored. */
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/i8259.c
--- a/xen/arch/x86/i8259.c Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/i8259.c Fri Feb 13 11:22:28 2009 +0900
@@ -410,8 +410,8 @@ void __init init_IRQ(void)
}
/* Never allocate the hypercall vector or Linux/BSD fast-trap vector. */
- vector_irq[HYPERCALL_VECTOR] = NEVER_ASSIGN;
- vector_irq[0x80] = NEVER_ASSIGN;
+ vector_irq[HYPERCALL_VECTOR] = NEVER_ASSIGN_IRQ;
+ vector_irq[0x80] = NEVER_ASSIGN_IRQ;
apic_intr_init();
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/io_apic.c
--- a/xen/arch/x86/io_apic.c Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/io_apic.c Fri Feb 13 11:22:28 2009 +0900
@@ -49,7 +49,6 @@ static struct { int pin, apic; } ioapic_
static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
static DEFINE_SPINLOCK(ioapic_lock);
-static DEFINE_SPINLOCK(vector_lock);
int skip_ioapic_setup;
@@ -88,9 +87,6 @@ static struct irq_pin_list {
[0 ... PIN_MAP_SIZE-1].pin = -1
};
static int irq_2_pin_free_entry = NR_IRQS;
-
-int vector_irq[NR_VECTORS] __read_mostly = {
- [0 ... NR_VECTORS - 1] = FREE_TO_ASSIGN};
/*
* The common case is 1:1 IRQ<->pin mappings. Sometimes there are
@@ -668,56 +664,6 @@ static inline int IO_APIC_irq_trigger(in
/* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
u8 irq_vector[NR_IRQS] __read_mostly;
-
-int free_irq_vector(int vector)
-{
- int irq;
-
- BUG_ON((vector > LAST_DYNAMIC_VECTOR) || (vector < FIRST_DYNAMIC_VECTOR));
-
- spin_lock(&vector_lock);
- if ((irq = vector_irq[vector]) == AUTO_ASSIGN)
- vector_irq[vector] = FREE_TO_ASSIGN;
- spin_unlock(&vector_lock);
-
- return (irq == AUTO_ASSIGN) ? 0 : -EINVAL;
-}
-
-int assign_irq_vector(int irq)
-{
- static unsigned current_vector = FIRST_DYNAMIC_VECTOR;
- unsigned vector;
-
- BUG_ON(irq >= NR_IRQS);
-
- spin_lock(&vector_lock);
-
- if ((irq != AUTO_ASSIGN) && (IO_APIC_VECTOR(irq) > 0)) {
- spin_unlock(&vector_lock);
- return IO_APIC_VECTOR(irq);
- }
-
- vector = current_vector;
- while (vector_irq[vector] != FREE_TO_ASSIGN) {
- vector += 8;
- if (vector > LAST_DYNAMIC_VECTOR)
- vector = FIRST_DYNAMIC_VECTOR + ((vector + 1) & 7);
-
- if (vector == current_vector) {
- spin_unlock(&vector_lock);
- return -ENOSPC;
- }
- }
-
- current_vector = vector;
- vector_irq[vector] = irq;
- if (irq != AUTO_ASSIGN)
- IO_APIC_VECTOR(irq) = vector;
-
- spin_unlock(&vector_lock);
-
- return vector;
-}
static struct hw_interrupt_type ioapic_level_type;
static struct hw_interrupt_type ioapic_edge_type;
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/irq.c
--- a/xen/arch/x86/irq.c Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/irq.c Fri Feb 13 11:22:28 2009 +0900
@@ -27,6 +27,11 @@ boolean_param("noirqbalance", opt_noirqb
irq_desc_t irq_desc[NR_VECTORS];
+static DEFINE_SPINLOCK(vector_lock);
+int vector_irq[NR_VECTORS] __read_mostly = {
+ [0 ... NR_VECTORS - 1] = FREE_TO_ASSIGN_IRQ
+};
+
static void __do_IRQ_guest(int vector);
void no_action(int cpl, void *dev_id, struct cpu_user_regs *regs) { }
@@ -54,6 +59,56 @@ struct hw_interrupt_type no_irq_type = {
atomic_t irq_err_count;
+int free_irq_vector(int vector)
+{
+ int irq;
+
+ BUG_ON((vector > LAST_DYNAMIC_VECTOR) || (vector < FIRST_DYNAMIC_VECTOR));
+
+ spin_lock(&vector_lock);
+ if ((irq = vector_irq[vector]) == AUTO_ASSIGN_IRQ)
+ vector_irq[vector] = FREE_TO_ASSIGN_IRQ;
+ spin_unlock(&vector_lock);
+
+ return (irq == AUTO_ASSIGN_IRQ) ? 0 : -EINVAL;
+}
+
+int assign_irq_vector(int irq)
+{
+ static unsigned current_vector = FIRST_DYNAMIC_VECTOR;
+ unsigned vector;
+
+ BUG_ON(irq >= NR_IRQS);
+
+ spin_lock(&vector_lock);
+
+ if ((irq != AUTO_ASSIGN_IRQ) && (IO_APIC_VECTOR(irq) > 0)) {
+ spin_unlock(&vector_lock);
+ return IO_APIC_VECTOR(irq);
+ }
+
+ vector = current_vector;
+ while (vector_irq[vector] != FREE_TO_ASSIGN_IRQ) {
+ vector += 8;
+ if (vector > LAST_DYNAMIC_VECTOR)
+ vector = FIRST_DYNAMIC_VECTOR + ((vector + 1) & 7);
+
+ if (vector == current_vector) {
+ spin_unlock(&vector_lock);
+ return -ENOSPC;
+ }
+ }
+
+ current_vector = vector;
+ vector_irq[vector] = irq;
+ if (irq != AUTO_ASSIGN_IRQ)
+ IO_APIC_VECTOR(irq) = vector;
+
+ spin_unlock(&vector_lock);
+
+ return vector;
+}
+
asmlinkage void do_IRQ(struct cpu_user_regs *regs)
{
unsigned int vector = regs->entry_vector;
@@ -104,7 +159,7 @@ asmlinkage void do_IRQ(struct cpu_user_r
spin_unlock(&desc->lock);
}
-int request_irq(unsigned int irq,
+int request_irq_vector(unsigned int vector,
void (*handler)(int, void *, struct cpu_user_regs *),
unsigned long irqflags, const char * devname, void *dev_id)
{
@@ -117,7 +172,7 @@ int request_irq(unsigned int irq,
* which interrupt is which (messes up the interrupt freeing
* logic etc).
*/
- if (irq >= NR_IRQS)
+ if (vector >= NR_VECTORS)
return -EINVAL;
if (!handler)
return -EINVAL;
@@ -130,34 +185,32 @@ int request_irq(unsigned int irq,
action->name = devname;
action->dev_id = dev_id;
- retval = setup_irq(irq, action);
+ retval = setup_irq_vector(vector, action);
if (retval)
xfree(action);
return retval;
}
-void free_irq(unsigned int irq)
-{
- unsigned int vector = irq_to_vector(irq);
- irq_desc_t *desc = &irq_desc[vector];
+void release_irq_vector(unsigned int vector)
+{
+ irq_desc_t *desc = &irq_desc[vector];
unsigned long flags;
spin_lock_irqsave(&desc->lock,flags);
desc->action = NULL;
desc->depth = 1;
desc->status |= IRQ_DISABLED;
- desc->handler->shutdown(irq);
+ desc->handler->shutdown(vector);
spin_unlock_irqrestore(&desc->lock,flags);
/* Wait to make sure it's not being used on another CPU */
do { smp_mb(); } while ( desc->status & IRQ_INPROGRESS );
}
-int setup_irq(unsigned int irq, struct irqaction *new)
-{
- unsigned int vector = irq_to_vector(irq);
- irq_desc_t *desc = &irq_desc[vector];
+int setup_irq_vector(unsigned int vector, struct irqaction *new)
+{
+ irq_desc_t *desc = &irq_desc[vector];
unsigned long flags;
spin_lock_irqsave(&desc->lock,flags);
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/mm.c Fri Feb 13 11:22:28 2009 +0900
@@ -179,12 +179,6 @@ l2_pgentry_t *compat_idle_pg_table_l2 =
#define l3_disallow_mask(d) L3_DISALLOW_MASK
#endif
-static void queue_deferred_ops(struct domain *d, unsigned int ops)
-{
- ASSERT(d == current->domain);
- this_cpu(percpu_mm_info).deferred_ops |= ops;
-}
-
void __init init_frametable(void)
{
unsigned long nr_pages, page_step, i, mfn;
@@ -333,7 +327,7 @@ void share_xen_page_with_guest(
page->count_info |= PGC_allocated | 1;
if ( unlikely(d->xenheap_pages++ == 0) )
get_knownalive_domain(d);
- list_add_tail(&page->list, &d->xenpage_list);
+ page_list_add_tail(page, &d->xenpage_list);
}
spin_unlock(&d->page_alloc_lock);
@@ -464,14 +458,18 @@ void update_cr3(struct vcpu *v)
}
-static void invalidate_shadow_ldt(struct vcpu *v)
+static void invalidate_shadow_ldt(struct vcpu *v, int flush)
{
int i;
unsigned long pfn;
struct page_info *page;
-
+
+ BUG_ON(unlikely(in_irq()));
+
+ spin_lock(&v->arch.shadow_ldt_lock);
+
if ( v->arch.shadow_ldt_mapcnt == 0 )
- return;
+ goto out;
v->arch.shadow_ldt_mapcnt = 0;
@@ -486,11 +484,12 @@ static void invalidate_shadow_ldt(struct
put_page_and_type(page);
}
- /* Dispose of the (now possibly invalid) mappings from the TLB. */
- if ( v == current )
- queue_deferred_ops(v->domain, DOP_FLUSH_TLB | DOP_RELOAD_LDT);
- else
- flush_tlb_mask(v->domain->domain_dirty_cpumask);
+ /* Rid TLBs of stale mappings (guest mappings and shadow mappings). */
+ if ( flush )
+ flush_tlb_mask(v->vcpu_dirty_cpumask);
+
+ out:
+ spin_unlock(&v->arch.shadow_ldt_lock);
}
@@ -541,8 +540,10 @@ int map_ldt_shadow_page(unsigned int off
nl1e = l1e_from_pfn(mfn, l1e_get_flags(l1e) | _PAGE_RW);
+ spin_lock(&v->arch.shadow_ldt_lock);
l1e_write(&v->arch.perdomain_ptes[off + 16], nl1e);
v->arch.shadow_ldt_mapcnt++;
+ spin_unlock(&v->arch.shadow_ldt_lock);
return 1;
}
@@ -989,7 +990,7 @@ void put_page_from_l1e(l1_pgentry_t l1e,
(d == e) )
{
for_each_vcpu ( d, v )
- invalidate_shadow_ldt(v);
+ invalidate_shadow_ldt(v, 1);
}
put_page(page);
}
@@ -2023,30 +2024,17 @@ int free_page_type(struct page_info *pag
unsigned long gmfn;
int rc;
- if ( likely(owner != NULL) )
- {
- /*
- * We have to flush before the next use of the linear mapping
- * (e.g., update_va_mapping()) or we could end up modifying a page
- * that is no longer a page table (and hence screw up ref counts).
- */
- if ( current->domain == owner )
- queue_deferred_ops(owner, DOP_FLUSH_ALL_TLBS);
- else
- flush_tlb_mask(owner->domain_dirty_cpumask);
-
- if ( unlikely(paging_mode_enabled(owner)) )
- {
- /* A page table is dirtied when its type count becomes zero. */
- paging_mark_dirty(owner, page_to_mfn(page));
-
- if ( shadow_mode_refcounts(owner) )
- return 0;
-
- gmfn = mfn_to_gmfn(owner, page_to_mfn(page));
- ASSERT(VALID_M2P(gmfn));
- shadow_remove_all_shadows(owner->vcpu[0], _mfn(gmfn));
- }
+ if ( likely(owner != NULL) && unlikely(paging_mode_enabled(owner)) )
+ {
+ /* A page table is dirtied when its type count becomes zero. */
+ paging_mark_dirty(owner, page_to_mfn(page));
+
+ if ( shadow_mode_refcounts(owner) )
+ return 0;
+
+ gmfn = mfn_to_gmfn(owner, page_to_mfn(page));
+ ASSERT(VALID_M2P(gmfn));
+ shadow_remove_all_shadows(owner->vcpu[0], _mfn(gmfn));
}
if ( !(type & PGT_partial) )
@@ -2366,8 +2354,8 @@ void cleanup_page_cacheattr(struct page_
int new_guest_cr3(unsigned long mfn)
{
- struct vcpu *v = current;
- struct domain *d = v->domain;
+ struct vcpu *curr = current;
+ struct domain *d = curr->domain;
int okay;
unsigned long old_base_mfn;
@@ -2377,19 +2365,19 @@ int new_guest_cr3(unsigned long mfn)
okay = paging_mode_refcounts(d)
? 0 /* Old code was broken, but what should it be? */
: mod_l4_entry(
- __va(pagetable_get_paddr(v->arch.guest_table)),
+ __va(pagetable_get_paddr(curr->arch.guest_table)),
l4e_from_pfn(
mfn,
(_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED)),
- pagetable_get_pfn(v->arch.guest_table), 0, 0) == 0;
+ pagetable_get_pfn(curr->arch.guest_table), 0, 0) == 0;
if ( unlikely(!okay) )
{
MEM_LOG("Error while installing new compat baseptr %lx", mfn);
return 0;
}
- invalidate_shadow_ldt(v);
- write_ptbase(v);
+ invalidate_shadow_ldt(curr, 0);
+ write_ptbase(curr);
return 1;
}
@@ -2403,14 +2391,14 @@ int new_guest_cr3(unsigned long mfn)
return 0;
}
- invalidate_shadow_ldt(v);
-
- old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
-
- v->arch.guest_table = pagetable_from_pfn(mfn);
- update_cr3(v);
-
- write_ptbase(v);
+ invalidate_shadow_ldt(curr, 0);
+
+ old_base_mfn = pagetable_get_pfn(curr->arch.guest_table);
+
+ curr->arch.guest_table = pagetable_from_pfn(mfn);
+ update_cr3(curr);
+
+ write_ptbase(curr);
if ( likely(old_base_mfn != 0) )
{
@@ -2440,6 +2428,10 @@ static void process_deferred_ops(void)
flush_tlb_local();
}
+ /*
+ * Do this after flushing TLBs, to ensure we see fresh LDT mappings
+ * via the linear pagetable mapping.
+ */
if ( deferred_ops & DOP_RELOAD_LDT )
(void)map_ldt_shadow_page(0);
@@ -2565,8 +2557,8 @@ int do_mmuext_op(
unsigned long mfn = 0, gmfn = 0, type;
unsigned int done = 0;
struct page_info *page;
- struct vcpu *v = current;
- struct domain *d = v->domain;
+ struct vcpu *curr = current;
+ struct domain *d = curr->domain;
if ( unlikely(count & MMU_UPDATE_PREEMPTED) )
{
@@ -2729,8 +2721,8 @@ int do_mmuext_op(
}
}
- old_mfn = pagetable_get_pfn(v->arch.guest_table_user);
- v->arch.guest_table_user = pagetable_from_pfn(mfn);
+ old_mfn = pagetable_get_pfn(curr->arch.guest_table_user);
+ curr->arch.guest_table_user = pagetable_from_pfn(mfn);
if ( old_mfn != 0 )
{
@@ -2750,7 +2742,7 @@ int do_mmuext_op(
case MMUEXT_INVLPG_LOCAL:
if ( !paging_mode_enabled(d)
- || paging_invlpg(v, op.arg1.linear_addr) != 0 )
+ || paging_invlpg(curr, op.arg1.linear_addr) != 0 )
flush_tlb_one_local(op.arg1.linear_addr);
break;
@@ -2773,7 +2765,7 @@ int do_mmuext_op(
}
case MMUEXT_TLB_FLUSH_ALL:
- flush_tlb_mask(d->domain_dirty_cpumask);
+ this_cpu(percpu_mm_info).deferred_ops |= DOP_FLUSH_ALL_TLBS;
break;
case MMUEXT_INVLPG_ALL:
@@ -2809,13 +2801,14 @@ int do_mmuext_op(
okay = 0;
MEM_LOG("Bad args to SET_LDT: ptr=%lx, ents=%lx", ptr, ents);
}
- else if ( (v->arch.guest_context.ldt_ents != ents) ||
- (v->arch.guest_context.ldt_base != ptr) )
+ else if ( (curr->arch.guest_context.ldt_ents != ents) ||
+ (curr->arch.guest_context.ldt_base != ptr) )
{
- invalidate_shadow_ldt(v);
- v->arch.guest_context.ldt_base = ptr;
- v->arch.guest_context.ldt_ents = ents;
- load_LDT(v);
+ invalidate_shadow_ldt(curr, 0);
+ this_cpu(percpu_mm_info).deferred_ops |= DOP_FLUSH_TLB;
+ curr->arch.guest_context.ldt_base = ptr;
+ curr->arch.guest_context.ldt_ents = ents;
+ load_LDT(curr);
this_cpu(percpu_mm_info).deferred_ops &= ~DOP_RELOAD_LDT;
if ( ents != 0 )
this_cpu(percpu_mm_info).deferred_ops |= DOP_RELOAD_LDT;
@@ -2931,8 +2924,7 @@ int do_mmu_update(
struct page_info *page;
int rc = 0, okay = 1, i = 0;
unsigned int cmd, done = 0;
- struct vcpu *v = current;
- struct domain *d = v->domain;
+ struct domain *d = current->domain;
struct domain_mmap_cache mapcache;
if ( unlikely(count & MMU_UPDATE_PREEMPTED) )
@@ -3042,7 +3034,8 @@ int do_mmu_update(
#endif
case PGT_writable_page:
perfc_incr(writable_mmu_updates);
- okay = paging_write_guest_entry(v, va, req.val, _mfn(mfn));
+ okay = paging_write_guest_entry(
+ current, va, req.val, _mfn(mfn));
break;
}
page_unlock(page);
@@ -3052,7 +3045,8 @@ int do_mmu_update(
else if ( get_page_type(page, PGT_writable_page) )
{
perfc_incr(writable_mmu_updates);
- okay = paging_write_guest_entry(v, va, req.val, _mfn(mfn));
+ okay = paging_write_guest_entry(
+ current, va, req.val, _mfn(mfn));
put_page_type(page);
}
@@ -3508,7 +3502,7 @@ int steal_page(
/* Unlink from original owner. */
if ( !(memflags & MEMF_no_refcount) )
d->tot_pages--;
- list_del(&page->list);
+ page_list_del(page, &d->page_list);
spin_unlock(&d->page_alloc_lock);
return 0;
@@ -3567,34 +3561,40 @@ int do_update_va_mapping(unsigned long v
if ( pl1e )
guest_unmap_l1e(v, pl1e);
- process_deferred_ops();
-
switch ( flags & UVMF_FLUSHTYPE_MASK )
{
case UVMF_TLB_FLUSH:
switch ( (bmap_ptr = flags & ~UVMF_FLUSHTYPE_MASK) )
{
case UVMF_LOCAL:
- flush_tlb_local();
+ this_cpu(percpu_mm_info).deferred_ops |= DOP_FLUSH_TLB;
break;
case UVMF_ALL:
- flush_tlb_mask(d->domain_dirty_cpumask);
+ this_cpu(percpu_mm_info).deferred_ops |= DOP_FLUSH_ALL_TLBS;
break;
default:
+ if ( this_cpu(percpu_mm_info).deferred_ops & DOP_FLUSH_ALL_TLBS )
+ break;
if ( unlikely(!is_pv_32on64_domain(d) ?
get_user(vmask, (unsigned long *)bmap_ptr) :
get_user(vmask, (unsigned int *)bmap_ptr)) )
- rc = -EFAULT;
+ rc = -EFAULT, vmask = 0;
pmask = vcpumask_to_pcpumask(d, vmask);
+ if ( cpu_isset(smp_processor_id(), pmask) )
+ this_cpu(percpu_mm_info).deferred_ops &= ~DOP_FLUSH_TLB;
flush_tlb_mask(pmask);
break;
}
break;
case UVMF_INVLPG:
+ if ( this_cpu(percpu_mm_info).deferred_ops & DOP_FLUSH_ALL_TLBS )
+ break;
switch ( (bmap_ptr = flags & ~UVMF_FLUSHTYPE_MASK) )
{
case UVMF_LOCAL:
+ if ( this_cpu(percpu_mm_info).deferred_ops & DOP_FLUSH_TLB )
+ break;
if ( !paging_mode_enabled(d) ||
(paging_invlpg(v, va) != 0) )
flush_tlb_one_local(va);
@@ -3606,13 +3606,17 @@ int do_update_va_mapping(unsigned long v
if ( unlikely(!is_pv_32on64_domain(d) ?
get_user(vmask, (unsigned long *)bmap_ptr) :
get_user(vmask, (unsigned int *)bmap_ptr)) )
- rc = -EFAULT;
+ rc = -EFAULT, vmask = 0;
pmask = vcpumask_to_pcpumask(d, vmask);
+ if ( this_cpu(percpu_mm_info).deferred_ops & DOP_FLUSH_TLB )
+ cpu_clear(smp_processor_id(), pmask);
flush_tlb_one_mask(pmask, va);
break;
}
break;
}
+
+ process_deferred_ops();
return rc;
}
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/mm/hap/hap.c
--- a/xen/arch/x86/mm/hap/hap.c Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/mm/hap/hap.c Fri Feb 13 11:22:28 2009 +0900
@@ -45,11 +45,11 @@
/* Override macros from asm/page.h to make them work with mfn_t */
#undef mfn_to_page
-#define mfn_to_page(_m) (frame_table + mfn_x(_m))
+#define mfn_to_page(_m) __mfn_to_page(mfn_x(_m))
#undef mfn_valid
-#define mfn_valid(_mfn) (mfn_x(_mfn) < max_page)
+#define mfn_valid(_mfn) __mfn_valid(mfn_x(_mfn))
#undef page_to_mfn
-#define page_to_mfn(_pg) (_mfn((_pg) - frame_table))
+#define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg))
/************************************************/
/* HAP LOG DIRTY SUPPORT */
@@ -96,11 +96,10 @@ static struct page_info *hap_alloc(struc
ASSERT(hap_locked_by_me(d));
- if ( unlikely(list_empty(&d->arch.paging.hap.freelist)) )
+ pg = page_list_remove_head(&d->arch.paging.hap.freelist);
+ if ( unlikely(!pg) )
return NULL;
- pg = list_entry(d->arch.paging.hap.freelist.next, struct page_info, list);
- list_del(&pg->list);
d->arch.paging.hap.free_pages--;
p = hap_map_domain_page(page_to_mfn(pg));
@@ -118,7 +117,7 @@ static void hap_free(struct domain *d, m
ASSERT(hap_locked_by_me(d));
d->arch.paging.hap.free_pages++;
- list_add_tail(&pg->list, &d->arch.paging.hap.freelist);
+ page_list_add_tail(pg, &d->arch.paging.hap.freelist);
}
static struct page_info *hap_alloc_p2m_page(struct domain *d)
@@ -210,15 +209,13 @@ hap_set_allocation(struct domain *d, uns
}
d->arch.paging.hap.free_pages++;
d->arch.paging.hap.total_pages++;
- list_add_tail(&pg->list, &d->arch.paging.hap.freelist);
+ page_list_add_tail(pg, &d->arch.paging.hap.freelist);
}
else if ( d->arch.paging.hap.total_pages > pages )
{
/* Need to return memory to domheap */
- ASSERT(!list_empty(&d->arch.paging.hap.freelist));
- pg = list_entry(d->arch.paging.hap.freelist.next,
- struct page_info, list);
- list_del(&pg->list);
+ pg = page_list_remove_head(&d->arch.paging.hap.freelist);
+ ASSERT(pg);
d->arch.paging.hap.free_pages--;
d->arch.paging.hap.total_pages--;
pg->count_info = 0;
@@ -393,7 +390,7 @@ void hap_domain_init(struct domain *d)
void hap_domain_init(struct domain *d)
{
hap_lock_init(d);
- INIT_LIST_HEAD(&d->arch.paging.hap.freelist);
+ INIT_PAGE_LIST_HEAD(&d->arch.paging.hap.freelist);
/* This domain will use HAP for log-dirty mode */
paging_log_dirty_init(d, hap_enable_log_dirty, hap_disable_log_dirty,
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/mm/hap/p2m-ept.c
--- a/xen/arch/x86/mm/hap/p2m-ept.c Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/mm/hap/p2m-ept.c Fri Feb 13 11:22:28 2009 +0900
@@ -63,7 +63,7 @@ static int ept_set_middle_entry(struct d
pg->count_info = 1;
pg->u.inuse.type_info = 1 | PGT_validated;
- list_add_tail(&pg->list, &d->arch.p2m->pages);
+ page_list_add_tail(pg, &d->arch.p2m->pages);
ept_entry->emt = 0;
ept_entry->igmt = 0;
@@ -116,12 +116,12 @@ static int ept_next_level(struct domain
}
/*
- * TODO: ept_set_entry() computes 'need_modify_vtd_table' for itself,
+ * ept_set_entry() computes 'need_modify_vtd_table' for itself,
* by observing whether any gfn->mfn translations are modified.
*/
static int
-_ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
- unsigned int order, p2m_type_t p2mt, int need_modify_vtd_table)
+ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
+ unsigned int order, p2m_type_t p2mt)
{
ept_entry_t *table = NULL;
unsigned long gfn_remainder = gfn, offset = 0;
@@ -131,6 +131,7 @@ _ept_set_entry(struct domain *d, unsigne
int walk_level = order / EPT_TABLE_ORDER;
int direct_mmio = (p2mt == p2m_mmio_direct);
uint8_t igmt = 0;
+ int need_modify_vtd_table = 1;
/* we only support 4k and 2m pages now */
@@ -171,14 +172,23 @@ _ept_set_entry(struct domain *d, unsigne
if ( ret == GUEST_TABLE_SUPER_PAGE )
{
- ept_entry->mfn = mfn_x(mfn) - offset;
+ if ( ept_entry->mfn == (mfn_x(mfn) - offset) )
+ need_modify_vtd_table = 0;
+ else
+ ept_entry->mfn = mfn_x(mfn) - offset;
+
if ( ept_entry->avail1 == p2m_ram_logdirty &&
p2mt == p2m_ram_rw )
for ( i = 0; i < 512; i++ )
paging_mark_dirty(d, mfn_x(mfn)-offset+i);
}
else
- ept_entry->mfn = mfn_x(mfn);
+ {
+ if ( ept_entry->mfn == mfn_x(mfn) )
+ need_modify_vtd_table = 0;
+ else
+ ept_entry->mfn = mfn_x(mfn);
+ }
ept_entry->avail1 = p2mt;
ept_entry->rsvd = 0;
@@ -239,7 +249,10 @@ _ept_set_entry(struct domain *d, unsigne
&igmt, direct_mmio);
split_ept_entry->igmt = igmt;
- split_ept_entry->mfn = mfn_x(mfn);
+ if ( split_ept_entry->mfn == mfn_x(mfn) )
+ need_modify_vtd_table = 0;
+ else
+ split_ept_entry->mfn = mfn_x(mfn);
split_ept_entry->avail1 = p2mt;
ept_p2m_type_to_flags(split_ept_entry, p2mt);
@@ -287,17 +300,6 @@ out:
}
return rv;
-}
-
-static int
-ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
- unsigned int order, p2m_type_t p2mt)
-{
- /* ept_set_entry() are called from set_entry(),
- * We should always create VT-d page table acording
- * to the gfn to mfn translations changes.
- */
- return _ept_set_entry(d, gfn, mfn, order, p2mt, 1);
}
/* Read ept p2m entries */
@@ -393,6 +395,21 @@ static mfn_t ept_get_entry_current(unsig
return ept_get_entry(current->domain, gfn, t, q);
}
+/* To test if the new emt type is the same with old,
+ * return 1 to not to reset ept entry.
+ */
+static int need_modify_ept_entry(struct domain *d, unsigned long gfn,
+ unsigned long mfn, uint8_t o_igmt,
+ uint8_t o_emt, p2m_type_t p2mt)
+{
+ uint8_t igmt, emt;
+ emt = epte_get_entry_emt(d, gfn, mfn, &igmt,
+ (p2mt == p2m_mmio_direct));
+ if ( (emt == o_emt) && (igmt == o_igmt) )
+ return 0;
+ return 1;
+}
+
void ept_change_entry_emt_with_range(struct domain *d, unsigned long start_gfn,
unsigned long end_gfn)
{
@@ -401,6 +418,7 @@ void ept_change_entry_emt_with_range(str
uint64_t epte;
int order = 0;
unsigned long mfn;
+ uint8_t o_igmt, o_emt;
for ( gfn = start_gfn; gfn <= end_gfn; gfn++ )
{
@@ -410,7 +428,9 @@ void ept_change_entry_emt_with_range(str
mfn = (epte & EPTE_MFN_MASK) >> PAGE_SHIFT;
if ( !mfn_valid(mfn) )
continue;
- p2mt = (epte & EPTE_AVAIL1_MASK) >> 8;
+ p2mt = (epte & EPTE_AVAIL1_MASK) >> EPTE_AVAIL1_SHIFT;
+ o_igmt = (epte & EPTE_IGMT_MASK) >> EPTE_IGMT_SHIFT;
+ o_emt = (epte & EPTE_EMT_MASK) >> EPTE_EMT_SHIFT;
order = 0;
if ( epte & EPTE_SUPER_PAGE_MASK )
@@ -422,30 +442,26 @@ void ept_change_entry_emt_with_range(str
* Set emt for super page.
*/
order = EPT_TABLE_ORDER;
- /* vmx_set_uc_mode() dont' touch the gfn to mfn
- * translations, only modify the emt field of the EPT entries.
- * so we need not modify the current VT-d page tables.
- */
- _ept_set_entry(d, gfn, _mfn(mfn), order, p2mt, 0);
+ if ( need_modify_ept_entry(d, gfn, mfn,
+ o_igmt, o_emt, p2mt) )
+ ept_set_entry(d, gfn, _mfn(mfn), order, p2mt);
gfn += 0x1FF;
}
else
{
- /* 1)change emt for partial entries of the 2m area.
- * 2)vmx_set_uc_mode() dont' touch the gfn to mfn
- * translations, only modify the emt field of the EPT entries.
- * so we need not modify the current VT-d page tables.
- */
- _ept_set_entry(d, gfn, _mfn(mfn), order, p2mt,0);
+ /* change emt for partial entries of the 2m area. */
+ if ( need_modify_ept_entry(d, gfn, mfn,
+ o_igmt, o_emt, p2mt) )
+ ept_set_entry(d, gfn, _mfn(mfn), order, p2mt);
gfn = ((gfn >> EPT_TABLE_ORDER) << EPT_TABLE_ORDER) + 0x1FF;
}
}
- else /* 1)gfn assigned with 4k
- * 2)vmx_set_uc_mode() dont' touch the gfn to mfn
- * translations, only modify the emt field of the EPT entries.
- * so we need not modify the current VT-d page tables.
- */
- _ept_set_entry(d, gfn, _mfn(mfn), order, p2mt, 0);
+ else /* gfn assigned with 4k */
+ {
+ if ( need_modify_ept_entry(d, gfn, mfn,
+ o_igmt, o_emt, p2mt) )
+ ept_set_entry(d, gfn, _mfn(mfn), order, p2mt);
+ }
}
}
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/mm/p2m.c Fri Feb 13 11:22:28 2009 +0900
@@ -89,11 +89,11 @@
/* Override macros from asm/page.h to make them work with mfn_t */
#undef mfn_to_page
-#define mfn_to_page(_m) (frame_table + mfn_x(_m))
+#define mfn_to_page(_m) __mfn_to_page(mfn_x(_m))
#undef mfn_valid
-#define mfn_valid(_mfn) (mfn_x(_mfn) < max_page)
+#define mfn_valid(_mfn) __mfn_valid(mfn_x(_mfn))
#undef page_to_mfn
-#define page_to_mfn(_pg) (_mfn((_pg) - frame_table))
+#define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg))
/* PTE flags for the various types of p2m entry */
@@ -175,7 +175,7 @@ p2m_next_level(struct domain *d, mfn_t *
struct page_info *pg = d->arch.p2m->alloc_page(d);
if ( pg == NULL )
return 0;
- list_add_tail(&pg->list, &d->arch.p2m->pages);
+ page_list_add_tail(pg, &d->arch.p2m->pages);
pg->u.inuse.type_info = type | 1 | PGT_validated;
pg->count_info = 1;
@@ -214,7 +214,7 @@ p2m_next_level(struct domain *d, mfn_t *
struct page_info *pg = d->arch.p2m->alloc_page(d);
if ( pg == NULL )
return 0;
- list_add_tail(&pg->list, &d->arch.p2m->pages);
+ page_list_add_tail(pg, &d->arch.p2m->pages);
pg->u.inuse.type_info = PGT_l1_page_table | 1 | PGT_validated;
pg->count_info = 1;
@@ -300,18 +300,18 @@ p2m_pod_cache_add(struct domain *d,
for(i=0; i < 1 << order ; i++)
{
p = page + i;
- list_del(&p->list);
+ page_list_del(p, &d->page_list);
}
/* Then add the first one to the appropriate populate-on-demand list */
switch(order)
{
case 9:
- list_add_tail(&page->list, &p2md->pod.super); /* lock: page_alloc */
+ page_list_add_tail(page, &p2md->pod.super); /* lock: page_alloc */
p2md->pod.count += 1 << order;
break;
case 0:
- list_add_tail(&page->list, &p2md->pod.single); /* lock: page_alloc */
+ page_list_add_tail(page, &p2md->pod.single); /* lock: page_alloc */
p2md->pod.count += 1 ;
break;
default:
@@ -334,54 +334,51 @@ static struct page_info * p2m_pod_cache_
struct page_info *p = NULL;
int i;
- if ( order == 9 && list_empty(&p2md->pod.super) )
+ if ( order == 9 && page_list_empty(&p2md->pod.super) )
{
return NULL;
}
- else if ( order == 0 && list_empty(&p2md->pod.single) )
+ else if ( order == 0 && page_list_empty(&p2md->pod.single) )
{
unsigned long mfn;
struct page_info *q;
- BUG_ON( list_empty(&p2md->pod.super) );
+ BUG_ON( page_list_empty(&p2md->pod.super) );
/* Break up a superpage to make single pages. NB count doesn't
* need to be adjusted. */
printk("%s: Breaking up superpage.\n", __func__);
- p = list_entry(p2md->pod.super.next, struct page_info, list);
- list_del(&p->list);
+ p = page_list_remove_head(&p2md->pod.super);
mfn = mfn_x(page_to_mfn(p));
for ( i=0; i<(1<<9); i++ )
{
q = mfn_to_page(_mfn(mfn+i));
- list_add_tail(&q->list, &p2md->pod.single);
+ page_list_add_tail(q, &p2md->pod.single);
}
}
switch ( order )
{
case 9:
- BUG_ON( list_empty(&p2md->pod.super) );
- p = list_entry(p2md->pod.super.next, struct page_info, list);
+ BUG_ON( page_list_empty(&p2md->pod.super) );
+ p = page_list_remove_head(&p2md->pod.super);
p2md->pod.count -= 1 << order; /* Lock: page_alloc */
break;
case 0:
- BUG_ON( list_empty(&p2md->pod.single) );
- p = list_entry(p2md->pod.single.next, struct page_info, list);
+ BUG_ON( page_list_empty(&p2md->pod.single) );
+ p = page_list_remove_head(&p2md->pod.single);
p2md->pod.count -= 1;
break;
default:
BUG();
}
- list_del(&p->list);
-
/* Put the pages back on the domain page_list */
for ( i = 0 ; i < (1 << order) ; i++ )
{
BUG_ON(page_get_owner(p + i) != d);
- list_add_tail(&p[i].list, &d->page_list);
+ page_list_add_tail(p + i, &d->page_list);
}
return p;
@@ -425,7 +422,7 @@ p2m_pod_set_cache_target(struct domain *
spin_lock(&d->page_alloc_lock);
if ( (p2md->pod.count - pod_target) > (1>>9)
- && !list_empty(&p2md->pod.super) )
+ && !page_list_empty(&p2md->pod.super) )
order = 9;
else
order = 0;
@@ -535,38 +532,27 @@ p2m_pod_empty_cache(struct domain *d)
p2m_pod_empty_cache(struct domain *d)
{
struct p2m_domain *p2md = d->arch.p2m;
- struct list_head *q, *p;
+ struct page_info *page;
spin_lock(&d->page_alloc_lock);
- list_for_each_safe(p, q, &p2md->pod.super) /* lock: page_alloc */
+ while ( (page = page_list_remove_head(&p2md->pod.super)) )
{
int i;
- struct page_info *page;
- list_del(p);
-
- page = list_entry(p, struct page_info, list);
-
for ( i = 0 ; i < (1 << 9) ; i++ )
{
BUG_ON(page_get_owner(page + i) != d);
- list_add_tail(&page[i].list, &d->page_list);
+ page_list_add_tail(page + i, &d->page_list);
}
p2md->pod.count -= 1<<9;
}
- list_for_each_safe(p, q, &p2md->pod.single)
- {
- struct page_info *page;
-
- list_del(p);
-
- page = list_entry(p, struct page_info, list);
-
+ while ( (page = page_list_remove_head(&p2md->pod.single)) )
+ {
BUG_ON(page_get_owner(page) != d);
- list_add_tail(&page->list, &d->page_list);
+ page_list_add_tail(page, &d->page_list);
p2md->pod.count -= 1;
}
@@ -952,7 +938,7 @@ p2m_pod_emergency_sweep_super(struct dom
* NB that this is a zero-sum game; we're increasing our cache size
* by increasing our 'debt'. Since we hold the p2m lock,
* (entry_count - count) must remain the same. */
- if ( !list_empty(&p2md->pod.super) && i < limit )
+ if ( !page_list_empty(&p2md->pod.super) && i < limit )
break;
}
@@ -1035,12 +1021,12 @@ p2m_pod_demand_populate(struct domain *d
}
/* If we're low, start a sweep */
- if ( order == 9 && list_empty(&p2md->pod.super) )
+ if ( order == 9 && page_list_empty(&p2md->pod.super) )
p2m_pod_emergency_sweep_super(d);
- if ( list_empty(&p2md->pod.single) &&
+ if ( page_list_empty(&p2md->pod.single) &&
( ( order == 0 )
- || (order == 9 && list_empty(&p2md->pod.super) ) ) )
+ || (order == 9 && page_list_empty(&p2md->pod.super) ) ) )
p2m_pod_emergency_sweep(d);
/* Keep track of the highest gfn demand-populated by a guest fault */
@@ -1477,9 +1463,9 @@ int p2m_init(struct domain *d)
memset(p2m, 0, sizeof(*p2m));
p2m_lock_init(p2m);
- INIT_LIST_HEAD(&p2m->pages);
- INIT_LIST_HEAD(&p2m->pod.super);
- INIT_LIST_HEAD(&p2m->pod.single);
+ INIT_PAGE_LIST_HEAD(&p2m->pages);
+ INIT_PAGE_LIST_HEAD(&p2m->pod.super);
+ INIT_PAGE_LIST_HEAD(&p2m->pod.single);
p2m->set_entry = p2m_set_entry;
p2m->get_entry = p2m_gfn_to_mfn;
@@ -1540,7 +1526,6 @@ int p2m_alloc_table(struct domain *d,
{
mfn_t mfn = _mfn(INVALID_MFN);
- struct list_head *entry;
struct page_info *page, *p2m_top;
unsigned int page_count = 0;
unsigned long gfn = -1UL;
@@ -1566,7 +1551,7 @@ int p2m_alloc_table(struct domain *d,
p2m_unlock(p2m);
return -ENOMEM;
}
- list_add_tail(&p2m_top->list, &p2m->pages);
+ page_list_add_tail(p2m_top, &p2m->pages);
p2m_top->count_info = 1;
p2m_top->u.inuse.type_info =
@@ -1587,11 +1572,8 @@ int p2m_alloc_table(struct domain *d,
goto error;
/* Copy all existing mappings from the page list and m2p */
- for ( entry = d->page_list.next;
- entry != &d->page_list;
- entry = entry->next )
- {
- page = list_entry(entry, struct page_info, list);
+ page_list_for_each(page, &d->page_list)
+ {
mfn = page_to_mfn(page);
gfn = get_gpfn_from_mfn(mfn_x(mfn));
page_count++;
@@ -1621,19 +1603,14 @@ void p2m_teardown(struct domain *d)
/* Return all the p2m pages to Xen.
* We know we don't have any extra mappings to these pages */
{
- struct list_head *entry, *n;
struct page_info *pg;
struct p2m_domain *p2m = d->arch.p2m;
p2m_lock(p2m);
d->arch.phys_table = pagetable_null();
- list_for_each_safe(entry, n, &p2m->pages)
- {
- pg = list_entry(entry, struct page_info, list);
- list_del(entry);
+ while ( (pg = page_list_remove_head(&p2m->pages)) )
p2m->free_page(d, pg);
- }
p2m_unlock(p2m);
}
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/mm/paging.c
--- a/xen/arch/x86/mm/paging.c Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/mm/paging.c Fri Feb 13 11:22:28 2009 +0900
@@ -47,11 +47,11 @@
/************************************************/
/* Override macros from asm/page.h to make them work with mfn_t */
#undef mfn_to_page
-#define mfn_to_page(_m) (frame_table + mfn_x(_m))
+#define mfn_to_page(_m) __mfn_to_page(mfn_x(_m))
#undef mfn_valid
-#define mfn_valid(_mfn) (mfn_x(_mfn) < max_page)
+#define mfn_valid(_mfn) __mfn_valid(mfn_x(_mfn))
#undef page_to_mfn
-#define page_to_mfn(_pg) (_mfn((_pg) - frame_table))
+#define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg))
/* The log-dirty lock. This protects the log-dirty bitmap from
* concurrent accesses (and teardowns, etc).
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/mm/shadow/common.c
--- a/xen/arch/x86/mm/shadow/common.c Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/mm/shadow/common.c Fri Feb 13 11:22:28 2009 +0900
@@ -48,9 +48,9 @@ void shadow_domain_init(struct domain *d
int i;
shadow_lock_init(d);
for ( i = 0; i <= SHADOW_MAX_ORDER; i++ )
- INIT_LIST_HEAD(&d->arch.paging.shadow.freelists[i]);
- INIT_LIST_HEAD(&d->arch.paging.shadow.p2m_freelist);
- INIT_LIST_HEAD(&d->arch.paging.shadow.pinned_shadows);
+ INIT_PAGE_LIST_HEAD(&d->arch.paging.shadow.freelists[i]);
+ INIT_PAGE_LIST_HEAD(&d->arch.paging.shadow.p2m_freelist);
+ INIT_PAGE_LIST_HEAD(&d->arch.paging.shadow.pinned_shadows);
/* Use shadow pagetables for log-dirty support */
paging_log_dirty_init(d, shadow_enable_log_dirty,
@@ -1291,9 +1291,9 @@ static inline int space_is_available(
for ( ; order <= shadow_max_order(d); ++order )
{
unsigned int n = count;
- const struct list_head *p;
-
- list_for_each ( p, &d->arch.paging.shadow.freelists[order] )
+ const struct page_info *sp;
+
+ page_list_for_each ( sp, &d->arch.paging.shadow.freelists[order] )
if ( --n == 0 )
return 1;
count = (count + 1) >> 1;
@@ -1306,8 +1306,8 @@ static inline int space_is_available(
* non-Xen mappings in this top-level shadow mfn */
static void shadow_unhook_mappings(struct vcpu *v, mfn_t smfn)
{
- struct shadow_page_info *sp = mfn_to_shadow_page(smfn);
- switch ( sp->type )
+ struct page_info *sp = mfn_to_page(smfn);
+ switch ( sp->u.sh.type )
{
case SH_type_l2_32_shadow:
SHADOW_INTERNAL_NAME(sh_unhook_32b_mappings, 2)(v,smfn);
@@ -1322,7 +1322,7 @@ static void shadow_unhook_mappings(struc
break;
#endif
default:
- SHADOW_ERROR("top-level shadow has bad type %08x\n", sp->type);
+ SHADOW_ERROR("top-level shadow has bad type %08x\n", sp->u.sh.type);
BUG();
}
}
@@ -1334,7 +1334,7 @@ static inline void trace_shadow_prealloc
/* Convert smfn to gfn */
unsigned long gfn;
ASSERT(mfn_valid(smfn));
- gfn = mfn_to_gfn(d, _mfn(mfn_to_shadow_page(smfn)->backpointer));
+ gfn = mfn_to_gfn(d, _mfn(mfn_to_page(smfn)->v.sh.back));
__trace_var(TRC_SHADOW_PREALLOC_UNPIN, 0/*!tsc*/,
sizeof(gfn), (unsigned char*)&gfn);
}
@@ -1350,8 +1350,7 @@ static void _shadow_prealloc(
/* Need a vpcu for calling unpins; for now, since we don't have
* per-vcpu shadows, any will do */
struct vcpu *v, *v2;
- struct list_head *l, *t;
- struct shadow_page_info *sp;
+ struct page_info *sp, *t;
mfn_t smfn;
int i;
@@ -1365,10 +1364,9 @@ static void _shadow_prealloc(
/* Stage one: walk the list of pinned pages, unpinning them */
perfc_incr(shadow_prealloc_1);
- list_for_each_backwards_safe(l, t, &d->arch.paging.shadow.pinned_shadows)
- {
- sp = list_entry(l, struct shadow_page_info, list);
- smfn = shadow_page_to_mfn(sp);
+ page_list_for_each_safe_reverse(sp, t,
&d->arch.paging.shadow.pinned_shadows)
+ {
+ smfn = page_to_mfn(sp);
/* Unpin this top-level shadow */
trace_shadow_prealloc_unpin(d, smfn);
@@ -1427,8 +1425,7 @@ void shadow_prealloc(struct domain *d, u
* this domain's shadows */
static void shadow_blow_tables(struct domain *d)
{
- struct list_head *l, *t;
- struct shadow_page_info *sp;
+ struct page_info *sp, *t;
struct vcpu *v = d->vcpu[0];
mfn_t smfn;
int i;
@@ -1436,10 +1433,9 @@ static void shadow_blow_tables(struct do
ASSERT(v != NULL);
/* Pass one: unpin all pinned pages */
- list_for_each_backwards_safe(l,t, &d->arch.paging.shadow.pinned_shadows)
- {
- sp = list_entry(l, struct shadow_page_info, list);
- smfn = shadow_page_to_mfn(sp);
+ page_list_for_each_safe_reverse(sp, t,
&d->arch.paging.shadow.pinned_shadows)
+ {
+ smfn = page_to_mfn(sp);
sh_unpin(v, smfn);
}
@@ -1493,6 +1489,18 @@ __initcall(shadow_blow_tables_keyhandler
__initcall(shadow_blow_tables_keyhandler_init);
#endif /* !NDEBUG */
+static inline struct page_info *
+next_shadow(const struct page_info *sp)
+{
+ return sp->next_shadow ? mfn_to_page(_mfn(sp->next_shadow)) : NULL;
+}
+
+static inline void
+set_next_shadow(struct page_info *sp, struct page_info *next)
+{
+ sp->next_shadow = next ? mfn_x(page_to_mfn(next)) : 0;
+}
+
/* Allocate another shadow's worth of (contiguous, aligned) pages,
* and fill in the type and backpointer fields of their page_infos.
* Never fails to allocate. */
@@ -1500,7 +1508,7 @@ mfn_t shadow_alloc(struct domain *d,
u32 shadow_type,
unsigned long backpointer)
{
- struct shadow_page_info *sp = NULL;
+ struct page_info *sp = NULL;
unsigned int order = shadow_order(shadow_type);
cpumask_t mask;
void *p;
@@ -1515,7 +1523,7 @@ mfn_t shadow_alloc(struct domain *d,
/* Find smallest order which can satisfy the request. */
for ( i = order; i <= SHADOW_MAX_ORDER; i++ )
- if ( !list_empty(&d->arch.paging.shadow.freelists[i]) )
+ if ( (sp = page_list_remove_head(&d->arch.paging.shadow.freelists[i]))
)
goto found;
/* If we get here, we failed to allocate. This should never happen.
@@ -1526,16 +1534,12 @@ mfn_t shadow_alloc(struct domain *d,
BUG();
found:
- sp = list_entry(d->arch.paging.shadow.freelists[i].next,
- struct shadow_page_info, list);
- list_del(&sp->list);
-
/* We may have to halve the chunk a number of times. */
while ( i != order )
{
i--;
- sp->order = i;
- list_add_tail(&sp->list, &d->arch.paging.shadow.freelists[i]);
+ sp->v.free.order = i;
+ page_list_add_tail(sp, &d->arch.paging.shadow.freelists[i]);
sp += 1 << i;
}
d->arch.paging.shadow.free_pages -= 1 << order;
@@ -1553,26 +1557,26 @@ mfn_t shadow_alloc(struct domain *d,
flush_tlb_mask(mask);
}
/* Now safe to clear the page for reuse */
- p = sh_map_domain_page(shadow_page_to_mfn(sp+i));
+ p = sh_map_domain_page(page_to_mfn(sp+i));
ASSERT(p != NULL);
clear_page(p);
sh_unmap_domain_page(p);
- INIT_LIST_HEAD(&sp[i].list);
- sp[i].type = shadow_type;
- sp[i].pinned = 0;
- sp[i].count = 0;
- sp[i].backpointer = backpointer;
- sp[i].next_shadow = NULL;
+ INIT_PAGE_LIST_ENTRY(&sp[i].list);
+ sp[i].u.sh.type = shadow_type;
+ sp[i].u.sh.pinned = 0;
+ sp[i].u.sh.count = 0;
+ sp[i].v.sh.back = backpointer;
+ set_next_shadow(&sp[i], NULL);
perfc_incr(shadow_alloc_count);
}
- return shadow_page_to_mfn(sp);
+ return page_to_mfn(sp);
}
/* Return some shadow pages to the pool. */
void shadow_free(struct domain *d, mfn_t smfn)
{
- struct shadow_page_info *sp = mfn_to_shadow_page(smfn);
+ struct page_info *sp = mfn_to_page(smfn);
u32 shadow_type;
unsigned long order;
unsigned long mask;
@@ -1581,7 +1585,7 @@ void shadow_free(struct domain *d, mfn_t
ASSERT(shadow_locked_by_me(d));
perfc_incr(shadow_free);
- shadow_type = sp->type;
+ shadow_type = sp->u.sh.type;
ASSERT(shadow_type != SH_type_none);
ASSERT(shadow_type != SH_type_p2m_table);
order = shadow_order(shadow_type);
@@ -1605,7 +1609,7 @@ void shadow_free(struct domain *d, mfn_t
}
#endif
/* Strip out the type: this is now a free shadow page */
- sp[i].type = 0;
+ sp[i].u.sh.type = 0;
/* Remember the TLB timestamp so we will know whether to flush
* TLBs when we reuse the page. Because the destructors leave the
* contents of the pages in place, we can delay TLB flushes until
@@ -1618,22 +1622,24 @@ void shadow_free(struct domain *d, mfn_t
for ( ; order < shadow_max_order(d); ++order )
{
mask = 1 << order;
- if ( (mfn_x(shadow_page_to_mfn(sp)) & mask) ) {
+ if ( (mfn_x(page_to_mfn(sp)) & mask) ) {
/* Merge with predecessor block? */
- if ( ((sp-mask)->type != PGT_none) || ((sp-mask)->order != order) )
+ if ( ((sp-mask)->u.sh.type != PGT_none) ||
+ ((sp-mask)->v.free.order != order) )
break;
- list_del(&(sp-mask)->list);
sp -= mask;
+ page_list_del(sp, &d->arch.paging.shadow.freelists[order]);
} else {
/* Merge with successor block? */
- if ( ((sp+mask)->type != PGT_none) || ((sp+mask)->order != order) )
+ if ( ((sp+mask)->u.sh.type != PGT_none) ||
+ ((sp+mask)->v.free.order != order) )
break;
- list_del(&(sp+mask)->list);
- }
- }
-
- sp->order = order;
- list_add_tail(&sp->list, &d->arch.paging.shadow.freelists[order]);
+ page_list_del(sp + mask, &d->arch.paging.shadow.freelists[order]);
+ }
+ }
+
+ sp->v.free.order = order;
+ page_list_add_tail(sp, &d->arch.paging.shadow.freelists[order]);
}
/* Divert some memory from the pool to be used by the p2m mapping.
@@ -1672,7 +1678,7 @@ sh_alloc_p2m_pages(struct domain *d)
*/
page_set_owner(&pg[i], d);
pg[i].count_info = 1;
- list_add_tail(&pg[i].list, &d->arch.paging.shadow.p2m_freelist);
+ page_list_add_tail(&pg[i], &d->arch.paging.shadow.p2m_freelist);
}
return 1;
}
@@ -1681,25 +1687,22 @@ static struct page_info *
static struct page_info *
shadow_alloc_p2m_page(struct domain *d)
{
- struct list_head *entry;
struct page_info *pg;
mfn_t mfn;
void *p;
shadow_lock(d);
- if ( list_empty(&d->arch.paging.shadow.p2m_freelist) &&
+ if ( page_list_empty(&d->arch.paging.shadow.p2m_freelist) &&
!sh_alloc_p2m_pages(d) )
{
shadow_unlock(d);
return NULL;
}
- entry = d->arch.paging.shadow.p2m_freelist.next;
- list_del(entry);
+ pg = page_list_remove_head(&d->arch.paging.shadow.p2m_freelist);
shadow_unlock(d);
- pg = list_entry(entry, struct page_info, list);
mfn = page_to_mfn(pg);
p = sh_map_domain_page(mfn);
clear_page(p);
@@ -1780,7 +1783,7 @@ static unsigned int sh_set_allocation(st
unsigned int pages,
int *preempted)
{
- struct shadow_page_info *sp;
+ struct page_info *sp;
unsigned int lower_bound;
unsigned int j, order = shadow_max_order(d);
@@ -1802,7 +1805,7 @@ static unsigned int sh_set_allocation(st
if ( d->arch.paging.shadow.total_pages < pages )
{
/* Need to allocate more memory from domheap */
- sp = (struct shadow_page_info *)
+ sp = (struct page_info *)
alloc_domheap_pages(NULL, order, MEMF_node(domain_to_node(d)));
if ( sp == NULL )
{
@@ -1813,23 +1816,26 @@ static unsigned int sh_set_allocation(st
d->arch.paging.shadow.total_pages += 1 << order;
for ( j = 0; j < 1U << order; j++ )
{
- sp[j].type = 0;
- sp[j].pinned = 0;
- sp[j].count = 0;
- sp[j].mbz = 0;
+ sp[j].u.sh.type = 0;
+ sp[j].u.sh.pinned = 0;
+ sp[j].u.sh.count = 0;
sp[j].tlbflush_timestamp = 0; /* Not in any TLB */
}
- sp->order = order;
- list_add_tail(&sp->list, &d->arch.paging.shadow.freelists[order]);
+ sp->v.free.order = order;
+ page_list_add_tail(sp, &d->arch.paging.shadow.freelists[order]);
}
else if ( d->arch.paging.shadow.total_pages > pages )
{
/* Need to return memory to domheap */
_shadow_prealloc(d, order, 1);
- ASSERT(!list_empty(&d->arch.paging.shadow.freelists[order]));
- sp = list_entry(d->arch.paging.shadow.freelists[order].next,
- struct shadow_page_info, list);
- list_del(&sp->list);
+ sp =
page_list_remove_head(&d->arch.paging.shadow.freelists[order]);
+ ASSERT(sp);
+ /*
+ * The pages were allocated anonymously, but the owner field
+ * gets overwritten normally, so need to clear it here.
+ */
+ for ( j = 0; j < 1U << order; j++ )
+ page_set_owner(&((struct page_info *)sp)[j], NULL);
d->arch.paging.shadow.free_pages -= 1 << order;
d->arch.paging.shadow.total_pages -= 1 << order;
free_domheap_pages((struct page_info *)sp, order);
@@ -1880,7 +1886,7 @@ static void sh_hash_audit_bucket(struct
static void sh_hash_audit_bucket(struct domain *d, int bucket)
/* Audit one bucket of the hash table */
{
- struct shadow_page_info *sp, *x;
+ struct page_info *sp, *x;
if ( !(SHADOW_AUDIT_ENABLE) )
return;
@@ -1889,38 +1895,39 @@ static void sh_hash_audit_bucket(struct
while ( sp )
{
/* Not a shadow? */
- BUG_ON( sp->mbz != 0 );
+ BUG_ON( sp->count_info != 0 );
/* Bogus type? */
- BUG_ON( sp->type == 0 );
- BUG_ON( sp->type > SH_type_max_shadow );
+ BUG_ON( sp->u.sh.type == 0 );
+ BUG_ON( sp->u.sh.type > SH_type_max_shadow );
/* Wrong bucket? */
- BUG_ON( sh_hash(sp->backpointer, sp->type) != bucket );
+ BUG_ON( sh_hash(sp->v.sh.back, sp->u.sh.type) != bucket );
/* Duplicate entry? */
- for ( x = sp->next_shadow; x; x = x->next_shadow )
- BUG_ON( x->backpointer == sp->backpointer && x->type == sp->type );
+ for ( x = next_shadow(sp); x; x = next_shadow(x) )
+ BUG_ON( x->v.sh.back == sp->v.sh.back &&
+ x->u.sh.type == sp->u.sh.type );
/* Follow the backpointer to the guest pagetable */
- if ( sp->type != SH_type_fl1_32_shadow
- && sp->type != SH_type_fl1_pae_shadow
- && sp->type != SH_type_fl1_64_shadow )
- {
- struct page_info *gpg = mfn_to_page(_mfn(sp->backpointer));
+ if ( sp->u.sh.type != SH_type_fl1_32_shadow
+ && sp->u.sh.type != SH_type_fl1_pae_shadow
+ && sp->u.sh.type != SH_type_fl1_64_shadow )
+ {
+ struct page_info *gpg = mfn_to_page(_mfn(sp->v.sh.back));
/* Bad shadow flags on guest page? */
- BUG_ON( !(gpg->shadow_flags & (1<<sp->type)) );
+ BUG_ON( !(gpg->shadow_flags & (1<<sp->u.sh.type)) );
/* Bad type count on guest page? */
#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
- if ( sp->type == SH_type_l1_32_shadow
- || sp->type == SH_type_l1_pae_shadow
- || sp->type == SH_type_l1_64_shadow )
+ if ( sp->u.sh.type == SH_type_l1_32_shadow
+ || sp->u.sh.type == SH_type_l1_pae_shadow
+ || sp->u.sh.type == SH_type_l1_64_shadow )
{
if ( (gpg->u.inuse.type_info & PGT_type_mask) ==
PGT_writable_page
&& (gpg->u.inuse.type_info & PGT_count_mask) != 0 )
{
if ( !page_is_out_of_sync(gpg) )
{
- SHADOW_ERROR("MFN %#lx shadowed (by %#"PRI_mfn")"
+ SHADOW_ERROR("MFN %#"PRpgmfn" shadowed (by
%#"PRI_mfn")"
" and not OOS but has typecount %#lx\n",
- sp->backpointer,
- mfn_x(shadow_page_to_mfn(sp)),
+ sp->v.sh.back,
+ mfn_x(page_to_mfn(sp)),
gpg->u.inuse.type_info);
BUG();
}
@@ -1931,15 +1938,15 @@ static void sh_hash_audit_bucket(struct
if ( (gpg->u.inuse.type_info & PGT_type_mask) == PGT_writable_page
&& (gpg->u.inuse.type_info & PGT_count_mask) != 0 )
{
- SHADOW_ERROR("MFN %#lx shadowed (by %#"PRI_mfn")"
+ SHADOW_ERROR("MFN %#"PRpgmfn" shadowed (by %#"PRI_mfn")"
" but has typecount %#lx\n",
- sp->backpointer, mfn_x(shadow_page_to_mfn(sp)),
+ sp->v.sh.back, mfn_x(page_to_mfn(sp)),
gpg->u.inuse.type_info);
BUG();
}
}
/* That entry was OK; on we go */
- sp = sp->next_shadow;
+ sp = next_shadow(sp);
}
}
@@ -1972,15 +1979,15 @@ static void sh_hash_audit(struct domain
* Returns 0 for success, 1 for error. */
static int shadow_hash_alloc(struct domain *d)
{
- struct shadow_page_info **table;
+ struct page_info **table;
ASSERT(shadow_locked_by_me(d));
ASSERT(!d->arch.paging.shadow.hash_table);
- table = xmalloc_array(struct shadow_page_info *, SHADOW_HASH_BUCKETS);
+ table = xmalloc_array(struct page_info *, SHADOW_HASH_BUCKETS);
if ( !table ) return 1;
memset(table, 0,
- SHADOW_HASH_BUCKETS * sizeof (struct shadow_page_info *));
+ SHADOW_HASH_BUCKETS * sizeof (struct page_info *));
d->arch.paging.shadow.hash_table = table;
return 0;
}
@@ -2002,7 +2009,7 @@ mfn_t shadow_hash_lookup(struct vcpu *v,
* or INVALID_MFN if it doesn't exist */
{
struct domain *d = v->domain;
- struct shadow_page_info *sp, *prev;
+ struct page_info *sp, *prev;
key_t key;
ASSERT(shadow_locked_by_me(d));
@@ -2019,21 +2026,21 @@ mfn_t shadow_hash_lookup(struct vcpu *v,
prev = NULL;
while(sp)
{
- if ( sp->backpointer == n && sp->type == t )
+ if ( sp->v.sh.back == n && sp->u.sh.type == t )
{
/* Pull-to-front if 'sp' isn't already the head item */
if ( unlikely(sp != d->arch.paging.shadow.hash_table[key]) )
{
if ( unlikely(d->arch.paging.shadow.hash_walking != 0) )
/* Can't reorder: someone is walking the hash chains */
- return shadow_page_to_mfn(sp);
+ return page_to_mfn(sp);
else
{
ASSERT(prev);
/* Delete sp from the list */
prev->next_shadow = sp->next_shadow;
/* Re-insert it at the head of the list */
- sp->next_shadow = d->arch.paging.shadow.hash_table[key];
+ set_next_shadow(sp, d->arch.paging.shadow.hash_table[key]);
d->arch.paging.shadow.hash_table[key] = sp;
}
}
@@ -2041,10 +2048,10 @@ mfn_t shadow_hash_lookup(struct vcpu *v,
{
perfc_incr(shadow_hash_lookup_head);
}
- return shadow_page_to_mfn(sp);
+ return page_to_mfn(sp);
}
prev = sp;
- sp = sp->next_shadow;
+ sp = next_shadow(sp);
}
perfc_incr(shadow_hash_lookup_miss);
@@ -2056,7 +2063,7 @@ void shadow_hash_insert(struct vcpu *v,
/* Put a mapping (n,t)->smfn into the hash table */
{
struct domain *d = v->domain;
- struct shadow_page_info *sp;
+ struct page_info *sp;
key_t key;
ASSERT(shadow_locked_by_me(d));
@@ -2070,8 +2077,8 @@ void shadow_hash_insert(struct vcpu *v,
sh_hash_audit_bucket(d, key);
/* Insert this shadow at the top of the bucket */
- sp = mfn_to_shadow_page(smfn);
- sp->next_shadow = d->arch.paging.shadow.hash_table[key];
+ sp = mfn_to_page(smfn);
+ set_next_shadow(sp, d->arch.paging.shadow.hash_table[key]);
d->arch.paging.shadow.hash_table[key] = sp;
sh_hash_audit_bucket(d, key);
@@ -2082,7 +2089,7 @@ void shadow_hash_delete(struct vcpu *v,
/* Excise the mapping (n,t)->smfn from the hash table */
{
struct domain *d = v->domain;
- struct shadow_page_info *sp, *x;
+ struct page_info *sp, *x;
key_t key;
ASSERT(shadow_locked_by_me(d));
@@ -2095,10 +2102,10 @@ void shadow_hash_delete(struct vcpu *v,
key = sh_hash(n, t);
sh_hash_audit_bucket(d, key);
- sp = mfn_to_shadow_page(smfn);
+ sp = mfn_to_page(smfn);
if ( d->arch.paging.shadow.hash_table[key] == sp )
/* Easy case: we're deleting the head item. */
- d->arch.paging.shadow.hash_table[key] = sp->next_shadow;
+ d->arch.paging.shadow.hash_table[key] = next_shadow(sp);
else
{
/* Need to search for the one we want */
@@ -2107,15 +2114,15 @@ void shadow_hash_delete(struct vcpu *v,
{
ASSERT(x); /* We can't have hit the end, since our target is
* still in the chain somehwere... */
- if ( x->next_shadow == sp )
+ if ( next_shadow(x) == sp )
{
x->next_shadow = sp->next_shadow;
break;
}
- x = x->next_shadow;
- }
- }
- sp->next_shadow = NULL;
+ x = next_shadow(x);
+ }
+ }
+ set_next_shadow(sp, NULL);
sh_hash_audit_bucket(d, key);
}
@@ -2137,7 +2144,7 @@ static void hash_foreach(struct vcpu *v,
{
int i, done = 0;
struct domain *d = v->domain;
- struct shadow_page_info *x;
+ struct page_info *x;
/* Say we're here, to stop hash-lookups reordering the chains */
ASSERT(shadow_locked_by_me(d));
@@ -2149,14 +2156,14 @@ static void hash_foreach(struct vcpu *v,
/* WARNING: This is not safe against changes to the hash table.
* The callback *must* return non-zero if it has inserted or
* deleted anything from the hash (lookups are OK, though). */
- for ( x = d->arch.paging.shadow.hash_table[i]; x; x = x->next_shadow )
- {
- if ( callback_mask & (1 << x->type) )
+ for ( x = d->arch.paging.shadow.hash_table[i]; x; x = next_shadow(x) )
+ {
+ if ( callback_mask & (1 << x->u.sh.type) )
{
- ASSERT(x->type <= 15);
- ASSERT(callbacks[x->type] != NULL);
- done = callbacks[x->type](v, shadow_page_to_mfn(x),
- callback_mfn);
+ ASSERT(x->u.sh.type <= 15);
+ ASSERT(callbacks[x->u.sh.type] != NULL);
+ done = callbacks[x->u.sh.type](v, page_to_mfn(x),
+ callback_mfn);
if ( done ) break;
}
}
@@ -2173,8 +2180,8 @@ static void hash_foreach(struct vcpu *v,
void sh_destroy_shadow(struct vcpu *v, mfn_t smfn)
{
- struct shadow_page_info *sp = mfn_to_shadow_page(smfn);
- unsigned int t = sp->type;
+ struct page_info *sp = mfn_to_page(smfn);
+ unsigned int t = sp->u.sh.type;
SHADOW_PRINTK("smfn=%#lx\n", mfn_x(smfn));
@@ -2186,7 +2193,7 @@ void sh_destroy_shadow(struct vcpu *v, m
t == SH_type_fl1_64_shadow ||
t == SH_type_monitor_table ||
(is_pv_32on64_vcpu(v) && t == SH_type_l4_64_shadow) ||
- (page_get_owner(mfn_to_page(_mfn(sp->backpointer)))
+ (page_get_owner(mfn_to_page(_mfn(sp->v.sh.back)))
== v->domain));
/* The down-shifts here are so that the switch statement is on nice
@@ -2438,7 +2445,7 @@ int sh_remove_write_access(struct vcpu *
{
unsigned long old_count = (pg->u.inuse.type_info & PGT_count_mask);
mfn_t last_smfn = _mfn(v->arch.paging.shadow.last_writeable_pte_smfn);
- int shtype = mfn_to_shadow_page(last_smfn)->type;
+ int shtype = mfn_to_page(last_smfn)->u.sh.type;
if ( callbacks[shtype] )
callbacks[shtype](v, last_smfn, gmfn);
@@ -2481,25 +2488,25 @@ int sh_remove_write_access_from_sl1p(str
int sh_remove_write_access_from_sl1p(struct vcpu *v, mfn_t gmfn,
mfn_t smfn, unsigned long off)
{
- struct shadow_page_info *sp = mfn_to_shadow_page(smfn);
+ struct page_info *sp = mfn_to_page(smfn);
ASSERT(mfn_valid(smfn));
ASSERT(mfn_valid(gmfn));
- if ( sp->type == SH_type_l1_32_shadow
- || sp->type == SH_type_fl1_32_shadow )
+ if ( sp->u.sh.type == SH_type_l1_32_shadow
+ || sp->u.sh.type == SH_type_fl1_32_shadow )
{
return SHADOW_INTERNAL_NAME(sh_rm_write_access_from_sl1p,2)
(v, gmfn, smfn, off);
}
#if CONFIG_PAGING_LEVELS >= 3
- else if ( sp->type == SH_type_l1_pae_shadow
- || sp->type == SH_type_fl1_pae_shadow )
+ else if ( sp->u.sh.type == SH_type_l1_pae_shadow
+ || sp->u.sh.type == SH_type_fl1_pae_shadow )
return SHADOW_INTERNAL_NAME(sh_rm_write_access_from_sl1p,3)
(v, gmfn, smfn, off);
#if CONFIG_PAGING_LEVELS >= 4
- else if ( sp->type == SH_type_l1_64_shadow
- || sp->type == SH_type_fl1_64_shadow )
+ else if ( sp->u.sh.type == SH_type_l1_64_shadow
+ || sp->u.sh.type == SH_type_fl1_64_shadow )
return SHADOW_INTERNAL_NAME(sh_rm_write_access_from_sl1p,4)
(v, gmfn, smfn, off);
#endif
@@ -2601,17 +2608,17 @@ static int sh_remove_shadow_via_pointer(
/* Follow this shadow's up-pointer, if it has one, and remove the reference
* found there. Returns 1 if that was the only reference to this shadow */
{
- struct shadow_page_info *sp = mfn_to_shadow_page(smfn);
+ struct page_info *sp = mfn_to_page(smfn);
mfn_t pmfn;
void *vaddr;
int rc;
- ASSERT(sp->type > 0);
- ASSERT(sp->type < SH_type_max_shadow);
- ASSERT(sp->type != SH_type_l2_32_shadow);
- ASSERT(sp->type != SH_type_l2_pae_shadow);
- ASSERT(sp->type != SH_type_l2h_pae_shadow);
- ASSERT(sp->type != SH_type_l4_64_shadow);
+ ASSERT(sp->u.sh.type > 0);
+ ASSERT(sp->u.sh.type < SH_type_max_shadow);
+ ASSERT(sp->u.sh.type != SH_type_l2_32_shadow);
+ ASSERT(sp->u.sh.type != SH_type_l2_pae_shadow);
+ ASSERT(sp->u.sh.type != SH_type_l2h_pae_shadow);
+ ASSERT(sp->u.sh.type != SH_type_l4_64_shadow);
if (sp->up == 0) return 0;
pmfn = _mfn(sp->up >> PAGE_SHIFT);
@@ -2622,10 +2629,10 @@ static int sh_remove_shadow_via_pointer(
ASSERT(l1e_get_pfn(*(l1_pgentry_t *)vaddr) == mfn_x(smfn));
/* Is this the only reference to this shadow? */
- rc = (sp->count == 1) ? 1 : 0;
+ rc = (sp->u.sh.count == 1) ? 1 : 0;
/* Blank the offending entry */
- switch (sp->type)
+ switch (sp->u.sh.type)
{
case SH_type_l1_32_shadow:
case SH_type_l2_32_shadow:
@@ -3156,7 +3163,6 @@ void shadow_teardown(struct domain *d)
{
struct vcpu *v;
mfn_t mfn;
- struct list_head *entry, *n;
struct page_info *pg;
ASSERT(d->is_dying);
@@ -3208,12 +3214,8 @@ void shadow_teardown(struct domain *d)
}
#endif /* (SHADOW_OPTIMIZATIONS & (SHOPT_VIRTUAL_TLB|SHOPT_OUT_OF_SYNC)) */
- list_for_each_safe(entry, n, &d->arch.paging.shadow.p2m_freelist)
- {
- list_del(entry);
- pg = list_entry(entry, struct page_info, list);
+ while ( (pg = page_list_remove_head(&d->arch.paging.shadow.p2m_freelist)) )
shadow_free_p2m_page(d, pg);
- }
if ( d->arch.paging.shadow.total_pages != 0 )
{
@@ -3657,7 +3659,6 @@ int shadow_track_dirty_vram(struct domai
for ( i = 0; i < nr; i++ ) {
mfn_t mfn = gfn_to_mfn(d, begin_pfn + i, &t);
struct page_info *page;
- u32 count_info;
int dirty = 0;
paddr_t sl1ma = d->dirty_vram->sl1ma[i];
@@ -3668,8 +3669,7 @@ int shadow_track_dirty_vram(struct domai
else
{
page = mfn_to_page(mfn);
- count_info = page->u.inuse.type_info & PGT_count_mask;
- switch (count_info)
+ switch (page->u.inuse.type_info & PGT_count_mask)
{
case 0:
/* No guest reference, nothing to track. */
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/mm/shadow/multi.c
--- a/xen/arch/x86/mm/shadow/multi.c Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/mm/shadow/multi.c Fri Feb 13 11:22:28 2009 +0900
@@ -973,13 +973,13 @@ static int shadow_set_l2e(struct vcpu *v
}
#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
{
- struct shadow_page_info *sp = mfn_to_shadow_page(sl1mfn);
- mfn_t gl1mfn = _mfn(sp->backpointer);
+ struct page_info *sp = mfn_to_page(sl1mfn);
+ mfn_t gl1mfn = _mfn(sp->v.sh.back);
/* If the shadow is a fl1 then the backpointer contains
the GFN instead of the GMFN, and it's definitely not
OOS. */
- if ( (sp->type != SH_type_fl1_shadow) && mfn_valid(gl1mfn)
+ if ( (sp->u.sh.type != SH_type_fl1_shadow) && mfn_valid(gl1mfn)
&& mfn_is_out_of_sync(gl1mfn) )
sh_resync(v, gl1mfn);
}
@@ -1036,9 +1036,8 @@ static inline void shadow_vram_get_l1e(s
if ( (gfn >= d->dirty_vram->begin_pfn) && (gfn < d->dirty_vram->end_pfn) )
{
unsigned long i = gfn - d->dirty_vram->begin_pfn;
struct page_info *page = mfn_to_page(mfn);
- u32 count_info = page->u.inuse.type_info & PGT_count_mask;
- if ( count_info == 1 )
+ if ( (page->u.inuse.type_info & PGT_count_mask) == 1 )
/* Initial guest reference, record it */
d->dirty_vram->sl1ma[i] = pfn_to_paddr(mfn_x(sl1mfn))
| ((unsigned long)sl1e & ~PAGE_MASK);
@@ -1064,12 +1063,11 @@ static inline void shadow_vram_put_l1e(s
if ( (gfn >= d->dirty_vram->begin_pfn) && (gfn < d->dirty_vram->end_pfn) )
{
unsigned long i = gfn - d->dirty_vram->begin_pfn;
struct page_info *page = mfn_to_page(mfn);
- u32 count_info = page->u.inuse.type_info & PGT_count_mask;
int dirty = 0;
paddr_t sl1ma = pfn_to_paddr(mfn_x(sl1mfn))
| ((unsigned long)sl1e & ~PAGE_MASK);
- if ( count_info == 1 ) {
+ if ( (page->u.inuse.type_info & PGT_count_mask) == 1 ) {
/* Last reference */
if ( d->dirty_vram->sl1ma[i] == INVALID_PADDR ) {
/* We didn't know it was that one, let's say it is dirty */
@@ -1194,8 +1192,8 @@ do {
do { \
int _i; \
shadow_l1e_t *_sp = sh_map_domain_page((_sl1mfn)); \
- ASSERT(mfn_to_shadow_page(_sl1mfn)->type == SH_type_l1_shadow \
- || mfn_to_shadow_page(_sl1mfn)->type == SH_type_fl1_shadow); \
+ ASSERT(mfn_to_page(_sl1mfn)->u.sh.type == SH_type_l1_shadow \
+ || mfn_to_page(_sl1mfn)->u.sh.type == SH_type_fl1_shadow);\
for ( _i = 0; _i < SHADOW_L1_PAGETABLE_ENTRIES; _i++ ) \
{ \
(_sl1e) = _sp + _i; \
@@ -1232,7 +1230,7 @@ do {
do { \
int _i, _j, __done = 0; \
int _xen = !shadow_mode_external(_dom); \
- ASSERT(mfn_to_shadow_page(_sl2mfn)->type == SH_type_l2_32_shadow); \
+ ASSERT(mfn_to_page(_sl2mfn)->u.sh.type == SH_type_l2_32_shadow);\
for ( _j = 0; _j < 4 && !__done; _j++ ) \
{ \
shadow_l2e_t *_sp = sh_map_domain_page(_sl2mfn); \
@@ -1260,11 +1258,11 @@ do {
int _i; \
int _xen = !shadow_mode_external(_dom); \
shadow_l2e_t *_sp = sh_map_domain_page((_sl2mfn)); \
- ASSERT(mfn_to_shadow_page(_sl2mfn)->type == SH_type_l2_pae_shadow \
- || mfn_to_shadow_page(_sl2mfn)->type == SH_type_l2h_pae_shadow);\
+ ASSERT(mfn_to_page(_sl2mfn)->u.sh.type == SH_type_l2_pae_shadow \
+ || mfn_to_page(_sl2mfn)->u.sh.type == SH_type_l2h_pae_shadow);\
for ( _i = 0; _i < SHADOW_L2_PAGETABLE_ENTRIES; _i++ ) \
if ( (!(_xen)) \
- || mfn_to_shadow_page(_sl2mfn)->type != SH_type_l2h_pae_shadow\
+ || mfn_to_page(_sl2mfn)->u.sh.type != SH_type_l2h_pae_shadow\
|| ((_i + (3 * SHADOW_L2_PAGETABLE_ENTRIES)) \
< (HYPERVISOR_VIRT_START >> SHADOW_L2_PAGETABLE_SHIFT)) ) \
{ \
@@ -1285,13 +1283,13 @@ do {
int _i; \
int _xen = !shadow_mode_external(_dom); \
shadow_l2e_t *_sp = sh_map_domain_page((_sl2mfn)); \
- ASSERT(mfn_to_shadow_page(_sl2mfn)->type == SH_type_l2_64_shadow || \
- mfn_to_shadow_page(_sl2mfn)->type == SH_type_l2h_64_shadow); \
+ ASSERT(mfn_to_page(_sl2mfn)->u.sh.type == SH_type_l2_64_shadow ||\
+ mfn_to_page(_sl2mfn)->u.sh.type == SH_type_l2h_64_shadow);\
for ( _i = 0; _i < SHADOW_L2_PAGETABLE_ENTRIES; _i++ ) \
{ \
if ( (!(_xen)) \
|| !is_pv_32on64_domain(_dom) \
- || mfn_to_shadow_page(_sl2mfn)->type != SH_type_l2h_64_shadow \
+ || mfn_to_page(_sl2mfn)->u.sh.type != SH_type_l2h_64_shadow\
|| (_i < COMPAT_L2_PAGETABLE_FIRST_XEN_SLOT(_dom)) ) \
{ \
(_sl2e) = _sp + _i; \
@@ -1313,7 +1311,7 @@ do {
do { \
int _i; \
shadow_l3e_t *_sp = sh_map_domain_page((_sl3mfn)); \
- ASSERT(mfn_to_shadow_page(_sl3mfn)->type == SH_type_l3_64_shadow); \
+ ASSERT(mfn_to_page(_sl3mfn)->u.sh.type == SH_type_l3_64_shadow);\
for ( _i = 0; _i < SHADOW_L3_PAGETABLE_ENTRIES; _i++ ) \
{ \
(_sl3e) = _sp + _i; \
@@ -1331,7 +1329,7 @@ do {
shadow_l4e_t *_sp = sh_map_domain_page((_sl4mfn)); \
int _xen = !shadow_mode_external(_dom); \
int _i; \
- ASSERT(mfn_to_shadow_page(_sl4mfn)->type == SH_type_l4_64_shadow); \
+ ASSERT(mfn_to_page(_sl4mfn)->u.sh.type == SH_type_l4_64_shadow);\
for ( _i = 0; _i < SHADOW_L4_PAGETABLE_ENTRIES; _i++ ) \
{ \
if ( (!(_xen)) || is_guest_l4_slot(_dom, _i) ) \
@@ -1506,7 +1504,7 @@ sh_make_shadow(struct vcpu *v, mfn_t gmf
&& shadow_type != SH_type_l2h_pae_shadow
&& shadow_type != SH_type_l4_64_shadow )
/* Lower-level shadow, not yet linked form a higher level */
- mfn_to_shadow_page(smfn)->up = 0;
+ mfn_to_page(smfn)->up = 0;
#if GUEST_PAGING_LEVELS == 4
#if (SHADOW_OPTIMIZATIONS & SHOPT_LINUX_L3_TOPLEVEL)
@@ -1519,14 +1517,12 @@ sh_make_shadow(struct vcpu *v, mfn_t gmf
* of them, decide that this isn't an old linux guest, and stop
* pinning l3es. This is not very quick but it doesn't happen
* very often. */
- struct list_head *l, *t;
- struct shadow_page_info *sp;
+ struct page_info *sp, *t;
struct vcpu *v2;
int l4count = 0, vcpus = 0;
- list_for_each(l, &v->domain->arch.paging.shadow.pinned_shadows)
- {
- sp = list_entry(l, struct shadow_page_info, list);
- if ( sp->type == SH_type_l4_64_shadow )
+ page_list_for_each(sp, &v->domain->arch.paging.shadow.pinned_shadows)
+ {
+ if ( sp->u.sh.type == SH_type_l4_64_shadow )
l4count++;
}
for_each_vcpu ( v->domain, v2 )
@@ -1534,11 +1530,10 @@ sh_make_shadow(struct vcpu *v, mfn_t gmf
if ( l4count > 2 * vcpus )
{
/* Unpin all the pinned l3 tables, and don't pin any more. */
- list_for_each_safe(l, t,
&v->domain->arch.paging.shadow.pinned_shadows)
+ page_list_for_each_safe(sp, t,
&v->domain->arch.paging.shadow.pinned_shadows)
{
- sp = list_entry(l, struct shadow_page_info, list);
- if ( sp->type == SH_type_l3_64_shadow )
- sh_unpin(v, shadow_page_to_mfn(sp));
+ if ( sp->u.sh.type == SH_type_l3_64_shadow )
+ sh_unpin(v, page_to_mfn(sp));
}
v->domain->arch.paging.shadow.opt_flags &=
~SHOPT_LINUX_L3_TOPLEVEL;
}
@@ -1921,7 +1916,7 @@ void sh_destroy_l4_shadow(struct vcpu *v
void sh_destroy_l4_shadow(struct vcpu *v, mfn_t smfn)
{
shadow_l4e_t *sl4e;
- u32 t = mfn_to_shadow_page(smfn)->type;
+ u32 t = mfn_to_page(smfn)->u.sh.type;
mfn_t gmfn, sl4mfn;
SHADOW_DEBUG(DESTROY_SHADOW,
@@ -1929,7 +1924,7 @@ void sh_destroy_l4_shadow(struct vcpu *v
ASSERT(t == SH_type_l4_shadow);
/* Record that the guest page isn't shadowed any more (in this type) */
- gmfn = _mfn(mfn_to_shadow_page(smfn)->backpointer);
+ gmfn = _mfn(mfn_to_page(smfn)->v.sh.back);
delete_shadow_status(v, gmfn, t, smfn);
shadow_demote(v, gmfn, t);
/* Decrement refcounts of all the old entries */
@@ -1950,7 +1945,7 @@ void sh_destroy_l3_shadow(struct vcpu *v
void sh_destroy_l3_shadow(struct vcpu *v, mfn_t smfn)
{
shadow_l3e_t *sl3e;
- u32 t = mfn_to_shadow_page(smfn)->type;
+ u32 t = mfn_to_page(smfn)->u.sh.type;
mfn_t gmfn, sl3mfn;
SHADOW_DEBUG(DESTROY_SHADOW,
@@ -1958,7 +1953,7 @@ void sh_destroy_l3_shadow(struct vcpu *v
ASSERT(t == SH_type_l3_shadow);
/* Record that the guest page isn't shadowed any more (in this type) */
- gmfn = _mfn(mfn_to_shadow_page(smfn)->backpointer);
+ gmfn = _mfn(mfn_to_page(smfn)->v.sh.back);
delete_shadow_status(v, gmfn, t, smfn);
shadow_demote(v, gmfn, t);
@@ -1980,7 +1975,7 @@ void sh_destroy_l2_shadow(struct vcpu *v
void sh_destroy_l2_shadow(struct vcpu *v, mfn_t smfn)
{
shadow_l2e_t *sl2e;
- u32 t = mfn_to_shadow_page(smfn)->type;
+ u32 t = mfn_to_page(smfn)->u.sh.type;
mfn_t gmfn, sl2mfn;
SHADOW_DEBUG(DESTROY_SHADOW,
@@ -1993,7 +1988,7 @@ void sh_destroy_l2_shadow(struct vcpu *v
#endif
/* Record that the guest page isn't shadowed any more (in this type) */
- gmfn = _mfn(mfn_to_shadow_page(smfn)->backpointer);
+ gmfn = _mfn(mfn_to_page(smfn)->v.sh.back);
delete_shadow_status(v, gmfn, t, smfn);
shadow_demote(v, gmfn, t);
@@ -2014,7 +2009,7 @@ void sh_destroy_l1_shadow(struct vcpu *v
{
struct domain *d = v->domain;
shadow_l1e_t *sl1e;
- u32 t = mfn_to_shadow_page(smfn)->type;
+ u32 t = mfn_to_page(smfn)->u.sh.type;
SHADOW_DEBUG(DESTROY_SHADOW,
"%s(%05lx)\n", __func__, mfn_x(smfn));
@@ -2023,12 +2018,12 @@ void sh_destroy_l1_shadow(struct vcpu *v
/* Record that the guest page isn't shadowed any more (in this type) */
if ( t == SH_type_fl1_shadow )
{
- gfn_t gfn = _gfn(mfn_to_shadow_page(smfn)->backpointer);
+ gfn_t gfn = _gfn(mfn_to_page(smfn)->v.sh.back);
delete_fl1_shadow_status(v, gfn, smfn);
}
else
{
- mfn_t gmfn = _mfn(mfn_to_shadow_page(smfn)->backpointer);
+ mfn_t gmfn = _mfn(mfn_to_page(smfn)->v.sh.back);
delete_shadow_status(v, gmfn, t, smfn);
shadow_demote(v, gmfn, t);
}
@@ -2054,7 +2049,7 @@ void sh_destroy_monitor_table(struct vcp
void sh_destroy_monitor_table(struct vcpu *v, mfn_t mmfn)
{
struct domain *d = v->domain;
- ASSERT(mfn_to_shadow_page(mmfn)->type == SH_type_monitor_table);
+ ASSERT(mfn_to_page(mmfn)->u.sh.type == SH_type_monitor_table);
#if (CONFIG_PAGING_LEVELS == 4) && (SHADOW_PAGING_LEVELS != 4)
{
@@ -2298,7 +2293,7 @@ static int validate_gl2e(struct vcpu *v,
#if SHADOW_PAGING_LEVELS == 3
reserved_xen_slot =
- ((mfn_to_shadow_page(sl2mfn)->type == SH_type_l2h_pae_shadow) &&
+ ((mfn_to_page(sl2mfn)->u.sh.type == SH_type_l2h_pae_shadow) &&
(shadow_index
>= (L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES-1))));
#else /* SHADOW_PAGING_LEVELS == 2 */
@@ -2352,7 +2347,7 @@ static int validate_gl1e(struct vcpu *v,
result |= shadow_set_l1e(v, sl1p, new_sl1e, sl1mfn);
#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
- gl1mfn = _mfn(mfn_to_shadow_page(sl1mfn)->backpointer);
+ gl1mfn = _mfn(mfn_to_page(sl1mfn)->v.sh.back);
if ( mfn_valid(gl1mfn)
&& mfn_is_out_of_sync(gl1mfn) )
{
@@ -2429,30 +2424,30 @@ void sh_resync_l1(struct vcpu *v, mfn_t
* called in the *mode* of the vcpu that unsynced it. Clear? Good. */
int sh_safe_not_to_sync(struct vcpu *v, mfn_t gl1mfn)
{
- struct shadow_page_info *sp;
+ struct page_info *sp;
mfn_t smfn;
smfn = get_shadow_status(v, gl1mfn, SH_type_l1_shadow);
ASSERT(mfn_valid(smfn)); /* Otherwise we would not have been called */
/* Up to l2 */
- sp = mfn_to_shadow_page(smfn);
- if ( sp->count != 1 || !sp->up )
+ sp = mfn_to_page(smfn);
+ if ( sp->u.sh.count != 1 || !sp->up )
return 0;
smfn = _mfn(sp->up >> PAGE_SHIFT);
ASSERT(mfn_valid(smfn));
#if (SHADOW_PAGING_LEVELS == 4)
/* up to l3 */
- sp = mfn_to_shadow_page(smfn);
- if ( sp->count != 1 || !sp->up )
+ sp = mfn_to_page(smfn);
+ if ( sp->u.sh.count != 1 || !sp->up )
return 0;
smfn = _mfn(sp->up >> PAGE_SHIFT);
ASSERT(mfn_valid(smfn));
/* up to l4 */
- sp = mfn_to_shadow_page(smfn);
- if ( sp->count != 1
+ sp = mfn_to_page(smfn);
+ if ( sp->u.sh.count != 1
|| sh_type_is_pinnable(v, SH_type_l3_64_shadow) || !sp->up )
return 0;
smfn = _mfn(sp->up >> PAGE_SHIFT);
@@ -2970,8 +2965,8 @@ static int sh_page_fault(struct vcpu *v,
+ shadow_l2_linear_offset(va)),
sizeof(sl2e)) != 0)
|| !(shadow_l2e_get_flags(sl2e) & _PAGE_PRESENT)
- || !mfn_valid(gl1mfn = _mfn(mfn_to_shadow_page(
- shadow_l2e_get_mfn(sl2e))->backpointer))
+ || !mfn_valid(gl1mfn = _mfn(mfn_to_page(
+ shadow_l2e_get_mfn(sl2e))->v.sh.back))
|| unlikely(mfn_is_out_of_sync(gl1mfn)) )
{
/* Hit the slow path as if there had been no
@@ -3523,7 +3518,7 @@ sh_invlpg(struct vcpu *v, unsigned long
// easier than invalidating all of the individual 4K pages).
//
sl1mfn = shadow_l2e_get_mfn(sl2e);
- if ( mfn_to_shadow_page(sl1mfn)->type
+ if ( mfn_to_page(sl1mfn)->u.sh.type
== SH_type_fl1_shadow )
{
flush_tlb_local();
@@ -3533,7 +3528,7 @@ sh_invlpg(struct vcpu *v, unsigned long
#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
/* Check to see if the SL1 is out of sync. */
{
- mfn_t gl1mfn = _mfn(mfn_to_shadow_page(sl1mfn)->backpointer);
+ mfn_t gl1mfn = _mfn(mfn_to_page(sl1mfn)->v.sh.back);
struct page_info *pg = mfn_to_page(gl1mfn);
if ( mfn_valid(gl1mfn)
&& page_is_out_of_sync(pg) )
@@ -3563,7 +3558,7 @@ sh_invlpg(struct vcpu *v, unsigned long
}
sl1mfn = shadow_l2e_get_mfn(sl2e);
- gl1mfn = _mfn(mfn_to_shadow_page(sl1mfn)->backpointer);
+ gl1mfn = _mfn(mfn_to_page(sl1mfn)->v.sh.back);
pg = mfn_to_page(gl1mfn);
if ( likely(sh_mfn_is_a_page_table(gl1mfn)
@@ -3968,7 +3963,7 @@ sh_set_toplevel_shadow(struct vcpu *v,
/* Need to repin the old toplevel shadow if it's been unpinned
* by shadow_prealloc(): in PV mode we're still running on this
* shadow and it's not safe to free it yet. */
- if ( !mfn_to_shadow_page(old_smfn)->pinned && !sh_pin(v, old_smfn) )
+ if ( !mfn_to_page(old_smfn)->u.sh.pinned && !sh_pin(v, old_smfn) )
{
SHADOW_ERROR("can't re-pin %#lx\n", mfn_x(old_smfn));
domain_crash(v->domain);
@@ -4262,16 +4257,16 @@ int sh_rm_write_access_from_sl1p(struct
{
int r;
shadow_l1e_t *sl1p, sl1e;
- struct shadow_page_info *sp;
+ struct page_info *sp;
ASSERT(mfn_valid(gmfn));
ASSERT(mfn_valid(smfn));
- sp = mfn_to_shadow_page(smfn);
-
- if ( sp->mbz != 0
- || (sp->type != SH_type_l1_shadow
- && sp->type != SH_type_fl1_shadow) )
+ sp = mfn_to_page(smfn);
+
+ if ( sp->count_info != 0
+ || (sp->u.sh.type != SH_type_l1_shadow
+ && sp->u.sh.type != SH_type_fl1_shadow) )
goto fail;
sl1p = sh_map_domain_page(smfn);
@@ -4410,7 +4405,7 @@ void sh_clear_shadow_entry(struct vcpu *
void sh_clear_shadow_entry(struct vcpu *v, void *ep, mfn_t smfn)
/* Blank out a single shadow entry */
{
- switch ( mfn_to_shadow_page(smfn)->type )
+ switch ( mfn_to_page(smfn)->u.sh.type )
{
case SH_type_l1_shadow:
(void) shadow_set_l1e(v, ep, shadow_l1e_empty(), smfn); break;
@@ -4443,7 +4438,7 @@ int sh_remove_l1_shadow(struct vcpu *v,
&& (mfn_x(shadow_l2e_get_mfn(*sl2e)) == mfn_x(sl1mfn)) )
{
(void) shadow_set_l2e(v, sl2e, shadow_l2e_empty(), sl2mfn);
- if ( mfn_to_shadow_page(sl1mfn)->type == 0 )
+ if ( mfn_to_page(sl1mfn)->u.sh.type == 0 )
/* This breaks us cleanly out of the FOREACH macro */
done = 1;
}
@@ -4466,7 +4461,7 @@ int sh_remove_l2_shadow(struct vcpu *v,
&& (mfn_x(shadow_l3e_get_mfn(*sl3e)) == mfn_x(sl2mfn)) )
{
(void) shadow_set_l3e(v, sl3e, shadow_l3e_empty(), sl3mfn);
- if ( mfn_to_shadow_page(sl2mfn)->type == 0 )
+ if ( mfn_to_page(sl2mfn)->u.sh.type == 0 )
/* This breaks us cleanly out of the FOREACH macro */
done = 1;
}
@@ -4488,7 +4483,7 @@ int sh_remove_l3_shadow(struct vcpu *v,
&& (mfn_x(shadow_l4e_get_mfn(*sl4e)) == mfn_x(sl3mfn)) )
{
(void) shadow_set_l4e(v, sl4e, shadow_l4e_empty(), sl4mfn);
- if ( mfn_to_shadow_page(sl3mfn)->type == 0 )
+ if ( mfn_to_page(sl3mfn)->u.sh.type == 0 )
/* This breaks us cleanly out of the FOREACH macro */
done = 1;
}
@@ -4890,7 +4885,7 @@ int sh_audit_l1_table(struct vcpu *v, mf
int done = 0;
/* Follow the backpointer */
- gl1mfn = _mfn(mfn_to_shadow_page(sl1mfn)->backpointer);
+ gl1mfn = _mfn(mfn_to_page(sl1mfn)->v.sh.back);
#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
/* Out-of-sync l1 shadows can contain anything: just check the OOS hash */
@@ -4980,7 +4975,7 @@ int sh_audit_l2_table(struct vcpu *v, mf
int done = 0;
/* Follow the backpointer */
- gl2mfn = _mfn(mfn_to_shadow_page(sl2mfn)->backpointer);
+ gl2mfn = _mfn(mfn_to_page(sl2mfn)->v.sh.back);
#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
/* Only L1's may be out of sync. */
@@ -5029,7 +5024,7 @@ int sh_audit_l3_table(struct vcpu *v, mf
int done = 0;
/* Follow the backpointer */
- gl3mfn = _mfn(mfn_to_shadow_page(sl3mfn)->backpointer);
+ gl3mfn = _mfn(mfn_to_page(sl3mfn)->v.sh.back);
#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
/* Only L1's may be out of sync. */
@@ -5076,7 +5071,7 @@ int sh_audit_l4_table(struct vcpu *v, mf
int done = 0;
/* Follow the backpointer */
- gl4mfn = _mfn(mfn_to_shadow_page(sl4mfn)->backpointer);
+ gl4mfn = _mfn(mfn_to_page(sl4mfn)->v.sh.back);
#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
/* Only L1's may be out of sync. */
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/mm/shadow/private.h
--- a/xen/arch/x86/mm/shadow/private.h Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/mm/shadow/private.h Fri Feb 13 11:22:28 2009 +0900
@@ -220,60 +220,6 @@ extern void shadow_audit_tables(struct v
#undef GUEST_LEVELS
#endif /* CONFIG_PAGING_LEVELS == 4 */
-/******************************************************************************
- * Page metadata for shadow pages.
- */
-
-struct shadow_page_info
-{
- union {
- /* Ensures that shadow_page_info is same size as page_info. */
- struct page_info page_info;
-
- struct {
- union {
- /* When in use, guest page we're a shadow of */
- unsigned long backpointer;
- /* When free, order of the freelist we're on */
- unsigned int order;
- };
- union {
- /* When in use, next shadow in this hash chain */
- struct shadow_page_info *next_shadow;
- /* When free, TLB flush time when freed */
- u32 tlbflush_timestamp;
- };
- struct {
- unsigned long mbz; /* Must be zero: count_info is here. */
- unsigned long type:5; /* What kind of shadow is this? */
- unsigned long pinned:1; /* Is the shadow pinned? */
- unsigned long count:26; /* Reference count */
- } __attribute__((packed));
- union {
- /* For unused shadow pages, a list of pages of this order; for
- * pinnable shadows, if pinned, a list of other pinned shadows
- * (see sh_type_is_pinnable() below for the definition of
- * "pinnable" shadow types). */
- struct list_head list;
- /* For non-pinnable shadows, a higher entry that points
- * at us. */
- paddr_t up;
- };
- };
- };
-};
-
-/* The structure above *must* be no larger than a struct page_info
- * from mm.h, since we'll be using the same space in the frametable.
- * Also, the mbz field must line up with the count_info field of normal
- * pages, so they cannot be successfully get_page()d. */
-static inline void shadow_check_page_struct_offsets(void) {
- BUILD_BUG_ON(sizeof (struct shadow_page_info) !=
- sizeof (struct page_info));
- BUILD_BUG_ON(offsetof(struct shadow_page_info, mbz) !=
- offsetof(struct page_info, count_info));
-};
-
/* Shadow type codes */
#define SH_type_none (0U) /* on the shadow free list */
#define SH_type_min_shadow (1U)
@@ -528,22 +474,13 @@ mfn_t oos_snapshot_lookup(struct vcpu *v
* MFN/page-info handling
*/
-// Override mfn_to_page from asm/page.h, which was #include'd above,
-// in order to make it work with our mfn type.
+/* Override macros from asm/page.h to make them work with mfn_t */
#undef mfn_to_page
-#define mfn_to_page(_m) (frame_table + mfn_x(_m))
-#define mfn_to_shadow_page(_m) ((struct shadow_page_info *)mfn_to_page(_m))
-
-// Override page_to_mfn from asm/page.h, which was #include'd above,
-// in order to make it work with our mfn type.
+#define mfn_to_page(_m) __mfn_to_page(mfn_x(_m))
+#undef mfn_valid
+#define mfn_valid(_mfn) __mfn_valid(mfn_x(_mfn))
#undef page_to_mfn
-#define page_to_mfn(_pg) (_mfn((_pg) - frame_table))
-#define shadow_page_to_mfn(_spg) (page_to_mfn((struct page_info *)_spg))
-
-// Override mfn_valid from asm/page.h, which was #include'd above,
-// in order to make it work with our mfn type.
-#undef mfn_valid
-#define mfn_valid(_mfn) (mfn_x(_mfn) < max_page)
+#define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg))
/* Override pagetable_t <-> struct page_info conversions to work with mfn_t */
#undef pagetable_get_page
@@ -675,26 +612,26 @@ static inline int sh_get_ref(struct vcpu
static inline int sh_get_ref(struct vcpu *v, mfn_t smfn, paddr_t entry_pa)
{
u32 x, nx;
- struct shadow_page_info *sp = mfn_to_shadow_page(smfn);
+ struct page_info *sp = mfn_to_page(smfn);
ASSERT(mfn_valid(smfn));
- x = sp->count;
+ x = sp->u.sh.count;
nx = x + 1;
if ( unlikely(nx >= 1U<<26) )
{
- SHADOW_PRINTK("shadow ref overflow, gmfn=%" PRtype_info " smfn=%lx\n",
- sp->backpointer, mfn_x(smfn));
+ SHADOW_PRINTK("shadow ref overflow, gmfn=%" PRpgmfn " smfn=%lx\n",
+ sp->v.sh.back, mfn_x(smfn));
return 0;
}
/* Guarded by the shadow lock, so no need for atomic update */
- sp->count = nx;
+ sp->u.sh.count = nx;
/* We remember the first shadow entry that points to each shadow. */
if ( entry_pa != 0
- && !sh_type_is_pinnable(v, sp->type)
+ && !sh_type_is_pinnable(v, sp->u.sh.type)
&& sp->up == 0 )
sp->up = entry_pa;
@@ -707,29 +644,29 @@ static inline void sh_put_ref(struct vcp
static inline void sh_put_ref(struct vcpu *v, mfn_t smfn, paddr_t entry_pa)
{
u32 x, nx;
- struct shadow_page_info *sp = mfn_to_shadow_page(smfn);
+ struct page_info *sp = mfn_to_page(smfn);
ASSERT(mfn_valid(smfn));
- ASSERT(sp->mbz == 0);
+ ASSERT(sp->count_info == 0);
/* If this is the entry in the up-pointer, remove it */
if ( entry_pa != 0
- && !sh_type_is_pinnable(v, sp->type)
+ && !sh_type_is_pinnable(v, sp->u.sh.type)
&& sp->up == entry_pa )
sp->up = 0;
- x = sp->count;
+ x = sp->u.sh.count;
nx = x - 1;
if ( unlikely(x == 0) )
{
SHADOW_ERROR("shadow ref underflow, smfn=%lx oc=%08x t=%#x\n",
- mfn_x(smfn), sp->count, sp->type);
+ mfn_x(smfn), sp->u.sh.count, sp->u.sh.type);
BUG();
}
/* Guarded by the shadow lock, so no need for atomic update */
- sp->count = nx;
+ sp->u.sh.count = nx;
if ( unlikely(nx == 0) )
sh_destroy_shadow(v, smfn);
@@ -741,26 +678,26 @@ static inline void sh_put_ref(struct vcp
* Returns 0 for failure, 1 for success. */
static inline int sh_pin(struct vcpu *v, mfn_t smfn)
{
- struct shadow_page_info *sp;
+ struct page_info *sp;
ASSERT(mfn_valid(smfn));
- sp = mfn_to_shadow_page(smfn);
- ASSERT(sh_type_is_pinnable(v, sp->type));
- if ( sp->pinned )
+ sp = mfn_to_page(smfn);
+ ASSERT(sh_type_is_pinnable(v, sp->u.sh.type));
+ if ( sp->u.sh.pinned )
{
/* Already pinned: take it out of the pinned-list so it can go
* at the front */
- list_del(&sp->list);
+ page_list_del(sp, &v->domain->arch.paging.shadow.pinned_shadows);
}
else
{
/* Not pinned: pin it! */
if ( !sh_get_ref(v, smfn, 0) )
return 0;
- sp->pinned = 1;
+ sp->u.sh.pinned = 1;
}
/* Put it at the head of the list of pinned shadows */
- list_add(&sp->list, &v->domain->arch.paging.shadow.pinned_shadows);
+ page_list_add(sp, &v->domain->arch.paging.shadow.pinned_shadows);
return 1;
}
@@ -768,15 +705,15 @@ static inline int sh_pin(struct vcpu *v,
* of pinned shadows, and release the extra ref. */
static inline void sh_unpin(struct vcpu *v, mfn_t smfn)
{
- struct shadow_page_info *sp;
+ struct page_info *sp;
ASSERT(mfn_valid(smfn));
- sp = mfn_to_shadow_page(smfn);
- ASSERT(sh_type_is_pinnable(v, sp->type));
- if ( sp->pinned )
+ sp = mfn_to_page(smfn);
+ ASSERT(sh_type_is_pinnable(v, sp->u.sh.type));
+ if ( sp->u.sh.pinned )
{
- sp->pinned = 0;
- list_del(&sp->list);
+ sp->u.sh.pinned = 0;
+ page_list_del(sp, &v->domain->arch.paging.shadow.pinned_shadows);
sp->up = 0; /* in case this stops being a pinnable type in future */
sh_put_ref(v, smfn, 0);
}
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/numa.c
--- a/xen/arch/x86/numa.c Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/numa.c Fri Feb 13 11:22:28 2009 +0900
@@ -312,7 +312,7 @@ static void dump_numa(unsigned char key)
for_each_online_node(i)
page_num_node[i] = 0;
- list_for_each_entry(page, &d->page_list, list)
+ page_list_for_each(page, &d->page_list)
{
i = phys_to_nid(page_to_mfn(page) << PAGE_SHIFT);
page_num_node[i]++;
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/physdev.c
--- a/xen/arch/x86/physdev.c Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/physdev.c Fri Feb 13 11:22:28 2009 +0900
@@ -62,7 +62,7 @@ static int physdev_map_pirq(struct physd
ret = -EINVAL;
goto free_domain;
}
- vector = IO_APIC_VECTOR(map->index);
+ vector = domain_irq_to_vector(current->domain, map->index);
if ( !vector )
{
dprintk(XENLOG_G_ERR, "dom%d: map irq with no vector %d\n",
@@ -75,7 +75,7 @@ static int physdev_map_pirq(struct physd
case MAP_PIRQ_TYPE_MSI:
vector = map->index;
if ( vector == -1 )
- vector = assign_irq_vector(AUTO_ASSIGN);
+ vector = assign_irq_vector(AUTO_ASSIGN_IRQ);
if ( vector < 0 || vector >= NR_VECTORS )
{
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/setup.c
--- a/xen/arch/x86/setup.c Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/setup.c Fri Feb 13 11:22:28 2009 +0900
@@ -417,7 +417,7 @@ void __init __start_xen(unsigned long mb
unsigned int initrdidx = 1;
multiboot_info_t *mbi = __va(mbi_p);
module_t *mod = (module_t *)__va(mbi->mods_addr);
- unsigned long nr_pages, modules_length, modules_headroom = -1;
+ unsigned long nr_pages, modules_length, modules_headroom;
unsigned long allocator_bitmap_end;
int i, e820_warn = 0, bytes = 0;
struct ns16550_defaults ns16550 = {
@@ -618,6 +618,12 @@ void __init __start_xen(unsigned long mb
*/
modules_length = mod[mbi->mods_count-1].mod_end - mod[0].mod_start;
+ /* ensure mod[0] is mapped before parsing */
+ bootstrap_map(mod[0].mod_start, mod[0].mod_end);
+ modules_headroom = bzimage_headroom(
+ (char *)(unsigned long)mod[0].mod_start,
+ (unsigned long)(mod[0].mod_end - mod[0].mod_start));
+
for ( i = boot_e820.nr_map-1; i >= 0; i-- )
{
uint64_t s, e, mask = (1UL << L2_PAGETABLE_SHIFT) - 1;
@@ -636,7 +642,8 @@ void __init __start_xen(unsigned long mb
s >> PAGE_SHIFT, (e-s) >> PAGE_SHIFT, PAGE_HYPERVISOR);
#if defined(CONFIG_X86_64)
-#define reloc_size ((__pa(&_end) + mask) & ~mask)
+/* Relocate Xen image, allocation bitmap, and one page of padding. */
+#define reloc_size ((__pa(&_end) + max_page/8 + PAGE_SIZE + mask) & ~mask)
/* Is the region suitable for relocating Xen? */
if ( !xen_phys_start && ((e-s) >= reloc_size) )
{
@@ -721,11 +728,6 @@ void __init __start_xen(unsigned long mb
}
#endif
- if ( modules_headroom == -1 )
- modules_headroom = bzimage_headroom(
- (char *)(unsigned long)mod[0].mod_start,
- (unsigned long)(mod[0].mod_end - mod[0].mod_start));
-
/* Is the region suitable for relocating the multiboot modules? */
if ( !initial_images_start && (s < e) &&
((e-s) >= (modules_length+modules_headroom)) )
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/smpboot.c
--- a/xen/arch/x86/smpboot.c Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/smpboot.c Fri Feb 13 11:22:28 2009 +0900
@@ -1256,8 +1256,6 @@ int __cpu_disable(void)
mdelay(1);
local_irq_disable();
- cpufreq_del_cpu(cpu);
-
time_suspend();
cpu_mcheck_disable();
@@ -1320,6 +1318,8 @@ int cpu_down(unsigned int cpu)
}
printk("Prepare to bring CPU%d down...\n", cpu);
+
+ cpufreq_del_cpu(cpu);
err = stop_machine_run(take_cpu_down, NULL, cpu);
if (err < 0)
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/x86_32/xen.lds.S
--- a/xen/arch/x86/x86_32/xen.lds.S Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/x86_32/xen.lds.S Fri Feb 13 11:22:28 2009 +0900
@@ -91,6 +91,7 @@ SECTIONS
*(.exit.text)
*(.exit.data)
*(.exitcall.exit)
+ *(.eh_frame)
}
/* Stabs debugging sections. */
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/x86_64/entry.S
--- a/xen/arch/x86/x86_64/entry.S Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/x86_64/entry.S Fri Feb 13 11:22:28 2009 +0900
@@ -739,7 +739,6 @@ ENTRY(hypercall_args_table)
.byte 1 /* do_sysctl */ /* 35 */
.byte 1 /* do_domctl */
.byte 2 /* do_kexec */
- .byte 1 /* do_xsm_op */
.rept __HYPERVISOR_arch_0-(.-hypercall_args_table)
.byte 0 /* do_ni_hypercall */
.endr
diff -r af992824b5cf -r c7cba853583d xen/arch/x86/x86_64/xen.lds.S
--- a/xen/arch/x86/x86_64/xen.lds.S Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/arch/x86/x86_64/xen.lds.S Fri Feb 13 11:22:28 2009 +0900
@@ -89,6 +89,7 @@ SECTIONS
*(.exit.text)
*(.exit.data)
*(.exitcall.exit)
+ *(.eh_frame)
}
/* Stabs debugging sections. */
diff -r af992824b5cf -r c7cba853583d xen/common/domain.c
--- a/xen/common/domain.c Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/common/domain.c Fri Feb 13 11:22:28 2009 +0900
@@ -41,7 +41,6 @@ boolean_param("dom0_vcpus_pin", opt_dom0
/* set xen as default cpufreq */
enum cpufreq_controller cpufreq_controller = FREQCTL_xen;
-struct cpufreq_governor *cpufreq_opt_governor;
static void __init setup_cpufreq_option(char *str)
{
@@ -70,19 +69,6 @@ static void __init setup_cpufreq_option(
cpufreq_cmdline_parse(arg);
}
custom_param("cpufreq", setup_cpufreq_option);
-
-static void __init setup_cpufreq_gov_option(char *str)
-{
- if ( !strcmp(str, "userspace") )
- cpufreq_opt_governor = &cpufreq_gov_userspace;
- else if ( !strcmp(str, "performance") )
- cpufreq_opt_governor = &cpufreq_gov_performance;
- else if ( !strcmp(str, "powersave") )
- cpufreq_opt_governor = &cpufreq_gov_powersave;
- else if ( !strcmp(str, "ondemand") )
- cpufreq_opt_governor = &cpufreq_gov_dbs;
-}
-custom_param("cpufreq_governor", setup_cpufreq_gov_option);
/* Protect updates/reads (resp.) of domain_list and domain_hash. */
DEFINE_SPINLOCK(domlist_update_lock);
@@ -233,8 +219,8 @@ struct domain *domain_create(
spin_lock_init(&d->page_alloc_lock);
spin_lock_init(&d->shutdown_lock);
spin_lock_init(&d->hypercall_deadlock_mutex);
- INIT_LIST_HEAD(&d->page_list);
- INIT_LIST_HEAD(&d->xenpage_list);
+ INIT_PAGE_LIST_HEAD(&d->page_list);
+ INIT_PAGE_LIST_HEAD(&d->xenpage_list);
if ( domcr_flags & DOMCRF_hvm )
d->is_hvm = 1;
diff -r af992824b5cf -r c7cba853583d xen/common/grant_table.c
--- a/xen/common/grant_table.c Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/common/grant_table.c Fri Feb 13 11:22:28 2009 +0900
@@ -1192,7 +1192,7 @@ gnttab_transfer(
/* Okay, add the page to 'e'. */
if ( unlikely(e->tot_pages++ == 0) )
get_knownalive_domain(e);
- list_add_tail(&page->list, &e->page_list);
+ page_list_add_tail(page, &e->page_list);
page_set_owner(page, e);
spin_unlock(&e->page_alloc_lock);
diff -r af992824b5cf -r c7cba853583d xen/common/hvm/save.c
--- a/xen/common/hvm/save.c Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/common/hvm/save.c Fri Feb 13 11:22:28 2009 +0900
@@ -26,6 +26,7 @@
#include <xen/version.h>
#include <public/version.h>
#include <xen/sched.h>
+#include <xen/guest_access.h>
#include <asm/hvm/support.h>
@@ -75,6 +76,53 @@ size_t hvm_save_size(struct domain *d)
return sz;
}
+/* Extract a single instance of a save record, by marshalling all
+ * records of that type and copying out the one we need. */
+int hvm_save_one(struct domain *d, uint16_t typecode, uint16_t instance,
+ XEN_GUEST_HANDLE_64(uint8) handle)
+{
+ int rv = 0;
+ size_t sz = 0;
+ struct vcpu *v;
+ hvm_domain_context_t ctxt = { 0, };
+
+ if ( d->is_dying
+ || typecode > HVM_SAVE_CODE_MAX
+ || hvm_sr_handlers[typecode].size < sizeof(struct hvm_save_descriptor)
+ || hvm_sr_handlers[typecode].save == NULL )
+ return -EINVAL;
+
+ if ( hvm_sr_handlers[typecode].kind == HVMSR_PER_VCPU )
+ for_each_vcpu(d, v)
+ sz += hvm_sr_handlers[typecode].size;
+ else
+ sz = hvm_sr_handlers[typecode].size;
+
+ if ( (instance + 1) * hvm_sr_handlers[typecode].size > sz )
+ return -EINVAL;
+
+ ctxt.size = sz;
+ ctxt.data = xmalloc_bytes(sz);
+ if ( !ctxt.data )
+ return -ENOMEM;
+
+ if ( hvm_sr_handlers[typecode].save(d, &ctxt) != 0 )
+ {
+ gdprintk(XENLOG_ERR,
+ "HVM save: failed to save type %"PRIu16"\n", typecode);
+ rv = -EFAULT;
+ }
+ else if ( copy_to_guest(handle,
+ ctxt.data
+ + (instance * hvm_sr_handlers[typecode].size)
+ + sizeof (struct hvm_save_descriptor),
+ hvm_sr_handlers[typecode].size
+ - sizeof (struct hvm_save_descriptor)) )
+ rv = -EFAULT;
+
+ xfree(ctxt.data);
+ return rv;
+}
int hvm_save(struct domain *d, hvm_domain_context_t *h)
{
diff -r af992824b5cf -r c7cba853583d xen/common/memory.c
--- a/xen/common/memory.c Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/common/memory.c Fri Feb 13 11:22:28 2009 +0900
@@ -218,8 +218,8 @@ static long memory_exchange(XEN_GUEST_HA
static long memory_exchange(XEN_GUEST_HANDLE(xen_memory_exchange_t) arg)
{
struct xen_memory_exchange exch;
- LIST_HEAD(in_chunk_list);
- LIST_HEAD(out_chunk_list);
+ PAGE_LIST_HEAD(in_chunk_list);
+ PAGE_LIST_HEAD(out_chunk_list);
unsigned long in_chunk_order, out_chunk_order;
xen_pfn_t gpfn, gmfn, mfn;
unsigned long i, j, k;
@@ -325,7 +325,7 @@ static long memory_exchange(XEN_GUEST_HA
goto fail;
}
- list_add(&page->list, &in_chunk_list);
+ page_list_add(page, &in_chunk_list);
}
}
@@ -339,7 +339,7 @@ static long memory_exchange(XEN_GUEST_HA
goto fail;
}
- list_add(&page->list, &out_chunk_list);
+ page_list_add(page, &out_chunk_list);
}
/*
@@ -347,10 +347,8 @@ static long memory_exchange(XEN_GUEST_HA
*/
/* Destroy final reference to each input page. */
- while ( !list_empty(&in_chunk_list) )
- {
- page = list_entry(in_chunk_list.next, struct page_info, list);
- list_del(&page->list);
+ while ( (page = page_list_remove_head(&in_chunk_list)) )
+ {
if ( !test_and_clear_bit(_PGC_allocated, &page->count_info) )
BUG();
mfn = page_to_mfn(page);
@@ -360,10 +358,8 @@ static long memory_exchange(XEN_GUEST_HA
/* Assign each output page to the domain. */
j = 0;
- while ( !list_empty(&out_chunk_list) )
- {
- page = list_entry(out_chunk_list.next, struct page_info, list);
- list_del(&page->list);
+ while ( (page = page_list_remove_head(&out_chunk_list)) )
+ {
if ( assign_pages(d, page, exch.out.extent_order,
MEMF_no_refcount) )
BUG();
@@ -399,21 +395,13 @@ static long memory_exchange(XEN_GUEST_HA
*/
fail:
/* Reassign any input pages we managed to steal. */
- while ( !list_empty(&in_chunk_list) )
- {
- page = list_entry(in_chunk_list.next, struct page_info, list);
- list_del(&page->list);
+ while ( (page = page_list_remove_head(&in_chunk_list)) )
if ( assign_pages(d, page, 0, MEMF_no_refcount) )
BUG();
- }
/* Free any output pages we managed to allocate. */
- while ( !list_empty(&out_chunk_list) )
- {
- page = list_entry(out_chunk_list.next, struct page_info, list);
- list_del(&page->list);
+ while ( (page = page_list_remove_head(&out_chunk_list)) )
free_domheap_pages(page, exch.out.extent_order);
- }
exch.nr_exchanged = i << in_chunk_order;
diff -r af992824b5cf -r c7cba853583d xen/common/page_alloc.c
--- a/xen/common/page_alloc.c Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/common/page_alloc.c Fri Feb 13 11:22:28 2009 +0900
@@ -71,7 +71,7 @@ integer_param("dma_bits", dma_bitsize);
#endif
static DEFINE_SPINLOCK(page_scrub_lock);
-LIST_HEAD(page_scrub_list);
+PAGE_LIST_HEAD(page_scrub_list);
static unsigned long scrub_pages;
/*********************
@@ -264,7 +264,7 @@ unsigned long __init alloc_boot_pages(
#define page_to_zone(pg) (is_xen_heap_page(pg) ? MEMZONE_XEN : \
(fls(page_to_mfn(pg)) - 1))
-typedef struct list_head heap_by_zone_and_order_t[NR_ZONES][MAX_ORDER+1];
+typedef struct page_list_head heap_by_zone_and_order_t[NR_ZONES][MAX_ORDER+1];
static heap_by_zone_and_order_t *_heap[MAX_NUMNODES];
#define heap(node, zone, order) ((*_heap[node])[zone][order])
@@ -272,13 +272,16 @@ static unsigned long *avail[MAX_NUMNODES
static DEFINE_SPINLOCK(heap_lock);
-static void init_node_heap(int node)
+static unsigned long init_node_heap(int node, unsigned long mfn,
+ unsigned long nr)
{
/* First node to be discovered has its heap metadata statically alloced. */
static heap_by_zone_and_order_t _heap_static;
static unsigned long avail_static[NR_ZONES];
static int first_node_initialised;
-
+ unsigned long needed = (sizeof(**_heap) +
+ sizeof(**avail) * NR_ZONES +
+ PAGE_SIZE - 1) >> PAGE_SHIFT;
int i, j;
if ( !first_node_initialised )
@@ -286,19 +289,40 @@ static void init_node_heap(int node)
_heap[node] = &_heap_static;
avail[node] = avail_static;
first_node_initialised = 1;
+ needed = 0;
+ }
+#ifdef DIRECTMAP_VIRT_END
+ else if ( nr >= needed &&
+ mfn + needed <= virt_to_mfn(DIRECTMAP_VIRT_END) )
+ {
+ _heap[node] = mfn_to_virt(mfn);
+ avail[node] = mfn_to_virt(mfn + needed) - sizeof(**avail) * NR_ZONES;
+ }
+#endif
+ else if ( get_order_from_bytes(sizeof(**_heap)) ==
+ get_order_from_pages(needed) )
+ {
+ _heap[node] = alloc_xenheap_pages(get_order_from_pages(needed), 0);
+ BUG_ON(!_heap[node]);
+ avail[node] = (void *)_heap[node] + (needed << PAGE_SHIFT) -
+ sizeof(**avail) * NR_ZONES;
+ needed = 0;
}
else
{
_heap[node] = xmalloc(heap_by_zone_and_order_t);
avail[node] = xmalloc_array(unsigned long, NR_ZONES);
BUG_ON(!_heap[node] || !avail[node]);
+ needed = 0;
}
memset(avail[node], 0, NR_ZONES * sizeof(long));
for ( i = 0; i < NR_ZONES; i++ )
for ( j = 0; j <= MAX_ORDER; j++ )
- INIT_LIST_HEAD(&(*_heap[node])[i][j]);
+ INIT_PAGE_LIST_HEAD(&(*_heap[node])[i][j]);
+
+ return needed;
}
/* Allocate 2^@order contiguous pages. */
@@ -340,7 +364,7 @@ static struct page_info *alloc_heap_page
/* Find smallest order which can satisfy the request. */
for ( j = order; j <= MAX_ORDER; j++ )
- if ( !list_empty(&heap(node, zone, j)) )
+ if ( (pg = page_list_remove_head(&heap(node, zone, j))) )
goto found;
} while ( zone-- > zone_lo ); /* careful: unsigned zone may wrap */
@@ -354,14 +378,11 @@ static struct page_info *alloc_heap_page
return NULL;
found:
- pg = list_entry(heap(node, zone, j).next, struct page_info, list);
- list_del(&pg->list);
-
/* We may have to halve the chunk a number of times. */
while ( j != order )
{
PFN_ORDER(pg) = --j;
- list_add_tail(&pg->list, &heap(node, zone, j));
+ page_list_add_tail(pg, &heap(node, zone, j));
pg += 1 << j;
}
@@ -378,10 +399,13 @@ static struct page_info *alloc_heap_page
/* Reference count must continuously be zero for free pages. */
BUG_ON(pg[i].count_info != 0);
- /* Add in any extra CPUs that need flushing because of this page. */
- cpus_andnot(extra_cpus_mask, pg[i].u.free.cpumask, mask);
- tlbflush_filter(extra_cpus_mask, pg[i].tlbflush_timestamp);
- cpus_or(mask, mask, extra_cpus_mask);
+ if ( pg[i].u.free.need_tlbflush )
+ {
+ /* Add in extra CPUs that need flushing because of this page. */
+ cpus_andnot(extra_cpus_mask, cpu_online_map, mask);
+ tlbflush_filter(extra_cpus_mask, pg[i].tlbflush_timestamp);
+ cpus_or(mask, mask, extra_cpus_mask);
+ }
/* Initialise fields which have other uses for free pages. */
pg[i].u.inuse.type_info = 0;
@@ -404,7 +428,6 @@ static void free_heap_pages(
unsigned long mask;
unsigned int i, node = phys_to_nid(page_to_maddr(pg));
unsigned int zone = page_to_zone(pg);
- struct domain *d;
ASSERT(order <= MAX_ORDER);
ASSERT(node >= 0);
@@ -425,15 +448,10 @@ static void free_heap_pages(
*/
pg[i].count_info = 0;
- if ( (d = page_get_owner(&pg[i])) != NULL )
- {
+ /* If a page has no owner it will need no safety TLB flush. */
+ pg[i].u.free.need_tlbflush = (page_get_owner(&pg[i]) != NULL);
+ if ( pg[i].u.free.need_tlbflush )
pg[i].tlbflush_timestamp = tlbflush_current_time();
- pg[i].u.free.cpumask = d->domain_dirty_cpumask;
- }
- else
- {
- cpus_clear(pg[i].u.free.cpumask);
- }
}
spin_lock(&heap_lock);
@@ -452,8 +470,8 @@ static void free_heap_pages(
if ( allocated_in_map(page_to_mfn(pg)-mask) ||
(PFN_ORDER(pg-mask) != order) )
break;
- list_del(&(pg-mask)->list);
pg -= mask;
+ page_list_del(pg, &heap(node, zone, order));
}
else
{
@@ -461,7 +479,7 @@ static void free_heap_pages(
if ( allocated_in_map(page_to_mfn(pg)+mask) ||
(PFN_ORDER(pg+mask) != order) )
break;
- list_del(&(pg+mask)->list);
+ page_list_del(pg + mask, &heap(node, zone, order));
}
order++;
@@ -471,7 +489,7 @@ static void free_heap_pages(
}
PFN_ORDER(pg) = order;
- list_add_tail(&pg->list, &heap(node, zone, order));
+ page_list_add_tail(pg, &heap(node, zone, order));
spin_unlock(&heap_lock);
}
@@ -482,7 +500,6 @@ static void free_heap_pages(
* latter is not on a MAX_ORDER boundary, then we reserve the page by
* not freeing it to the buddy allocator.
*/
-#define MAX_ORDER_ALIGNED (1UL << (MAX_ORDER))
static void init_heap_pages(
struct page_info *pg, unsigned long nr_pages)
{
@@ -491,25 +508,33 @@ static void init_heap_pages(
nid_prev = phys_to_nid(page_to_maddr(pg-1));
- for ( i = 0; i < nr_pages; i++ )
+ for ( i = 0; i < nr_pages; nid_prev = nid_curr, i++ )
{
nid_curr = phys_to_nid(page_to_maddr(pg+i));
if ( unlikely(!avail[nid_curr]) )
- init_node_heap(nid_curr);
+ {
+ unsigned long n;
+
+ n = init_node_heap(nid_curr, page_to_mfn(pg+i), nr_pages - i);
+ if ( n )
+ {
+ BUG_ON(i + n > nr_pages);
+ i += n - 1;
+ continue;
+ }
+ }
/*
- * free pages of the same node, or if they differ, but are on a
- * MAX_ORDER alignement boundary (which already get reserved)
+ * Free pages of the same node, or if they differ, but are on a
+ * MAX_ORDER alignment boundary (which already get reserved).
*/
- if ( (nid_curr == nid_prev) || (page_to_maddr(pg+i) &
- MAX_ORDER_ALIGNED) )
- free_heap_pages(pg+i, 0);
- else
- printk("Reserving non-aligned node boundary @ mfn %lu\n",
- page_to_mfn(pg+i));
-
- nid_prev = nid_curr;
+ if ( (nid_curr == nid_prev) ||
+ !(page_to_mfn(pg+i) & ((1UL << MAX_ORDER) - 1)) )
+ free_heap_pages(pg+i, 0);
+ else
+ printk("Reserving non-aligned node boundary @ mfn %#lx\n",
+ page_to_mfn(pg+i));
}
}
@@ -537,7 +562,7 @@ static unsigned long avail_heap_pages(
#define avail_for_domheap(mfn) !(allocated_in_map(mfn) || is_xen_heap_mfn(mfn))
void __init end_boot_allocator(void)
{
- unsigned long i;
+ unsigned long i, nr = 0;
int curr_free, next_free;
/* Pages that are free now go to the domain sub-allocator. */
@@ -550,8 +575,15 @@ void __init end_boot_allocator(void)
if ( next_free )
map_alloc(i+1, 1); /* prevent merging in free_heap_pages() */
if ( curr_free )
- init_heap_pages(mfn_to_page(i), 1);
- }
+ ++nr;
+ else if ( nr )
+ {
+ init_heap_pages(mfn_to_page(i - nr), nr);
+ nr = 0;
+ }
+ }
+ if ( nr )
+ init_heap_pages(mfn_to_page(i - nr), nr);
if ( !dma_bitsize && (num_online_nodes() > 1) )
{
@@ -786,7 +818,7 @@ int assign_pages(
page_set_owner(&pg[i], d);
wmb(); /* Domain pointer must be visible before updating refcnt. */
pg[i].count_info = PGC_allocated | 1;
- list_add_tail(&pg[i].list, &d->page_list);
+ page_list_add_tail(&pg[i], &d->page_list);
}
spin_unlock(&d->page_alloc_lock);
@@ -844,7 +876,7 @@ void free_domheap_pages(struct page_info
spin_lock_recursive(&d->page_alloc_lock);
for ( i = 0; i < (1 << order); i++ )
- list_del(&pg[i].list);
+ page_list_del2(&pg[i], &d->xenpage_list, &d->arch.relmem_list);
d->xenheap_pages -= 1 << order;
drop_dom_ref = (d->xenheap_pages == 0);
@@ -859,7 +891,7 @@ void free_domheap_pages(struct page_info
for ( i = 0; i < (1 << order); i++ )
{
BUG_ON((pg[i].u.inuse.type_info & PGT_count_mask) != 0);
- list_del(&pg[i].list);
+ page_list_del2(&pg[i], &d->page_list, &d->arch.relmem_list);
}
d->tot_pages -= 1 << order;
@@ -882,7 +914,7 @@ void free_domheap_pages(struct page_info
{
page_set_owner(&pg[i], NULL);
spin_lock(&page_scrub_lock);
- list_add(&pg[i].list, &page_scrub_list);
+ page_list_add(&pg[i], &page_scrub_list);
scrub_pages++;
spin_unlock(&page_scrub_lock);
}
@@ -965,7 +997,7 @@ static DEFINE_PER_CPU(struct timer, page
static void page_scrub_softirq(void)
{
- struct list_head *ent;
+ PAGE_LIST_HEAD(list);
struct page_info *pg;
void *p;
int i;
@@ -983,32 +1015,26 @@ static void page_scrub_softirq(void)
do {
spin_lock(&page_scrub_lock);
- if ( unlikely((ent = page_scrub_list.next) == &page_scrub_list) )
+ /* Peel up to 16 pages from the list. */
+ for ( i = 0; i < 16; i++ )
+ {
+ if ( !(pg = page_list_remove_head(&page_scrub_list)) )
+ break;
+ page_list_add_tail(pg, &list);
+ }
+
+ if ( unlikely(i == 0) )
{
spin_unlock(&page_scrub_lock);
goto out;
}
-
- /* Peel up to 16 pages from the list. */
- for ( i = 0; i < 16; i++ )
- {
- if ( ent->next == &page_scrub_list )
- break;
- ent = ent->next;
- }
-
- /* Remove peeled pages from the list. */
- ent->next->prev = &page_scrub_list;
- page_scrub_list.next = ent->next;
- scrub_pages -= (i+1);
+
+ scrub_pages -= i;
spin_unlock(&page_scrub_lock);
- /* Working backwards, scrub each page in turn. */
- while ( ent != &page_scrub_list )
- {
- pg = list_entry(ent, struct page_info, list);
- ent = ent->prev;
+ /* Scrub each page in turn. */
+ while ( (pg = page_list_remove_head(&list)) ) {
p = map_domain_page(page_to_mfn(pg));
scrub_page(p);
unmap_domain_page(p);
diff -r af992824b5cf -r c7cba853583d xen/drivers/char/serial.c
--- a/xen/drivers/char/serial.c Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/drivers/char/serial.c Fri Feb 13 11:22:28 2009 +0900
@@ -471,7 +471,7 @@ void serial_suspend(void)
int i, irq;
for ( i = 0; i < ARRAY_SIZE(com); i++ )
if ( (irq = serial_irq(i)) >= 0 )
- free_irq(irq);
+ release_irq(irq);
}
void serial_resume(void)
diff -r af992824b5cf -r c7cba853583d xen/drivers/cpufreq/cpufreq.c
--- a/xen/drivers/cpufreq/cpufreq.c Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/drivers/cpufreq/cpufreq.c Fri Feb 13 11:22:28 2009 +0900
@@ -46,6 +46,9 @@
#include <acpi/acpi.h>
#include <acpi/cpufreq/cpufreq.h>
+static unsigned int usr_max_freq, usr_min_freq;
+static void cpufreq_cmdline_common_para(struct cpufreq_policy *new_policy);
+
struct cpufreq_dom {
unsigned int dom;
cpumask_t map;
@@ -53,6 +56,7 @@ struct cpufreq_dom {
};
static LIST_HEAD(cpufreq_dom_list_head);
+struct cpufreq_governor *cpufreq_opt_governor;
LIST_HEAD(cpufreq_governor_list);
struct cpufreq_governor *__find_governor(const char *governor)
@@ -213,6 +217,9 @@ int cpufreq_add_cpu(unsigned int cpu)
perf->domain_info.num_processors) {
memcpy(&new_policy, policy, sizeof(struct cpufreq_policy));
policy->governor = NULL;
+
+ cpufreq_cmdline_common_para(&new_policy);
+
ret = __cpufreq_set_policy(policy, &new_policy);
if (ret) {
if (new_policy.governor == CPUFREQ_DEFAULT_GOVERNOR)
@@ -467,3 +474,69 @@ out:
return ret;
}
+static void cpufreq_cmdline_common_para(struct cpufreq_policy *new_policy)
+{
+ if (usr_max_freq)
+ new_policy->max = usr_max_freq;
+ if (usr_min_freq)
+ new_policy->min = usr_min_freq;
+}
+
+static int __init cpufreq_handle_common_option(const char *name, const char
*val)
+{
+ if (!strcmp(name, "maxfreq") && val) {
+ usr_max_freq = simple_strtoul(val, NULL, 0);
+ return 1;
+ }
+
+ if (!strcmp(name, "minfreq") && val) {
+ usr_min_freq = simple_strtoul(val, NULL, 0);
+ return 1;
+ }
+
+ return 0;
+}
+
+void __init cpufreq_cmdline_parse(char *str)
+{
+ static struct cpufreq_governor *__initdata cpufreq_governors[] =
+ {
+ &cpufreq_gov_userspace,
+ &cpufreq_gov_dbs,
+ &cpufreq_gov_performance,
+ &cpufreq_gov_powersave
+ };
+ unsigned int gov_index = 0;
+
+ do {
+ char *val, *end = strchr(str, ',');
+ unsigned int i;
+
+ if (end)
+ *end++ = '\0';
+ val = strchr(str, '=');
+ if (val)
+ *val++ = '\0';
+
+ if (!cpufreq_opt_governor) {
+ if (!val) {
+ for (i = 0; i < ARRAY_SIZE(cpufreq_governors); ++i) {
+ if (!strcmp(str, cpufreq_governors[i]->name)) {
+ cpufreq_opt_governor = cpufreq_governors[i];
+ gov_index = i;
+ str = NULL;
+ break;
+ }
+ }
+ } else {
+ cpufreq_opt_governor = CPUFREQ_DEFAULT_GOVERNOR;
+ }
+ }
+
+ if (str && !cpufreq_handle_common_option(str, val) &&
+ cpufreq_governors[gov_index]->handle_option)
+ cpufreq_governors[gov_index]->handle_option(str, val);
+
+ str = end;
+ } while (str);
+}
diff -r af992824b5cf -r c7cba853583d
xen/drivers/cpufreq/cpufreq_misc_governors.c
--- a/xen/drivers/cpufreq/cpufreq_misc_governors.c Fri Feb 13 10:56:01
2009 +0900
+++ b/xen/drivers/cpufreq/cpufreq_misc_governors.c Fri Feb 13 11:22:28
2009 +0900
@@ -18,6 +18,7 @@
#include <xen/sched.h>
#include <acpi/cpufreq/cpufreq.h>
+static unsigned int usr_speed;
/*
* cpufreq userspace governor
@@ -26,6 +27,7 @@ static int cpufreq_governor_userspace(st
unsigned int event)
{
int ret = 0;
+ unsigned int freq;
if (!policy)
return -EINVAL;
@@ -35,12 +37,17 @@ static int cpufreq_governor_userspace(st
case CPUFREQ_GOV_STOP:
break;
case CPUFREQ_GOV_LIMITS:
- if (policy->max < policy->cur)
+ freq = usr_speed ? : policy->cur;
+ if (policy->max < freq)
ret = __cpufreq_driver_target(policy, policy->max,
CPUFREQ_RELATION_H);
- else if (policy->min > policy->cur)
+ else if (policy->min > freq)
ret = __cpufreq_driver_target(policy, policy->min,
CPUFREQ_RELATION_L);
+ else if (usr_speed)
+ ret = __cpufreq_driver_target(policy, freq,
+ CPUFREQ_RELATION_L);
+
break;
default:
ret = -EINVAL;
@@ -50,9 +57,17 @@ static int cpufreq_governor_userspace(st
return ret;
}
+static void __init
+cpufreq_userspace_handle_option(const char *name, const char *val)
+{
+ if (!strcmp(name, "speed") && val)
+ usr_speed = simple_strtoul(val, NULL, 0);
+}
+
struct cpufreq_governor cpufreq_gov_userspace = {
.name = "userspace",
.governor = cpufreq_governor_userspace,
+ .handle_option = cpufreq_userspace_handle_option
};
static int __init cpufreq_gov_userspace_init(void)
@@ -61,7 +76,7 @@ static int __init cpufreq_gov_userspace_
}
__initcall(cpufreq_gov_userspace_init);
-static void cpufreq_gov_userspace_exit(void)
+static void __exit cpufreq_gov_userspace_exit(void)
{
cpufreq_unregister_governor(&cpufreq_gov_userspace);
}
@@ -106,7 +121,7 @@ static int __init cpufreq_gov_performanc
}
__initcall(cpufreq_gov_performance_init);
-static void cpufreq_gov_performance_exit(void)
+static void __exit cpufreq_gov_performance_exit(void)
{
cpufreq_unregister_governor(&cpufreq_gov_performance);
}
@@ -151,7 +166,7 @@ static int __init cpufreq_gov_powersave_
}
__initcall(cpufreq_gov_powersave_init);
-static void cpufreq_gov_powersave_exit(void)
+static void __exit cpufreq_gov_powersave_exit(void)
{
cpufreq_unregister_governor(&cpufreq_gov_powersave);
}
diff -r af992824b5cf -r c7cba853583d xen/drivers/cpufreq/cpufreq_ondemand.c
--- a/xen/drivers/cpufreq/cpufreq_ondemand.c Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/drivers/cpufreq/cpufreq_ondemand.c Fri Feb 13 11:22:28 2009 +0900
@@ -281,9 +281,50 @@ int cpufreq_governor_dbs(struct cpufreq_
return 0;
}
+static void __init cpufreq_dbs_handle_option(const char *name, const char *val)
+{
+ if ( !strcmp(name, "rate") && val )
+ {
+ usr_sampling_rate = simple_strtoull(val, NULL, 0) * MICROSECS(1);
+ }
+ else if ( !strcmp(name, "up_threshold") && val )
+ {
+ unsigned long tmp = simple_strtoul(val, NULL, 0);
+
+ if ( tmp < MIN_FREQUENCY_UP_THRESHOLD )
+ {
+ printk(XENLOG_WARNING "cpufreq/ondemand: "
+ "specified threshold too low, using %d\n",
+ MIN_FREQUENCY_UP_THRESHOLD);
+ tmp = MIN_FREQUENCY_UP_THRESHOLD;
+ }
+ else if ( tmp > MAX_FREQUENCY_UP_THRESHOLD )
+ {
+ printk(XENLOG_WARNING "cpufreq/ondemand: "
+ "specified threshold too high, using %d\n",
+ MAX_FREQUENCY_UP_THRESHOLD);
+ tmp = MAX_FREQUENCY_UP_THRESHOLD;
+ }
+ dbs_tuners_ins.up_threshold = tmp;
+ }
+ else if ( !strcmp(name, "bias") && val )
+ {
+ unsigned long tmp = simple_strtoul(val, NULL, 0);
+
+ if ( tmp > 1000 )
+ {
+ printk(XENLOG_WARNING "cpufreq/ondemand: "
+ "specified bias too high, using 1000\n");
+ tmp = 1000;
+ }
+ dbs_tuners_ins.powersave_bias = tmp;
+ }
+}
+
struct cpufreq_governor cpufreq_gov_dbs = {
.name = "ondemand",
.governor = cpufreq_governor_dbs,
+ .handle_option = cpufreq_dbs_handle_option
};
static int __init cpufreq_gov_dbs_init(void)
@@ -292,60 +333,8 @@ static int __init cpufreq_gov_dbs_init(v
}
__initcall(cpufreq_gov_dbs_init);
-static void cpufreq_gov_dbs_exit(void)
+static void __exit cpufreq_gov_dbs_exit(void)
{
cpufreq_unregister_governor(&cpufreq_gov_dbs);
}
__exitcall(cpufreq_gov_dbs_exit);
-
-void __init cpufreq_cmdline_parse(char *str)
-{
- do {
- char *val, *end = strchr(str, ',');
-
- if ( end )
- *end++ = '\0';
- val = strchr(str, '=');
- if ( val )
- *val++ = '\0';
-
- if ( !strcmp(str, "rate") && val )
- {
- usr_sampling_rate = simple_strtoull(val, NULL, 0) * MICROSECS(1);
- }
- else if ( !strcmp(str, "threshold") && val )
- {
- unsigned long tmp = simple_strtoul(val, NULL, 0);
-
- if ( tmp < MIN_FREQUENCY_UP_THRESHOLD )
- {
- printk(XENLOG_WARNING "cpufreq/ondemand: "
- "specified threshold too low, using %d\n",
- MIN_FREQUENCY_UP_THRESHOLD);
- tmp = MIN_FREQUENCY_UP_THRESHOLD;
- }
- else if ( tmp > MAX_FREQUENCY_UP_THRESHOLD )
- {
- printk(XENLOG_WARNING "cpufreq/ondemand: "
- "specified threshold too high, using %d\n",
- MAX_FREQUENCY_UP_THRESHOLD);
- tmp = MAX_FREQUENCY_UP_THRESHOLD;
- }
- dbs_tuners_ins.up_threshold = tmp;
- }
- else if ( !strcmp(str, "bias") && val )
- {
- unsigned long tmp = simple_strtoul(val, NULL, 0);
-
- if ( tmp > 1000 )
- {
- printk(XENLOG_WARNING "cpufreq/ondemand: "
- "specified bias too high, using 1000\n");
- tmp = 1000;
- }
- dbs_tuners_ins.powersave_bias = tmp;
- }
-
- str = end;
- } while ( str );
-}
diff -r af992824b5cf -r c7cba853583d xen/drivers/passthrough/amd/iommu_init.c
--- a/xen/drivers/passthrough/amd/iommu_init.c Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/drivers/passthrough/amd/iommu_init.c Fri Feb 13 11:22:28 2009 +0900
@@ -479,26 +479,27 @@ static int set_iommu_interrupt_handler(s
{
int vector, ret;
- vector = assign_irq_vector(AUTO_ASSIGN);
- vector_to_iommu[vector] = iommu;
-
- /* make irq == vector */
- irq_vector[vector] = vector;
- vector_irq[vector] = vector;
-
- if ( !vector )
- {
- amd_iov_error("no vectors\n");
+ vector = assign_irq_vector(AUTO_ASSIGN_IRQ);
+ if ( vector <= 0 )
+ {
+ gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: no vectors\n");
return 0;
}
irq_desc[vector].handler = &iommu_msi_type;
- ret = request_irq(vector, amd_iommu_page_fault, 0, "amd_iommu", iommu);
+ ret = request_irq_vector(vector, amd_iommu_page_fault, 0,
+ "amd_iommu", iommu);
if ( ret )
{
+ irq_desc[vector].handler = &no_irq_type;
+ free_irq_vector(vector);
amd_iov_error("can't request irq\n");
return 0;
}
+
+ /* Make sure that vector is never re-used. */
+ vector_irq[vector] = NEVER_ASSIGN_IRQ;
+ vector_to_iommu[vector] = iommu;
iommu->vector = vector;
return vector;
}
diff -r af992824b5cf -r c7cba853583d xen/drivers/passthrough/amd/iommu_map.c
--- a/xen/drivers/passthrough/amd/iommu_map.c Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/drivers/passthrough/amd/iommu_map.c Fri Feb 13 11:22:28 2009 +0900
@@ -461,8 +461,8 @@ int amd_iommu_map_page(struct domain *d,
iommu_l2e = iommu_l2e_from_pfn(hd->root_table, hd->paging_mode, gfn);
if ( iommu_l2e == 0 )
{
+ spin_unlock_irqrestore(&hd->mapping_lock, flags);
amd_iov_error("Invalid IO pagetable entry gfn = %lx\n", gfn);
- spin_unlock_irqrestore(&hd->mapping_lock, flags);
return -EFAULT;
}
set_iommu_l1e_present(iommu_l2e, gfn, (u64)mfn << PAGE_SHIFT, iw, ir);
@@ -493,8 +493,8 @@ int amd_iommu_unmap_page(struct domain *
if ( iommu_l2e == 0 )
{
+ spin_unlock_irqrestore(&hd->mapping_lock, flags);
amd_iov_error("Invalid IO pagetable entry gfn = %lx\n", gfn);
- spin_unlock_irqrestore(&hd->mapping_lock, flags);
return -EFAULT;
}
@@ -533,9 +533,9 @@ int amd_iommu_reserve_domain_unity_map(
if ( iommu_l2e == 0 )
{
- amd_iov_error(
- "Invalid IO pagetable entry phys_addr = %lx\n", phys_addr);
spin_unlock_irqrestore(&hd->mapping_lock, flags);
+ amd_iov_error("Invalid IO pagetable entry phys_addr = %lx\n",
+ phys_addr);
return -EFAULT;
}
@@ -552,7 +552,6 @@ int amd_iommu_sync_p2m(struct domain *d)
{
unsigned long mfn, gfn, flags;
u64 iommu_l2e;
- struct list_head *entry;
struct page_info *page;
struct hvm_iommu *hd;
int iw = IOMMU_IO_WRITE_ENABLED;
@@ -568,10 +567,10 @@ int amd_iommu_sync_p2m(struct domain *d)
if ( hd->p2m_synchronized )
goto out;
- for ( entry = d->page_list.next; entry != &d->page_list;
- entry = entry->next )
- {
- page = list_entry(entry, struct page_info, list);
+ spin_lock(&d->page_alloc_lock);
+
+ page_list_for_each ( page, &d->page_list )
+ {
mfn = page_to_mfn(page);
gfn = get_gpfn_from_mfn(mfn);
@@ -582,13 +581,16 @@ int amd_iommu_sync_p2m(struct domain *d)
if ( iommu_l2e == 0 )
{
+ spin_unlock(&d->page_alloc_lock);
+ spin_unlock_irqrestore(&hd->mapping_lock, flags);
amd_iov_error("Invalid IO pagetable entry gfn = %lx\n", gfn);
- spin_unlock_irqrestore(&hd->mapping_lock, flags);
return -EFAULT;
}
set_iommu_l1e_present(iommu_l2e, gfn, (u64)mfn << PAGE_SHIFT, iw, ir);
}
+
+ spin_unlock(&d->page_alloc_lock);
hd->p2m_synchronized = 1;
diff -r af992824b5cf -r c7cba853583d xen/drivers/passthrough/amd/pci_amd_iommu.c
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c Fri Feb 13 10:56:01
2009 +0900
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c Fri Feb 13 11:22:28
2009 +0900
@@ -23,7 +23,6 @@
#include <xen/pci_regs.h>
#include <asm/amd-iommu.h>
#include <asm/hvm/svm/amd-iommu-proto.h>
-#include <asm/mm.h>
extern unsigned short ivrs_bdf_entries;
extern struct ivrs_mappings *ivrs_mappings;
diff -r af992824b5cf -r c7cba853583d xen/drivers/passthrough/io.c
--- a/xen/drivers/passthrough/io.c Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/drivers/passthrough/io.c Fri Feb 13 11:22:28 2009 +0900
@@ -87,8 +87,8 @@ int pt_irq_create_bind_vtd(
if ( domain_set_irq_dpci(d, hvm_irq_dpci) == 0 )
{
+ spin_unlock(&d->event_lock);
xfree(hvm_irq_dpci);
- spin_unlock(&d->event_lock);
return -EINVAL;
}
}
diff -r af992824b5cf -r c7cba853583d xen/drivers/passthrough/iommu.c
--- a/xen/drivers/passthrough/iommu.c Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/drivers/passthrough/iommu.c Fri Feb 13 11:22:28 2009 +0900
@@ -33,6 +33,8 @@ int amd_iov_detect(void);
* no-pv Disable IOMMU for PV domains (default)
* force|required Don't boot unless IOMMU is enabled
* passthrough Bypass VT-d translation for Dom0
+ * snoop Utilize the snoop control for IOMMU (default)
+ * no-snoop Dont utilize the snoop control for IOMMU
*/
custom_param("iommu", parse_iommu_param);
int iommu_enabled = 0;
@@ -45,6 +47,7 @@ static void __init parse_iommu_param(cha
{
char *ss;
iommu_enabled = 1;
+ iommu_snoop = 1;
do {
ss = strchr(s, ',');
@@ -62,6 +65,10 @@ static void __init parse_iommu_param(cha
force_iommu = 1;
else if ( !strcmp(s, "passthrough") )
iommu_passthrough = 1;
+ else if ( !strcmp(s, "snoop") )
+ iommu_snoop = 1;
+ else if ( !strcmp(s, "no-snoop") )
+ iommu_snoop = 0;
s = ss + 1;
} while ( ss );
@@ -141,7 +148,7 @@ static int iommu_populate_page_table(str
spin_lock(&d->page_alloc_lock);
- list_for_each_entry ( page, &d->page_list, list )
+ page_list_for_each ( page, &d->page_list )
{
if ( (page->u.inuse.type_info & PGT_type_mask) == PGT_writable_page )
{
diff -r af992824b5cf -r c7cba853583d xen/drivers/passthrough/vtd/dmar.c
--- a/xen/drivers/passthrough/vtd/dmar.c Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/drivers/passthrough/vtd/dmar.c Fri Feb 13 11:22:28 2009 +0900
@@ -21,6 +21,7 @@
#include <xen/init.h>
#include <xen/bitmap.h>
+#include <xen/errno.h>
#include <xen/kernel.h>
#include <xen/acpi.h>
#include <xen/mm.h>
@@ -518,8 +519,6 @@ int acpi_dmar_init(void)
int acpi_dmar_init(void)
{
int rc;
- struct acpi_drhd_unit *drhd;
- struct iommu *iommu;
rc = -ENODEV;
if ( force_iommu )
@@ -536,20 +535,7 @@ int acpi_dmar_init(void)
if ( list_empty(&acpi_drhd_units) )
goto fail;
- /* Giving that all devices within guest use same io page table,
- * enable snoop control only if all VT-d engines support it.
- */
- iommu_snoop = 1;
- for_each_drhd_unit ( drhd )
- {
- iommu = drhd->iommu;
- if ( !ecap_snp_ctl(iommu->ecap) ) {
- iommu_snoop = 0;
- break;
- }
- }
-
- printk("Intel VT-d has been enabled, snoop_control=%d.\n", iommu_snoop);
+ printk("Intel VT-d has been enabled\n");
return 0;
diff -r af992824b5cf -r c7cba853583d xen/drivers/passthrough/vtd/ia64/vtd.c
--- a/xen/drivers/passthrough/vtd/ia64/vtd.c Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/drivers/passthrough/vtd/ia64/vtd.c Fri Feb 13 11:22:28 2009 +0900
@@ -29,7 +29,9 @@
#include "../vtd.h"
-int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1};
+int vector_irq[NR_VECTORS] __read_mostly = {
+ [0 ... NR_VECTORS - 1] = FREE_TO_ASSIGN_IRQ
+};
/* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
u8 irq_vector[NR_IRQS] __read_mostly;
@@ -45,18 +47,19 @@ void unmap_vtd_domain_page(void *va)
}
/* Allocate page table, return its machine address */
-u64 alloc_pgtable_maddr(struct domain *d)
+u64 alloc_pgtable_maddr(struct domain *d, unsigned long npages)
{
struct page_info *pg;
u64 *vaddr;
- pg = alloc_domheap_page(NULL, d ? MEMF_node(domain_to_node(d)) : 0);
+ pg = alloc_domheap_pages(NULL, get_order_from_pages(npages),
+ d ? MEMF_node(domain_to_node(d)) : 0);
vaddr = map_domain_page(page_to_mfn(pg));
if ( !vaddr )
return 0;
- memset(vaddr, 0, PAGE_SIZE);
+ memset(vaddr, 0, PAGE_SIZE * npages);
- iommu_flush_cache_page(vaddr);
+ iommu_flush_cache_page(vaddr, npages);
unmap_domain_page(vaddr);
return page_to_maddr(pg);
diff -r af992824b5cf -r c7cba853583d xen/drivers/passthrough/vtd/intremap.c
--- a/xen/drivers/passthrough/vtd/intremap.c Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/drivers/passthrough/vtd/intremap.c Fri Feb 13 11:22:28 2009 +0900
@@ -502,7 +502,7 @@ int intremap_setup(struct iommu *iommu)
ir_ctrl = iommu_ir_ctrl(iommu);
if ( ir_ctrl->iremap_maddr == 0 )
{
- ir_ctrl->iremap_maddr = alloc_pgtable_maddr(NULL);
+ ir_ctrl->iremap_maddr = alloc_pgtable_maddr(NULL, 1);
if ( ir_ctrl->iremap_maddr == 0 )
{
dprintk(XENLOG_WARNING VTDPREFIX,
diff -r af992824b5cf -r c7cba853583d xen/drivers/passthrough/vtd/iommu.c
--- a/xen/drivers/passthrough/vtd/iommu.c Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/drivers/passthrough/vtd/iommu.c Fri Feb 13 11:22:28 2009 +0900
@@ -129,9 +129,9 @@ void iommu_flush_cache_entry(void *addr)
__iommu_flush_cache(addr, 8);
}
-void iommu_flush_cache_page(void *addr)
-{
- __iommu_flush_cache(addr, PAGE_SIZE_4K);
+void iommu_flush_cache_page(void *addr, unsigned long npages)
+{
+ __iommu_flush_cache(addr, PAGE_SIZE_4K * npages);
}
int nr_iommus;
@@ -146,7 +146,7 @@ static u64 bus_to_context_maddr(struct i
root = &root_entries[bus];
if ( !root_present(*root) )
{
- maddr = alloc_pgtable_maddr(NULL);
+ maddr = alloc_pgtable_maddr(NULL, 1);
if ( maddr == 0 )
{
unmap_vtd_domain_page(root_entries);
@@ -174,7 +174,7 @@ static u64 addr_to_dma_page_maddr(struct
addr &= (((u64)1) << addr_width) - 1;
ASSERT(spin_is_locked(&hd->mapping_lock));
if ( hd->pgd_maddr == 0 )
- if ( !alloc || ((hd->pgd_maddr = alloc_pgtable_maddr(domain)) == 0) )
+ if ( !alloc || ((hd->pgd_maddr = alloc_pgtable_maddr(domain, 1)) == 0)
)
goto out;
parent = (struct dma_pte *)map_vtd_domain_page(hd->pgd_maddr);
@@ -187,7 +187,7 @@ static u64 addr_to_dma_page_maddr(struct
{
if ( !alloc )
break;
- maddr = alloc_pgtable_maddr(domain);
+ maddr = alloc_pgtable_maddr(domain, 1);
if ( !maddr )
break;
dma_set_pte_addr(*pte, maddr);
@@ -577,7 +577,7 @@ static int iommu_set_root_entry(struct i
spin_lock(&iommu->lock);
if ( iommu->root_maddr == 0 )
- iommu->root_maddr = alloc_pgtable_maddr(NULL);
+ iommu->root_maddr = alloc_pgtable_maddr(NULL, 1);
if ( iommu->root_maddr == 0 )
{
spin_unlock(&iommu->lock);
@@ -874,23 +874,27 @@ int iommu_set_interrupt(struct iommu *io
{
int vector, ret;
- vector = assign_irq_vector(AUTO_ASSIGN);
- vector_to_iommu[vector] = iommu;
-
- /* VT-d fault is a MSI, make irq == vector */
- irq_vector[vector] = vector;
- vector_irq[vector] = vector;
-
- if ( !vector )
+ vector = assign_irq_vector(AUTO_ASSIGN_IRQ);
+ if ( vector <= 0 )
{
gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: no vectors\n");
return -EINVAL;
}
irq_desc[vector].handler = &dma_msi_type;
- ret = request_irq(vector, iommu_page_fault, 0, "dmar", iommu);
+ ret = request_irq_vector(vector, iommu_page_fault, 0, "dmar", iommu);
if ( ret )
+ {
+ irq_desc[vector].handler = &no_irq_type;
+ free_irq_vector(vector);
gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: can't request irq\n");
+ return ret;
+ }
+
+ /* Make sure that vector is never re-used. */
+ vector_irq[vector] = NEVER_ASSIGN_IRQ;
+ vector_to_iommu[vector] = iommu;
+
return vector;
}
@@ -966,7 +970,7 @@ static void iommu_free(struct acpi_drhd_
iounmap(iommu->reg);
free_intel_iommu(iommu->intel);
- free_irq(iommu->vector);
+ release_irq_vector(iommu->vector);
xfree(iommu);
drhd->iommu = NULL;
@@ -1677,6 +1681,11 @@ static int init_vtd_hw(void)
}
vector = iommu_set_interrupt(iommu);
+ if ( vector < 0 )
+ {
+ gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: interrupt setup failed\n");
+ return vector;
+ }
dma_msi_data_init(iommu, vector);
dma_msi_addr_init(iommu, cpu_physical_id(first_cpu(cpu_online_map)));
iommu->vector = vector;
@@ -1756,6 +1765,23 @@ int intel_vtd_setup(void)
if ( init_vtd_hw() )
goto error;
+ /* Giving that all devices within guest use same io page table,
+ * enable snoop control only if all VT-d engines support it.
+ */
+
+ if ( iommu_snoop )
+ {
+ for_each_drhd_unit ( drhd )
+ {
+ iommu = drhd->iommu;
+ if ( !ecap_snp_ctl(iommu->ecap) ) {
+ iommu_snoop = 0;
+ break;
+ }
+ }
+ }
+
+ printk("Intel VT-d snoop control %sabled\n", iommu_snoop ? "en" : "dis");
register_keyhandler('V', dump_iommu_info, "dump iommu info");
return 0;
@@ -1764,6 +1790,7 @@ int intel_vtd_setup(void)
for_each_drhd_unit ( drhd )
iommu_free(drhd);
vtd_enabled = 0;
+ iommu_snoop = 0;
return -ENOMEM;
}
diff -r af992824b5cf -r c7cba853583d xen/drivers/passthrough/vtd/iommu.h
--- a/xen/drivers/passthrough/vtd/iommu.h Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/drivers/passthrough/vtd/iommu.h Fri Feb 13 11:22:28 2009 +0900
@@ -397,7 +397,9 @@ struct poll_info {
u32 udata;
};
-#define QINVAL_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct qinval_entry))
+#define MAX_QINVAL_PAGES 8
+#define NUM_QINVAL_PAGES 1
+#define QINVAL_ENTRY_NR (PAGE_SIZE_4K*NUM_QINVAL_PAGES/sizeof(struct
qinval_entry))
#define qinval_present(v) ((v).lo & 1)
#define qinval_fault_disable(v) (((v).lo >> 1) & 1)
diff -r af992824b5cf -r c7cba853583d xen/drivers/passthrough/vtd/qinval.c
--- a/xen/drivers/passthrough/vtd/qinval.c Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/drivers/passthrough/vtd/qinval.c Fri Feb 13 11:22:28 2009 +0900
@@ -427,7 +427,7 @@ int qinval_setup(struct iommu *iommu)
if ( qi_ctrl->qinval_maddr == 0 )
{
- qi_ctrl->qinval_maddr = alloc_pgtable_maddr(NULL);
+ qi_ctrl->qinval_maddr = alloc_pgtable_maddr(NULL, NUM_QINVAL_PAGES);
if ( qi_ctrl->qinval_maddr == 0 )
{
dprintk(XENLOG_WARNING VTDPREFIX,
@@ -445,6 +445,8 @@ int qinval_setup(struct iommu *iommu)
* registers are automatically reset to 0 with write
* to IQA register.
*/
+ if ( NUM_QINVAL_PAGES <= MAX_QINVAL_PAGES )
+ qi_ctrl->qinval_maddr |= NUM_QINVAL_PAGES - 1;
dmar_writeq(iommu->reg, DMAR_IQA_REG, qi_ctrl->qinval_maddr);
/* enable queued invalidation hardware */
diff -r af992824b5cf -r c7cba853583d xen/drivers/passthrough/vtd/vtd.h
--- a/xen/drivers/passthrough/vtd/vtd.h Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/drivers/passthrough/vtd/vtd.h Fri Feb 13 11:22:28 2009 +0900
@@ -101,12 +101,12 @@ void cacheline_flush(char *);
void cacheline_flush(char *);
void flush_all_cache(void);
void *map_to_nocache_virt(int nr_iommus, u64 maddr);
-u64 alloc_pgtable_maddr(struct domain *d);
+u64 alloc_pgtable_maddr(struct domain *d, unsigned long npages);
void free_pgtable_maddr(u64 maddr);
void *map_vtd_domain_page(u64 maddr);
void unmap_vtd_domain_page(void *va);
void iommu_flush_cache_entry(void *addr);
-void iommu_flush_cache_page(void *addr);
+void iommu_flush_cache_page(void *addr, unsigned long npages);
#endif // _VTD_H_
diff -r af992824b5cf -r c7cba853583d xen/drivers/passthrough/vtd/x86/vtd.c
--- a/xen/drivers/passthrough/vtd/x86/vtd.c Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/drivers/passthrough/vtd/x86/vtd.c Fri Feb 13 11:22:28 2009 +0900
@@ -38,20 +38,21 @@ void unmap_vtd_domain_page(void *va)
}
/* Allocate page table, return its machine address */
-u64 alloc_pgtable_maddr(struct domain *d)
+u64 alloc_pgtable_maddr(struct domain *d, unsigned long npages)
{
struct page_info *pg;
u64 *vaddr;
unsigned long mfn;
- pg = alloc_domheap_page(NULL, d ? MEMF_node(domain_to_node(d)) : 0);
+ pg = alloc_domheap_pages(NULL, get_order_from_pages(npages),
+ d ? MEMF_node(domain_to_node(d)) : 0);
if ( !pg )
return 0;
mfn = page_to_mfn(pg);
vaddr = map_domain_page(mfn);
- memset(vaddr, 0, PAGE_SIZE);
+ memset(vaddr, 0, PAGE_SIZE * npages);
- iommu_flush_cache_page(vaddr);
+ iommu_flush_cache_page(vaddr, npages);
unmap_domain_page(vaddr);
return (u64)mfn << PAGE_SHIFT_4K;
diff -r af992824b5cf -r c7cba853583d xen/include/acpi/cpufreq/cpufreq.h
--- a/xen/include/acpi/cpufreq/cpufreq.h Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/acpi/cpufreq/cpufreq.h Fri Feb 13 11:22:28 2009 +0900
@@ -87,6 +87,7 @@ struct cpufreq_governor {
char name[CPUFREQ_NAME_LEN];
int (*governor)(struct cpufreq_policy *policy,
unsigned int event);
+ void (*handle_option)(const char *name, const char *value);
struct list_head governor_list;
};
diff -r af992824b5cf -r c7cba853583d xen/include/asm-ia64/hardirq.h
--- a/xen/include/asm-ia64/hardirq.h Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/asm-ia64/hardirq.h Fri Feb 13 11:22:28 2009 +0900
@@ -4,6 +4,7 @@
#define __ARCH_IRQ_STAT 1
#define HARDIRQ_BITS 14
#include <linux/hardirq.h>
+#include <xen/sched.h>
#define local_softirq_pending()
(local_cpu_data->softirq_pending)
diff -r af992824b5cf -r c7cba853583d xen/include/asm-ia64/hvm/iommu.h
--- a/xen/include/asm-ia64/hvm/iommu.h Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/asm-ia64/hvm/iommu.h Fri Feb 13 11:22:28 2009 +0900
@@ -28,7 +28,6 @@ static inline void pci_cleanup_msi(struc
/* TODO */
}
-#define AUTO_ASSIGN -1
extern int assign_irq_vector (int irq);
diff -r af992824b5cf -r c7cba853583d xen/include/asm-ia64/hvm/irq.h
--- a/xen/include/asm-ia64/hvm/irq.h Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/asm-ia64/hvm/irq.h Fri Feb 13 11:22:28 2009 +0900
@@ -90,13 +90,17 @@ struct hvm_irq {
#define hvm_pci_intx_link(dev, intx) \
(((dev) + (intx)) & 3)
-/* Extract the IA-64 vector that corresponds to IRQ. */
-static inline int
-irq_to_vector (int irq)
+#define IA64_INVALID_VECTOR ((unsigned int)((int)-1))
+static inline unsigned int irq_to_vector(int irq)
{
- return irq;
+ int acpi_gsi_to_irq (u32 gsi, unsigned int *irq);
+ unsigned int vector;
+
+ if ( acpi_gsi_to_irq(irq, &vector) < 0)
+ return 0;
+
+ return vector;
}
-
extern u8 irq_vector[NR_IRQS];
extern int vector_irq[NR_VECTORS];
diff -r af992824b5cf -r c7cba853583d xen/include/asm-ia64/linux-xen/asm/smp.h
--- a/xen/include/asm-ia64/linux-xen/asm/smp.h Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/asm-ia64/linux-xen/asm/smp.h Fri Feb 13 11:22:28 2009 +0900
@@ -47,7 +47,6 @@ ia64_get_lid (void)
#define SMP_IPI_REDIRECTION (1 << 1)
#ifdef XEN
-#include <xen/sched.h>
#define raw_smp_processor_id() (current->processor)
#else
#define raw_smp_processor_id() (current_thread_info()->cpu)
diff -r af992824b5cf -r c7cba853583d
xen/include/asm-ia64/linux-xen/linux/interrupt.h
--- a/xen/include/asm-ia64/linux-xen/linux/interrupt.h Fri Feb 13 10:56:01
2009 +0900
+++ b/xen/include/asm-ia64/linux-xen/linux/interrupt.h Fri Feb 13 11:22:28
2009 +0900
@@ -52,10 +52,10 @@ struct irqaction {
};
extern irqreturn_t no_action(int cpl, void *dev_id, struct pt_regs *regs);
-extern int request_irq(unsigned int,
+extern int request_irq_vector(unsigned int,
irqreturn_t (*handler)(int, void *, struct pt_regs *),
unsigned long, const char *, void *);
-extern void free_irq(unsigned int, void *);
+extern void release_irq_vector(unsigned int, void *);
#endif
diff -r af992824b5cf -r c7cba853583d xen/include/asm-ia64/linux/asm/hw_irq.h
--- a/xen/include/asm-ia64/linux/asm/hw_irq.h Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/asm-ia64/linux/asm/hw_irq.h Fri Feb 13 11:22:28 2009 +0900
@@ -34,7 +34,7 @@ typedef u8 ia64_vector;
#define IA64_MAX_VECTORED_IRQ 255
#define IA64_NUM_VECTORS 256
-#define AUTO_ASSIGN -1
+#define AUTO_ASSIGN_IRQ (-1)
#define IA64_SPURIOUS_INT_VECTOR 0x0f
diff -r af992824b5cf -r c7cba853583d xen/include/asm-ia64/mm.h
--- a/xen/include/asm-ia64/mm.h Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/asm-ia64/mm.h Fri Feb 13 11:22:28 2009 +0900
@@ -13,7 +13,6 @@
#include <xen/list.h>
#include <xen/spinlock.h>
#include <xen/perfc.h>
-#include <xen/sched.h>
#include <asm/processor.h>
#include <asm/atomic.h>
@@ -63,21 +62,14 @@ struct page_info
struct {
/* Order-size of the free chunk this page is the head of. */
u32 order;
- /* Mask of possibly-tainted TLBs. */
- cpumask_t cpumask;
+ /* Do TLBs need flushing for safety before next page use? */
+ bool_t need_tlbflush;
} free;
} u;
/* Timestamp from 'TLB clock', used to reduce need for safety flushes. */
u32 tlbflush_timestamp;
-
-#if 0
-// following added for Linux compiling
- page_flags_t flags;
- atomic_t _count;
- struct list_head lru; // is this the same as above "list"?
-#endif
};
#define set_page_count(p,v) atomic_set(&(p)->_count, v - 1)
diff -r af992824b5cf -r c7cba853583d xen/include/asm-ia64/tlbflush.h
--- a/xen/include/asm-ia64/tlbflush.h Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/asm-ia64/tlbflush.h Fri Feb 13 11:22:28 2009 +0900
@@ -1,7 +1,8 @@
#ifndef __FLUSHTLB_H__
#define __FLUSHTLB_H__
-#include <xen/sched.h>
+struct vcpu;
+struct domain;
/* TLB flushes can be either local (current vcpu only) or domain wide (on
all vcpus).
diff -r af992824b5cf -r c7cba853583d xen/include/asm-x86/domain.h
--- a/xen/include/asm-x86/domain.h Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/asm-x86/domain.h Fri Feb 13 11:22:28 2009 +0900
@@ -79,11 +79,11 @@ struct shadow_domain {
int locker; /* processor which holds the lock */
const char *locker_function; /* Func that took it */
unsigned int opt_flags; /* runtime tunable optimizations on/off */
- struct list_head pinned_shadows;
+ struct page_list_head pinned_shadows;
/* Memory allocation */
- struct list_head freelists[SHADOW_MAX_ORDER + 1];
- struct list_head p2m_freelist;
+ struct page_list_head freelists[SHADOW_MAX_ORDER + 1];
+ struct page_list_head p2m_freelist;
unsigned int total_pages; /* number of pages allocated */
unsigned int free_pages; /* number of pages on freelists */
unsigned int p2m_pages; /* number of pages allocates to p2m */
@@ -92,7 +92,7 @@ struct shadow_domain {
pagetable_t unpaged_pagetable;
/* Shadow hashtable */
- struct shadow_page_info **hash_table;
+ struct page_info **hash_table;
int hash_walking; /* Some function is walking the hash table */
/* Fast MMIO path heuristic */
@@ -143,7 +143,7 @@ struct hap_domain {
int locker;
const char *locker_function;
- struct list_head freelist;
+ struct page_list_head freelist;
unsigned int total_pages; /* number of pages allocated */
unsigned int free_pages; /* number of pages on freelists */
unsigned int p2m_pages; /* number of pages allocates to p2m */
@@ -265,7 +265,7 @@ struct arch_domain
RELMEM_l2,
RELMEM_done,
} relmem;
- struct list_head relmem_list;
+ struct page_list_head relmem_list;
cpuid_input_t cpuids[MAX_CPUID_INPUT];
} __cacheline_aligned;
@@ -352,6 +352,7 @@ struct arch_vcpu
/* Current LDT details. */
unsigned long shadow_ldt_mapcnt;
+ spinlock_t shadow_ldt_lock;
struct paging_vcpu paging;
diff -r af992824b5cf -r c7cba853583d xen/include/asm-x86/hvm/vmx/vmx.h
--- a/xen/include/asm-x86/hvm/vmx/vmx.h Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h Fri Feb 13 11:22:28 2009 +0900
@@ -48,7 +48,11 @@ typedef union {
#define EPTE_SUPER_PAGE_MASK 0x80
#define EPTE_MFN_MASK 0x1fffffffffff000
#define EPTE_AVAIL1_MASK 0xF00
-#define EPTE_EMT_MASK 0x78
+#define EPTE_EMT_MASK 0x38
+#define EPTE_IGMT_MASK 0x40
+#define EPTE_AVAIL1_SHIFT 8
+#define EPTE_EMT_SHIFT 3
+#define EPTE_IGMT_SHIFT 6
void vmx_asm_vmexit_handler(struct cpu_user_regs);
void vmx_asm_do_vmentry(void);
diff -r af992824b5cf -r c7cba853583d xen/include/asm-x86/iocap.h
--- a/xen/include/asm-x86/iocap.h Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/asm-x86/iocap.h Fri Feb 13 11:22:28 2009 +0900
@@ -14,7 +14,8 @@
#define ioports_access_permitted(d, s, e) \
rangeset_contains_range((d)->arch.ioport_caps, s, e)
-#define cache_flush_permitted(d) \
- (!rangeset_is_empty((d)->iomem_caps))
+#define cache_flush_permitted(d) \
+ (!rangeset_is_empty((d)->iomem_caps) || \
+ !rangeset_is_empty((d)->arch.ioport_caps))
#endif /* __X86_IOCAP_H__ */
diff -r af992824b5cf -r c7cba853583d xen/include/asm-x86/irq.h
--- a/xen/include/asm-x86/irq.h Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/asm-x86/irq.h Fri Feb 13 11:22:28 2009 +0900
@@ -19,9 +19,6 @@
extern int vector_irq[NR_VECTORS];
extern u8 irq_vector[NR_IRQS];
-#define AUTO_ASSIGN -1
-#define NEVER_ASSIGN -2
-#define FREE_TO_ASSIGN -3
#define platform_legacy_irq(irq) ((irq) < 16)
diff -r af992824b5cf -r c7cba853583d xen/include/asm-x86/mm.h
--- a/xen/include/asm-x86/mm.h Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/asm-x86/mm.h Fri Feb 13 11:22:28 2009 +0900
@@ -12,15 +12,40 @@
* Per-page-frame information.
*
* Every architecture must ensure the following:
- * 1. 'struct page_info' contains a 'struct list_head list'.
+ * 1. 'struct page_info' contains a 'struct page_list_entry list'.
* 2. Provide a PFN_ORDER() macro for accessing the order of a free page.
*/
-#define PFN_ORDER(_pfn) ((_pfn)->u.free.order)
+#define PFN_ORDER(_pfn) ((_pfn)->v.free.order)
+
+/*
+ * This definition is solely for the use in struct page_info (and
+ * struct page_list_head), intended to allow easy adjustment once x86-64
+ * wants to support more than 16TB.
+ * 'unsigned long' should be used for MFNs everywhere else.
+ */
+#define __mfn_t unsigned int
+#define PRpgmfn "08x"
+
+#undef page_list_entry
+struct page_list_entry
+{
+ __mfn_t next, prev;
+};
struct page_info
{
- /* Each frame can be threaded onto a doubly-linked list. */
- struct list_head list;
+ union {
+ /* Each frame can be threaded onto a doubly-linked list.
+ *
+ * For unused shadow pages, a list of pages of this order; for
+ * pinnable shadows, if pinned, a list of other pinned shadows
+ * (see sh_type_is_pinnable() below for the definition of
+ * "pinnable" shadow types).
+ */
+ struct page_list_entry list;
+ /* For non-pinnable shadows, a higher entry that points at us. */
+ paddr_t up;
+ };
/* Reference count and various PGC_xxx flags and fields. */
unsigned long count_info;
@@ -30,21 +55,46 @@ struct page_info
/* Page is in use: ((count_info & PGC_count_mask) != 0). */
struct {
- /* Owner of this page (NULL if page is anonymous). */
- u32 _domain; /* pickled format */
/* Type reference count and various PGT_xxx flags and fields. */
unsigned long type_info;
} inuse;
+ /* Page is in use as a shadow: count_info == 0. */
+ struct {
+ unsigned long type:5; /* What kind of shadow is this? */
+ unsigned long pinned:1; /* Is the shadow pinned? */
+ unsigned long count:26; /* Reference count */
+ } sh;
+
/* Page is on a free list: ((count_info & PGC_count_mask) == 0). */
struct {
+ /* Do TLBs need flushing for safety before next page use? */
+ bool_t need_tlbflush;
+ } free;
+
+ } u;
+
+ union {
+
+ /* Page is in use, but not as a shadow. */
+ struct {
+ /* Owner of this page (NULL if page is anonymous). */
+ u32 _domain; /* pickled format */
+ } inuse;
+
+ /* Page is in use as a shadow. */
+ struct {
+ /* GMFN of guest page we're a shadow of. */
+ __mfn_t back;
+ } sh;
+
+ /* Page is on a free list (including shadow code free lists). */
+ struct {
/* Order-size of the free chunk this page is the head of. */
- u32 order;
- /* Mask of possibly-tainted TLBs. */
- cpumask_t cpumask;
+ unsigned int order;
} free;
- } u;
+ } v;
union {
/*
@@ -95,8 +145,13 @@ struct page_info
* tracked for TLB-flush avoidance when a guest runs in shadow mode.
*/
u32 shadow_flags;
+
+ /* When in use as a shadow, next shadow in this hash chain. */
+ __mfn_t next_shadow;
};
};
+
+#undef __mfn_t
#define PG_shift(idx) (BITS_PER_LONG - (idx))
#define PG_mask(x, idx) (x ## UL << PG_shift(idx))
@@ -155,7 +210,8 @@ struct page_info
})
#else
#define is_xen_heap_page(page) ((page)->count_info & PGC_xen_heap)
-#define is_xen_heap_mfn(mfn) is_xen_heap_page(&frame_table[mfn])
+#define is_xen_heap_mfn(mfn) \
+ (__mfn_valid(mfn) && is_xen_heap_page(__mfn_to_page(mfn)))
#endif
#if defined(__i386__)
@@ -174,10 +230,10 @@ struct page_info
#define SHADOW_OOS_FIXUPS 2
#define page_get_owner(_p) \
- ((struct domain *)((_p)->u.inuse._domain ? \
- mfn_to_virt((_p)->u.inuse._domain) : NULL))
+ ((struct domain *)((_p)->v.inuse._domain ? \
+ mfn_to_virt((_p)->v.inuse._domain) : NULL))
#define page_set_owner(_p,_d) \
- ((_p)->u.inuse._domain = (_d) ? virt_to_mfn(_d) : 0)
+ ((_p)->v.inuse._domain = (_d) ? virt_to_mfn(_d) : 0)
#define maddr_get_owner(ma) (page_get_owner(maddr_to_page((ma))))
#define vaddr_get_owner(va) (page_get_owner(virt_to_page((va))))
diff -r af992824b5cf -r c7cba853583d xen/include/asm-x86/p2m.h
--- a/xen/include/asm-x86/p2m.h Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/asm-x86/p2m.h Fri Feb 13 11:22:28 2009 +0900
@@ -110,7 +110,7 @@ struct p2m_domain {
const char *locker_function; /* Func that took it */
/* Pages used to construct the p2m */
- struct list_head pages;
+ struct page_list_head pages;
/* Functions to call to get or free pages for the p2m */
struct page_info * (*alloc_page )(struct domain *d);
@@ -148,7 +148,7 @@ struct p2m_domain {
* protect moving stuff from the PoD cache to the domain page list.
*/
struct {
- struct list_head super, /* List of superpages */
+ struct page_list_head super, /* List of superpages */
single; /* Non-super lists */
int count, /* # of pages in cache lists */
entry_count; /* # of pages in p2m marked pod */
diff -r af992824b5cf -r c7cba853583d xen/include/asm-x86/page.h
--- a/xen/include/asm-x86/page.h Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/asm-x86/page.h Fri Feb 13 11:22:28 2009 +0900
@@ -220,31 +220,47 @@ void copy_page_sse2(void *, const void *
copy_page_sse2(_t, _f) : \
(void)memcpy(_t, _f, PAGE_SIZE))
-#define mfn_valid(mfn) ((mfn) < max_page)
+#define __mfn_valid(mfn) ((mfn) < max_page)
/* Convert between Xen-heap virtual addresses and machine addresses. */
#define __pa(x) (virt_to_maddr(x))
#define __va(x) (maddr_to_virt(x))
/* Convert between Xen-heap virtual addresses and machine frame numbers. */
-#define virt_to_mfn(va) (virt_to_maddr(va) >> PAGE_SHIFT)
-#define mfn_to_virt(mfn) (maddr_to_virt((paddr_t)(mfn) << PAGE_SHIFT))
+#define __virt_to_mfn(va) (virt_to_maddr(va) >> PAGE_SHIFT)
+#define __mfn_to_virt(mfn) (maddr_to_virt((paddr_t)(mfn) << PAGE_SHIFT))
/* Convert between machine frame numbers and page-info structures. */
-#define mfn_to_page(mfn) (frame_table + (mfn))
-#define page_to_mfn(pg) ((unsigned long)((pg) - frame_table))
+#define __mfn_to_page(mfn) (frame_table + (mfn))
+#define __page_to_mfn(pg) ((unsigned long)((pg) - frame_table))
/* Convert between machine addresses and page-info structures. */
-#define maddr_to_page(ma) (frame_table + ((ma) >> PAGE_SHIFT))
-#define page_to_maddr(pg) ((paddr_t)((pg) - frame_table) << PAGE_SHIFT)
+#define __maddr_to_page(ma) (frame_table + ((ma) >> PAGE_SHIFT))
+#define __page_to_maddr(pg) ((paddr_t)((pg) - frame_table) << PAGE_SHIFT)
/* Convert between Xen-heap virtual addresses and page-info structures. */
-#define virt_to_page(va) (frame_table + (__pa(va) >> PAGE_SHIFT))
-#define page_to_virt(pg) (maddr_to_virt(page_to_maddr(pg)))
+#define __virt_to_page(va) (frame_table + (__pa(va) >> PAGE_SHIFT))
+#define __page_to_virt(pg) (maddr_to_virt(page_to_maddr(pg)))
/* Convert between frame number and address formats. */
-#define pfn_to_paddr(pfn) ((paddr_t)(pfn) << PAGE_SHIFT)
-#define paddr_to_pfn(pa) ((unsigned long)((pa) >> PAGE_SHIFT))
+#define __pfn_to_paddr(pfn) ((paddr_t)(pfn) << PAGE_SHIFT)
+#define __paddr_to_pfn(pa) ((unsigned long)((pa) >> PAGE_SHIFT))
+
+/*
+ * We define non-underscored wrappers for above conversion functions. These are
+ * overridden in various source files while underscored versions remain intact.
+ */
+#define mfn_valid(mfn) __mfn_valid(mfn)
+#define virt_to_mfn(va) __virt_to_mfn(va)
+#define mfn_to_virt(mfn) __mfn_to_virt(mfn)
+#define mfn_to_page(mfn) __mfn_to_page(mfn)
+#define page_to_mfn(pg) __page_to_mfn(pg)
+#define maddr_to_page(ma) __maddr_to_page(ma)
+#define page_to_maddr(pg) __page_to_maddr(pg)
+#define virt_to_page(va) __virt_to_page(va)
+#define page_to_virt(pg) __page_to_virt(pg)
+#define pfn_to_paddr(pfn) __pfn_to_paddr(pfn)
+#define paddr_to_pfn(pa) __paddr_to_pfn(pa)
#endif /* !defined(__ASSEMBLY__) */
diff -r af992824b5cf -r c7cba853583d xen/include/asm-x86/perfc.h
--- a/xen/include/asm-x86/perfc.h Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/asm-x86/perfc.h Fri Feb 13 11:22:28 2009 +0900
@@ -1,6 +1,5 @@
#ifndef __ASM_PERFC_H__
#define __ASM_PERFC_H__
-#include <asm/mm.h>
static inline void arch_perfc_printall(void)
{
diff -r af992824b5cf -r c7cba853583d xen/include/asm-x86/processor.h
--- a/xen/include/asm-x86/processor.h Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/asm-x86/processor.h Fri Feb 13 11:22:28 2009 +0900
@@ -188,6 +188,7 @@ extern struct cpuinfo_x86 cpu_data[];
#define current_cpu_data boot_cpu_data
#endif
+extern u64 host_pat;
extern int phys_proc_id[NR_CPUS];
extern int cpu_core_id[NR_CPUS];
diff -r af992824b5cf -r c7cba853583d xen/include/public/arch-ia64/hvm/save.h
--- a/xen/include/public/arch-ia64/hvm/save.h Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/public/arch-ia64/hvm/save.h Fri Feb 13 11:22:28 2009 +0900
@@ -23,8 +23,8 @@
#ifndef __XEN_PUBLIC_HVM_SAVE_IA64_H__
#define __XEN_PUBLIC_HVM_SAVE_IA64_H__
-#include <public/hvm/save.h>
-#include <public/arch-ia64.h>
+#include "../../hvm/save.h"
+#include "../../arch-ia64.h"
/*
* Save/restore header: general info about the save file.
diff -r af992824b5cf -r c7cba853583d xen/include/public/arch-x86/hvm/save.h
--- a/xen/include/public/arch-x86/hvm/save.h Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/public/arch-x86/hvm/save.h Fri Feb 13 11:22:28 2009 +0900
@@ -287,7 +287,7 @@ struct hvm_hw_pci_irqs {
* Indexed by: device*4 + INTx#.
*/
union {
- DECLARE_BITMAP(i, 32*4);
+ unsigned long i[16 / sizeof (unsigned long)]; /* DECLARE_BITMAP(i,
32*4); */
uint64_t pad[2];
};
};
@@ -300,7 +300,7 @@ struct hvm_hw_isa_irqs {
* Indexed by ISA IRQ (assumes no ISA-device IRQ sharing).
*/
union {
- DECLARE_BITMAP(i, 16);
+ unsigned long i[1]; /* DECLARE_BITMAP(i, 16); */
uint64_t pad[1];
};
};
diff -r af992824b5cf -r c7cba853583d xen/include/public/arch-x86/xen-mca.h
--- a/xen/include/public/arch-x86/xen-mca.h Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/public/arch-x86/xen-mca.h Fri Feb 13 11:22:28 2009 +0900
@@ -56,7 +56,7 @@
/* Hypercall */
#define __HYPERVISOR_mca __HYPERVISOR_arch_0
-#define XEN_MCA_INTERFACE_VERSION 0x03000001
+#define XEN_MCA_INTERFACE_VERSION 0x03000002
/* IN: Dom0 calls hypercall from MC event handler. */
#define XEN_MC_CORRECTABLE 0x0
@@ -118,7 +118,7 @@ struct mcinfo_global {
uint16_t mc_domid;
uint32_t mc_socketid; /* physical socket of the physical core */
uint16_t mc_coreid; /* physical impacted core */
- uint8_t mc_apicid;
+ uint32_t mc_apicid;
uint16_t mc_core_threadid; /* core thread of physical core */
uint16_t mc_vcpuid; /* virtual cpu scheduled for mc_domid */
uint64_t mc_gstatus; /* global status */
@@ -175,6 +175,41 @@ struct mc_info {
};
typedef struct mc_info mc_info_t;
+#define __MC_MSR_ARRAYSIZE 8
+#define __MC_NMSRS 1
+#define MC_NCAPS 7 /* 7 CPU feature flag words */
+#define MC_CAPS_STD_EDX 0 /* cpuid level 0x00000001 (%edx) */
+#define MC_CAPS_AMD_EDX 1 /* cpuid level 0x80000001 (%edx) */
+#define MC_CAPS_TM 2 /* cpuid level 0x80860001 (TransMeta) */
+#define MC_CAPS_LINUX 3 /* Linux-defined */
+#define MC_CAPS_STD_ECX 4 /* cpuid level 0x00000001 (%ecx) */
+#define MC_CAPS_VIA 5 /* cpuid level 0xc0000001 */
+#define MC_CAPS_AMD_ECX 6 /* cpuid level 0x80000001 (%ecx) */
+
+typedef struct mcinfo_logical_cpu {
+ uint32_t mc_cpunr;
+ uint32_t mc_chipid;
+ uint16_t mc_coreid;
+ uint16_t mc_threadid;
+ uint32_t mc_apicid;
+ uint32_t mc_clusterid;
+ uint32_t mc_ncores;
+ uint32_t mc_ncores_active;
+ uint32_t mc_nthreads;
+ int32_t mc_cpuid_level;
+ uint32_t mc_family;
+ uint32_t mc_vendor;
+ uint32_t mc_model;
+ uint32_t mc_step;
+ char mc_vendorid[16];
+ char mc_brandid[64];
+ uint32_t mc_cpu_caps[MC_NCAPS];
+ uint32_t mc_cache_size;
+ uint32_t mc_cache_alignment;
+ int32_t mc_nmsrvals;
+ struct mcinfo_msr mc_msrvalues[__MC_MSR_ARRAYSIZE];
+} xen_mc_logical_cpu_t;
+DEFINE_XEN_GUEST_HANDLE(xen_mc_logical_cpu_t);
/*
@@ -272,6 +307,14 @@ typedef struct xen_mc_notifydomain xen_m
typedef struct xen_mc_notifydomain xen_mc_notifydomain_t;
DEFINE_XEN_GUEST_HANDLE(xen_mc_notifydomain_t);
+#define XEN_MC_physcpuinfo 3
+struct xen_mc_physcpuinfo {
+ /* IN/OUT */
+ uint32_t ncpus;
+ uint32_t pad0;
+ /* OUT */
+ XEN_GUEST_HANDLE(xen_mc_logical_cpu_t) info;
+};
struct xen_mc {
uint32_t cmd;
@@ -279,6 +322,7 @@ struct xen_mc {
union {
struct xen_mc_fetch mc_fetch;
struct xen_mc_notifydomain mc_notifydomain;
+ struct xen_mc_physcpuinfo mc_physcpuinfo;
uint8_t pad[MCINFO_HYPERCALLSIZE];
} u;
};
diff -r af992824b5cf -r c7cba853583d xen/include/public/domctl.h
--- a/xen/include/public/domctl.h Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/public/domctl.h Fri Feb 13 11:22:28 2009 +0900
@@ -630,6 +630,17 @@ typedef struct xen_domctl_debug_op xen_d
typedef struct xen_domctl_debug_op xen_domctl_debug_op_t;
DEFINE_XEN_GUEST_HANDLE(xen_domctl_debug_op_t);
+/*
+ * Request a particular record from the HVM context
+ */
+#define XEN_DOMCTL_gethvmcontext_partial 55
+typedef struct xen_domctl_hvmcontext_partial {
+ uint32_t type; /* IN: Type of record required */
+ uint32_t instance; /* IN: Instance of that type */
+ XEN_GUEST_HANDLE_64(uint8) buffer; /* OUT: buffer to write record into */
+} xen_domctl_hvmcontext_partial_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_hvmcontext_partial_t);
+
struct xen_domctl {
uint32_t cmd;
@@ -658,6 +669,7 @@ struct xen_domctl {
struct xen_domctl_settimeoffset settimeoffset;
struct xen_domctl_real_mode_area real_mode_area;
struct xen_domctl_hvmcontext hvmcontext;
+ struct xen_domctl_hvmcontext_partial hvmcontext_partial;
struct xen_domctl_address_size address_size;
struct xen_domctl_sendtrigger sendtrigger;
struct xen_domctl_get_device_group get_device_group;
diff -r af992824b5cf -r c7cba853583d xen/include/public/io/pciif.h
--- a/xen/include/public/io/pciif.h Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/public/io/pciif.h Fri Feb 13 11:22:28 2009 +0900
@@ -29,7 +29,7 @@
/* xen_pci_sharedinfo flags */
#define _XEN_PCIF_active (0)
-#define XEN_PCIF_active (1<<_XEN_PCI_active)
+#define XEN_PCIF_active (1<<_XEN_PCIF_active)
#define _XEN_PCIB_AERHANDLER (1)
#define XEN_PCIB_AERHANDLER (1<<_XEN_PCIB_AERHANDLER)
#define _XEN_PCIB_active (2)
diff -r af992824b5cf -r c7cba853583d xen/include/xen/hvm/save.h
--- a/xen/include/xen/hvm/save.h Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/xen/hvm/save.h Fri Feb 13 11:22:28 2009 +0900
@@ -152,6 +152,8 @@ __initcall(__hvm_register_##_x##_save_an
/* Entry points for saving and restoring HVM domain state */
size_t hvm_save_size(struct domain *d);
int hvm_save(struct domain *d, hvm_domain_context_t *h);
+int hvm_save_one(struct domain *d, uint16_t typecode, uint16_t instance,
+ XEN_GUEST_HANDLE_64(uint8) handle);
int hvm_load(struct domain *d, hvm_domain_context_t *h);
/* Arch-specific definitions. */
diff -r af992824b5cf -r c7cba853583d xen/include/xen/iocap.h
--- a/xen/include/xen/iocap.h Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/xen/iocap.h Fri Feb 13 11:22:28 2009 +0900
@@ -29,6 +29,7 @@
rangeset_contains_singleton((d)->irq_caps, i)
#define multipage_allocation_permitted(d) \
- (!rangeset_is_empty((d)->iomem_caps))
+ (!rangeset_is_empty((d)->iomem_caps) || \
+ !rangeset_is_empty((d)->arch.ioport_caps))
#endif /* __XEN_IOCAP_H__ */
diff -r af992824b5cf -r c7cba853583d xen/include/xen/irq.h
--- a/xen/include/xen/irq.h Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/xen/irq.h Fri Feb 13 11:22:28 2009 +0900
@@ -24,6 +24,11 @@ struct irqaction
#define IRQ_GUEST 16 /* IRQ is handled by guest OS(es) */
#define IRQ_GUEST_EOI_PENDING 32 /* IRQ was disabled, pending a guest EOI */
#define IRQ_PER_CPU 256 /* IRQ is per CPU */
+
+/* Special IRQ numbers. */
+#define AUTO_ASSIGN_IRQ (-1)
+#define NEVER_ASSIGN_IRQ (-2)
+#define FREE_TO_ASSIGN_IRQ (-3)
/*
* Interrupt controller descriptor. This is all we need
@@ -64,11 +69,20 @@ typedef struct {
extern irq_desc_t irq_desc[NR_VECTORS];
-extern int setup_irq(unsigned int, struct irqaction *);
-extern void free_irq(unsigned int);
-extern int request_irq(unsigned int irq,
+extern int setup_irq_vector(unsigned int, struct irqaction *);
+extern void release_irq_vector(unsigned int);
+extern int request_irq_vector(unsigned int vector,
void (*handler)(int, void *, struct cpu_user_regs *),
unsigned long irqflags, const char * devname, void *dev_id);
+
+#define setup_irq(irq, action) \
+ setup_irq_vector(irq_to_vector(irq), action)
+
+#define release_irq(irq) \
+ release_irq_vector(irq_to_vector(irq))
+
+#define request_irq(irq, handler, irqflags, devname, devid) \
+ request_irq_vector(irq_to_vector(irq), handler, irqflags, defname, devid)
extern hw_irq_controller no_irq_type;
extern void no_action(int cpl, void *dev_id, struct cpu_user_regs *regs);
diff -r af992824b5cf -r c7cba853583d xen/include/xen/mm.h
--- a/xen/include/xen/mm.h Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/xen/mm.h Fri Feb 13 11:22:28 2009 +0900
@@ -85,22 +85,192 @@ int assign_pages(
#define MAX_ORDER 20 /* 2^20 contiguous pages */
#endif
+#define page_list_entry list_head
+
+#include <asm/mm.h>
+
+#ifndef page_list_entry
+struct page_list_head
+{
+ struct page_info *next, *tail;
+};
+/* These must only have instances in struct page_info. */
+# define page_list_entry
+
+# define PAGE_LIST_HEAD_INIT(name) { NULL, NULL }
+# define PAGE_LIST_HEAD(name) \
+ struct page_list_head name = PAGE_LIST_HEAD_INIT(name)
+# define INIT_PAGE_LIST_HEAD(head) ((head)->tail = (head)->next = NULL)
+# define INIT_PAGE_LIST_ENTRY(ent) ((ent)->prev = (ent)->next = ~0)
+
+static inline int
+page_list_empty(const struct page_list_head *head)
+{
+ return !head->next;
+}
+static inline struct page_info *
+page_list_first(const struct page_list_head *head)
+{
+ return head->next;
+}
+static inline struct page_info *
+page_list_next(const struct page_info *page,
+ const struct page_list_head *head)
+{
+ return page != head->tail ? mfn_to_page(page->list.next) : NULL;
+}
+static inline struct page_info *
+page_list_prev(const struct page_info *page,
+ const struct page_list_head *head)
+{
+ return page != head->next ? mfn_to_page(page->list.prev) : NULL;
+}
+static inline void
+page_list_add(struct page_info *page, struct page_list_head *head)
+{
+ if ( head->next )
+ {
+ page->list.next = page_to_mfn(head->next);
+ head->next->list.prev = page_to_mfn(page);
+ }
+ else
+ {
+ head->tail = page;
+ page->list.next = ~0;
+ }
+ page->list.prev = ~0;
+ head->next = page;
+}
+static inline void
+page_list_add_tail(struct page_info *page, struct page_list_head *head)
+{
+ page->list.next = ~0;
+ if ( head->next )
+ {
+ page->list.prev = page_to_mfn(head->tail);
+ head->tail->list.next = page_to_mfn(page);
+ }
+ else
+ {
+ page->list.prev = ~0;
+ head->next = page;
+ }
+ head->tail = page;
+}
+static inline bool_t
+__page_list_del_head(struct page_info *page, struct page_list_head *head,
+ struct page_info *next, struct page_info *prev)
+{
+ if ( head->next == page )
+ {
+ if ( head->tail != page )
+ {
+ next->list.prev = ~0;
+ head->next = next;
+ }
+ else
+ head->tail = head->next = NULL;
+ return 1;
+ }
+
+ if ( head->tail == page )
+ {
+ prev->list.next = ~0;
+ head->tail = prev;
+ return 1;
+ }
+
+ return 0;
+}
+static inline void
+page_list_del(struct page_info *page, struct page_list_head *head)
+{
+ struct page_info *next = mfn_to_page(page->list.next);
+ struct page_info *prev = mfn_to_page(page->list.prev);
+
+ if ( !__page_list_del_head(page, head, next, prev) )
+ {
+ next->list.prev = page->list.prev;
+ prev->list.next = page->list.next;
+ }
+}
+static inline void
+page_list_del2(struct page_info *page, struct page_list_head *head1,
+ struct page_list_head *head2)
+{
+ struct page_info *next = mfn_to_page(page->list.next);
+ struct page_info *prev = mfn_to_page(page->list.prev);
+
+ if ( !__page_list_del_head(page, head1, next, prev) &&
+ !__page_list_del_head(page, head2, next, prev) )
+ {
+ next->list.prev = page->list.prev;
+ prev->list.next = page->list.next;
+ }
+}
+static inline struct page_info *
+page_list_remove_head(struct page_list_head *head)
+{
+ struct page_info *page = head->next;
+
+ if ( page )
+ page_list_del(page, head);
+
+ return page;
+}
+
+#define page_list_for_each(pos, head) \
+ for ( pos = (head)->next; pos; pos = page_list_next(pos, head) )
+#define page_list_for_each_safe(pos, tmp, head) \
+ for ( pos = (head)->next; \
+ pos ? (tmp = page_list_next(pos, head), 1) : 0; \
+ pos = tmp )
+#define page_list_for_each_safe_reverse(pos, tmp, head) \
+ for ( pos = (head)->tail; \
+ pos ? (tmp = page_list_prev(pos, head), 1) : 0; \
+ pos = tmp )
+#else
+# define page_list_head list_head
+# define PAGE_LIST_HEAD_INIT LIST_HEAD_INIT
+# define PAGE_LIST_HEAD LIST_HEAD
+# define INIT_PAGE_LIST_HEAD INIT_LIST_HEAD
+# define INIT_PAGE_LIST_ENTRY INIT_LIST_HEAD
+# define page_list_empty list_empty
+# define page_list_first(hd) list_entry((hd)->next, \
+ struct page_info, list)
+# define page_list_next(pg, hd) list_entry((pg)->list.next, \
+ struct page_info, list)
+# define page_list_add(pg, hd) list_add(&(pg)->list, hd)
+# define page_list_add_tail(pg, hd) list_add_tail(&(pg)->list, hd)
+# define page_list_del(pg, hd) list_del(&(pg)->list)
+# define page_list_del2(pg, hd1, hd2) list_del(&(pg)->list)
+# define page_list_remove_head(hd) (!page_list_empty(hd) ? \
+ ({ \
+ struct page_info *__pg = page_list_first(hd); \
+ list_del(&__pg->list); \
+ __pg; \
+ }) : NULL)
+# define page_list_for_each(pos, head) list_for_each_entry(pos, head, list)
+# define page_list_for_each_safe(pos, tmp, head) \
+ list_for_each_entry_safe(pos, tmp, head, list)
+# define page_list_for_each_safe_reverse(pos, tmp, head) \
+ list_for_each_entry_safe_reverse(pos, tmp, head, list)
+#endif
+
/* Automatic page scrubbing for dead domains. */
-extern struct list_head page_scrub_list;
-#define page_scrub_schedule_work() \
- do { \
- if ( !list_empty(&page_scrub_list) ) \
- raise_softirq(PAGE_SCRUB_SOFTIRQ); \
+extern struct page_list_head page_scrub_list;
+#define page_scrub_schedule_work() \
+ do { \
+ if ( !page_list_empty(&page_scrub_list) ) \
+ raise_softirq(PAGE_SCRUB_SOFTIRQ); \
} while ( 0 )
#define page_scrub_kick() \
do { \
- if ( !list_empty(&page_scrub_list) ) \
+ if ( !page_list_empty(&page_scrub_list) ) \
cpumask_raise_softirq(cpu_online_map, PAGE_SCRUB_SOFTIRQ); \
} while ( 0 )
unsigned long avail_scrub_pages(void);
-#include <asm/mm.h>
-
int guest_remove_page(struct domain *d, unsigned long gmfn);
/* Returns TRUE if the whole page at @mfn is ordinary RAM. */
diff -r af992824b5cf -r c7cba853583d xen/include/xen/sched.h
--- a/xen/include/xen/sched.h Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/include/xen/sched.h Fri Feb 13 11:22:28 2009 +0900
@@ -19,6 +19,7 @@
#include <xen/xenoprof.h>
#include <xen/rcupdate.h>
#include <xen/irq.h>
+#include <xen/mm.h>
#ifdef CONFIG_COMPAT
#include <compat/vcpu.h>
@@ -171,8 +172,8 @@ struct domain
spinlock_t domain_lock;
spinlock_t page_alloc_lock; /* protects all the following fields */
- struct list_head page_list; /* linked list, of size tot_pages */
- struct list_head xenpage_list; /* linked list, of size xenheap_pages */
+ struct page_list_head page_list; /* linked list, of size tot_pages */
+ struct page_list_head xenpage_list; /* linked list (size xenheap_pages) */
unsigned int tot_pages; /* number of pages currently possesed */
unsigned int max_pages; /* maximum value for tot_pages */
unsigned int xenheap_pages; /* # pages allocated from Xen heap */
diff -r af992824b5cf -r c7cba853583d xen/xsm/flask/hooks.c
--- a/xen/xsm/flask/hooks.c Fri Feb 13 10:56:01 2009 +0900
+++ b/xen/xsm/flask/hooks.c Fri Feb 13 11:22:28 2009 +0900
@@ -820,6 +820,7 @@ static int flask_hvmcontext(struct domai
perm = HVM__SETHVMC;
break;
case XEN_DOMCTL_gethvmcontext:
+ case XEN_DOMCTL_gethvmcontext_partial:
perm = HVM__GETHVMC;
break;
default:
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|