# HG changeset patch
# User Jimi Xenidis <jimix@xxxxxxxxxxxxxx>
# Node ID 3f6a2745b3a3b40bcdd51f9111b0993bad2c7ec6
# Parent 5568efb41da42a55318fa05d3ce0aa73e774e6d1
# Parent fd2667419c53ce2555c799acf3e84dd25912bcb5
branch merge with xen-unstable.hg
---
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/kmap_types.h | 32
patches/linux-2.6.16.33/ipv6-no-autoconf.patch | 18
tools/xm-test/lib/XmTestLib/XenManagedDomain.py | 177 -
.hgignore | 6
Config.mk | 2
buildconfigs/linux-defconfig_xen0_x86_32 | 1
buildconfigs/linux-defconfig_xenU_x86_32 | 1
buildconfigs/linux-defconfig_xen_x86_32 | 1
buildconfigs/mk.linux-2.6-xen | 31
config/x86_64.mk | 1
docs/xen-api/wire-protocol.tex | 30
docs/xen-api/xenapi-datamodel.tex | 137
extras/mini-os/Makefile | 24
extras/mini-os/gnttab.c | 36
extras/mini-os/include/hypervisor.h | 1
extras/mini-os/include/netfront.h | 2
extras/mini-os/include/x86/x86_32/hypercall-x86_32.h | 8
extras/mini-os/include/x86/x86_64/hypercall-x86_64.h | 8
extras/mini-os/include/xenbus.h | 3
extras/mini-os/kernel.c | 11
extras/mini-os/netfront.c | 455 +++
extras/mini-os/xenbus/xenbus.c | 86
linux-2.6-xen-sparse/arch/i386/Kconfig | 2
linux-2.6-xen-sparse/arch/i386/kernel/pci-dma-xen.c | 19
linux-2.6-xen-sparse/arch/i386/kernel/process-xen.c | 46
linux-2.6-xen-sparse/arch/i386/kernel/swiotlb.c | 74
linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c | 4
linux-2.6-xen-sparse/arch/i386/mm/highmem-xen.c | 1
linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c | 30
linux-2.6-xen-sparse/arch/i386/pci/irq-xen.c | 5
linux-2.6-xen-sparse/arch/x86_64/kernel/process-xen.c | 49
linux-2.6-xen-sparse/arch/x86_64/mm/pageattr-xen.c | 16
linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c | 6
linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c | 32
linux-2.6-xen-sparse/drivers/xen/blktap/Makefile | 4
linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c | 32
linux-2.6-xen-sparse/drivers/xen/core/machine_kexec.c | 4
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/dma-mapping.h | 6
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgalloc.h | 2
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/swiotlb.h | 4
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgalloc.h | 85
linux-2.6-xen-sparse/kernel/kexec.c | 8
linux-2.6-xen-sparse/net/core/dev.c | 3
patches/linux-2.6.16.33/series | 1
patches/linux-2.6.16.33/vsnprintf.patch | 1
tools/check/check_udev | 4
tools/examples/blktap | 2
tools/examples/block | 2
tools/examples/block-enbd | 2
tools/examples/block-nbd | 2
tools/examples/external-device-migrate | 2
tools/examples/network-bridge | 2
tools/examples/network-nat | 2
tools/examples/network-route | 2
tools/examples/vif-bridge | 2
tools/examples/vif-common.sh | 8
tools/examples/vif-nat | 2
tools/examples/vif-route | 2
tools/examples/vtpm | 2
tools/examples/vtpm-common.sh | 4
tools/examples/vtpm-delete | 2
tools/examples/xen-backend.agent | 2
tools/examples/xen-hotplug-cleanup | 2
tools/examples/xen-network-common.sh | 5
tools/examples/xmexample1 | 34
tools/examples/xmexample2 | 34
tools/examples/xmexample3 | 34
tools/firmware/rombios/rombios.c | 482 ++-
tools/ioemu/hw/pc.c | 2
tools/ioemu/target-i386-dm/helper2.c | 21
tools/ioemu/vl.c | 2
tools/libxc/xc_hvm_build.c | 3
tools/libxc/xc_linux_build.c | 4
tools/libxc/xc_linux_restore.c | 207 +
tools/libxc/xc_linux_save.c | 34
tools/libxc/xc_load_elf.c | 14
tools/libxc/xc_ptrace.c | 18
tools/libxc/xg_save_restore.h | 9
tools/libxen/include/xen_cpu_feature.h | 4
tools/libxen/src/xen_common.c | 48
tools/libxen/src/xen_cpu_feature.c | 4
tools/pygrub/src/pygrub | 280 +-
tools/python/scripts/xapi.py | 95
tools/python/xen/xend/XendAPI.py | 10
tools/python/xen/xend/XendAPIConstants.py | 3
tools/python/xen/xend/XendBootloader.py | 6
tools/python/xen/xend/XendCheckpoint.py | 8
tools/python/xen/xend/XendConfig.py | 57
tools/python/xen/xend/XendConstants.py | 1
tools/python/xen/xend/XendDomain.py | 8
tools/python/xen/xend/XendDomainInfo.py | 57
tools/python/xen/xend/XendNode.py | 2
tools/python/xen/xend/osdep.py | 5
tools/python/xen/xend/server/SrvDaemon.py | 2
tools/python/xen/xend/server/blkif.py | 1
tools/python/xen/xend/server/netif.py | 8
tools/python/xen/xend/server/vfbif.py | 2
tools/python/xen/xm/create.py | 25
tools/python/xen/xm/main.py | 44
tools/python/xen/xm/migrate.py | 1
tools/python/xen/xm/opts.py | 8
tools/python/xen/xm/shutdown.py | 1
tools/tests/Makefile | 13
tools/tests/blowfish.c | 439 +++
tools/tests/blowfish.mk | 23
tools/tests/test_x86_emulator.c | 193 +
tools/xenstat/xentop/xentop.c | 2
tools/xm-test/README | 43
tools/xm-test/configure.ac | 1
tools/xm-test/grouptest/xapi | 1
tools/xm-test/lib/XmTestLib/DomainTracking.py | 61
tools/xm-test/lib/XmTestLib/XenAPIDomain.py | 176 +
tools/xm-test/lib/XmTestLib/XenDomain.py | 28
tools/xm-test/lib/XmTestLib/Xm.py | 2
tools/xm-test/lib/XmTestLib/xapi.py | 79
tools/xm-test/ramdisk/Makefile.am | 13
tools/xm-test/ramdisk/skel/etc/init.d/rcS | 11
tools/xm-test/runtest.sh | 8
tools/xm-test/tests/block-destroy/06_block-destroy_check_list_pos.py | 8
tools/xm-test/tests/sched-credit/01_sched_credit_weight_cap_pos.py | 33
tools/xm-test/tests/vtpm/09_vtpm-xapi.py | 99
tools/xm-test/tests/xapi/01_xapi-vm_basic.py | 61
tools/xm-test/tests/xapi/Makefile.am | 19
unmodified_drivers/linux-2.6/platform-pci/evtchn.c | 6
unmodified_drivers/linux-2.6/platform-pci/platform-pci.c | 34
unmodified_drivers/linux-2.6/platform-pci/platform-pci.h | 4
xen/Makefile | 12
xen/Rules.mk | 1
xen/arch/ia64/linux-xen/unaligned.c | 2
xen/arch/ia64/xen/domain.c | 20
xen/arch/ia64/xen/xenmisc.c | 24
xen/arch/powerpc/domain.c | 6
xen/arch/powerpc/domctl.c | 6
xen/arch/powerpc/setup.c | 13
xen/arch/powerpc/xen.lds.S | 4
xen/arch/x86/boot/x86_32.S | 2
xen/arch/x86/boot/x86_64.S | 29
xen/arch/x86/compat.c | 14
xen/arch/x86/crash.c | 3
xen/arch/x86/domain.c | 691
++++-
xen/arch/x86/domain_build.c | 225 +
xen/arch/x86/domctl.c | 113
xen/arch/x86/e820.c | 35
xen/arch/x86/hvm/hpet.c | 2
xen/arch/x86/hvm/hvm.c | 2
xen/arch/x86/hvm/instrlen.c | 32
xen/arch/x86/hvm/intercept.c | 2
xen/arch/x86/hvm/irq.c | 186 -
xen/arch/x86/hvm/platform.c | 22
xen/arch/x86/hvm/svm/svm.c | 12
xen/arch/x86/hvm/vioapic.c | 11
xen/arch/x86/hvm/vmx/vmcs.c | 9
xen/arch/x86/hvm/vmx/vmx.c | 148 -
xen/arch/x86/hvm/vmx/x86_32/exits.S | 33
xen/arch/x86/hvm/vmx/x86_64/exits.S | 29
xen/arch/x86/irq.c | 12
xen/arch/x86/mm.c | 277 +-
xen/arch/x86/mm/shadow/common.c | 41
xen/arch/x86/mm/shadow/multi.c | 78
xen/arch/x86/mm/shadow/private.h | 5
xen/arch/x86/oprofile/nmi_int.c | 2
xen/arch/x86/physdev.c | 13
xen/arch/x86/platform_hypercall.c | 19
xen/arch/x86/setup.c | 41
xen/arch/x86/sysctl.c | 8
xen/arch/x86/time.c | 12
xen/arch/x86/traps.c | 358 +-
xen/arch/x86/x86_32/mm.c | 12
xen/arch/x86/x86_32/traps.c | 6
xen/arch/x86/x86_32/xen.lds.S | 17
xen/arch/x86/x86_64/Makefile | 19
xen/arch/x86/x86_64/asm-offsets.c | 39
xen/arch/x86/x86_64/compat.c | 30
xen/arch/x86/x86_64/compat/entry.S | 365 ++
xen/arch/x86/x86_64/compat/mm.c | 337 ++
xen/arch/x86/x86_64/compat/traps.c | 338 ++
xen/arch/x86/x86_64/domain.c | 68
xen/arch/x86/x86_64/domctl.c | 111
xen/arch/x86/x86_64/entry.S | 29
xen/arch/x86/x86_64/mm.c | 93
xen/arch/x86/x86_64/physdev.c | 48
xen/arch/x86/x86_64/platform_hypercall.c | 29
xen/arch/x86/x86_64/sysctl.c | 33
xen/arch/x86/x86_64/traps.c | 19
xen/arch/x86/x86_64/xen.lds.S | 17
xen/arch/x86/x86_emulate.c | 1376
++++++++--
xen/common/Makefile | 12
xen/common/acm_ops.c | 21
xen/common/compat/Makefile | 13
xen/common/compat/acm_ops.c | 47
xen/common/compat/domain.c | 91
xen/common/compat/domctl.c | 137
xen/common/compat/grant_table.c | 218 +
xen/common/compat/kernel.c | 59
xen/common/compat/kexec.c | 33
xen/common/compat/memory.c | 364 ++
xen/common/compat/multicall.c | 31
xen/common/compat/schedule.c | 51
xen/common/compat/sysctl.c | 95
xen/common/compat/xenoprof.c | 40
xen/common/compat/xlat.c | 73
xen/common/domain.c | 53
xen/common/domctl.c | 98
xen/common/elf.c | 4
xen/common/elf32.c | 19
xen/common/event_channel.c | 21
xen/common/grant_table.c | 4
xen/common/kernel.c | 24
xen/common/kexec.c | 194 -
xen/common/keyhandler.c | 26
xen/common/lib.c | 644
++--
xen/common/memory.c | 22
xen/common/multicall.c | 5
xen/common/schedule.c | 38
xen/common/symbols.c | 9
xen/common/sysctl.c | 18
xen/common/trace.c | 78
xen/common/xencomm.c | 4
xen/common/xenoprof.c | 70
xen/drivers/video/vga.c | 11
xen/include/Makefile | 73
xen/include/asm-ia64/init.h | 25
xen/include/asm-ia64/shared.h | 4
xen/include/asm-powerpc/init.h | 19
xen/include/asm-powerpc/shared.h | 4
xen/include/asm-x86/compat.h | 8
xen/include/asm-x86/config.h | 63
xen/include/asm-x86/desc.h | 99
xen/include/asm-x86/domain.h | 5
xen/include/asm-x86/event.h | 14
xen/include/asm-x86/guest_access.h | 18
xen/include/asm-x86/hvm/hvm.h | 2
xen/include/asm-x86/hvm/irq.h | 16
xen/include/asm-x86/hypercall.h | 20
xen/include/asm-x86/init.h | 25
xen/include/asm-x86/ldt.h | 3
xen/include/asm-x86/mm.h | 26
xen/include/asm-x86/multicall.h | 25
xen/include/asm-x86/page.h | 5
xen/include/asm-x86/processor.h | 6
xen/include/asm-x86/regs.h | 3
xen/include/asm-x86/shadow.h | 10
xen/include/asm-x86/shared.h | 78
xen/include/asm-x86/x86_32/kexec.h | 1
xen/include/asm-x86/x86_32/page-2level.h | 2
xen/include/asm-x86/x86_32/page-3level.h | 2
xen/include/asm-x86/x86_32/regs.h | 2
xen/include/asm-x86/x86_32/uaccess.h | 2
xen/include/asm-x86/x86_64/kexec.h | 1
xen/include/asm-x86/x86_64/page.h | 7
xen/include/asm-x86/x86_64/regs.h | 7
xen/include/asm-x86/x86_64/uaccess.h | 15
xen/include/asm-x86/x86_emulate.h | 43
xen/include/public/arch-x86/xen-x86_64.h | 5
xen/include/public/arch-x86/xen.h | 10
xen/include/public/domctl.h | 5
xen/include/public/elfnote.h | 9
xen/include/public/hvm/ioreq.h | 5
xen/include/public/hvm/params.h | 22
xen/include/public/xen.h | 4
xen/include/public/xenoprof.h | 2
xen/include/xen/compat.h | 180 +
xen/include/xen/domain.h | 17
xen/include/xen/elf.h | 9
xen/include/xen/elfcore.h | 57
xen/include/xen/hypercall.h | 19
xen/include/xen/init.h | 19
xen/include/xen/kernel.h | 17
xen/include/xen/multicall.h | 10
xen/include/xen/perfc.h | 3
xen/include/xen/sched.h | 43
xen/include/xen/shared.h | 54
xen/include/xen/symbols.h | 5
xen/include/xen/xenoprof.h | 26
xen/include/xlat.lst | 52
xen/tools/compat-build-header.py | 21
xen/tools/compat-build-source.py | 27
xen/tools/get-fields.sh | 432 +++
xen/tools/symbols.c | 479 ---
279 files changed, 11480 insertions(+), 3343 deletions(-)
diff -r 5568efb41da4 -r 3f6a2745b3a3 .hgignore
--- a/.hgignore Mon Jan 15 13:27:20 2007 -0500
+++ b/.hgignore Wed Jan 17 09:56:40 2007 -0500
@@ -20,6 +20,7 @@
^\.config$
^TAGS$
^tags$
+^build.*$
^dist/.*$
^docs/.*\.aux$
^docs/.*\.dvi$
@@ -57,7 +58,7 @@
^docs/xen-api/xenapi-datamodel-graph.eps$
^extras/mini-os/h/hypervisor-ifs$
^extras/mini-os/h/xen-public$
-^extras/mini-os/mini-os\..*$
+^extras/mini-os/mini-os.*$
^install/.*$
^linux-[^/]*-native/.*$
^linux-[^/]*-xen/.*$
@@ -141,6 +142,8 @@
^tools/python/build/.*$
^tools/security/secpol_tool$
^tools/security/xen/.*$
+^tools/tests/blowfish\.bin$
+^tools/tests/blowfish\.h$
^tools/tests/test_x86_emulator$
^tools/vnet/Make.local$
^tools/vnet/build/.*$
@@ -207,6 +210,7 @@
^xen/ddb/.*$
^xen/include/asm$
^xen/include/asm-.*/asm-offsets\.h$
+^xen/include/compat/.*$
^xen/include/hypervisor-ifs/arch$
^xen/include/public/public$
^xen/include/xen/.*\.new$
diff -r 5568efb41da4 -r 3f6a2745b3a3 Config.mk
--- a/Config.mk Mon Jan 15 13:27:20 2007 -0500
+++ b/Config.mk Wed Jan 17 09:56:40 2007 -0500
@@ -10,6 +10,8 @@ XEN_OS ?= $(shell uname -s)
XEN_OS ?= $(shell uname -s)
CONFIG_$(XEN_OS) := y
+
+SHELL ?= /bin/sh
# Tools to run on system hosting the build
HOSTCC = gcc
diff -r 5568efb41da4 -r 3f6a2745b3a3 buildconfigs/linux-defconfig_xen0_x86_32
--- a/buildconfigs/linux-defconfig_xen0_x86_32 Mon Jan 15 13:27:20 2007 -0500
+++ b/buildconfigs/linux-defconfig_xen0_x86_32 Wed Jan 17 09:56:40 2007 -0500
@@ -172,6 +172,7 @@ CONFIG_FLAT_NODE_MEM_MAP=y
CONFIG_FLAT_NODE_MEM_MAP=y
# CONFIG_SPARSEMEM_STATIC is not set
CONFIG_SPLIT_PTLOCK_CPUS=4096
+# CONFIG_HIGHPTE is not set
CONFIG_MTRR=y
# CONFIG_REGPARM is not set
CONFIG_SECCOMP=y
diff -r 5568efb41da4 -r 3f6a2745b3a3 buildconfigs/linux-defconfig_xenU_x86_32
--- a/buildconfigs/linux-defconfig_xenU_x86_32 Mon Jan 15 13:27:20 2007 -0500
+++ b/buildconfigs/linux-defconfig_xenU_x86_32 Wed Jan 17 09:56:40 2007 -0500
@@ -172,6 +172,7 @@ CONFIG_FLAT_NODE_MEM_MAP=y
CONFIG_FLAT_NODE_MEM_MAP=y
# CONFIG_SPARSEMEM_STATIC is not set
CONFIG_SPLIT_PTLOCK_CPUS=4096
+# CONFIG_HIGHPTE is not set
# CONFIG_REGPARM is not set
CONFIG_SECCOMP=y
CONFIG_HZ_100=y
diff -r 5568efb41da4 -r 3f6a2745b3a3 buildconfigs/linux-defconfig_xen_x86_32
--- a/buildconfigs/linux-defconfig_xen_x86_32 Mon Jan 15 13:27:20 2007 -0500
+++ b/buildconfigs/linux-defconfig_xen_x86_32 Wed Jan 17 09:56:40 2007 -0500
@@ -180,6 +180,7 @@ CONFIG_FLAT_NODE_MEM_MAP=y
CONFIG_FLAT_NODE_MEM_MAP=y
# CONFIG_SPARSEMEM_STATIC is not set
CONFIG_SPLIT_PTLOCK_CPUS=4096
+# CONFIG_HIGHPTE is not set
CONFIG_MTRR=y
CONFIG_REGPARM=y
CONFIG_SECCOMP=y
diff -r 5568efb41da4 -r 3f6a2745b3a3 buildconfigs/mk.linux-2.6-xen
--- a/buildconfigs/mk.linux-2.6-xen Mon Jan 15 13:27:20 2007 -0500
+++ b/buildconfigs/mk.linux-2.6-xen Wed Jan 17 09:56:40 2007 -0500
@@ -3,7 +3,8 @@ LINUX_VER = 2.6.16.33
EXTRAVERSION ?= xen
-LINUX_DIR = linux-$(LINUX_VER)-$(EXTRAVERSION)
+LINUX_SRCDIR = linux-$(LINUX_VER)-xen
+LINUX_DIR = build-linux-$(LINUX_VER)-$(EXTRAVERSION)_$(XEN_TARGET_ARCH)
IMAGE_TARGET ?= vmlinuz
INSTALL_BOOT_PATH ?= $(DESTDIR)
@@ -23,24 +24,31 @@ build: $(LINUX_DIR)/include/linux/autoco
mkdir -p $(INSTALL_BOOT_PATH)
$(MAKE) -C $(LINUX_DIR) ARCH=$(LINUX_ARCH)
INSTALL_PATH=$(INSTALL_BOOT_PATH) install
-$(LINUX_DIR)/include/linux/autoconf.h: ref-linux-$(LINUX_VER)/.valid-ref
- rm -rf $(LINUX_DIR)
- cp -al $(<D) $(LINUX_DIR)
+$(LINUX_SRCDIR)/.valid-src: ref-linux-$(LINUX_VER)/.valid-ref
+ rm -rf $(LINUX_SRCDIR)
+ cp -al $(<D) $(LINUX_SRCDIR)
# Apply arch-xen patches
( cd linux-$(LINUX_SERIES)-xen-sparse && \
- LINUX_ARCH=$(LINUX_ARCH) bash ./mkbuildtree ../$(LINUX_DIR) )
+ LINUX_ARCH=$(LINUX_ARCH) bash ./mkbuildtree ../$(LINUX_SRCDIR) )
+ # Patch kernel Makefile to set EXTRAVERSION
+ ( cd $(LINUX_SRCDIR) ; \
+ sed -e 's,^EXTRAVERSION.*,&$$(XENGUEST),' \
+ -e 's,^KERNELRELEASE,XENGUEST := $$(shell [ -r
$$(objtree)/.xenguest ] \&\& cat $$(objtree)/.xenguest)\n&,' Makefile >Mk.tmp ;
\
+ rm -f Makefile ; mv Mk.tmp Makefile )
+ touch $@
+
+$(LINUX_DIR)/include/linux/autoconf.h: $(LINUX_SRCDIR)/.valid-src
+ rm -rf $(LINUX_DIR)
+ mkdir -p $(LINUX_DIR)
# Re-use config from install dir if one exits else use default config
- CONFIG_VERSION=$$(sed -ne 's/^EXTRAVERSION = //p'
$(LINUX_DIR)/Makefile); \
+ CONFIG_VERSION=$$(sed -ne 's/$$(XENGUEST)//; s/^EXTRAVERSION = //p'
$(LINUX_SRCDIR)/Makefile); \
[ -r
$(DESTDIR)/boot/config-$(LINUX_VER3)$$CONFIG_VERSION-$(EXTRAVERSION) ] && \
cp
$(DESTDIR)/boot/config-$(LINUX_VER3)$$CONFIG_VERSION-$(EXTRAVERSION)
$(LINUX_DIR)/.config \
|| sh buildconfigs/create_config.sh $(LINUX_DIR)/.config
$(EXTRAVERSION) $(XEN_TARGET_ARCH) $(XEN_SYSTYPE)
# See if we need to munge config to enable PAE
$(MAKE) CONFIG_FILE=$(LINUX_DIR)/.config -f buildconfigs/Rules.mk
config-update-pae
- # Patch kernel Makefile to set EXTRAVERSION
- ( cd $(LINUX_DIR) ; \
- sed -e 's/^EXTRAVERSION.*/&$$(XENGUEST)\nXENGUEST =
-$(EXTRAVERSION)/' Makefile >Mk.tmp ; \
- rm -f Makefile ; mv Mk.tmp Makefile )
- $(MAKE) -C $(LINUX_DIR) ARCH=$(LINUX_ARCH) oldconfig
+ echo "-$(EXTRAVERSION)" >$(LINUX_DIR)/.xenguest
+ $(MAKE) -C $(LINUX_SRCDIR) ARCH=$(LINUX_ARCH) oldconfig
O=$$(/bin/pwd)/$(LINUX_DIR)
.PHONY: prep
prep: $(LINUX_DIR)/include/linux/autoconf.h
@@ -62,4 +70,5 @@ delete:
.PHONY: mrpropper
mrpropper:
+ rm -rf $(LINUX_SRCDIR)
rm -f linux-$(LINUX_VER).tar.bz2
diff -r 5568efb41da4 -r 3f6a2745b3a3 config/x86_64.mk
--- a/config/x86_64.mk Mon Jan 15 13:27:20 2007 -0500
+++ b/config/x86_64.mk Wed Jan 17 09:56:40 2007 -0500
@@ -2,6 +2,7 @@ CONFIG_X86_64 := y
CONFIG_X86_64 := y
CONFIG_X86_$(XEN_OS) := y
+CONFIG_COMPAT := y
CONFIG_HVM := y
CONFIG_MIGRATE := y
CONFIG_XCUTILS := y
diff -r 5568efb41da4 -r 3f6a2745b3a3 docs/xen-api/wire-protocol.tex
--- a/docs/xen-api/wire-protocol.tex Mon Jan 15 13:27:20 2007 -0500
+++ b/docs/xen-api/wire-protocol.tex Wed Jan 17 09:56:40 2007 -0500
@@ -149,16 +149,16 @@ The XML-RPC interface is session-based;
The XML-RPC interface is session-based; before you can make arbitrary RPC calls
you must login and initiate a session. For example:
\begin{verbatim}
- session_id Session.login_with_password(string uname, string pwd)
+ session_id session.login_with_password(string uname, string pwd)
\end{verbatim}
Where {\tt uname} and {\tt password} refer to your username and password
respectively, as defined by the Xen administrator.
-The {\tt session\_id} returned by {\tt Session.Login} is passed to subequent
-RPC calls as an authentication token.
-
-A session can be terminated with the {\tt Session.Logout} function:
-\begin{verbatim}
- void Session.Logout(session_id session)
+The {\tt session\_id} returned by {\tt session.login\_with\_password} is passed
+to subequent RPC calls as an authentication token.
+
+A session can be terminated with the {\tt session.logout} function:
+\begin{verbatim}
+ void session.logout(session_id session)
\end{verbatim}
\subsection{Synchronous and Asynchronous invocation}
@@ -251,14 +251,20 @@ call takes the session token as the only
'2045dbc0-0734-4eea-9cb2-b8218c6b5bf2',
'3202ae18-a046-4c32-9fda-e32e9631866e']
\end{verbatim}
-Note the VM references are internally UUIDs. Once a reference to a VM has been
acquired a lifecycle operation may be invoked:
+The VM references here are UUIDs, though they may not be that simple in the
+future, and you should treat them as opaque strings. Once a reference to a VM
+has been acquired a lifecycle operation may be invoked:
\begin{verbatim}
>>> xen.VM.start(session, all_vms[3], False)
-{'Status': 'Failure', 'ErrorDescription': 'Operation not implemented'}
-\end{verbatim}
-
-In this case the {\tt start} message has not been implemented and an error
response has been returned. Currently these high-level errors are returned as
structured data (rather than as XMLRPC faults), allowing for internationalised
errors in future. Finally, here are some examples of using accessors for object
fields:
+{'Status': 'Failure', 'ErrorDescription': ['VM_BAD_POWER_STATE', 'Halted',
'Running']}
+\end{verbatim}
+
+In this case the {\tt start} message has been rejected, because the VM is
+already running, and so an error response has been returned. These high-level
+errors are returned as structured data (rather than as XML-RPC faults),
+allowing them to be internationalised. Finally, here are some examples of
+using accessors for object fields:
\begin{verbatim}
>>> xen.VM.get_name_label(session, all_vms[3])['Value']
diff -r 5568efb41da4 -r 3f6a2745b3a3 docs/xen-api/xenapi-datamodel.tex
--- a/docs/xen-api/xenapi-datamodel.tex Mon Jan 15 13:27:20 2007 -0500
+++ b/docs/xen-api/xenapi-datamodel.tex Wed Jan 17 09:56:40 2007 -0500
@@ -184,8 +184,8 @@ The following enumeration types are used
\hspace{0.5cm}{\tt NX} & Execute Disable \\
\hspace{0.5cm}{\tt MMXEXT} & AMD MMX extensions \\
\hspace{0.5cm}{\tt LM} & Long Mode (x86-64) \\
-\hspace{0.5cm}{\tt 3DNOWEXT} & AMD 3DNow! extensions \\
-\hspace{0.5cm}{\tt 3DNOW} & 3DNow! \\
+\hspace{0.5cm}{\tt THREEDNOWEXT} & AMD 3DNow! extensions \\
+\hspace{0.5cm}{\tt THREEDNOW} & 3DNow! \\
\hspace{0.5cm}{\tt RECOVERY} & CPU in recovery mode \\
\hspace{0.5cm}{\tt LONGRUN} & Longrun power control \\
\hspace{0.5cm}{\tt LRTI} & LongRun table interface \\
@@ -286,6 +286,7 @@ Quals & Field & Type & Description \\
$\mathit{RO}_\mathit{run}$ & {\tt uuid} & string & unique identifier/object
reference \\
$\mathit{RO}_\mathit{ins}$ & {\tt this\_host} & host ref & Currently
connected host \\
$\mathit{RO}_\mathit{ins}$ & {\tt this\_user} & user ref & Currently
connected user \\
+$\mathit{RO}_\mathit{run}$ & {\tt last\_active} & int & Timestamp for last
time session was active \\
\hline
\end{longtable}
\subsection{Additional RPCs associated with class: session}
@@ -440,45 +441,13 @@ value of the field
\vspace{0.3cm}
\vspace{0.3cm}
\vspace{0.3cm}
-\subsubsection{RPC name:~create}
-
-{\bf Overview:}
-Create a new session instance, and return its handle.
-
- \noindent {\bf Signature:}
-\begin{verbatim} (session ref) create (session_id s, session record
args)\end{verbatim}
-
-
-\noindent{\bf Arguments:}
-
-
-\vspace{0.3cm}
-\begin{tabular}{|c|c|p{7cm}|}
- \hline
-{\bf type} & {\bf name} & {\bf description} \\ \hline
-{\tt session record } & args & All constructor arguments \\ \hline
-
-\end{tabular}
-
-\vspace{0.3cm}
-
- \noindent {\bf Return Type:}
-{\tt
-session ref
-}
-
-
-reference to the newly created object
-\vspace{0.3cm}
-\vspace{0.3cm}
-\vspace{0.3cm}
-\subsubsection{RPC name:~destroy}
-
-{\bf Overview:}
-Destroy the specified session instance.
-
- \noindent {\bf Signature:}
-\begin{verbatim} void destroy (session_id s, session ref self)\end{verbatim}
+\subsubsection{RPC name:~get\_last\_active}
+
+{\bf Overview:}
+Get the last\_active field of the given session.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} int get_last_active (session_id s, session ref
self)\end{verbatim}
\noindent{\bf Arguments:}
@@ -496,11 +465,11 @@ Destroy the specified session instance.
\noindent {\bf Return Type:}
{\tt
-void
-}
-
-
-
+int
+}
+
+
+value of the field
\vspace{0.3cm}
\vspace{0.3cm}
\vspace{0.3cm}
@@ -596,7 +565,7 @@ Quals & Field & Type & Description \\
\subsubsection{RPC name:~get\_all}
{\bf Overview:}
-Return a list of all the tasks known to the system
+Return a list of all the tasks known to the system.
\noindent {\bf Signature:}
\begin{verbatim} ((task ref) Set) get_all (session_id s)\end{verbatim}
@@ -999,70 +968,6 @@ string Set
value of the field
-\vspace{0.3cm}
-\vspace{0.3cm}
-\vspace{0.3cm}
-\subsubsection{RPC name:~create}
-
-{\bf Overview:}
-Create a new task instance, and return its handle.
-
- \noindent {\bf Signature:}
-\begin{verbatim} (task ref) create (session_id s, task record
args)\end{verbatim}
-
-
-\noindent{\bf Arguments:}
-
-
-\vspace{0.3cm}
-\begin{tabular}{|c|c|p{7cm}|}
- \hline
-{\bf type} & {\bf name} & {\bf description} \\ \hline
-{\tt task record } & args & All constructor arguments \\ \hline
-
-\end{tabular}
-
-\vspace{0.3cm}
-
- \noindent {\bf Return Type:}
-{\tt
-task ref
-}
-
-
-reference to the newly created object
-\vspace{0.3cm}
-\vspace{0.3cm}
-\vspace{0.3cm}
-\subsubsection{RPC name:~destroy}
-
-{\bf Overview:}
-Destroy the specified task instance.
-
- \noindent {\bf Signature:}
-\begin{verbatim} void destroy (session_id s, task ref self)\end{verbatim}
-
-
-\noindent{\bf Arguments:}
-
-
-\vspace{0.3cm}
-\begin{tabular}{|c|c|p{7cm}|}
- \hline
-{\bf type} & {\bf name} & {\bf description} \\ \hline
-{\tt task ref } & self & reference to the object \\ \hline
-
-\end{tabular}
-
-\vspace{0.3cm}
-
- \noindent {\bf Return Type:}
-{\tt
-void
-}
-
-
-
\vspace{0.3cm}
\vspace{0.3cm}
\vspace{0.3cm}
@@ -10575,6 +10480,16 @@ expected parameters are returned.
\begin{verbatim}MESSAGE_PARAMETER_COUNT_MISMATCH(method, expected,
received)\end{verbatim}
\begin{center}\rule{10em}{0.1pt}\end{center}
+\subsubsection{NETWORK\_ALREADY\_CONNECTED}
+
+You tried to create a PIF, but the network you tried to attach it to is
+already attached to some other PIF, and so the creation failed.
+
+\vspace{0.3cm}
+{\bf Signature:}
+\begin{verbatim}NETWORK_ALREADY_CONNECTED(network, connected PIF)\end{verbatim}
+\begin{center}\rule{10em}{0.1pt}\end{center}
+
\subsubsection{SESSION\_AUTHENTICATION\_FAILED}
The credentials given by the user are incorrect, so access has been denied,
diff -r 5568efb41da4 -r 3f6a2745b3a3 extras/mini-os/Makefile
--- a/extras/mini-os/Makefile Mon Jan 15 13:27:20 2007 -0500
+++ b/extras/mini-os/Makefile Wed Jan 17 09:56:40 2007 -0500
@@ -7,7 +7,7 @@ include $(XEN_ROOT)/Config.mk
# Set TARGET_ARCH
override TARGET_ARCH := $(XEN_TARGET_ARCH)
-XEN_INTERFACE_VERSION := 0x00030203
+XEN_INTERFACE_VERSION := 0x00030204
# NB. '-Wcast-qual' is nasty, so I omitted it.
CFLAGS := -fno-builtin -Wall -Werror -Wredundant-decls -Wno-format
@@ -17,7 +17,13 @@ ASFLAGS = -D__ASSEMBLY__
ASFLAGS = -D__ASSEMBLY__
LDLIBS = -L. -lminios
-LDFLAGS := -N -T minios-$(TARGET_ARCH).lds
+LDFLAGS_FINAL := -N -T minios-$(TARGET_ARCH).lds
+LDFLAGS :=
+
+# Prefix for global API names. All other symbols are localised before
+# linking with EXTRA_OBJS.
+GLOBAL_PREFIX := xenos_
+EXTRA_OBJS =
# For possible special source directories.
EXTRA_SRC =
@@ -110,18 +116,16 @@ links: $(ARCH_LINKS)
links: $(ARCH_LINKS)
[ -e include/xen ] || ln -sf ../../../xen/include/public include/xen
-libminios.a: links $(OBJS) $(HEAD)
- $(AR) r libminios.a $(HEAD) $(OBJS)
-
-$(TARGET): libminios.a $(HEAD)
- $(LD) $(LDFLAGS) $(HEAD) $(LDLIBS) -o $@.elf
- gzip -f -9 -c $@.elf >$@.gz
+$(TARGET): links $(OBJS) $(HEAD)
+ $(LD) -r $(LDFLAGS) $(HEAD) $(OBJS) -o $@.o
+ $(OBJCOPY) -w -G $(GLOBAL_PREFIX)* -G _start $@.o $@.o
+ $(LD) $(LDFLAGS) $(LDFLAGS_FINAL) $@.o $(EXTRA_OBJS) -o $@
+ gzip -f -9 -c $@ >$@.gz
.PHONY: clean
clean:
find . -type f -name '*.o' | xargs rm -f
- rm -f *.o *~ core $(TARGET).elf $(TARGET).raw $(TARGET) $(TARGET).gz
- rm -f libminios.a
+ rm -f *.o *~ core $(TARGET) $(TARGET).gz
find . -type l | xargs rm -f
rm -f tags TAGS
diff -r 5568efb41da4 -r 3f6a2745b3a3 extras/mini-os/gnttab.c
--- a/extras/mini-os/gnttab.c Mon Jan 15 13:27:20 2007 -0500
+++ b/extras/mini-os/gnttab.c Wed Jan 17 09:56:40 2007 -0500
@@ -23,31 +23,24 @@
#define NR_GRANT_FRAMES 4
#define NR_GRANT_ENTRIES (NR_GRANT_FRAMES * PAGE_SIZE / sizeof(grant_entry_t))
-#define GNTTAB_LIST_END (NR_GRANT_ENTRIES + 1)
static grant_entry_t *gnttab_table;
static grant_ref_t gnttab_list[NR_GRANT_ENTRIES];
-static grant_ref_t gnttab_free_head;
+
+static void
+put_free_entry(grant_ref_t ref)
+{
+ gnttab_list[ref] = gnttab_list[0];
+ gnttab_list[0] = ref;
+
+}
static grant_ref_t
-get_free_entries(int count)
+get_free_entry(void)
{
- grant_ref_t ref;
- grant_ref_t head;
-
- ref = head = gnttab_free_head;
- while (count-- > 1)
- head = gnttab_list[head];
- gnttab_free_head = gnttab_list[head];
- gnttab_list[head] = GNTTAB_LIST_END;
+ unsigned int ref = gnttab_list[0];
+ gnttab_list[0] = gnttab_list[ref];
return ref;
-}
-
-static void
-put_free_entry(grant_ref_t gref)
-{
- gnttab_list[gref] = gnttab_free_head;
- gnttab_free_head = gref;
}
grant_ref_t
@@ -55,7 +48,7 @@ gnttab_grant_access(domid_t domid, unsig
{
grant_ref_t ref;
- ref = get_free_entries(1);
+ ref = get_free_entry();
gnttab_table[ref].frame = frame;
gnttab_table[ref].domid = domid;
wmb();
@@ -70,7 +63,7 @@ gnttab_grant_transfer(domid_t domid, uns
{
grant_ref_t ref;
- ref = get_free_entries(1);
+ ref = get_free_entry();
gnttab_table[ref].frame = pfn;
gnttab_table[ref].domid = domid;
wmb();
@@ -157,8 +150,7 @@ init_gnttab(void)
int i;
for (i = NR_RESERVED_ENTRIES; i < NR_GRANT_ENTRIES; i++)
- gnttab_list[i] = i + 1;
- gnttab_free_head = NR_RESERVED_ENTRIES;
+ put_free_entry(i);
setup.dom = DOMID_SELF;
setup.nr_frames = NR_GRANT_FRAMES;
diff -r 5568efb41da4 -r 3f6a2745b3a3 extras/mini-os/include/hypervisor.h
--- a/extras/mini-os/include/hypervisor.h Mon Jan 15 13:27:20 2007 -0500
+++ b/extras/mini-os/include/hypervisor.h Wed Jan 17 09:56:40 2007 -0500
@@ -15,7 +15,6 @@
#include <types.h>
#include <xen/xen.h>
-#include <xen/dom0_ops.h>
#if defined(__i386__)
#include <hypercall-x86_32.h>
#elif defined(__x86_64__)
diff -r 5568efb41da4 -r 3f6a2745b3a3
extras/mini-os/include/x86/x86_32/hypercall-x86_32.h
--- a/extras/mini-os/include/x86/x86_32/hypercall-x86_32.h Mon Jan 15
13:27:20 2007 -0500
+++ b/extras/mini-os/include/x86/x86_32/hypercall-x86_32.h Wed Jan 17
09:56:40 2007 -0500
@@ -179,14 +179,6 @@ HYPERVISOR_set_timer_op(
unsigned long timeout_hi = (unsigned long)(timeout>>32);
unsigned long timeout_lo = (unsigned long)timeout;
return _hypercall2(long, set_timer_op, timeout_lo, timeout_hi);
-}
-
-static inline int
-HYPERVISOR_dom0_op(
- dom0_op_t *dom0_op)
-{
- dom0_op->interface_version = DOM0_INTERFACE_VERSION;
- return _hypercall1(int, dom0_op, dom0_op);
}
static inline int
diff -r 5568efb41da4 -r 3f6a2745b3a3
extras/mini-os/include/x86/x86_64/hypercall-x86_64.h
--- a/extras/mini-os/include/x86/x86_64/hypercall-x86_64.h Mon Jan 15
13:27:20 2007 -0500
+++ b/extras/mini-os/include/x86/x86_64/hypercall-x86_64.h Wed Jan 17
09:56:40 2007 -0500
@@ -181,14 +181,6 @@ HYPERVISOR_set_timer_op(
u64 timeout)
{
return _hypercall1(long, set_timer_op, timeout);
-}
-
-static inline int
-HYPERVISOR_dom0_op(
- dom0_op_t *dom0_op)
-{
- dom0_op->interface_version = DOM0_INTERFACE_VERSION;
- return _hypercall1(int, dom0_op, dom0_op);
}
static inline int
diff -r 5568efb41da4 -r 3f6a2745b3a3 extras/mini-os/include/xenbus.h
--- a/extras/mini-os/include/xenbus.h Mon Jan 15 13:27:20 2007 -0500
+++ b/extras/mini-os/include/xenbus.h Wed Jan 17 09:56:40 2007 -0500
@@ -11,6 +11,9 @@ void init_xenbus(void);
string on failure and sets *value to NULL. On success, *value is
set to a malloc'd copy of the value. */
char *xenbus_read(xenbus_transaction_t xbt, const char *path, char **value);
+
+char *xenbus_watch_path(xenbus_transaction_t xbt, const char *path);
+char* xenbus_wait_for_value(const char*,const char*);
/* Associates a value with a path. Returns a malloc'd error string on
failure. */
diff -r 5568efb41da4 -r 3f6a2745b3a3 extras/mini-os/kernel.c
--- a/extras/mini-os/kernel.c Mon Jan 15 13:27:20 2007 -0500
+++ b/extras/mini-os/kernel.c Wed Jan 17 09:56:40 2007 -0500
@@ -37,6 +37,7 @@
#include <sched.h>
#include <xenbus.h>
#include <gnttab.h>
+#include <netfront.h>
#include <xen/features.h>
#include <xen/version.h>
@@ -61,13 +62,13 @@ void setup_xen_features(void)
void test_xenbus(void);
-void xenbus_tester(void *p)
+static void xenbus_tester(void *p)
{
printk("Xenbus tests disabled, because of a Xend bug.\n");
/* test_xenbus(); */
}
-void periodic_thread(void *p)
+static void periodic_thread(void *p)
{
struct timeval tv;
printk("Periodic thread started.\n");
@@ -79,12 +80,18 @@ void periodic_thread(void *p)
}
}
+static void netfront_thread(void *p)
+{
+ init_netfront(&start_info);
+}
+
/* This should be overridden by the application we are linked against. */
__attribute__((weak)) int app_main(start_info_t *si)
{
printk("Dummy main: start_info=%p\n", si);
create_thread("xenbus_tester", xenbus_tester, si);
create_thread("periodic_thread", periodic_thread, si);
+ create_thread("netfront", netfront_thread, si);
return 0;
}
diff -r 5568efb41da4 -r 3f6a2745b3a3 extras/mini-os/xenbus/xenbus.c
--- a/extras/mini-os/xenbus/xenbus.c Mon Jan 15 13:27:20 2007 -0500
+++ b/extras/mini-os/xenbus/xenbus.c Wed Jan 17 09:56:40 2007 -0500
@@ -45,9 +45,9 @@
#define DEBUG(_f, _a...) ((void)0)
#endif
-
static struct xenstore_domain_interface *xenstore_buf;
static DECLARE_WAIT_QUEUE_HEAD(xb_waitq);
+static DECLARE_WAIT_QUEUE_HEAD(watch_queue);
struct xenbus_req_info
{
int in_use:1;
@@ -71,6 +71,34 @@ static void memcpy_from_ring(const void
memcpy(dest, ring + off, c1);
memcpy(dest + c1, ring, c2);
}
+
+static inline void wait_for_watch(void)
+{
+ DEFINE_WAIT(w);
+ add_waiter(w,watch_queue);
+ schedule();
+ wake(current);
+}
+
+char* xenbus_wait_for_value(const char* path,const char* value)
+{
+ for(;;)
+ {
+ char *res, *msg;
+ int r;
+
+ msg = xenbus_read(XBT_NIL, path, &res);
+ if(msg) return msg;
+
+ r = strcmp(value,res);
+ free(res);
+
+ if(r==0) break;
+ else wait_for_watch();
+ }
+ return NULL;
+}
+
static void xenbus_thread_func(void *ign)
{
@@ -101,13 +129,35 @@ static void xenbus_thread_func(void *ign
break;
DEBUG("Message is good.\n");
- req_info[msg.req_id].reply = malloc(sizeof(msg) + msg.len);
- memcpy_from_ring(xenstore_buf->rsp,
+
+ if(msg.type == XS_WATCH_EVENT)
+ {
+ char* payload = (char*)malloc(sizeof(msg) + msg.len);
+ char *path,*token;
+
+ memcpy_from_ring(xenstore_buf->rsp,
+ payload,
+ MASK_XENSTORE_IDX(xenstore_buf->rsp_cons),
+ msg.len + sizeof(msg));
+
+ path = payload + sizeof(msg);
+ token = path + strlen(path) + 1;
+
+ xenstore_buf->rsp_cons += msg.len + sizeof(msg);
+ free(payload);
+ wake_up(&watch_queue);
+ }
+
+ else
+ {
+ req_info[msg.req_id].reply = malloc(sizeof(msg) + msg.len);
+ memcpy_from_ring(xenstore_buf->rsp,
req_info[msg.req_id].reply,
MASK_XENSTORE_IDX(xenstore_buf->rsp_cons),
msg.len + sizeof(msg));
- wake_up(&req_info[msg.req_id].waitq);
- xenstore_buf->rsp_cons += msg.len + sizeof(msg);
+ xenstore_buf->rsp_cons += msg.len + sizeof(msg);
+ wake_up(&req_info[msg.req_id].waitq);
+ }
}
}
}
@@ -381,9 +431,29 @@ char *xenbus_write(xenbus_transaction_t
struct xsd_sockmsg *rep;
rep = xenbus_msg_reply(XS_WRITE, xbt, req, ARRAY_SIZE(req));
char *msg = errmsg(rep);
- if (msg)
- return msg;
- free(rep);
+ if (msg) return msg;
+ free(rep);
+ return NULL;
+}
+
+char* xenbus_watch_path( xenbus_transaction_t xbt, const char *path)
+{
+ /* in the future one could have multiple watch queues, and use
+ * the token for demuxing. For now the token is 0. */
+
+ struct xsd_sockmsg *rep;
+
+ struct write_req req[] = {
+ {path, strlen(path) + 1},
+ {"0",2 },
+ };
+
+ rep = xenbus_msg_reply(XS_WATCH, xbt, req, ARRAY_SIZE(req));
+
+ char *msg = errmsg(rep);
+ if (msg) return msg;
+ free(rep);
+
return NULL;
}
diff -r 5568efb41da4 -r 3f6a2745b3a3 linux-2.6-xen-sparse/arch/i386/Kconfig
--- a/linux-2.6-xen-sparse/arch/i386/Kconfig Mon Jan 15 13:27:20 2007 -0500
+++ b/linux-2.6-xen-sparse/arch/i386/Kconfig Wed Jan 17 09:56:40 2007 -0500
@@ -594,7 +594,7 @@ config HAVE_ARCH_EARLY_PFN_TO_NID
config HIGHPTE
bool "Allocate 3rd-level pagetables from highmem"
- depends on (HIGHMEM4G || HIGHMEM64G) && !X86_XEN
+ depends on HIGHMEM4G || HIGHMEM64G
help
The VM uses one page table entry for each page of physical memory.
For systems with a lot of RAM, this can be wasteful of precious
diff -r 5568efb41da4 -r 3f6a2745b3a3
linux-2.6-xen-sparse/arch/i386/kernel/pci-dma-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/kernel/pci-dma-xen.c Mon Jan 15
13:27:20 2007 -0500
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/pci-dma-xen.c Wed Jan 17
09:56:40 2007 -0500
@@ -94,13 +94,7 @@ dma_unmap_sg(struct device *hwdev, struc
}
EXPORT_SYMBOL(dma_unmap_sg);
-/*
- * XXX This file is also used by xenLinux/ia64.
- * "defined(__i386__) || defined (__x86_64__)" means "!defined(__ia64__)".
- * This #if work around should be removed once this file is merbed back into
- * i386' pci-dma or is moved to drivers/xen/core.
- */
-#if defined(__i386__) || defined(__x86_64__)
+#ifdef CONFIG_HIGHMEM
dma_addr_t
dma_map_page(struct device *dev, struct page *page, unsigned long offset,
size_t size, enum dma_data_direction direction)
@@ -130,7 +124,7 @@ dma_unmap_page(struct device *dev, dma_a
swiotlb_unmap_page(dev, dma_address, size, direction);
}
EXPORT_SYMBOL(dma_unmap_page);
-#endif /* defined(__i386__) || defined(__x86_64__) */
+#endif /* CONFIG_HIGHMEM */
int
dma_mapping_error(dma_addr_t dma_addr)
@@ -161,6 +155,8 @@ void *dma_alloc_coherent(struct device *
struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
unsigned int order = get_order(size);
unsigned long vstart;
+ u64 mask;
+
/* ignore region specifiers */
gfp &= ~(__GFP_DMA | __GFP_HIGHMEM);
@@ -183,9 +179,14 @@ void *dma_alloc_coherent(struct device *
vstart = __get_free_pages(gfp, order);
ret = (void *)vstart;
+ if (dev != NULL && dev->coherent_dma_mask)
+ mask = dev->coherent_dma_mask;
+ else
+ mask = 0xffffffff;
+
if (ret != NULL) {
if (xen_create_contiguous_region(vstart, order,
- dma_bits) != 0) {
+ fls64(mask)) != 0) {
free_pages(vstart, order);
return NULL;
}
diff -r 5568efb41da4 -r 3f6a2745b3a3
linux-2.6-xen-sparse/arch/i386/kernel/process-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/kernel/process-xen.c Mon Jan 15
13:27:20 2007 -0500
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/process-xen.c Wed Jan 17
09:56:40 2007 -0500
@@ -101,8 +101,24 @@ void enable_hlt(void)
EXPORT_SYMBOL(enable_hlt);
-/* XXX XEN doesn't use default_idle(), poll_idle(). Use xen_idle() instead. */
-void xen_idle(void)
+/*
+ * On SMP it's slightly faster (but much more power-consuming!)
+ * to poll the ->work.need_resched flag instead of waiting for the
+ * cross-CPU IPI to arrive. Use this option with caution.
+ */
+static void poll_idle(void)
+{
+ local_irq_enable();
+
+ asm volatile(
+ "2:"
+ "testl %0, %1;"
+ "rep; nop;"
+ "je 2b;"
+ : : "i"(_TIF_NEED_RESCHED), "m" (current_thread_info()->flags));
+}
+
+static void xen_idle(void)
{
local_irq_disable();
@@ -152,17 +168,22 @@ void cpu_idle(void)
/* endless idle loop with no priority at all */
while (1) {
while (!need_resched()) {
+ void (*idle)(void);
if (__get_cpu_var(cpu_idle_state))
__get_cpu_var(cpu_idle_state) = 0;
rmb();
+ idle = pm_idle;
+
+ if (!idle)
+ idle = xen_idle;
if (cpu_is_offline(cpu))
play_dead();
__get_cpu_var(irq_stat).idle_timestamp = jiffies;
- xen_idle();
+ idle();
}
preempt_enable_no_resched();
schedule();
@@ -198,9 +219,22 @@ void cpu_idle_wait(void)
}
EXPORT_SYMBOL_GPL(cpu_idle_wait);
-/* XXX XEN doesn't use mwait_idle(), select_idle_routine(), idle_setup(). */
-/* Always use xen_idle() instead. */
-void __devinit select_idle_routine(const struct cpuinfo_x86 *c) {}
+void __devinit select_idle_routine(const struct cpuinfo_x86 *c)
+{
+}
+
+static int __init idle_setup (char *str)
+{
+ if (!strncmp(str, "poll", 4)) {
+ printk("using polling idle threads.\n");
+ pm_idle = poll_idle;
+ }
+
+ boot_option_idle_override = 1;
+ return 1;
+}
+
+__setup("idle=", idle_setup);
void show_regs(struct pt_regs * regs)
{
diff -r 5568efb41da4 -r 3f6a2745b3a3
linux-2.6-xen-sparse/arch/i386/kernel/swiotlb.c
--- a/linux-2.6-xen-sparse/arch/i386/kernel/swiotlb.c Mon Jan 15 13:27:20
2007 -0500
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/swiotlb.c Wed Jan 17 09:56:40
2007 -0500
@@ -47,9 +47,6 @@ EXPORT_SYMBOL(swiotlb);
*/
#define IO_TLB_SHIFT 11
-/* Width of DMA addresses. 30 bits is a b44 limitation. */
-#define DEFAULT_DMA_BITS 30
-
static int swiotlb_force;
static char *iotlb_virt_start;
static unsigned long iotlb_nslabs;
@@ -98,11 +95,12 @@ static struct phys_addr {
*/
static DEFINE_SPINLOCK(io_tlb_lock);
-unsigned int dma_bits = DEFAULT_DMA_BITS;
+static unsigned int dma_bits;
+static unsigned int __initdata max_dma_bits = 32;
static int __init
setup_dma_bits(char *str)
{
- dma_bits = simple_strtoul(str, NULL, 0);
+ max_dma_bits = simple_strtoul(str, NULL, 0);
return 0;
}
__setup("dma_bits=", setup_dma_bits);
@@ -143,6 +141,7 @@ swiotlb_init_with_default_size (size_t d
swiotlb_init_with_default_size (size_t default_size)
{
unsigned long i, bytes;
+ int rc;
if (!iotlb_nslabs) {
iotlb_nslabs = (default_size >> IO_TLB_SHIFT);
@@ -159,16 +158,33 @@ swiotlb_init_with_default_size (size_t d
*/
iotlb_virt_start = alloc_bootmem_low_pages(bytes);
if (!iotlb_virt_start)
- panic("Cannot allocate SWIOTLB buffer!\n"
- "Use dom0_mem Xen boot parameter to reserve\n"
- "some DMA memory (e.g., dom0_mem=-128M).\n");
-
+ panic("Cannot allocate SWIOTLB buffer!\n");
+
+ dma_bits = get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT) + PAGE_SHIFT;
for (i = 0; i < iotlb_nslabs; i += IO_TLB_SEGSIZE) {
- int rc = xen_create_contiguous_region(
- (unsigned long)iotlb_virt_start + (i << IO_TLB_SHIFT),
- get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT),
- dma_bits);
- BUG_ON(rc);
+ do {
+ rc = xen_create_contiguous_region(
+ (unsigned long)iotlb_virt_start + (i <<
IO_TLB_SHIFT),
+ get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT),
+ dma_bits);
+ } while (rc && dma_bits++ < max_dma_bits);
+ if (rc) {
+ if (i == 0)
+ panic("No suitable physical memory available
for SWIOTLB buffer!\n"
+ "Use dom0_mem Xen boot parameter to
reserve\n"
+ "some DMA memory (e.g.,
dom0_mem=-128M).\n");
+ iotlb_nslabs = i;
+ i <<= IO_TLB_SHIFT;
+ free_bootmem(__pa(iotlb_virt_start + i), bytes - i);
+ bytes = i;
+ for (dma_bits = 0; i > 0; i -= IO_TLB_SEGSIZE <<
IO_TLB_SHIFT) {
+ unsigned int bits =
fls64(virt_to_bus(iotlb_virt_start + i - 1));
+
+ if (bits > dma_bits)
+ dma_bits = bits;
+ }
+ break;
+ }
}
/*
@@ -186,17 +202,27 @@ swiotlb_init_with_default_size (size_t d
* Get the overflow emergency buffer
*/
io_tlb_overflow_buffer = alloc_bootmem_low(io_tlb_overflow);
+ if (!io_tlb_overflow_buffer)
+ panic("Cannot allocate SWIOTLB overflow buffer!\n");
+
+ do {
+ rc = xen_create_contiguous_region(
+ (unsigned long)io_tlb_overflow_buffer,
+ get_order(io_tlb_overflow),
+ dma_bits);
+ } while (rc && dma_bits++ < max_dma_bits);
+ if (rc)
+ panic("No suitable physical memory available for SWIOTLB
overflow buffer!\n");
iotlb_pfn_start = __pa(iotlb_virt_start) >> PAGE_SHIFT;
iotlb_pfn_end = iotlb_pfn_start + (bytes >> PAGE_SHIFT);
printk(KERN_INFO "Software IO TLB enabled: \n"
" Aperture: %lu megabytes\n"
- " Kernel range: 0x%016lx - 0x%016lx\n"
+ " Kernel range: %p - %p\n"
" Address size: %u bits\n",
bytes >> 20,
- (unsigned long)iotlb_virt_start,
- (unsigned long)iotlb_virt_start + bytes,
+ iotlb_virt_start, iotlb_virt_start + bytes,
dma_bits);
}
@@ -238,9 +264,12 @@ __sync_single(struct phys_addr buffer, c
char *dev, *host, *kmp;
len = size;
while (len != 0) {
+ unsigned long flags;
+
if (((bytes = len) + buffer.offset) > PAGE_SIZE)
bytes = PAGE_SIZE - buffer.offset;
- kmp = kmap_atomic(buffer.page, KM_SWIOTLB);
+ local_irq_save(flags); /* protects KM_BOUNCE_READ */
+ kmp = kmap_atomic(buffer.page, KM_BOUNCE_READ);
dev = dma_addr + size - len;
host = kmp + buffer.offset;
if (dir == DMA_FROM_DEVICE) {
@@ -248,7 +277,8 @@ __sync_single(struct phys_addr buffer, c
/* inaccessible */;
} else
memcpy(dev, host, bytes);
- kunmap_atomic(kmp, KM_SWIOTLB);
+ kunmap_atomic(kmp, KM_BOUNCE_READ);
+ local_irq_restore(flags);
len -= bytes;
buffer.page++;
buffer.offset = 0;
@@ -617,6 +647,8 @@ swiotlb_sync_sg_for_device(struct device
sg->dma_length, dir);
}
+#ifdef CONFIG_HIGHMEM
+
dma_addr_t
swiotlb_map_page(struct device *hwdev, struct page *page,
unsigned long offset, size_t size,
@@ -650,6 +682,8 @@ swiotlb_unmap_page(struct device *hwdev,
unmap_single(hwdev, bus_to_virt(dma_address), size, direction);
}
+#endif
+
int
swiotlb_dma_mapping_error(dma_addr_t dma_addr)
{
@@ -677,7 +711,5 @@ EXPORT_SYMBOL(swiotlb_sync_single_for_de
EXPORT_SYMBOL(swiotlb_sync_single_for_device);
EXPORT_SYMBOL(swiotlb_sync_sg_for_cpu);
EXPORT_SYMBOL(swiotlb_sync_sg_for_device);
-EXPORT_SYMBOL(swiotlb_map_page);
-EXPORT_SYMBOL(swiotlb_unmap_page);
EXPORT_SYMBOL(swiotlb_dma_mapping_error);
EXPORT_SYMBOL(swiotlb_dma_supported);
diff -r 5568efb41da4 -r 3f6a2745b3a3
linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c Mon Jan 15 13:27:20
2007 -0500
+++ b/linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c Wed Jan 17 09:56:40
2007 -0500
@@ -225,7 +225,7 @@ static void dump_fault_path(unsigned lon
p += (address >> 30) * 2;
printk(KERN_ALERT "%08lx -> *pde = %08lx:%08lx\n", page, p[1], p[0]);
if (p[0] & 1) {
- mfn = (p[0] >> PAGE_SHIFT) | ((p[1] & 0x7) << 20);
+ mfn = (p[0] >> PAGE_SHIFT) | (p[1] << 20);
page = mfn_to_pfn(mfn) << PAGE_SHIFT;
p = (unsigned long *)__va(page);
address &= 0x3fffffff;
@@ -234,7 +234,7 @@ static void dump_fault_path(unsigned lon
page, p[1], p[0]);
#ifndef CONFIG_HIGHPTE
if (p[0] & 1) {
- mfn = (p[0] >> PAGE_SHIFT) | ((p[1] & 0x7) << 20);
+ mfn = (p[0] >> PAGE_SHIFT) | (p[1] << 20);
page = mfn_to_pfn(mfn) << PAGE_SHIFT;
p = (unsigned long *) __va(page);
address &= 0x001fffff;
diff -r 5568efb41da4 -r 3f6a2745b3a3
linux-2.6-xen-sparse/arch/i386/mm/highmem-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/mm/highmem-xen.c Mon Jan 15 13:27:20
2007 -0500
+++ b/linux-2.6-xen-sparse/arch/i386/mm/highmem-xen.c Wed Jan 17 09:56:40
2007 -0500
@@ -129,5 +129,6 @@ EXPORT_SYMBOL(kmap);
EXPORT_SYMBOL(kmap);
EXPORT_SYMBOL(kunmap);
EXPORT_SYMBOL(kmap_atomic);
+EXPORT_SYMBOL(kmap_atomic_pte);
EXPORT_SYMBOL(kunmap_atomic);
EXPORT_SYMBOL(kmap_atomic_to_page);
diff -r 5568efb41da4 -r 3f6a2745b3a3
linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c Mon Jan 15 13:27:20
2007 -0500
+++ b/linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c Wed Jan 17 09:56:40
2007 -0500
@@ -238,23 +238,41 @@ struct page *pte_alloc_one(struct mm_str
#ifdef CONFIG_HIGHPTE
pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0);
+ if (pte && PageHighMem(pte)) {
+ struct mmuext_op op;
+
+ kmap_flush_unused();
+ op.cmd = MMUEXT_PIN_L1_TABLE;
+ op.arg1.mfn = pfn_to_mfn(page_to_pfn(pte));
+ BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
+ }
#else
pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
+#endif
if (pte) {
SetPageForeign(pte, pte_free);
set_page_count(pte, 1);
}
-#endif
return pte;
}
void pte_free(struct page *pte)
{
- unsigned long va = (unsigned long)__va(page_to_pfn(pte)<<PAGE_SHIFT);
-
- if (!pte_write(*virt_to_ptep(va)))
- BUG_ON(HYPERVISOR_update_va_mapping(
- va, pfn_pte(page_to_pfn(pte), PAGE_KERNEL), 0));
+ unsigned long pfn = page_to_pfn(pte);
+
+ if (!PageHighMem(pte)) {
+ unsigned long va = (unsigned long)__va(pfn << PAGE_SHIFT);
+
+ if (!pte_write(*virt_to_ptep(va)))
+ BUG_ON(HYPERVISOR_update_va_mapping(
+ va, pfn_pte(pfn, PAGE_KERNEL), 0));
+ } else {
+ struct mmuext_op op;
+
+ op.cmd = MMUEXT_UNPIN_TABLE;
+ op.arg1.mfn = pfn_to_mfn(pfn);
+ BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
+ }
ClearPageForeign(pte);
set_page_count(pte, 1);
diff -r 5568efb41da4 -r 3f6a2745b3a3
linux-2.6-xen-sparse/arch/i386/pci/irq-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/pci/irq-xen.c Mon Jan 15 13:27:20
2007 -0500
+++ b/linux-2.6-xen-sparse/arch/i386/pci/irq-xen.c Wed Jan 17 09:56:40
2007 -0500
@@ -110,7 +110,6 @@ static struct irq_routing_table * __init
if (rt)
return rt;
}
-
return NULL;
}
@@ -261,13 +260,13 @@ static int pirq_via_set(struct pci_dev *
*/
static int pirq_via586_get(struct pci_dev *router, struct pci_dev *dev, int
pirq)
{
- static unsigned int pirqmap[4] = { 3, 2, 5, 1 };
+ static unsigned int pirqmap[5] = { 3, 2, 5, 1, 1 };
return read_config_nybble(router, 0x55, pirqmap[pirq-1]);
}
static int pirq_via586_set(struct pci_dev *router, struct pci_dev *dev, int
pirq, int irq)
{
- static unsigned int pirqmap[4] = { 3, 2, 5, 1 };
+ static unsigned int pirqmap[5] = { 3, 2, 5, 1, 1 };
write_config_nybble(router, 0x55, pirqmap[pirq-1], irq);
return 1;
}
diff -r 5568efb41da4 -r 3f6a2745b3a3
linux-2.6-xen-sparse/arch/x86_64/kernel/process-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/process-xen.c Mon Jan 15
13:27:20 2007 -0500
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/process-xen.c Wed Jan 17
09:56:40 2007 -0500
@@ -119,8 +119,26 @@ void exit_idle(void)
__exit_idle();
}
-/* XXX XEN doesn't use default_idle(), poll_idle(). Use xen_idle() instead. */
-void xen_idle(void)
+/*
+ * On SMP it's slightly faster (but much more power-consuming!)
+ * to poll the ->need_resched flag instead of waiting for the
+ * cross-CPU IPI to arrive. Use this option with caution.
+ */
+static void poll_idle(void)
+{
+ local_irq_enable();
+
+ asm volatile(
+ "2:"
+ "testl %0,%1;"
+ "rep; nop;"
+ "je 2b;"
+ : :
+ "i" (_TIF_NEED_RESCHED),
+ "m" (current_thread_info()->flags));
+}
+
+static void xen_idle(void)
{
local_irq_disable();
@@ -164,14 +182,18 @@ void cpu_idle (void)
/* endless idle loop with no priority at all */
while (1) {
while (!need_resched()) {
+ void (*idle)(void);
+
if (__get_cpu_var(cpu_idle_state))
__get_cpu_var(cpu_idle_state) = 0;
rmb();
-
+ idle = pm_idle;
+ if (!idle)
+ idle = xen_idle;
if (cpu_is_offline(smp_processor_id()))
play_dead();
enter_idle();
- xen_idle();
+ idle();
__exit_idle();
}
@@ -210,9 +232,22 @@ void cpu_idle_wait(void)
}
EXPORT_SYMBOL_GPL(cpu_idle_wait);
-/* XXX XEN doesn't use mwait_idle(), select_idle_routine(), idle_setup(). */
-/* Always use xen_idle() instead. */
-void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) {}
+void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
+{
+}
+
+static int __init idle_setup (char *str)
+{
+ if (!strncmp(str, "poll", 4)) {
+ printk("using polling idle threads.\n");
+ pm_idle = poll_idle;
+ }
+
+ boot_option_idle_override = 1;
+ return 1;
+}
+
+__setup("idle=", idle_setup);
/* Prints also some state that isn't saved in the pt_regs */
void __show_regs(struct pt_regs * regs)
diff -r 5568efb41da4 -r 3f6a2745b3a3
linux-2.6-xen-sparse/arch/x86_64/mm/pageattr-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/mm/pageattr-xen.c Mon Jan 15
13:27:20 2007 -0500
+++ b/linux-2.6-xen-sparse/arch/x86_64/mm/pageattr-xen.c Wed Jan 17
09:56:40 2007 -0500
@@ -164,6 +164,18 @@ void _arch_exit_mmap(struct mm_struct *m
mm_unpin(mm);
}
+struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
+{
+ struct page *pte;
+
+ pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
+ if (pte) {
+ SetPageForeign(pte, pte_free);
+ set_page_count(pte, 1);
+ }
+ return pte;
+}
+
void pte_free(struct page *pte)
{
unsigned long va = (unsigned long)__va(page_to_pfn(pte)<<PAGE_SHIFT);
@@ -171,6 +183,10 @@ void pte_free(struct page *pte)
if (!pte_write(*virt_to_ptep(va)))
BUG_ON(HYPERVISOR_update_va_mapping(
va, pfn_pte(page_to_pfn(pte), PAGE_KERNEL), 0));
+
+ ClearPageForeign(pte);
+ set_page_count(pte, 1);
+
__free_page(pte);
}
#endif /* CONFIG_XEN */
diff -r 5568efb41da4 -r 3f6a2745b3a3
linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c
--- a/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c Mon Jan 15
13:27:20 2007 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c Wed Jan 17
09:56:40 2007 -0500
@@ -446,8 +446,10 @@ static struct notifier_block xenstore_no
static int __init balloon_init(void)
{
+#ifdef CONFIG_X86
unsigned long pfn;
struct page *page;
+#endif
if (!is_running_on_xen())
return -ENODEV;
@@ -476,13 +478,15 @@ static int __init balloon_init(void)
balloon_pde->write_proc = balloon_write;
#endif
balloon_sysfs_init();
-
+
+#ifdef CONFIG_X86
/* Initialise the balloon with excess memory space. */
for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
page = pfn_to_page(pfn);
if (!PageReserved(page))
balloon_append(page);
}
+#endif
target_watch.callback = watch_target;
xenstore_notifier.notifier_call = balloon_init_watcher;
diff -r 5568efb41da4 -r 3f6a2745b3a3
linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Mon Jan 15 13:27:20
2007 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Wed Jan 17 09:56:40
2007 -0500
@@ -42,9 +42,30 @@ static void backend_changed(struct xenbu
static void backend_changed(struct xenbus_watch *, const char **,
unsigned int);
+static int blkback_name(blkif_t *blkif, char *buf)
+{
+ char *devpath, *devname;
+ struct xenbus_device *dev = blkif->be->dev;
+
+ devpath = xenbus_read(XBT_NIL, dev->nodename, "dev", NULL);
+ if (IS_ERR(devpath))
+ return PTR_ERR(devpath);
+
+ if ((devname = strstr(devpath, "/dev/")) != NULL)
+ devname += strlen("/dev/");
+ else
+ devname = devpath;
+
+ snprintf(buf, TASK_COMM_LEN, "blkback.%d.%s", blkif->domid, devname);
+ kfree(devpath);
+
+ return 0;
+}
+
static void update_blkif_status(blkif_t *blkif)
{
int err;
+ char name[TASK_COMM_LEN];
/* Not ready to connect? */
if (!blkif->irq || !blkif->vbd.bdev)
@@ -59,10 +80,13 @@ static void update_blkif_status(blkif_t
if (blkif->be->dev->state != XenbusStateConnected)
return;
- blkif->xenblkd = kthread_run(blkif_schedule, blkif,
- "xvd %d %02x:%02x",
- blkif->domid,
- blkif->be->major, blkif->be->minor);
+ err = blkback_name(blkif, name);
+ if (err) {
+ xenbus_dev_error(blkif->be->dev, err, "get blkback dev name");
+ return;
+ }
+
+ blkif->xenblkd = kthread_run(blkif_schedule, blkif, name);
if (IS_ERR(blkif->xenblkd)) {
err = PTR_ERR(blkif->xenblkd);
blkif->xenblkd = NULL;
diff -r 5568efb41da4 -r 3f6a2745b3a3
linux-2.6-xen-sparse/drivers/xen/blktap/Makefile
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/Makefile Mon Jan 15 13:27:20
2007 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/blktap/Makefile Wed Jan 17 09:56:40
2007 -0500
@@ -1,3 +1,5 @@ LINUXINCLUDE += -I../xen/include/public/
LINUXINCLUDE += -I../xen/include/public/io
-obj-y := xenbus.o interface.o blktap.o
+obj-$(CONFIG_XEN_BLKDEV_TAP) := xenblktap.o
+
+xenblktap-y := xenbus.o interface.o blktap.o
diff -r 5568efb41da4 -r 3f6a2745b3a3
linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c Mon Jan 15 13:27:20
2007 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c Wed Jan 17 09:56:40
2007 -0500
@@ -92,9 +92,30 @@ static long get_id(const char *str)
return simple_strtol(num, NULL, 10);
}
+static int blktap_name(blkif_t *blkif, char *buf)
+{
+ char *devpath, *devname;
+ struct xenbus_device *dev = blkif->be->dev;
+
+ devpath = xenbus_read(XBT_NIL, dev->nodename, "dev", NULL);
+ if (IS_ERR(devpath))
+ return PTR_ERR(devpath);
+
+ if ((devname = strstr(devpath, "/dev/")) != NULL)
+ devname += strlen("/dev/");
+ else
+ devname = devpath;
+
+ snprintf(buf, TASK_COMM_LEN, "blktap.%d.%s", blkif->domid, devname);
+ kfree(devpath);
+
+ return 0;
+}
+
static void tap_update_blkif_status(blkif_t *blkif)
{
int err;
+ char name[TASK_COMM_LEN];
/* Not ready to connect? */
if(!blkif->irq || !blkif->sectors) {
@@ -110,10 +131,13 @@ static void tap_update_blkif_status(blki
if (blkif->be->dev->state != XenbusStateConnected)
return;
- blkif->xenblkd = kthread_run(tap_blkif_schedule, blkif,
- "xvd %d",
- blkif->domid);
-
+ err = blktap_name(blkif, name);
+ if (err) {
+ xenbus_dev_error(blkif->be->dev, err, "get blktap dev name");
+ return;
+ }
+
+ blkif->xenblkd = kthread_run(tap_blkif_schedule, blkif, name);
if (IS_ERR(blkif->xenblkd)) {
err = PTR_ERR(blkif->xenblkd);
blkif->xenblkd = NULL;
diff -r 5568efb41da4 -r 3f6a2745b3a3
linux-2.6-xen-sparse/drivers/xen/core/machine_kexec.c
--- a/linux-2.6-xen-sparse/drivers/xen/core/machine_kexec.c Mon Jan 15
13:27:20 2007 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/core/machine_kexec.c Wed Jan 17
09:56:40 2007 -0500
@@ -98,8 +98,8 @@ void xen_machine_kexec_setup_resources(v
err:
/*
* It isn't possible to free xen_phys_cpus this early in the
- * boot. Since failure at this stage is unexpected and the
- * amount is small we leak the memory.
+ * boot. Failure at this stage is unexpected and the amount of
+ * memory is small therefore we tolerate the potential leak.
*/
xen_max_nr_phys_cpus = 0;
return;
diff -r 5568efb41da4 -r 3f6a2745b3a3
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/dma-mapping.h
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/dma-mapping.h Mon Jan
15 13:27:20 2007 -0500
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/dma-mapping.h Wed Jan
17 09:56:40 2007 -0500
@@ -53,6 +53,7 @@ extern void dma_unmap_sg(struct device *
extern void dma_unmap_sg(struct device *hwdev, struct scatterlist *sg,
int nents, enum dma_data_direction direction);
+#ifdef CONFIG_HIGHMEM
extern dma_addr_t
dma_map_page(struct device *dev, struct page *page, unsigned long offset,
size_t size, enum dma_data_direction direction);
@@ -60,6 +61,11 @@ extern void
extern void
dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
enum dma_data_direction direction);
+#else
+#define dma_map_page(dev, page, offset, size, dir) \
+ dma_map_single(dev, page_address(page) + (offset), (size), (dir))
+#define dma_unmap_page dma_unmap_single
+#endif
extern void
dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size,
diff -r 5568efb41da4 -r 3f6a2745b3a3
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgalloc.h
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgalloc.h Mon Jan
15 13:27:20 2007 -0500
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgalloc.h Wed Jan
17 09:56:40 2007 -0500
@@ -42,7 +42,7 @@ static inline void pte_free_kernel(pte_t
static inline void pte_free_kernel(pte_t *pte)
{
free_page((unsigned long)pte);
- make_page_writable(pte, XENFEAT_writable_page_tables);
+ make_lowmem_page_writable(pte, XENFEAT_writable_page_tables);
}
extern void pte_free(struct page *pte);
diff -r 5568efb41da4 -r 3f6a2745b3a3
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/swiotlb.h
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/swiotlb.h Mon Jan
15 13:27:20 2007 -0500
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/swiotlb.h Wed Jan
17 09:56:40 2007 -0500
@@ -26,15 +26,15 @@ extern void swiotlb_unmap_sg(struct devi
extern void swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg,
int nents, int direction);
extern int swiotlb_dma_mapping_error(dma_addr_t dma_addr);
+#ifdef CONFIG_HIGHMEM
extern dma_addr_t swiotlb_map_page(struct device *hwdev, struct page *page,
unsigned long offset, size_t size,
enum dma_data_direction direction);
extern void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dma_address,
size_t size, enum dma_data_direction direction);
+#endif
extern int swiotlb_dma_supported(struct device *hwdev, u64 mask);
extern void swiotlb_init(void);
-
-extern unsigned int dma_bits;
#ifdef CONFIG_SWIOTLB
extern int swiotlb;
diff -r 5568efb41da4 -r 3f6a2745b3a3
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgalloc.h
--- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgalloc.h Mon Jan
15 13:27:20 2007 -0500
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgalloc.h Wed Jan
17 09:56:40 2007 -0500
@@ -64,50 +64,43 @@ static inline void pgd_populate(struct m
}
}
-static inline void pmd_free(pmd_t *pmd)
-{
- pte_t *ptep = virt_to_ptep(pmd);
-
- if (!pte_write(*ptep)) {
- BUG_ON(HYPERVISOR_update_va_mapping(
- (unsigned long)pmd,
- pfn_pte(virt_to_phys(pmd)>>PAGE_SHIFT, PAGE_KERNEL),
- 0));
- }
- free_page((unsigned long)pmd);
-}
+extern struct page *pte_alloc_one(struct mm_struct *mm, unsigned long addr);
+extern void pte_free(struct page *pte);
static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
{
- pmd_t *pmd = (pmd_t *) get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
- return pmd;
+ struct page *pg;
+
+ pg = pte_alloc_one(mm, addr);
+ return pg ? page_address(pg) : NULL;
+}
+
+static inline void pmd_free(pmd_t *pmd)
+{
+ BUG_ON((unsigned long)pmd & (PAGE_SIZE-1));
+ pte_free(virt_to_page(pmd));
}
static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
{
- pud_t *pud = (pud_t *) get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
- return pud;
+ struct page *pg;
+
+ pg = pte_alloc_one(mm, addr);
+ return pg ? page_address(pg) : NULL;
}
static inline void pud_free(pud_t *pud)
{
- pte_t *ptep = virt_to_ptep(pud);
-
- if (!pte_write(*ptep)) {
- BUG_ON(HYPERVISOR_update_va_mapping(
- (unsigned long)pud,
- pfn_pte(virt_to_phys(pud)>>PAGE_SHIFT, PAGE_KERNEL),
- 0));
- }
- free_page((unsigned long)pud);
+ BUG_ON((unsigned long)pud & (PAGE_SIZE-1));
+ pte_free(virt_to_page(pud));
}
static inline pgd_t *pgd_alloc(struct mm_struct *mm)
{
- /*
- * We allocate two contiguous pages for kernel and user.
- */
- unsigned boundary;
+ /*
+ * We allocate two contiguous pages for kernel and user.
+ */
+ unsigned boundary;
pgd_t *pgd = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_REPEAT, 1);
if (!pgd)
@@ -124,11 +117,11 @@ static inline pgd_t *pgd_alloc(struct mm
(PTRS_PER_PGD - boundary) * sizeof(pgd_t));
memset(__user_pgd(pgd), 0, PAGE_SIZE); /* clean up user pgd */
- /*
- * Set level3_user_pgt for vsyscall area
- */
+ /*
+ * Set level3_user_pgt for vsyscall area
+ */
set_pgd(__user_pgd(pgd) + pgd_index(VSYSCALL_START),
- mk_kernel_pgd(__pa_symbol(level3_user_pgt)));
+ mk_kernel_pgd(__pa_symbol(level3_user_pgt)));
return pgd;
}
@@ -160,18 +153,10 @@ static inline void pgd_free(pgd_t *pgd)
static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long
address)
{
- pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
- if (pte)
+ pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
+ if (pte)
make_page_readonly(pte, XENFEAT_writable_page_tables);
- return pte;
-}
-
-static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long
address)
-{
- struct page *pte;
-
- pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
return pte;
}
@@ -181,18 +166,12 @@ static inline void pte_free_kernel(pte_t
static inline void pte_free_kernel(pte_t *pte)
{
BUG_ON((unsigned long)pte & (PAGE_SIZE-1));
- make_page_writable(pte, XENFEAT_writable_page_tables);
+ make_page_writable(pte, XENFEAT_writable_page_tables);
free_page((unsigned long)pte);
}
-extern void pte_free(struct page *pte);
-
-//#define __pte_free_tlb(tlb,pte) tlb_remove_page((tlb),(pte))
-//#define __pmd_free_tlb(tlb,x) tlb_remove_page((tlb),virt_to_page(x))
-//#define __pud_free_tlb(tlb,x) tlb_remove_page((tlb),virt_to_page(x))
-
-#define __pte_free_tlb(tlb,x) pte_free((x))
-#define __pmd_free_tlb(tlb,x) pmd_free((x))
-#define __pud_free_tlb(tlb,x) pud_free((x))
+#define __pte_free_tlb(tlb,pte) tlb_remove_page((tlb),(pte))
+#define __pmd_free_tlb(tlb,x) tlb_remove_page((tlb),virt_to_page(x))
+#define __pud_free_tlb(tlb,x) tlb_remove_page((tlb),virt_to_page(x))
#endif /* _X86_64_PGALLOC_H */
diff -r 5568efb41da4 -r 3f6a2745b3a3 linux-2.6-xen-sparse/kernel/kexec.c
--- a/linux-2.6-xen-sparse/kernel/kexec.c Mon Jan 15 13:27:20 2007 -0500
+++ b/linux-2.6-xen-sparse/kernel/kexec.c Wed Jan 17 09:56:40 2007 -0500
@@ -1012,9 +1012,11 @@ asmlinkage long sys_kexec_load(unsigned
goto out;
}
#ifdef CONFIG_XEN
- result = xen_machine_kexec_load(image);
- if (result)
- goto out;
+ if (image) {
+ result = xen_machine_kexec_load(image);
+ if (result)
+ goto out;
+ }
#endif
/* Install the new kernel, and Uninstall the old */
image = xchg(dest_image, image);
diff -r 5568efb41da4 -r 3f6a2745b3a3 linux-2.6-xen-sparse/net/core/dev.c
--- a/linux-2.6-xen-sparse/net/core/dev.c Mon Jan 15 13:27:20 2007 -0500
+++ b/linux-2.6-xen-sparse/net/core/dev.c Wed Jan 17 09:56:40 2007 -0500
@@ -1248,14 +1248,13 @@ static int dev_gso_segment(struct sk_buf
/* Verifying header integrity only. */
if (!segs)
return 0;
-
+
if (unlikely(IS_ERR(segs)))
return PTR_ERR(segs);
skb->next = segs;
DEV_GSO_CB(skb)->destructor = skb->destructor;
skb->destructor = dev_gso_skb_destructor;
-
return 0;
}
diff -r 5568efb41da4 -r 3f6a2745b3a3 patches/linux-2.6.16.33/series
--- a/patches/linux-2.6.16.33/series Mon Jan 15 13:27:20 2007 -0500
+++ b/patches/linux-2.6.16.33/series Wed Jan 17 09:56:40 2007 -0500
@@ -9,7 +9,6 @@ fix-hz-suspend.patch
fix-hz-suspend.patch
fix-ide-cd-pio-mode.patch
i386-mach-io-check-nmi.patch
-ipv6-no-autoconf.patch
net-csum.patch
net-gso-0-base.patch
net-gso-1-check-dodgy.patch
diff -r 5568efb41da4 -r 3f6a2745b3a3 patches/linux-2.6.16.33/vsnprintf.patch
--- a/patches/linux-2.6.16.33/vsnprintf.patch Mon Jan 15 13:27:20 2007 -0500
+++ b/patches/linux-2.6.16.33/vsnprintf.patch Wed Jan 17 09:56:40 2007 -0500
@@ -203,7 +203,7 @@ index b07db5c..f595947 100644
+ if (str < end)
+ *str = '\0';
+ else
-+ *end = '\0';
++ end[-1] = '\0';
+ }
+ /* the trailing null byte doesn't count towards the total */
return str-buf;
diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/check/check_udev
--- a/tools/check/check_udev Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/check/check_udev Wed Jan 17 09:56:40 2007 -0500
@@ -11,7 +11,7 @@ Linux)
Linux)
TOOL="udevinfo"
UDEV_VERSION="0"
- test -x "$(which ${TOOL})" && \
+ test -x "$(which ${TOOL} 2>/dev/null)" && \
UDEV_VERSION=$(${TOOL} -V | sed -e 's/^[^0-9]*
\([0-9]\{1,\}\)[^0-9]\{0,\}/\1/')
if test "${UDEV_VERSION}" -ge 059; then
RC=0
@@ -28,7 +28,7 @@ esac
if test ${RC} -ne 0; then
echo
- echo ' *** Check for ${TOOL} FAILED'
+ echo " *** Check for ${TOOL} FAILED"
fi
exit ${RC}
diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/examples/blktap
--- a/tools/examples/blktap Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/examples/blktap Wed Jan 17 09:56:40 2007 -0500
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
# Copyright (c) 2005, XenSource Ltd.
diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/examples/block
--- a/tools/examples/block Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/examples/block Wed Jan 17 09:56:40 2007 -0500
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
dir=$(dirname "$0")
. "$dir/block-common.sh"
diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/examples/block-enbd
--- a/tools/examples/block-enbd Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/examples/block-enbd Wed Jan 17 09:56:40 2007 -0500
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
# Usage: block-enbd [bind server ctl_port |unbind node]
#
diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/examples/block-nbd
--- a/tools/examples/block-nbd Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/examples/block-nbd Wed Jan 17 09:56:40 2007 -0500
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
# Usage: block-nbd [bind server ctl_port |unbind node]
#
diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/examples/external-device-migrate
--- a/tools/examples/external-device-migrate Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/examples/external-device-migrate Wed Jan 17 09:56:40 2007 -0500
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
# Copyright (c) 2005 IBM Corporation
#
diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/examples/network-bridge
--- a/tools/examples/network-bridge Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/examples/network-bridge Wed Jan 17 09:56:40 2007 -0500
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
#============================================================================
# Default Xen network start/stop script.
# Xend calls a network script when it starts.
diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/examples/network-nat
--- a/tools/examples/network-nat Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/examples/network-nat Wed Jan 17 09:56:40 2007 -0500
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
#============================================================================
# Default Xen network start/stop script when using NAT.
# Xend calls a network script when it starts.
diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/examples/network-route
--- a/tools/examples/network-route Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/examples/network-route Wed Jan 17 09:56:40 2007 -0500
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
#============================================================================
# Default Xen network start/stop script.
# Xend calls a network script when it starts.
diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/examples/vif-bridge
--- a/tools/examples/vif-bridge Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/examples/vif-bridge Wed Jan 17 09:56:40 2007 -0500
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
#============================================================================
# /etc/xen/vif-bridge
#
diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/examples/vif-common.sh
--- a/tools/examples/vif-common.sh Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/examples/vif-common.sh Wed Jan 17 09:56:40 2007 -0500
@@ -64,7 +64,7 @@ fi
fi
-function frob_iptable()
+frob_iptable()
{
if [ "$command" == "online" ]
then
@@ -89,7 +89,7 @@ If you are using iptables, this may affe
# to those coming from the specified networks, though we allow DHCP requests
# as well.
#
-function handle_iptable()
+handle_iptable()
{
# Check for a working iptables installation. Checking for the iptables
# binary is not sufficient, because the user may not have the appropriate
@@ -123,7 +123,7 @@ function handle_iptable()
# Print the IP address currently in use at the given interface, or nothing if
# the interface is not up.
#
-function ip_of()
+ip_of()
{
ip addr show "$1" | awk "/^.*inet.*$1\$/{print \$2}" | sed -n '1 s,/.*,,p'
}
@@ -137,7 +137,7 @@ function ip_of()
# to these scripts, or eth0 by default. This function will call fatal if no
# such interface could be found.
#
-function dom0_ip()
+dom0_ip()
{
local nd=${netdev:-eth0}
local result=$(ip_of "$nd")
diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/examples/vif-nat
--- a/tools/examples/vif-nat Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/examples/vif-nat Wed Jan 17 09:56:40 2007 -0500
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
#============================================================================
# /etc/xen/vif-nat
#
diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/examples/vif-route
--- a/tools/examples/vif-route Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/examples/vif-route Wed Jan 17 09:56:40 2007 -0500
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
#============================================================================
# /etc/xen/vif-route
#
diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/examples/vtpm
--- a/tools/examples/vtpm Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/examples/vtpm Wed Jan 17 09:56:40 2007 -0500
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
dir=$(dirname "$0")
. "$dir/vtpm-hotplug-common.sh"
diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/examples/vtpm-common.sh
--- a/tools/examples/vtpm-common.sh Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/examples/vtpm-common.sh Wed Jan 17 09:56:40 2007 -0500
@@ -24,7 +24,9 @@ VTPMDB="/etc/xen/vtpm.db"
#In the vtpm-impl file some commands should be defined:
# vtpm_create, vtpm_setup, vtpm_start, etc. (see below)
-if [ -r "$dir/vtpm-impl" ]; then
+if [ -r "$dir/vtpm-impl.alt" ]; then
+ . "$dir/vtpm-impl.alt"
+elif [ -r "$dir/vtpm-impl" ]; then
. "$dir/vtpm-impl"
else
function vtpm_create () {
diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/examples/vtpm-delete
--- a/tools/examples/vtpm-delete Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/examples/vtpm-delete Wed Jan 17 09:56:40 2007 -0500
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
# This scripts must be called the following way:
# vtpm-delete <domain name>
diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/examples/xen-backend.agent
--- a/tools/examples/xen-backend.agent Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/examples/xen-backend.agent Wed Jan 17 09:56:40 2007 -0500
@@ -1,4 +1,4 @@
-#! /bin/sh
+#! /bin/bash
PATH=/etc/xen/scripts:$PATH
diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/examples/xen-hotplug-cleanup
--- a/tools/examples/xen-hotplug-cleanup Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/examples/xen-hotplug-cleanup Wed Jan 17 09:56:40 2007 -0500
@@ -1,4 +1,4 @@
-#! /bin/sh
+#! /bin/bash
dir=$(dirname "$0")
. "$dir/xen-hotplug-common.sh"
diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/examples/xen-network-common.sh
--- a/tools/examples/xen-network-common.sh Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/examples/xen-network-common.sh Wed Jan 17 09:56:40 2007 -0500
@@ -117,7 +117,12 @@ create_bridge () {
ip link set ${bridge} arp off
ip link set ${bridge} multicast off
fi
+
+ # A small MTU disables IPv6 (and therefore IPv6 addrconf).
+ mtu=$(ip link show ${bridge} | sed -n 's/.* mtu \([0-9]\+\).*/\1/p')
+ ip link set ${bridge} mtu 68
ip link set ${bridge} up
+ ip link set ${bridge} mtu ${mtu:-1500}
}
# Usage: add_to_bridge bridge dev
diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/examples/xmexample1
--- a/tools/examples/xmexample1 Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/examples/xmexample1 Wed Jan 17 09:56:40 2007 -0500
@@ -64,6 +64,40 @@ vif = [ '' ]
# and MODE is r for read-only, w for read-write.
disk = [ 'phy:hda1,hda1,w' ]
+
+#----------------------------------------------------------------------------
+# Define frame buffer device.
+#
+# By default, no frame buffer device is configured.
+#
+# To create one using the SDL backend and sensible defaults:
+#
+# vfb = [ 'type=sdl' ]
+#
+# This uses environment variables XAUTHORITY and DISPLAY. You
+# can override that:
+#
+# vfb = [ 'type=sdl,xauthority=/home/bozo/.Xauthority,display=:1' ]
+#
+# To create one using the VNC backend and sensible defaults:
+#
+# vfb = [ 'type=vnc' ]
+#
+# The backend listens on 127.0.0.1 port 5900+N by default, where N is
+# the domain ID. You can override both address and N:
+#
+# vfb = [ 'type=vnc,vnclisten=127.0.0.1,vncdisplay=1' ]
+#
+# Or you can bind the first unused port above 5900:
+#
+# vfb = [ 'type=vnc,vnclisten=0.0.0.0,vnunused=1' ]
+#
+# You can override the password:
+#
+# vfb = [ 'type=vnc,vncpasswd=MYPASSWD' ]
+#
+# Empty password disables authentication. Defaults to the vncpasswd
+# configured in xend-config.sxp.
#----------------------------------------------------------------------------
# Define to which TPM instance the user domain should communicate.
diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/examples/xmexample2
--- a/tools/examples/xmexample2 Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/examples/xmexample2 Wed Jan 17 09:56:40 2007 -0500
@@ -100,6 +100,40 @@ vif = [ '' ]
# All domains get sda6 read-only (to use for /usr, see below).
disk = [ 'phy:sda%d,sda1,w' % (7+vmid),
'phy:sda6,sda6,r' ]
+
+#----------------------------------------------------------------------------
+# Define frame buffer device.
+#
+# By default, no frame buffer device is configured.
+#
+# To create one using the SDL backend and sensible defaults:
+#
+# vfb = [ 'type=sdl' ]
+#
+# This uses environment variables XAUTHORITY and DISPLAY. You
+# can override that:
+#
+# vfb = [ 'type=sdl,xauthority=/home/bozo/.Xauthority,display=:1' ]
+#
+# To create one using the VNC backend and sensible defaults:
+#
+# vfb = [ 'type=vnc' ]
+#
+# The backend listens on 127.0.0.1 port 5900+N by default, where N is
+# the domain ID. You can override both address and N:
+#
+# vfb = [ 'type=vnc,vnclisten=127.0.0.1,vncdisplay=%d' % vmid ]
+#
+# Or you can bind the first unused port above 5900:
+#
+# vfb = [ 'type=vnc,vnclisten=0.0.0.0,vnunused=1' ]
+#
+# You can override the password:
+#
+# vfb = [ 'type=vnc,vncpasswd=MYPASSWD' ]
+#
+# Empty password disables authentication. Defaults to the vncpasswd
+# configured in xend-config.sxp.
#----------------------------------------------------------------------------
# Define to which TPM instance the user domain should communicate.
diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/examples/xmexample3
--- a/tools/examples/xmexample3 Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/examples/xmexample3 Wed Jan 17 09:56:40 2007 -0500
@@ -85,6 +85,40 @@ vif = [ 'ip=192.168.%d.1/24' % (vmid)]
# to all domains as sda1.
# All domains get sda6 read-only (to use for /usr, see below).
disk = [ 'phy:hda%d,hda1,w' % (vmid)]
+
+#----------------------------------------------------------------------------
+# Define frame buffer device.
+#
+# By default, no frame buffer device is configured.
+#
+# To create one using the SDL backend and sensible defaults:
+#
+# vfb = [ 'type=sdl' ]
+#
+# This uses environment variables XAUTHORITY and DISPLAY. You
+# can override that:
+#
+# vfb = [ 'type=sdl,xauthority=/home/bozo/.Xauthority,display=:1' ]
+#
+# To create one using the VNC backend and sensible defaults:
+#
+# vfb = [ 'type=vnc' ]
+#
+# The backend listens on 127.0.0.1 port 5900+N by default, where N is
+# the domain ID. You can override both address and N:
+#
+# vfb = [ 'type=vnc,vnclisten=127.0.0.1,vncdisplay=%d' % vmid ]
+#
+# Or you can bind the first unused port above 5900:
+#
+# vfb = [ 'type=vnc,vnclisten=0.0.0.0,vnunused=1' ]
+#
+# You can override the password:
+#
+# vfb = [ 'type=vnc,vncpasswd=MYPASSWD' ]
+#
+# Empty password disables authentication. Defaults to the vncpasswd
+# configured in xend-config.sxp.
#----------------------------------------------------------------------------
# Define to which TPM instance the user domain should communicate.
diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/firmware/rombios/rombios.c
--- a/tools/firmware/rombios/rombios.c Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/firmware/rombios/rombios.c Wed Jan 17 09:56:40 2007 -0500
@@ -278,7 +278,6 @@ typedef unsigned short bx_bool;
typedef unsigned short bx_bool;
typedef unsigned long Bit32u;
-#if BX_USE_ATADRV
void memsetb(seg,offset,value,count);
void memcpyb(dseg,doffset,sseg,soffset,count);
@@ -418,7 +417,6 @@ typedef unsigned long Bit32u;
ASM_END
}
#endif
-#endif //BX_USE_ATADRV
// read_dword and write_dword functions
static Bit32u read_dword();
@@ -728,6 +726,8 @@ typedef struct {
// The EBDA structure should conform to
// http://www.cybertrails.com/~fys/rombios.htm document
// I made the ata and cdemu structs begin at 0x121 in the EBDA seg
+ // EBDA must be at most 768 bytes; it lives at 0x9fc00, and the boot
+ // device tables are at 0x9ff00 -- 0x9ffff
typedef struct {
unsigned char filler1[0x3D];
@@ -885,7 +885,7 @@ static void int15_function();
static void int15_function();
static void int16_function();
static void int17_function();
-static Bit32u int19_function();
+static void int19_function();
static void int1a_function();
static void int70_function();
static void int74_function();
@@ -1435,10 +1435,17 @@ copy_e820_table()
copy_e820_table()
{
Bit8u nr_entries = read_byte(0x9000, 0x1e8);
+ Bit32u base_mem;
if (nr_entries > 32)
nr_entries = 32;
write_word(0xe000, 0x8, nr_entries);
memcpyb(0xe000, 0x10, 0x9000, 0x2d0, nr_entries * 0x14);
+ /* Report the proper base memory size at address 0x0413: otherwise
+ * non-e820 code will clobber things if BASE_MEM_IN_K is bigger than
+ * the first e820 entry. Get the size by reading the second 64bit
+ * field of the first e820 slot. */
+ base_mem = read_dword(0x9000, 0x2d0 + 8);
+ write_word(0x40, 0x13, base_mem >> 10);
}
#endif /* HVMASSIST */
@@ -1847,28 +1854,100 @@ print_bios_banner()
printf("\n");
}
+
+//--------------------------------------------------------------------------
+// BIOS Boot Specification 1.0.1 compatibility
+//
+// Very basic support for the BIOS Boot Specification, which allows expansion
+// ROMs to register themselves as boot devices, instead of just stealing the
+// INT 19h boot vector.
+//
+// This is a hack: to do it properly requires a proper PnP BIOS and we aren't
+// one; we just lie to the option ROMs to make them behave correctly.
+// We also don't support letting option ROMs register as bootable disk
+// drives (BCVs), only as bootable devices (BEVs).
+//
+//
http://www.phoenix.com/en/Customer+Services/White+Papers-Specs/pc+industry+specifications.htm
+//--------------------------------------------------------------------------
+
+/* 256 bytes at 0x9ff00 -- 0x9ffff is used for the IPL boot table. */
+#define IPL_SEG 0x9ff0
+#define IPL_TABLE_OFFSET 0x0000
+#define IPL_TABLE_ENTRIES 8
+#define IPL_COUNT_OFFSET 0x0080 /* u16: number of valid table entries */
+#define IPL_SEQUENCE_OFFSET 0x0082 /* u16: next boot device */
+
+struct ipl_entry {
+ Bit16u type;
+ Bit16u flags;
+ Bit32u vector;
+ Bit32u description;
+ Bit32u reserved;
+};
+
+static void
+init_boot_vectors()
+{
+ struct ipl_entry e;
+ Bit16u count = 0;
+ Bit16u ss = get_SS();
+
+ /* Clear out the IPL table. */
+ memsetb(IPL_SEG, IPL_TABLE_OFFSET, 0, 0xff);
+
+ /* Floppy drive */
+ e.type = 1; e.flags = 0; e.vector = 0; e.description = 0; e.reserved = 0;
+ memcpyb(IPL_SEG, IPL_TABLE_OFFSET + count * sizeof (e), ss, &e, sizeof (e));
+ count++;
+
+ /* First HDD */
+ e.type = 2; e.flags = 0; e.vector = 0; e.description = 0; e.reserved = 0;
+ memcpyb(IPL_SEG, IPL_TABLE_OFFSET + count * sizeof (e), ss, &e, sizeof (e));
+ count++;
+
+#if BX_ELTORITO_BOOT
+ /* CDROM */
+ e.type = 3; e.flags = 0; e.vector = 0; e.description = 0; e.reserved = 0;
+ memcpyb(IPL_SEG, IPL_TABLE_OFFSET + count * sizeof (e), ss, &e, sizeof (e));
+ count++;
+#endif
+
+ /* Remember how many devices we have */
+ write_word(IPL_SEG, IPL_COUNT_OFFSET, count);
+ /* Not tried booting anything yet */
+ write_word(IPL_SEG, IPL_SEQUENCE_OFFSET, 0xffff);
+}
+
+static Bit8u
+get_boot_vector(i, e)
+Bit16u i; struct ipl_entry *e;
+{
+ Bit16u count;
+ Bit16u ss = get_SS();
+ /* Get the count of boot devices, and refuse to overrun the array */
+ count = read_word(IPL_SEG, IPL_COUNT_OFFSET);
+ if (i >= count) return 0;
+ /* OK to read this device */
+ memcpyb(ss, e, IPL_SEG, IPL_TABLE_OFFSET + i * sizeof (*e), sizeof (*e));
+ return 1;
+}
+
+
//--------------------------------------------------------------------------
// print_boot_device
// displays the boot device
//--------------------------------------------------------------------------
-static char drivetypes[][10]={"Floppy","Hard Disk","CD-Rom"};
+static char drivetypes[][10]={"", "Floppy","Hard Disk","CD-Rom", "Network"};
void
-print_boot_device(cdboot, drive)
- Bit8u cdboot; Bit16u drive;
+print_boot_device(type)
+ Bit16u type;
{
- Bit8u i;
-
- // cdboot contains 0 if floppy/harddisk, 1 otherwise
- // drive contains real/emulated boot drive
-
- if(cdboot)i=2; // CD-Rom
- else if((drive&0x0080)==0x00)i=0; // Floppy
- else if((drive&0x0080)==0x80)i=1; // Hard drive
- else return;
-
- printf("Booting from %s...\n",drivetypes[i]);
+ /* NIC appears as type 0x80 */
+ if (type == 0x80 ) type = 0x4;
+ if (type == 0 || type > 0x4) BX_PANIC("Bad drive type\n");
+ printf("Booting from %s...\n", drivetypes[type]);
}
//--------------------------------------------------------------------------
@@ -1876,29 +1955,20 @@ print_boot_device(cdboot, drive)
// displays the reason why boot failed
//--------------------------------------------------------------------------
void
-print_boot_failure(cdboot, drive, reason, lastdrive)
- Bit8u cdboot; Bit8u drive; Bit8u lastdrive;
+print_boot_failure(type, reason)
+ Bit16u type; Bit8u reason;
{
- Bit16u drivenum = drive&0x7f;
-
- // cdboot: 1 if boot from cd, 0 otherwise
- // drive : drive number
- // reason: 0 signature check failed, 1 read error
- // lastdrive: 1 boot drive is the last one in boot sequence
-
- if (cdboot)
- bios_printf(BIOS_PRINTF_INFO | BIOS_PRINTF_SCREEN, "Boot from %s
failed\n",drivetypes[2]);
- else if (drive & 0x80)
- bios_printf(BIOS_PRINTF_INFO | BIOS_PRINTF_SCREEN, "Boot from %s %d
failed\n", drivetypes[1],drivenum);
+ if (type == 0 || type > 0x3) BX_PANIC("Bad drive type\n");
+
+ printf("Boot from %s failed", drivetypes[type]);
+ if (type < 4) {
+ /* Report the reason too */
+ if (reason==0)
+ printf(": not a bootable disk");
else
- bios_printf(BIOS_PRINTF_INFO | BIOS_PRINTF_SCREEN, "Boot from %s %d
failed\n", drivetypes[0],drivenum);
-
- if (lastdrive==1) {
- if (reason==0)
- BX_PANIC("Not a bootable disk\n");
- else
- BX_PANIC("Could not read the boot disk\n");
+ printf(": could not read the boot disk");
}
+ printf("\n");
}
//--------------------------------------------------------------------------
@@ -7546,19 +7616,19 @@ int17_function(regs, ds, iret_addr)
}
}
-// returns bootsegment in ax, drive in bl
- Bit32u
-int19_function(bseqnr)
-Bit8u bseqnr;
+void
+int19_function(seq_nr)
+Bit16u seq_nr;
{
Bit16u ebda_seg=read_word(0x0040,0x000E);
- Bit16u bootseq;
+ Bit16u bootdev;
Bit8u bootdrv;
- Bit8u bootcd;
Bit8u bootchk;
Bit16u bootseg;
+ Bit16u bootip;
Bit16u status;
- Bit8u lastdrive=0;
+
+ struct ipl_entry e;
// if BX_ELTORITO_BOOT is not defined, old behavior
// check bit 5 in CMOS reg 0x2d. load either 0x00 or 0x80 into DL
@@ -7575,62 +7645,54 @@ Bit8u bseqnr;
// 0x01 : first floppy
// 0x02 : first harddrive
// 0x03 : first cdrom
+ // 0x04 - 0x0f : PnP expansion ROMs (e.g. Etherboot)
// else : boot failure
// Get the boot sequence
#if BX_ELTORITO_BOOT
- bootseq=inb_cmos(0x3d);
- bootseq|=((inb_cmos(0x38) & 0xf0) << 4);
-
- if (bseqnr==2) bootseq >>= 4;
- if (bseqnr==3) bootseq >>= 8;
- if (bootseq<0x10) lastdrive = 1;
- bootdrv=0x00; bootcd=0;
- switch(bootseq & 0x0f) {
- case 0x01: bootdrv=0x00; bootcd=0; break;
- case 0x02: bootdrv=0x80; bootcd=0; break;
- case 0x03: bootdrv=0x00; bootcd=1; break;
- default: return 0x00000000;
- }
-#else
- bootseq=inb_cmos(0x2d);
-
- if (bseqnr==2) {
- bootseq ^= 0x20;
- lastdrive = 1;
+ bootdev = inb_cmos(0x3d);
+ bootdev |= ((inb_cmos(0x38) & 0xf0) << 4);
+ bootdev >>= 4 * seq_nr;
+ bootdev &= 0xf;
+ if (bootdev == 0) BX_PANIC("No bootable device.\n");
+
+ /* Translate from CMOS runes to an IPL table offset by subtracting 1 */
+ bootdev -= 1;
+#else
+ if (seq_nr ==2) BX_PANIC("No more boot devices.");
+ if (!!(inb_cmos(0x2d) & 0x20) ^ (seq_nr == 1))
+ /* Boot from floppy if the bit is set or it's the second boot */
+ bootdev = 0x00;
+ else
+ bootdev = 0x01;
+#endif
+
+ /* Read the boot device from the IPL table */
+ if (get_boot_vector(bootdev, &e) == 0) {
+ BX_INFO("Invalid boot device (0x%x)\n", bootdev);
+ return;
}
- bootdrv=0x00; bootcd=0;
- if((bootseq&0x20)==0) bootdrv=0x80;
-#endif // BX_ELTORITO_BOOT
-
-#if BX_ELTORITO_BOOT
- // We have to boot from cd
- if (bootcd != 0) {
- status = cdrom_boot();
-
- // If failure
- if ( (status & 0x00ff) !=0 ) {
- print_cdromboot_failure(status);
- print_boot_failure(bootcd, bootdrv, 1, lastdrive);
- return 0x00000000;
- }
-
- bootseg = read_word(ebda_seg,&EbdaData->cdemu.load_segment);
- bootdrv = (Bit8u)(status>>8);
- }
-
-#endif // BX_ELTORITO_BOOT
-
- // We have to boot from harddisk or floppy
- if (bootcd == 0) {
- bootseg=0x07c0;
+
+ /* Do the loading, and set up vector as a far pointer to the boot
+ * address, and bootdrv as the boot drive */
+ print_boot_device(e.type);
+
+ switch(e.type) {
+ case 0x01: /* FDD */
+ case 0x02: /* HDD */
+
+ bootdrv = (e.type == 0x02) ? 0x80 : 0x00;
+ bootseg = 0x07c0;
+ status = 0;
ASM_START
push bp
mov bp, sp
-
- mov ax, #0x0000
- mov _int19_function.status + 2[bp], ax
+ push ax
+ push bx
+ push cx
+ push dx
+
mov dl, _int19_function.bootdrv + 2[bp]
mov ax, _int19_function.bootseg + 2[bp]
mov es, ax ;; segment
@@ -7646,43 +7708,83 @@ ASM_START
mov _int19_function.status + 2[bp], ax
int19_load_done:
+ pop dx
+ pop cx
+ pop bx
+ pop ax
pop bp
ASM_END
if (status != 0) {
- print_boot_failure(bootcd, bootdrv, 1, lastdrive);
- return 0x00000000;
+ print_boot_failure(e.type, 1);
+ return;
+ }
+
+ /* Always check the signature on a HDD boot sector; on FDD, only do
+ * the check if the CMOS doesn't tell us to skip it */
+ if (e.type != 0x00 || !((inb_cmos(0x38) & 0x01))) {
+ if (read_word(bootseg,0x1fe) != 0xaa55) {
+ print_boot_failure(e.type, 0);
+ return;
}
}
- // check signature if instructed by cmos reg 0x38, only for floppy
- // bootchk = 1 : signature check disabled
- // bootchk = 0 : signature check enabled
- if (bootdrv != 0) bootchk = 0;
- else bootchk = inb_cmos(0x38) & 0x01;
+ /* Canonicalize bootseg:bootip */
+ bootip = (bootseg & 0x0fff) << 4;
+ bootseg &= 0xf000;
+ break;
#if BX_ELTORITO_BOOT
- // if boot from cd, no signature check
- if (bootcd != 0)
- bootchk = 1;
-#endif // BX_ELTORITO_BOOT
-
- if (bootchk == 0) {
- if (read_word(bootseg,0x1fe) != 0xaa55) {
- print_boot_failure(bootcd, bootdrv, 0, lastdrive);
- return 0x00000000;
- }
+ case 0x03: /* CD-ROM */
+ status = cdrom_boot();
+
+ // If failure
+ if ( (status & 0x00ff) !=0 ) {
+ print_cdromboot_failure(status);
+ print_boot_failure(e.type, 1);
+ return;
}
+
+ bootdrv = (Bit8u)(status>>8);
+ bootseg = read_word(ebda_seg,&EbdaData->cdemu.load_segment);
+ /* Canonicalize bootseg:bootip */
+ bootip = (bootseg & 0x0fff) << 4;
+ bootseg &= 0xf000;
+ break;
+#endif
+
+ case 0x80: /* Expansion ROM with a Bootstrap Entry Vector (a far pointer) */
+ bootseg = e.vector >> 16;
+ bootip = e.vector & 0xffff;
+ break;
+
+ default: return;
+ }
+
+ /* Debugging info */
+ printf("Booting from %x:%x\n", bootseg, bootip);
-#if BX_ELTORITO_BOOT
- // Print out the boot string
- print_boot_device(bootcd, bootdrv);
-#else // BX_ELTORITO_BOOT
- print_boot_device(0, bootdrv);
-#endif // BX_ELTORITO_BOOT
-
- // return the boot segment
- return (((Bit32u)bootdrv) << 16) + bootseg;
+ /* Jump to the boot vector */
+ASM_START
+ mov bp, sp
+ ;; Build an iret stack frame that will take us to the boot vector.
+ ;; iret pops ip, then cs, then flags, so push them in the opposite order.
+ pushf
+ mov ax, _int19_function.bootseg + 0[bp]
+ push ax
+ mov ax, _int19_function.bootip + 0[bp]
+ push ax
+ ;; Set the magic number in ax and the boot drive in dl.
+ mov ax, #0xaa55
+ mov dl, _int19_function.bootdrv + 0[bp]
+ ;; Zero some of the other registers.
+ xor bx, bx
+ mov ds, bx
+ mov es, bx
+ mov bp, bx
+ ;; Go!
+ iret
+ASM_END
}
void
@@ -8139,14 +8241,29 @@ int13_out:
popa
iret
-
;----------
;- INT18h -
;----------
-int18_handler: ;; Boot Failure routing
- call _int18_panic_msg
- hlt
- iret
+int18_handler: ;; Boot Failure recovery: try the next device.
+
+ ;; Reset SP and SS
+ mov ax, #0xfffe
+ mov sp, ax
+ xor ax, ax
+ mov ss, ax
+
+ ;; Get the boot sequence number out of the IPL memory
+ mov bx, #IPL_SEG
+ mov ds, bx ;; Set segment
+ mov bx, IPL_SEQUENCE_OFFSET ;; BX is now the sequence number
+ inc bx ;; ++
+ mov IPL_SEQUENCE_OFFSET, bx ;; Write it back
+ mov ds, ax ;; and reset the segment to zero.
+
+ ;; Carry on in the INT 19h handler, using the new sequence number
+ push bx
+
+ jmp int19_next_boot
;----------
;- INT19h -
@@ -8154,62 +8271,32 @@ int19_relocated: ;; Boot function, reloc
int19_relocated: ;; Boot function, relocated
;; int19 was beginning to be really complex, so now it
- ;; just calls an C function, that does the work
- ;; it returns in BL the boot drive, and in AX the boot segment
- ;; the boot segment will be 0x0000 if something has failed
+ ;; just calls a C function that does the work
push bp
mov bp, sp
-
- ;; drop ds
+
+ ;; Reset SS and SP
+ mov ax, #0xfffe
+ mov sp, ax
xor ax, ax
- mov ds, ax
-
- ;; 1st boot device
- mov ax, #0x0001
+ mov ss, ax
+
+ ;; Start from the first boot device (0, in AX)
+ mov bx, #IPL_SEG
+ mov ds, bx ;; Set segment to write to the IPL memory
+ mov IPL_SEQUENCE_OFFSET, ax ;; Save the sequence number
+ mov ds, ax ;; and reset the segment.
+
push ax
+
+int19_next_boot:
+
+ ;; Call the C code for the next boot device
call _int19_function
- inc sp
- inc sp
- ;; bl contains the boot drive
- ;; ax contains the boot segment or 0 if failure
-
- test ax, ax ;; if ax is 0 try next boot device
- jnz boot_setup
-
- ;; 2nd boot device
- mov ax, #0x0002
- push ax
- call _int19_function
- inc sp
- inc sp
- test ax, ax ;; if ax is 0 try next boot device
- jnz boot_setup
-
- ;; 3rd boot device
- mov ax, #0x0003
- push ax
- call _int19_function
- inc sp
- inc sp
- test ax, ax ;; if ax is 0 call int18
- jz int18_handler
-
-boot_setup:
- mov dl, bl ;; set drive so guest os find it
- shl eax, #0x04 ;; convert seg to ip
- mov 2[bp], ax ;; set ip
-
- shr eax, #0x04 ;; get cs back
- and ax, #0xF000 ;; remove what went in ip
- mov 4[bp], ax ;; set cs
- xor ax, ax
- mov es, ax ;; set es to zero fixes [ 549815 ]
- mov [bp], ax ;; set bp to zero
- mov ax, #0xaa55 ;; set ok flag
-
- pop bp
- iret ;; Beam me up Scotty
+
+ ;; Boot failed: invoke the boot recovery function
+ int #0x18
;----------
;- INT1Ch -
@@ -9387,6 +9474,15 @@ checksum_loop:
pop ax
ret
+
+;; We need a copy of this string, but we are not actually a PnP BIOS,
+;; so make sure it is *not* aligned, so OSes will not see it if they scan.
+.align 16
+ db 0
+pnp_string:
+ .ascii "$PnP"
+
+
rom_scan:
;; Scan for existence of valid expansion ROMS.
;; Video ROM: from 0xC0000..0xC7FFF in 2k increments
@@ -9421,9 +9517,17 @@ block_count_rounded:
xor bx, bx ;; Restore DS back to 0000:
mov ds, bx
push ax ;; Save AX
+ push di ;; Save DI
;; Push addr of ROM entry point
push cx ;; Push seg
push #0x0003 ;; Push offset
+
+ ;; Point ES:DI at "$PnP", which tells the ROM that we are a PnP BIOS.
+ ;; That should stop it grabbing INT 19h; we will use its BEV instead.
+ mov ax, #0xf000
+ mov es, ax
+ lea di, pnp_string
+
mov bp, sp ;; Call ROM init routine using seg:off on stack
db 0xff ;; call_far ss:[bp+0]
db 0x5e
@@ -9431,6 +9535,38 @@ block_count_rounded:
cli ;; In case expansion ROM BIOS turns IF on
add sp, #2 ;; Pop offset value
pop cx ;; Pop seg value (restore CX)
+
+ ;; Look at the ROM's PnP Expansion header. Properly, we're supposed
+ ;; to init all the ROMs and then go back and build an IPL table of
+ ;; all the bootable devices, but we can get away with one pass.
+ mov ds, cx ;; ROM base
+ mov bx, 0x001a ;; 0x1A is the offset into ROM header that contains...
+ mov ax, [bx] ;; the offset of PnP expansion header, where...
+ cmp ax, #0x5024 ;; we look for signature "$PnP"
+ jne no_bev
+ mov ax, 2[bx]
+ cmp ax, #0x506e
+ jne no_bev
+ mov ax, 0x1a[bx] ;; 0x1A is also the offset into the expansion header of...
+ cmp ax, #0x0000 ;; the Bootstrap Entry Vector, or zero if there is none.
+ je no_bev
+
+ ;; Found a device that thinks it can boot the system. Record its BEV.
+ mov bx, #IPL_SEG ;; Go to the segment where the IPL table lives
+ mov ds, bx
+ mov bx, IPL_COUNT_OFFSET ;; Read the number of entries so far
+ cmp bx, #IPL_TABLE_ENTRIES
+ je no_bev ;; Get out if the table is full
+ shl bx, #0x4 ;; Turn count into offset (entries are 16 bytes)
+ mov 0[bx], #0x80 ;; This entry is a BEV device
+ mov 6[bx], cx ;; Build a far pointer from the segment...
+ mov 4[bx], ax ;; and the offset
+ shr bx, #0x4 ;; Turn the offset back into a count
+ inc bx ;; We have one more entry now
+ mov IPL_COUNT_OFFSET, bx ;; Remember that.
+
+no_bev:
+ pop di ;; Restore DI
pop ax ;; Restore AX
rom_scan_increment:
shl ax, #5 ;; convert 512-bytes blocks to 16-byte increments
@@ -9763,6 +9899,8 @@ post_default_ints:
call _copy_e820_table
call smbios_init
#endif
+
+ call _init_boot_vectors
call rom_scan
diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/ioemu/hw/pc.c
--- a/tools/ioemu/hw/pc.c Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/ioemu/hw/pc.c Wed Jan 17 09:56:40 2007 -0500
@@ -168,6 +168,8 @@ static int get_bios_disk(char *boot_devi
return 0x02; /* hard drive */
case 'd':
return 0x03; /* cdrom */
+ case 'n':
+ return 0x04; /* network */
}
}
return 0x00; /* no device */
diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/ioemu/target-i386-dm/helper2.c
--- a/tools/ioemu/target-i386-dm/helper2.c Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/ioemu/target-i386-dm/helper2.c Wed Jan 17 09:56:40 2007 -0500
@@ -498,6 +498,8 @@ void handle_buffered_io(void *opaque)
void cpu_handle_ioreq(void *opaque)
{
+ extern int vm_running;
+ extern int shutdown_requested;
CPUState *env = opaque;
ioreq_t *req = cpu_get_ioreq();
@@ -516,6 +518,25 @@ void cpu_handle_ioreq(void *opaque)
}
wmb(); /* Update ioreq contents /then/ update state. */
+
+ /*
+ * We do this before we send the response so that the tools
+ * have the opportunity to pick up on the reset before the
+ * guest resumes and does a hlt with interrupts disabled which
+ * causes Xen to powerdown the domain.
+ */
+ if (vm_running) {
+ if (shutdown_requested) {
+ fprintf(logfile, "shutdown requested in cpu_handle_ioreq\n");
+ destroy_hvm_domain();
+ }
+ if (reset_requested) {
+ fprintf(logfile, "reset requested in cpu_handle_ioreq.\n");
+ qemu_system_reset();
+ reset_requested = 0;
+ }
+ }
+
req->state = STATE_IORESP_READY;
xc_evtchn_notify(xce_handle, ioreq_local_port[send_vcpu]);
}
diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/ioemu/vl.c
--- a/tools/ioemu/vl.c Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/ioemu/vl.c Wed Jan 17 09:56:40 2007 -0500
@@ -6153,7 +6153,7 @@ int main(int argc, char **argv)
case QEMU_OPTION_boot:
boot_device = strdup(optarg);
if (strspn(boot_device, "acd"
-#ifdef TARGET_SPARC
+#if defined(TARGET_SPARC) || defined(TARGET_I386)
"n"
#endif
) != strlen(boot_device)) {
diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/libxc/xc_hvm_build.c
--- a/tools/libxc/xc_hvm_build.c Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/libxc/xc_hvm_build.c Wed Jan 17 09:56:40 2007 -0500
@@ -233,8 +233,7 @@ static int setup_guest(int xc_handle,
SCRATCH_PFN)) == NULL) )
goto error_out;
memset(shared_info, 0, PAGE_SIZE);
- for ( i = 0; i < MAX_VIRT_CPUS; i++ )
- shared_info->vcpu_info[i].evtchn_upcall_mask = 1;
+ /* NB. evtchn_upcall_mask is unused: leave as zero. */
memset(&shared_info->evtchn_mask[0], 0xff,
sizeof(shared_info->evtchn_mask));
munmap(shared_info, PAGE_SIZE);
diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/libxc/xc_linux_build.c
--- a/tools/libxc/xc_linux_build.c Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/libxc/xc_linux_build.c Wed Jan 17 09:56:40 2007 -0500
@@ -595,6 +595,7 @@ static int compat_check(int xc_handle, s
return 0;
}
+#ifndef __x86_64__//temp
if (strstr(xen_caps, "xen-3.0-x86_32p")) {
if (dsi->pae_kernel == PAEKERN_bimodal) {
dsi->pae_kernel = PAEKERN_extended_cr3;
@@ -612,6 +613,7 @@ static int compat_check(int xc_handle, s
return 0;
}
}
+#endif
return 1;
}
@@ -739,7 +741,7 @@ static int setup_guest(int xc_handle,
/*
* Enable shadow translate mode. This must happen after
* populate physmap because the p2m reservation is based on
- * the domains current memory allocation.
+ * the domain's current memory allocation.
*/
if ( xc_shadow_control(xc_handle, dom,
XEN_DOMCTL_SHADOW_OP_ENABLE_TRANSLATE,
diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/libxc/xc_linux_restore.c
--- a/tools/libxc/xc_linux_restore.c Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/libxc/xc_linux_restore.c Wed Jan 17 09:56:40 2007 -0500
@@ -12,7 +12,7 @@
#include "xg_private.h"
#include "xg_save_restore.h"
-/* max mfn of the whole machine */
+/* max mfn of the current host machine */
static unsigned long max_mfn;
/* virtual starting address of the hypervisor */
@@ -29,6 +29,9 @@ static xen_pfn_t *live_p2m = NULL;
/* A table mapping each PFN to its new MFN. */
static xen_pfn_t *p2m = NULL;
+
+/* A table of P2M mappings in the current region */
+static xen_pfn_t *p2m_batch = NULL;
static ssize_t
@@ -57,46 +60,78 @@ read_exact(int fd, void *buf, size_t cou
** This function inverts that operation, replacing the pfn values with
** the (now known) appropriate mfn values.
*/
-static int uncanonicalize_pagetable(unsigned long type, void *page)
+static int uncanonicalize_pagetable(int xc_handle, uint32_t dom,
+ unsigned long type, void *page)
{
int i, pte_last;
unsigned long pfn;
uint64_t pte;
+ int nr_mfns = 0;
pte_last = PAGE_SIZE / ((pt_levels == 2)? 4 : 8);
- /* Now iterate through the page table, uncanonicalizing each PTE */
+ /* First pass: work out how many (if any) MFNs we need to alloc */
for(i = 0; i < pte_last; i++) {
-
+
if(pt_levels == 2)
pte = ((uint32_t *)page)[i];
else
pte = ((uint64_t *)page)[i];
-
- if(pte & _PAGE_PRESENT) {
-
- pfn = (pte >> PAGE_SHIFT) & 0xffffffff;
-
- if(pfn >= max_pfn) {
- /* This "page table page" is probably not one; bail. */
- ERROR("Frame number in type %lu page table is out of range: "
- "i=%d pfn=0x%lx max_pfn=%lu",
- type >> 28, i, pfn, max_pfn);
- return 0;
- }
-
-
- pte &= 0xffffff0000000fffULL;
- pte |= (uint64_t)p2m[pfn] << PAGE_SHIFT;
-
- if(pt_levels == 2)
- ((uint32_t *)page)[i] = (uint32_t)pte;
- else
- ((uint64_t *)page)[i] = (uint64_t)pte;
-
-
-
- }
+
+ /* XXX SMH: below needs fixing for PROT_NONE etc */
+ if(!(pte & _PAGE_PRESENT))
+ continue;
+
+ pfn = (pte >> PAGE_SHIFT) & 0xffffffff;
+
+ if(pfn >= max_pfn) {
+ /* This "page table page" is probably not one; bail. */
+ ERROR("Frame number in type %lu page table is out of range: "
+ "i=%d pfn=0x%lx max_pfn=%lu",
+ type >> 28, i, pfn, max_pfn);
+ return 0;
+ }
+
+ if(p2m[pfn] == INVALID_P2M_ENTRY) {
+ /* Have a 'valid' PFN without a matching MFN - need to alloc */
+ p2m_batch[nr_mfns++] = pfn;
+ }
+ }
+
+
+ /* Alllocate the requistite number of mfns */
+ if (nr_mfns && xc_domain_memory_populate_physmap(
+ xc_handle, dom, nr_mfns, 0, 0, p2m_batch) != 0) {
+ ERROR("Failed to allocate memory for batch.!\n");
+ errno = ENOMEM;
+ return 0;
+ }
+
+ /* Second pass: uncanonicalize each present PTE */
+ nr_mfns = 0;
+ for(i = 0; i < pte_last; i++) {
+
+ if(pt_levels == 2)
+ pte = ((uint32_t *)page)[i];
+ else
+ pte = ((uint64_t *)page)[i];
+
+ /* XXX SMH: below needs fixing for PROT_NONE etc */
+ if(!(pte & _PAGE_PRESENT))
+ continue;
+
+ pfn = (pte >> PAGE_SHIFT) & 0xffffffff;
+
+ if(p2m[pfn] == INVALID_P2M_ENTRY)
+ p2m[pfn] = p2m_batch[nr_mfns++];
+
+ pte &= 0xffffff0000000fffULL;
+ pte |= (uint64_t)p2m[pfn] << PAGE_SHIFT;
+
+ if(pt_levels == 2)
+ ((uint32_t *)page)[i] = (uint32_t)pte;
+ else
+ ((uint64_t *)page)[i] = (uint64_t)pte;
}
return 1;
@@ -140,6 +175,7 @@ int xc_linux_restore(int xc_handle, int
/* A temporary mapping of the guest's start_info page. */
start_info_t *start_info;
+ /* Our mapping of the current region (batch) */
char *region_base;
xc_mmu_t *mmu = NULL;
@@ -244,8 +280,10 @@ int xc_linux_restore(int xc_handle, int
p2m = calloc(max_pfn, sizeof(xen_pfn_t));
pfn_type = calloc(max_pfn, sizeof(unsigned long));
region_mfn = calloc(MAX_BATCH_SIZE, sizeof(xen_pfn_t));
-
- if ((p2m == NULL) || (pfn_type == NULL) || (region_mfn == NULL)) {
+ p2m_batch = calloc(MAX_BATCH_SIZE, sizeof(xen_pfn_t));
+
+ if ((p2m == NULL) || (pfn_type == NULL) ||
+ (region_mfn == NULL) || (p2m_batch == NULL)) {
ERROR("memory alloc failed");
errno = ENOMEM;
goto out;
@@ -253,6 +291,11 @@ int xc_linux_restore(int xc_handle, int
if (lock_pages(region_mfn, sizeof(xen_pfn_t) * MAX_BATCH_SIZE)) {
ERROR("Could not lock region_mfn");
+ goto out;
+ }
+
+ if (lock_pages(p2m_batch, sizeof(xen_pfn_t) * MAX_BATCH_SIZE)) {
+ ERROR("Could not lock p2m_batch");
goto out;
}
@@ -270,17 +313,9 @@ int xc_linux_restore(int xc_handle, int
goto out;
}
+ /* Mark all PFNs as invalid; we allocate on demand */
for ( pfn = 0; pfn < max_pfn; pfn++ )
- p2m[pfn] = pfn;
-
- if (xc_domain_memory_populate_physmap(xc_handle, dom, max_pfn,
- 0, 0, p2m) != 0) {
- ERROR("Failed to increase reservation by %lx KB", PFN_TO_KB(max_pfn));
- errno = ENOMEM;
- goto out;
- }
-
- DPRINTF("Increased domain reservation by %lx KB\n", PFN_TO_KB(max_pfn));
+ p2m[pfn] = INVALID_P2M_ENTRY;
if(!(mmu = xc_init_mmu_updates(xc_handle, dom))) {
ERROR("Could not initialise for MMU updates");
@@ -298,7 +333,7 @@ int xc_linux_restore(int xc_handle, int
n = 0;
while (1) {
- int j;
+ int j, nr_mfns = 0;
this_pc = (n * 100) / max_pfn;
if ( (this_pc - prev_pc) >= 5 )
@@ -333,20 +368,57 @@ int xc_linux_restore(int xc_handle, int
goto out;
}
+ /* First pass for this batch: work out how much memory to alloc */
+ nr_mfns = 0;
for ( i = 0; i < j; i++ )
{
unsigned long pfn, pagetype;
pfn = region_pfn_type[i] & ~XEN_DOMCTL_PFINFO_LTAB_MASK;
pagetype = region_pfn_type[i] & XEN_DOMCTL_PFINFO_LTAB_MASK;
+ if ( (pagetype != XEN_DOMCTL_PFINFO_XTAB) &&
+ (p2m[pfn] == INVALID_P2M_ENTRY) )
+ {
+ /* Have a live PFN which hasn't had an MFN allocated */
+ p2m_batch[nr_mfns++] = pfn;
+ }
+ }
+
+
+ /* Now allocate a bunch of mfns for this batch */
+ if (nr_mfns && xc_domain_memory_populate_physmap(
+ xc_handle, dom, nr_mfns, 0, 0, p2m_batch) != 0) {
+ ERROR("Failed to allocate memory for batch.!\n");
+ errno = ENOMEM;
+ goto out;
+ }
+
+ /* Second pass for this batch: update p2m[] and region_mfn[] */
+ nr_mfns = 0;
+ for ( i = 0; i < j; i++ )
+ {
+ unsigned long pfn, pagetype;
+ pfn = region_pfn_type[i] & ~XEN_DOMCTL_PFINFO_LTAB_MASK;
+ pagetype = region_pfn_type[i] & XEN_DOMCTL_PFINFO_LTAB_MASK;
+
if ( pagetype == XEN_DOMCTL_PFINFO_XTAB)
- region_mfn[i] = 0; /* we know map will fail, but don't care */
- else
- region_mfn[i] = p2m[pfn];
- }
-
+ region_mfn[i] = ~0UL; /* map will fail but we don't care */
+ else
+ {
+ if (p2m[pfn] == INVALID_P2M_ENTRY) {
+ /* We just allocated a new mfn above; update p2m */
+ p2m[pfn] = p2m_batch[nr_mfns++];
+ }
+
+ /* setup region_mfn[] for batch map */
+ region_mfn[i] = p2m[pfn];
+ }
+ }
+
+ /* Map relevant mfns */
region_base = xc_map_foreign_batch(
xc_handle, dom, PROT_WRITE, region_mfn, j);
+
if ( region_base == NULL )
{
ERROR("map batch failed");
@@ -401,7 +473,8 @@ int xc_linux_restore(int xc_handle, int
pae_extended_cr3 ||
(pagetype != XEN_DOMCTL_PFINFO_L1TAB)) {
- if (!uncanonicalize_pagetable(pagetype, page)) {
+ if (!uncanonicalize_pagetable(xc_handle, dom,
+ pagetype, page)) {
/*
** Failing to uncanonicalize a page table can be ok
** under live migration since the pages type may have
@@ -411,10 +484,8 @@ int xc_linux_restore(int xc_handle, int
pagetype >> 28, pfn, mfn);
nraces++;
continue;
- }
-
- }
-
+ }
+ }
}
else if ( pagetype != XEN_DOMCTL_PFINFO_NOTAB )
{
@@ -486,7 +557,7 @@ int xc_linux_restore(int xc_handle, int
*/
int j, k;
-
+
/* First pass: find all L3TABs current in > 4G mfns and get new mfns */
for ( i = 0; i < max_pfn; i++ )
{
@@ -555,7 +626,8 @@ int xc_linux_restore(int xc_handle, int
}
for(k = 0; k < j; k++) {
- if(!uncanonicalize_pagetable(XEN_DOMCTL_PFINFO_L1TAB,
+ if(!uncanonicalize_pagetable(xc_handle, dom,
+ XEN_DOMCTL_PFINFO_L1TAB,
region_base + k*PAGE_SIZE)) {
ERROR("failed uncanonicalize pt!");
goto out;
@@ -631,7 +703,7 @@ int xc_linux_restore(int xc_handle, int
{
unsigned int count;
unsigned long *pfntab;
- int rc;
+ int nr_frees, rc;
if (!read_exact(io_fd, &count, sizeof(count))) {
ERROR("Error when reading pfn count");
@@ -648,29 +720,30 @@ int xc_linux_restore(int xc_handle, int
goto out;
}
+ nr_frees = 0;
for (i = 0; i < count; i++) {
unsigned long pfn = pfntab[i];
- if(pfn > max_pfn)
- /* shouldn't happen - continue optimistically */
- continue;
-
- pfntab[i] = p2m[pfn];
- p2m[pfn] = INVALID_P2M_ENTRY; // not in pseudo-physical map
- }
-
- if (count > 0) {
+ if(p2m[pfn] != INVALID_P2M_ENTRY) {
+ /* pfn is not in physmap now, but was at some point during
+ the save/migration process - need to free it */
+ pfntab[nr_frees++] = p2m[pfn];
+ p2m[pfn] = INVALID_P2M_ENTRY; // not in pseudo-physical map
+ }
+ }
+
+ if (nr_frees > 0) {
struct xen_memory_reservation reservation = {
- .nr_extents = count,
+ .nr_extents = nr_frees,
.extent_order = 0,
.domid = dom
};
set_xen_guest_handle(reservation.extent_start, pfntab);
if ((rc = xc_memory_op(xc_handle, XENMEM_decrease_reservation,
- &reservation)) != count) {
+ &reservation)) != nr_frees) {
ERROR("Could not decrease reservation : %d", rc);
goto out;
} else
@@ -791,6 +864,6 @@ int xc_linux_restore(int xc_handle, int
free(pfn_type);
DPRINTF("Restore exit with rc=%d\n", rc);
-
+
return rc;
}
diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/libxc/xc_linux_save.c
--- a/tools/libxc/xc_linux_save.c Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/libxc/xc_linux_save.c Wed Jan 17 09:56:40 2007 -0500
@@ -44,6 +44,7 @@ static xen_pfn_t *live_p2m = NULL;
/* Live mapping of system MFN to PFN table. */
static xen_pfn_t *live_m2p = NULL;
+static unsigned long m2p_mfn0;
/* grep fodder: machine_to_phys */
@@ -440,13 +441,23 @@ static int canonicalize_pagetable(unsign
** that this check will fail for other L2s.
*/
if (pt_levels == 3 && type == XEN_DOMCTL_PFINFO_L2TAB) {
-
-/* XXX index of the L2 entry in PAE mode which holds the guest LPT */
-#define PAE_GLPT_L2ENTRY (495)
- pte = ((const uint64_t*)spage)[PAE_GLPT_L2ENTRY];
-
- if(((pte >> PAGE_SHIFT) & 0x0fffffff) == live_p2m[pfn])
- xen_start = (hvirt_start >> L2_PAGETABLE_SHIFT_PAE) & 0x1ff;
+ int hstart;
+ unsigned long he;
+
+ hstart = (hvirt_start >> L2_PAGETABLE_SHIFT_PAE) & 0x1ff;
+ he = ((const uint64_t *) spage)[hstart];
+
+ if ( ((he >> PAGE_SHIFT) & 0x0fffffff) == m2p_mfn0 ) {
+ /* hvirt starts with xen stuff... */
+ xen_start = hstart;
+ } else if ( hvirt_start != 0xf5800000 ) {
+ /* old L2s from before hole was shrunk... */
+ hstart = (0xf5800000 >> L2_PAGETABLE_SHIFT_PAE) & 0x1ff;
+ he = ((const uint64_t *) spage)[hstart];
+
+ if( ((he >> PAGE_SHIFT) & 0x0fffffff) == m2p_mfn0 )
+ xen_start = hstart;
+ }
}
if (pt_levels == 4 && type == XEN_DOMCTL_PFINFO_L4TAB) {
@@ -550,6 +561,8 @@ static xen_pfn_t *xc_map_m2p(int xc_hand
return NULL;
}
+ m2p_mfn0 = entries[0].mfn;
+
free(extent_start);
free(entries);
@@ -646,13 +659,6 @@ int xc_linux_save(int xc_handle, int io_
ERROR("Domain is not in a valid Linux guest OS state");
goto out;
}
-
- /* cheesy sanity check */
- if ((info.max_memkb >> (PAGE_SHIFT - 10)) > max_mfn) {
- ERROR("Invalid state record -- pfn count out of range: %lu",
- (info.max_memkb >> (PAGE_SHIFT - 10)));
- goto out;
- }
/* Map the shared info frame */
if(!(live_shinfo = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/libxc/xc_load_elf.c
--- a/tools/libxc/xc_load_elf.c Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/libxc/xc_load_elf.c Wed Jan 17 09:56:40 2007 -0500
@@ -406,17 +406,19 @@ static int parseelfimage(const char *ima
}
/*
- * A "bimodal" ELF note indicates the kernel will adjust to the
- * current paging mode, including handling extended cr3 syntax.
- * If we have ELF notes then PAE=yes implies that we must support
- * the extended cr3 syntax. Otherwise we need to find the
- * [extended-cr3] syntax in the __xen_guest string.
+ * A "bimodal" ELF note indicates the kernel will adjust to the current
+ * paging mode, including handling extended cr3 syntax. If we have ELF
+ * notes then PAE=yes implies that we must support the extended cr3 syntax.
+ * Otherwise we need to find the [extended-cr3] syntax in the __xen_guest
+ * string. We use strstr() to look for "bimodal" to allow guests to use
+ * "yes,bimodal" or "no,bimodal" for compatibility reasons.
*/
+
dsi->pae_kernel = PAEKERN_no;
if ( dsi->__elfnote_section )
{
p = xen_elfnote_string(dsi, XEN_ELFNOTE_PAE_MODE);
- if ( p != NULL && strncmp(p, "bimodal", 7) == 0 )
+ if ( p != NULL && strstr(p, "bimodal") != NULL )
dsi->pae_kernel = PAEKERN_bimodal;
else if ( p != NULL && strncmp(p, "yes", 3) == 0 )
dsi->pae_kernel = PAEKERN_extended_cr3;
diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/libxc/xc_ptrace.c
--- a/tools/libxc/xc_ptrace.c Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/libxc/xc_ptrace.c Wed Jan 17 09:56:40 2007 -0500
@@ -166,14 +166,11 @@ static unsigned long *page_arr
* tables.
*
*/
-static unsigned long
-to_ma(int cpu,
- unsigned long in_addr)
-{
- unsigned long maddr = in_addr;
-
+static uint64_t
+to_ma(int cpu, uint64_t maddr)
+{
if ( current_is_hvm && paging_enabled(&ctxt[cpu]) )
- maddr = page_array[maddr >> PAGE_SHIFT] << PAGE_SHIFT;
+ maddr = (uint64_t)page_array[maddr >> PAGE_SHIFT] << PAGE_SHIFT;
return maddr;
}
@@ -225,7 +222,8 @@ map_domain_va_pae(
void *guest_va,
int perm)
{
- unsigned long l3e, l2e, l1e, l2p, l1p, p, va = (unsigned long)guest_va;
+ uint64_t l3e, l2e, l1e, l2p, l1p, p;
+ unsigned long va = (unsigned long)guest_va;
uint64_t *l3, *l2, *l1;
static void *v[MAX_VIRT_CPUS];
@@ -380,12 +378,12 @@ map_domain_va(
if (!paging_enabled(&ctxt[cpu])) {
static void * v;
- unsigned long page;
+ uint64_t page;
if ( v != NULL )
munmap(v, PAGE_SIZE);
- page = to_ma(cpu, page_array[va >> PAGE_SHIFT]);
+ page = to_ma(cpu, va);
v = xc_map_foreign_range( xc_handle, current_domid, PAGE_SIZE,
perm, page >> PAGE_SHIFT);
diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/libxc/xg_save_restore.h
--- a/tools/libxc/xg_save_restore.h Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/libxc/xg_save_restore.h Wed Jan 17 09:56:40 2007 -0500
@@ -53,8 +53,17 @@ static int get_platform_info(int xc_hand
*hvirt_start = xen_params.virt_start;
+ /*
+ * XXX For now, 32bit dom0's can only save/restore 32bit domUs
+ * on 64bit hypervisors, so no need to check which type of domain
+ * we're dealing with.
+ */
if (strstr(xen_caps, "xen-3.0-x86_64"))
+#if defined(__i386__)
+ *pt_levels = 3;
+#else
*pt_levels = 4;
+#endif
else if (strstr(xen_caps, "xen-3.0-x86_32p"))
*pt_levels = 3;
else if (strstr(xen_caps, "xen-3.0-x86_32"))
diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/libxen/include/xen_cpu_feature.h
--- a/tools/libxen/include/xen_cpu_feature.h Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/libxen/include/xen_cpu_feature.h Wed Jan 17 09:56:40 2007 -0500
@@ -198,12 +198,12 @@ enum xen_cpu_feature
/**
* AMD 3DNow! extensions
*/
- XEN_CPU_FEATURE_3DNOWEXT,
+ XEN_CPU_FEATURE_THREEDNOWEXT,
/**
* 3DNow!
*/
- XEN_CPU_FEATURE_3DNOW,
+ XEN_CPU_FEATURE_THREEDNOW,
/**
* CPU in recovery mode
diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/libxen/src/xen_common.c
--- a/tools/libxen/src/xen_common.c Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/libxen/src/xen_common.c Wed Jan 17 09:56:40 2007 -0500
@@ -373,11 +373,18 @@ static void server_error_2(xen_session *
}
-static bool is_container_node(xmlNode *n, char *type)
+static bool is_node(xmlNode *n, char *type)
{
return
n->type == XML_ELEMENT_NODE &&
- 0 == strcmp((char *)n->name, type) &&
+ 0 == strcmp((char *)n->name, type);
+}
+
+
+static bool is_container_node(xmlNode *n, char *type)
+{
+ return
+ is_node(n, type) &&
n->children != NULL &&
n->children == n->last &&
n->children->type == XML_ELEMENT_NODE;
@@ -390,13 +397,30 @@ static bool is_container_node(xmlNode *n
*/
static xmlChar *string_from_value(xmlNode *n, char *type)
{
- return
- is_container_node(n, "value") &&
- 0 == strcmp((char *)n->children->name, type) ?
- (n->children->children == NULL ?
- xmlStrdup(BAD_CAST("")) :
- xmlNodeGetContent(n->children->children)) :
- NULL;
+ /*
+ <value><type>XYZ</type></value> is normal, but the XML-RPC spec also
+ allows <value>XYZ</value> where XYZ is to be interpreted as a string.
+ */
+
+ if (is_container_node(n, "value") &&
+ 0 == strcmp((char *)n->children->name, type))
+ {
+ return
+ n->children->children == NULL ?
+ xmlStrdup(BAD_CAST("")) :
+ xmlNodeGetContent(n->children->children);
+ }
+ else if (0 == strcmp(type, "string") && is_node(n, "value"))
+ {
+ return
+ n->children == NULL ?
+ xmlStrdup(BAD_CAST("")) :
+ xmlNodeGetContent(n->children);
+ }
+ else
+ {
+ return NULL;
+ }
}
@@ -557,8 +581,14 @@ static void parse_into(xen_session *s, x
xmlChar *string = string_from_value(value_node, "double");
if (string == NULL)
{
+#if PERMISSIVE
+ fprintf(stderr,
+ "Expected a Float from the server, but didn't get one\n");
+ ((double *)value)[slot] = 0.0;
+#else
server_error(
s, "Expected a Float from the server, but didn't get one");
+#endif
}
else
{
diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/libxen/src/xen_cpu_feature.c
--- a/tools/libxen/src/xen_cpu_feature.c Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/libxen/src/xen_cpu_feature.c Wed Jan 17 09:56:40 2007 -0500
@@ -62,8 +62,8 @@ static const char *lookup_table[] =
"NX",
"MMXEXT",
"LM",
- "3DNOWEXT",
- "3DNOW",
+ "THREEDNOWEXT",
+ "THREEDNOW",
"RECOVERY",
"LONGRUN",
"LRTI",
diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/pygrub/src/pygrub
--- a/tools/pygrub/src/pygrub Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/pygrub/src/pygrub Wed Jan 17 09:56:40 2007 -0500
@@ -13,7 +13,7 @@
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
-import os, sys, string, struct, tempfile
+import os, sys, string, struct, tempfile, re
import copy
import logging
@@ -48,8 +48,7 @@ def is_disk_image(file):
return True
return False
-SECTOR_SIZE=512
-def get_active_offset(file):
+def get_active_partition(file):
"""Find the offset for the start of the first active partition "
"in the disk image file."""
@@ -58,13 +57,56 @@ def get_active_offset(file):
for poff in (446, 462, 478, 494): # partition offsets
# active partition has 0x80 as the first byte
if struct.unpack("<c", buf[poff:poff+1]) == ('\x80',):
- return struct.unpack("<L",
- buf[poff+8:poff+12])[0] * SECTOR_SIZE
+ return buf[poff:poff+16]
# if there's not a partition marked as active, fall back to
# the first partition
- P1 = 446
- return struct.unpack("<L", buf[P1+8:P1+12])[0] * SECTOR_SIZE
+ return buf[446:446+16]
+
+SECTOR_SIZE=512
+DK_LABEL_LOC=1
+DKL_MAGIC=0xdabe
+V_ROOT=0x2
+
+def get_solaris_slice(file, offset):
+ """Find the root slice in a Solaris VTOC."""
+
+ fd = os.open(file, os.O_RDONLY)
+ os.lseek(fd, offset + (DK_LABEL_LOC * SECTOR_SIZE), 0)
+ buf = os.read(fd, 512)
+ if struct.unpack("<H", buf[508:510])[0] != DKL_MAGIC:
+ raise RuntimeError, "Invalid disklabel magic"
+
+ nslices = struct.unpack("<H", buf[30:32])[0]
+
+ for i in range(nslices):
+ sliceoff = 72 + 12 * i
+ slicetag = struct.unpack("<H", buf[sliceoff:sliceoff+2])[0]
+ slicesect = struct.unpack("<L", buf[sliceoff+4:sliceoff+8])[0]
+ if slicetag == V_ROOT:
+ return slicesect * SECTOR_SIZE
+
+ raise RuntimeError, "No root slice found"
+
+FDISK_PART_SOLARIS=0xbf
+FDISK_PART_SOLARIS_OLD=0x82
+
+def get_fs_offset(file):
+ if not is_disk_image(file):
+ return 0
+
+ partbuf = get_active_partition(file)
+ if len(partbuf) == 0:
+ raise RuntimeError, "Unable to find active partition on disk"
+
+ offset = struct.unpack("<L", partbuf[8:12])[0] * SECTOR_SIZE
+
+ type = struct.unpack("<B", partbuf[4:5])[0]
+
+ if type == FDISK_PART_SOLARIS or type == FDISK_PART_SOLARIS_OLD:
+ offset += get_solaris_slice(file, offset)
+
+ return offset
class GrubLineEditor(curses.textpad.Textbox):
def __init__(self, screen, startx, starty, line = ""):
@@ -143,12 +185,12 @@ class GrubLineEditor(curses.textpad.Text
class Grub:
- def __init__(self, file, isconfig = False):
+ def __init__(self, file, fs = None):
self.screen = None
self.entry_win = None
self.text_win = None
if file:
- self.read_config(file, isconfig)
+ self.read_config(file, fs)
def draw_main_windows(self):
if self.screen is None: #only init stuff once
@@ -295,8 +337,8 @@ class Grub:
# else, we cancelled and should just go back
break
- def read_config(self, fn, isConfig = False):
- """Read the given file to parse the config. If isconfig, then
+ def read_config(self, fn, fs = None):
+ """Read the given file to parse the config. If fs = None, then
we're being given a raw config file rather than a disk image."""
if not os.access(fn, os.R_OK):
@@ -304,38 +346,25 @@ class Grub:
self.cf = grub.GrubConf.GrubConfigFile()
- if isConfig:
+ if not fs:
# set the config file and parse it
self.cf.filename = fn
self.cf.parse()
return
- offset = 0
- if is_disk_image(fn):
- offset = get_active_offset(fn)
- if offset == -1:
- raise RuntimeError, "Unable to find active partition on disk"
-
- # open the image and read the grub config
- fs = fsimage.open(fn, offset)
-
- if fs is not None:
- grubfile = None
- for f in ("/boot/grub/menu.lst", "/boot/grub/grub.conf",
- "/grub/menu.lst", "/grub/grub.conf"):
- if fs.file_exists(f):
- grubfile = f
- break
- if grubfile is None:
- raise RuntimeError, "we couldn't find grub config file in the
image provided."
- f = fs.open_file(grubfile)
- buf = f.read()
- del f
- del fs
- # then parse the grub config
- self.cf.parse(buf)
- else:
- raise RuntimeError, "Unable to read filesystem"
+ grubfile = None
+ for f in ("/boot/grub/menu.lst", "/boot/grub/grub.conf",
+ "/grub/menu.lst", "/grub/grub.conf"):
+ if fs.file_exists(f):
+ grubfile = f
+ break
+ if grubfile is None:
+ raise RuntimeError, "we couldn't find grub config file in the
image provided."
+ f = fs.open_file(grubfile)
+ buf = f.read()
+ del f
+ # then parse the grub config
+ self.cf.parse(buf)
def run(self):
timeout = int(self.cf.timeout)
@@ -376,6 +405,9 @@ class Grub:
c = self.screen.getch()
if mytime != -1:
mytime += 1
+ if mytime >= int(timeout):
+ self.isdone = True
+ break
# handle keypresses
if c == ord('c'):
@@ -431,19 +463,93 @@ def get_entry_idx(cf, entry):
return None
+def run_grub(file, entry, fs):
+ global g
+ global sel
+
+ def run_main(scr, *args):
+ global sel
+ global g
+ sel = g.run()
+
+ g = Grub(file, fs)
+ if interactive:
+ curses.wrapper(run_main)
+ else:
+ sel = g.cf.default
+
+ # set the entry to boot as requested
+ if entry is not None:
+ idx = get_entry_idx(g.cf, entry)
+ if idx is not None and idx > 0 and idx < len(g.cf.images):
+ sel = idx
+
+ if sel == -1:
+ print "No kernel image selected!"
+ sys.exit(1)
+
+ img = g.cf.images[sel]
+
+ grubcfg = { "kernel": None, "ramdisk": None, "args": None }
+
+ grubcfg["kernel"] = img.kernel[1]
+ if img.initrd:
+ grubcfg["ramdisk"] = img.initrd[1]
+ if img.args:
+ grubcfg["args"] = img.args
+
+ return grubcfg
+
+# If nothing has been specified, look for a Solaris domU. If found, perform the
+# necessary tweaks.
+def sniff_solaris(fs, cfg):
+ if not fs.file_exists("/platform/i86xen/kernel/unix"):
+ return cfg
+
+ # darned python
+ longmode = (sys.maxint != 2147483647L)
+ if not longmode:
+ longmode = os.uname()[4] == "x86_64"
+ if not longmode:
+ if (os.access("/usr/bin/isainfo", os.R_OK) and
+ os.popen("/usr/bin/isainfo -b").read() == "64\n"):
+ longmode = True
+
+ if not cfg["kernel"]:
+ cfg["kernel"] = "/platform/i86xen/kernel/unix"
+ cfg["ramdisk"] = "/platform/i86pc/boot_archive"
+ if longmode:
+ cfg["kernel"] = "/platform/i86xen/kernel/amd64/unix"
+ cfg["ramdisk"] = "/platform/i86pc/amd64/boot_archive"
+
+ # Unpleasant. Typically we'll have 'root=foo -k' or 'root=foo /kernel -k',
+ # and we need to maintain Xen properties (root= and ip=) and the kernel
+ # before any user args.
+
+ xenargs = ""
+ userargs = ""
+
+ if not cfg["args"]:
+ cfg["args"] = cfg["kernel"]
+ else:
+ for arg in cfg["args"].split():
+ if re.match("^root=", arg) or re.match("^ip=", arg):
+ xenargs += arg + " "
+ elif arg != cfg["kernel"]:
+ userargs += arg + " "
+ cfg["args"] = xenargs + " " + cfg["kernel"] + " " + userargs
+
+ return cfg
+
if __name__ == "__main__":
sel = None
- def run_main(scr, *args):
- global sel
- sel = g.run()
-
def usage():
- print >> sys.stderr, "Usage: %s [-q|--quiet] [--output=] [--entry=]
<image>" %(sys.argv[0],)
+ print >> sys.stderr, "Usage: %s [-q|--quiet] [--output=] [--kernel=]
[--ramdisk=] [--args=] [--entry=] <image>" %(sys.argv[0],)
try:
opts, args = getopt.gnu_getopt(sys.argv[1:], 'qh::',
- ["quiet", "help", "output=", "entry=",
+ ["quiet", "help", "output=", "entry=",
"kernel=", "ramdisk=", "args=",
"isconfig"])
except getopt.GetoptError:
usage()
@@ -458,6 +564,14 @@ if __name__ == "__main__":
entry = None
interactive = True
isconfig = False
+
+ # what was passed in
+ incfg = { "kernel": None, "ramdisk": None, "args": None }
+ # what grub or sniffing chose
+ chosencfg = { "kernel": None, "ramdisk": None, "args": None }
+ # what to boot
+ bootcfg = { "kernel": None, "ramdisk": None, "args": None }
+
for o, a in opts:
if o in ("-q", "--quiet"):
interactive = False
@@ -466,6 +580,12 @@ if __name__ == "__main__":
sys.exit()
elif o in ("--output",):
output = a
+ elif o in ("--kernel",):
+ incfg["kernel"] = a
+ elif o in ("--ramdisk",):
+ incfg["ramdisk"] = a
+ elif o in ("--args",):
+ incfg["args"] = a
elif o in ("--entry",):
entry = a
# specifying the entry to boot implies non-interactive
@@ -478,58 +598,42 @@ if __name__ == "__main__":
else:
fd = os.open(output, os.O_WRONLY)
- g = Grub(file, isconfig)
- if interactive:
- curses.wrapper(run_main)
- else:
- sel = g.cf.default
-
- # set the entry to boot as requested
- if entry is not None:
- idx = get_entry_idx(g.cf, entry)
- if idx is not None and idx > 0 and idx < len(g.cf.images):
- sel = idx
-
- if sel == -1:
- print "No kernel image selected!"
- sys.exit(1)
-
- img = g.cf.images[sel]
- print "Going to boot %s" %(img.title)
- print " kernel: %s" %(img.kernel[1],)
- if img.initrd:
- print " initrd: %s" %(img.initrd[1],)
-
+ # debug
if isconfig:
- print " args: %s" %(img.args,)
+ chosencfg = run_grub(file, entry)
+ print " kernel: %s" % chosencfg["kernel"]
+ if img.initrd:
+ print " initrd: %s" % chosencfg["ramdisk"]
+ print " args: %s" % chosencfg["args"]
sys.exit(0)
-
- offset = 0
- if is_disk_image(file):
- offset = get_active_offset(file)
- if offset == -1:
- raise RuntimeError, "Unable to find active partition on disk"
-
- # read the kernel and initrd onto the hostfs
- fs = fsimage.open(file, offset)
-
- kernel = fs.open_file(img.kernel[1],).read()
- (tfd, fn) = tempfile.mkstemp(prefix="boot_kernel.",
+
+ fs = fsimage.open(file, get_fs_offset(file))
+
+ chosencfg = sniff_solaris(fs, incfg)
+
+ if not chosencfg["kernel"]:
+ chosencfg = run_grub(file, entry, fs)
+
+ data = fs.open_file(chosencfg["kernel"]).read()
+ (tfd, bootcfg["kernel"]) = tempfile.mkstemp(prefix="boot_kernel.",
dir="/var/run/xend/boot")
- os.write(tfd, kernel)
+ os.write(tfd, data)
os.close(tfd)
- sxp = "linux (kernel %s)" %(fn,)
-
- if img.initrd:
- initrd = fs.open_file(img.initrd[1],).read()
- (tfd, fn) = tempfile.mkstemp(prefix="boot_ramdisk.",
+
+ if chosencfg["ramdisk"]:
+ data = fs.open_file(chosencfg["ramdisk"],).read()
+ (tfd, bootcfg["ramdisk"]) = tempfile.mkstemp(prefix="boot_ramdisk.",
dir="/var/run/xend/boot")
- os.write(tfd, initrd)
+ os.write(tfd, data)
os.close(tfd)
- sxp += "(ramdisk %s)" %(fn,)
else:
initrd = None
- sxp += "(args '%s')" %(img.args,)
+
+ sxp = "linux (kernel %s)" % bootcfg["kernel"]
+ if bootcfg["ramdisk"]:
+ sxp += "(ramdisk %s)" % bootcfg["ramdisk"]
+ if chosencfg["args"]:
+ sxp += "(args \"%s\")" % chosencfg["args"]
sys.stdout.flush()
os.write(fd, sxp)
diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/python/scripts/xapi.py
--- a/tools/python/scripts/xapi.py Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/python/scripts/xapi.py Wed Jan 17 09:56:40 2007 -0500
@@ -41,6 +41,7 @@ COMMANDS = {
COMMANDS = {
'host-info': ('', 'Get Xen Host Info'),
'host-set-name': ('', 'Set host name'),
+ 'pif-list': ('', 'List all PIFs'),
'sr-list': ('', 'List all SRs'),
'vbd-list': ('', 'List all VBDs'),
'vbd-create': ('<domname> <pycfg> [opts]',
@@ -63,6 +64,15 @@ COMMANDS = {
}
OPTIONS = {
+ 'sr-list': [(('-l', '--long'),
+ {'action':'store_true',
+ 'help':'List all properties of SR'})
+ ],
+
+ 'vdi-list': [(('-l', '--long'),
+ {'action':'store_true',
+ 'help':'List all properties of VDI'})
+ ],
'vm-list': [(('-l', '--long'),
{'action':'store_true',
'help':'List all properties of VMs'})
@@ -145,7 +155,7 @@ def _connect(*args):
def _connect(*args):
global _server, _session, _initialised
if not _initialised:
- _server = ServerProxy('httpu:///var/run/xend/xmlrpc.sock')
+ _server = ServerProxy('httpu:///var/run/xend/xen-api.sock')
login = raw_input("Login: ")
password = getpass()
creds = (login, password)
@@ -361,29 +371,53 @@ def xapi_vbd_list(*args):
print VBD_LIST_FORMAT % vbd_struct
def xapi_vdi_list(*args):
+ opts, args = parse_args('vdi-list', args, set_defaults = True)
+ is_long = opts and opts.long
+
server, session = _connect()
vdis = execute(server.VDI.get_all, session)
- print VDI_LIST_FORMAT % {'name_label': 'VDI Label',
- 'uuid' : 'UUID',
- 'virtual_size': 'Sectors',
- 'sector_size': 'Sector Size'}
-
- for vdi in vdis:
- vdi_struct = execute(server.VDI.get_record, session, vdi)
- print VDI_LIST_FORMAT % vdi_struct
+ if not is_long:
+ print VDI_LIST_FORMAT % {'name_label': 'VDI Label',
+ 'uuid' : 'UUID',
+ 'virtual_size': 'Sectors',
+ 'sector_size': 'Sector Size'}
+
+ for vdi in vdis:
+ vdi_struct = execute(server.VDI.get_record, session, vdi)
+ print VDI_LIST_FORMAT % vdi_struct
+
+ else:
+
+ for vdi in vdis:
+ vdi_struct = execute(server.VDI.get_record, session, vdi)
+ pprint(vdi_struct)
def xapi_sr_list(*args):
+ opts, args = parse_args('sr-list', args, set_defaults = True)
+ is_long = opts and opts.long
+
server, session = _connect()
srs = execute(server.SR.get_all, session)
- print SR_LIST_FORMAT % {'name_label': 'SR Label',
- 'uuid' : 'UUID',
- 'physical_size': 'Size',
- 'type': 'Type'}
- for sr in srs:
- sr_struct = execute(server.SR.get_record, session, sr)
- sr_struct['physical_size'] = int(sr_struct['physical_size'])/MB
- print SR_LIST_FORMAT % sr_struct
+ if not is_long:
+ print SR_LIST_FORMAT % {'name_label': 'SR Label',
+ 'uuid' : 'UUID',
+ 'physical_size': 'Size (MB)',
+ 'type': 'Type'}
+
+ for sr in srs:
+ sr_struct = execute(server.SR.get_record, session, sr)
+ sr_struct['physical_size'] = int(sr_struct['physical_size'])/MB
+ print SR_LIST_FORMAT % sr_struct
+ else:
+ for sr in srs:
+ sr_struct = execute(server.SR.get_record, session, sr)
+ pprint(sr_struct)
+
+def xapi_sr_rename(*args):
+ server, session = _connect()
+ sr = execute(server.SR.get_by_name_label, session, args[0])
+ execute(server.SR.set_name_label, session, sr[0], args[1])
def xapi_vdi_create(*args):
opts, args = parse_args('vdi-create', args)
@@ -421,10 +455,11 @@ def xapi_vdi_rename(*args):
if len(args) < 2:
raise OptionError('Not enough arguments')
- vdi_uuid = args[0]
+ vdi_uuid = execute(server.VDI.get_by_name_label, session, args[0])
vdi_name = args[1]
- print 'Renaming VDI %s to %s' % (vdi_uuid, vdi_name)
- result = execute(server.VDI.set_name_label, session, vdi_uuid, vdi_name)
+
+ print 'Renaming VDI %s to %s' % (vdi_uuid[0], vdi_name)
+ result = execute(server.VDI.set_name_label, session, vdi_uuid[0], vdi_name)
print 'Done.'
@@ -447,6 +482,14 @@ def xapi_vtpm_create(*args):
vtpm_rec = execute(server.VTPM.get_record, session, vtpm_uuid)
print "Has vtpm record '%s'" % vtpm_rec
+
+def xapi_pif_list(*args):
+ server, session = _connect()
+ pif_uuids = execute(server.PIF.get_all, session)
+ for pif_uuid in pif_uuids:
+ pif = execute(server.PIF.get_record, session, pif_uuid)
+ print pif
+
#
# Command Line Utils
@@ -517,10 +560,12 @@ def usage(command = None, print_usage =
print
print 'Subcommands:'
print
- sorted_commands = sorted(COMMANDS.keys())
- for command in sorted_commands:
- args, description = COMMANDS[command]
- print '%-16s %-40s' % (command, description)
+
+ for func in sorted(globals().keys()):
+ if func.startswith('xapi_'):
+ command = func[5:].replace('_', '-')
+ args, description = COMMANDS.get(command, ('', ''))
+ print '%-16s %-40s' % (command, description)
print
else:
parse_args(command, ['-h'])
@@ -549,7 +594,7 @@ def main(args):
try:
subcmd_func(*args[1:])
except XenAPIError, e:
- print 'Error: %s' % str(e.args[1])
+ print 'Error: %s' % str(e.args[0])
sys.exit(2)
except OptionError, e:
print 'Error: %s' % e
diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/python/xen/xend/XendAPI.py
--- a/tools/python/xen/xend/XendAPI.py Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/python/xen/xend/XendAPI.py Wed Jan 17 09:56:40 2007 -0500
@@ -543,14 +543,14 @@ class XendAPI:
try:
node = XendNode.instance()
if host_uuid != node.uuid:
- return xen_api_error([HOST_HANDLE_INVALID, host_uuid])
+ return xen_api_error(['HOST_HANDLE_INVALID', host_uuid])
elif _is_valid_ref(network_uuid, node.is_valid_network):
network = node.get_network(network_uuid)
return xen_api_success(node.PIF_create(name, mtu, vlan, mac,
network))
else:
- return xen_api_error([NETWORK_HANDLE_INVALID, network_uuid])
+ return xen_api_error(['NETWORK_HANDLE_INVALID', network_uuid])
except NetworkAlreadyConnected, exn:
return xen_api_error(['NETWORK_ALREADY_CONNECTED',
network_uuid, exn.pif_uuid])
@@ -593,10 +593,10 @@ class XendAPI:
return xen_api_success(self._get_PIF(ref).set_name(name))
def PIF_set_MAC(self, _, ref, mac):
- return xen_api_success(self._get_PIF(ref).set_mac(name))
+ return xen_api_success(self._get_PIF(ref).set_mac(mac))
def PIF_set_MTU(self, _, ref, mtu):
- return xen_api_success(self._get_PIF(ref).set_mtu(name))
+ return xen_api_success(self._get_PIF(ref).set_mtu(mtu))
def PIF_create_VLAN(self, _, ref, network, vlan):
try:
@@ -604,7 +604,7 @@ class XendAPI:
return xen_api_success(XendNode.instance().PIF_create_VLAN(
ref, network, vlan))
else:
- return xen_api_error([NETWORK_HANDLE_INVALID, network])
+ return xen_api_error(['NETWORK_HANDLE_INVALID', network])
except NetworkAlreadyConnected, exn:
return xen_api_error(['NETWORK_ALREADY_CONNECTED',
network, exn.pif_uuid])
diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/python/xen/xend/XendAPIConstants.py
--- a/tools/python/xen/xend/XendAPIConstants.py Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/python/xen/xend/XendAPIConstants.py Wed Jan 17 09:56:40 2007 -0500
@@ -41,7 +41,7 @@ XEN_API_CPU_FEATURE = [
'CMOV', 'PAT', 'PSE36', 'PN', 'CLFLSH', 'DTES',
'ACPI', 'MMX', 'FXCR', 'XMM', 'XMM2', 'SELFSNOOP',
'HT', 'ACC', 'IA64', 'SYSCALL', 'MP', 'NX', 'MMXEXT',
- 'LM', '3DNOWEXT', '3DNOW', 'RECOVERY', 'LONGRUN',
+ 'LM', 'THREEDNOWEXT', 'THREEDNOW', 'RECOVERY', 'LONGRUN',
'LRTI', 'CXMMX', 'K6_MTRR', 'CYRIX_ARR', 'CENTAUR_MCR',
'K8', 'K7', 'P3', 'P4', 'CONSTANT_TSC', 'FXSAVE_LEAK',
'XMM3', 'MWAIT', 'DSCPL', 'EST', 'TM2', 'CID', 'CX16',
@@ -73,3 +73,4 @@ XEN_API_VBD_MODE = ['RO', 'RW']
XEN_API_VBD_MODE = ['RO', 'RW']
XEN_API_VDI_TYPE = ['system', 'user', 'ephemeral']
XEN_API_DRIVER_TYPE = ['ioemu', 'paravirtualised']
+XEN_API_VBD_TYPE = ['CD', 'Disk']
diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/python/xen/xend/XendBootloader.py
--- a/tools/python/xen/xend/XendBootloader.py Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/python/xen/xend/XendBootloader.py Wed Jan 17 09:56:40 2007 -0500
@@ -53,6 +53,12 @@ def bootloader(blexec, disk, quiet = Fal
child = os.fork()
if (not child):
args = [ blexec ]
+ if kernel:
+ args.append("--kernel=%s" % kernel)
+ if ramdisk:
+ args.append("--ramdisk=%s" % ramdisk)
+ if kernel_args:
+ args.append("--args=%s" % kernel_args)
if quiet:
args.append("-q")
args.append("--output=%s" % fifo)
diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/python/xen/xend/XendCheckpoint.py
--- a/tools/python/xen/xend/XendCheckpoint.py Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/python/xen/xend/XendCheckpoint.py Wed Jan 17 09:56:40 2007 -0500
@@ -147,18 +147,20 @@ def restore(xd, fd, dominfo = None, paus
assert store_port
assert console_port
+ nr_pfns = (dominfo.getMemoryTarget() + 3) / 4
+
try:
l = read_exact(fd, sizeof_unsigned_long,
"not a valid guest state file: pfn count read")
- nr_pfns = unpack("L", l)[0] # native sizeof long
- if nr_pfns > 16*1024*1024: # XXX
+ max_pfn = unpack("L", l)[0] # native sizeof long
+ if max_pfn > 16*1024*1024: # XXX
raise XendError(
"not a valid guest state file: pfn count out of range")
balloon.free(xc.pages_to_kib(nr_pfns))
cmd = map(str, [xen.util.auxbin.pathTo(XC_RESTORE),
- fd, dominfo.getDomid(), nr_pfns,
+ fd, dominfo.getDomid(), max_pfn,
store_port, console_port])
log.debug("[xc_restore]: %s", string.join(cmd))
diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/python/xen/xend/XendConfig.py
--- a/tools/python/xen/xend/XendConfig.py Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/python/xen/xend/XendConfig.py Wed Jan 17 09:56:40 2007 -0500
@@ -508,8 +508,12 @@ class XendConfig(dict):
pci_devs = []
for pci_dev in sxp.children(config, 'dev'):
pci_dev_info = {}
- for opt, val in pci_dev[1:]:
- pci_dev_info[opt] = val
+ for opt_val in pci_dev[1:]:
+ try:
+ opt, val = opt_val
+ pci_dev_info[opt] = val
+ except TypeError:
+ pass
pci_devs.append(pci_dev_info)
cfg['devices'][pci_devs_uuid] = (dev_type,
@@ -572,7 +576,6 @@ class XendConfig(dict):
if 'security' in cfg and isinstance(cfg['security'], str):
cfg['security'] = sxp.from_string(cfg['security'])
- # TODO: get states
old_state = sxp.child_value(sxp_cfg, 'state')
if old_state:
for i in range(len(CONFIG_OLD_DOM_STATES)):
@@ -855,14 +858,15 @@ class XendConfig(dict):
for cls in XendDevices.valid_devices():
found = False
- # figure if there is a device that is running
- if domain:
+ # figure if there is a dev controller is valid and running
+ if domain and domain.getDomid() != None:
try:
controller = domain.getDeviceController(cls)
configs = controller.configurations()
for config in configs:
sxpr.append(['device', config])
- found = True
+
+ found = True
except:
log.exception("dumping sxp from device controllers")
pass
@@ -923,11 +927,12 @@ class XendConfig(dict):
dev_type = sxp.name(config)
dev_info = {}
- try:
- for opt, val in config[1:]:
+ for opt_val in config[1:]:
+ try:
+ opt, val = opt_val
dev_info[opt] = val
- except ValueError:
- pass # SXP has no options for this device
+ except (TypeError, ValueError): # unpack error
+ pass
if dev_type == 'vbd':
if dev_info.get('dev', '').startswith('ioemu:'):
@@ -996,7 +1001,7 @@ class XendConfig(dict):
self['vbd_refs'].append(dev_uuid)
return dev_uuid
- elif dev_type in ('vtpm'):
+ elif dev_type == 'vtpm':
if cfg_xenapi.get('type'):
dev_info['type'] = cfg_xenapi.get('type')
@@ -1015,15 +1020,17 @@ class XendConfig(dict):
@return: Returns True if succesfully found and updated a device conf
"""
if dev_uuid in self['devices']:
- config = sxp.child0(cfg_sxp)
- dev_type = sxp.name(config)
- dev_info = {}
-
- try:
- for opt, val in config[1:]:
- self['devices'][opt] = val
- except ValueError:
- pass # SXP has no options for this device
+ if sxp.child0(cfg_sxp) == 'device':
+ config = sxp.child0(cfg_sxp)
+ else:
+ config = cfg_sxp
+
+ for opt_val in config[1:]:
+ try:
+ opt, val = opt_val
+ self['devices'][dev_uuid][opt] = val
+ except (TypeError, ValueError):
+ pass # no value for this config option
return True
@@ -1107,15 +1114,19 @@ class XendConfig(dict):
# configuration
log.debug("update_with_image_sxp(%s)" % scrub_password(image_sxp))
- kernel_args = sxp.child_value(image_sxp, 'args', '')
+ kernel_args = ""
# attempt to extract extra arguments from SXP config
arg_ip = sxp.child_value(image_sxp, 'ip')
if arg_ip and not re.search(r'ip=[^ ]+', kernel_args):
- kernel_args += ' ip=%s' % arg_ip
+ kernel_args += 'ip=%s ' % arg_ip
arg_root = sxp.child_value(image_sxp, 'root')
if arg_root and not re.search(r'root=', kernel_args):
- kernel_args += ' root=%s' % arg_root
+ kernel_args += 'root=%s ' % arg_root
+
+ # user-specified args must come last: previous releases did this and
+ # some domU kernels rely upon the ordering.
+ kernel_args += sxp.child_value(image_sxp, 'args', '')
if bootloader:
self['_temp_using_bootloader'] = '1'
diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/python/xen/xend/XendConstants.py
--- a/tools/python/xen/xend/XendConstants.py Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/python/xen/xend/XendConstants.py Wed Jan 17 09:56:40 2007 -0500
@@ -80,6 +80,7 @@ MINIMUM_RESTART_TIME = 20
MINIMUM_RESTART_TIME = 20
RESTART_IN_PROGRESS = 'xend/restart_in_progress'
+LAST_SHUTDOWN_REASON = 'xend/last_shutdown_reason'
#
# Device migration stages (eg. XendDomainInfo, XendCheckpoint, server.tpmif)
diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/python/xen/xend/XendDomain.py
--- a/tools/python/xen/xend/XendDomain.py Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/python/xen/xend/XendDomain.py Wed Jan 17 09:56:40 2007 -0500
@@ -377,7 +377,7 @@ class XendDomain:
dom0.setVCpuCount(target)
- def _refresh(self):
+ def _refresh(self, refresh_shutdown = True):
"""Refresh the domain list. Needs to be called when
either xenstore has changed or when a method requires
up to date information (like uptime, cputime stats).
@@ -393,7 +393,7 @@ class XendDomain:
for dom in running:
domid = dom['domid']
if domid in self.domains:
- self.domains[domid].update(dom)
+ self.domains[domid].update(dom, refresh_shutdown)
elif domid not in self.domains and dom['dying'] != 1:
try:
new_dom = XendDomainInfo.recreate(dom, False)
@@ -495,7 +495,7 @@ class XendDomain:
"""
self.domains_lock.acquire()
try:
- self._refresh()
+ self._refresh(refresh_shutdown = False)
dom = self.domain_lookup_nr(domid)
if not dom:
raise XendError("No domain named '%s'." % str(domid))
@@ -731,7 +731,7 @@ class XendDomain:
self.domains_lock.acquire()
try:
- self._refresh()
+ self._refresh(refresh_shutdown = False)
# active domains
active_domains = self.domains.values()
diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/python/xen/xend/XendDomainInfo.py Wed Jan 17 09:56:40 2007 -0500
@@ -37,7 +37,7 @@ from xen.util.blkif import blkdev_uname_
from xen.util.blkif import blkdev_uname_to_file
from xen.util import security
-from xen.xend import balloon, sxp, uuid, image, arch
+from xen.xend import balloon, sxp, uuid, image, arch, osdep
from xen.xend import XendRoot, XendNode, XendConfig
from xen.xend.XendConfig import scrub_password
@@ -439,7 +439,7 @@ class Common_XendDomainInfo:
def shutdown(self, reason):
"""Shutdown a domain by signalling this via xenstored."""
- log.debug('XendDomainInfo.shutdown')
+ log.debug('XendDomainInfo.shutdown(%s)', reason)
if self.state in (DOM_STATE_SHUTDOWN, DOM_STATE_HALTED,):
raise XendError('Domain cannot be shutdown')
@@ -496,7 +496,7 @@ class Common_XendDomainInfo:
self._waitForDevice(dev_type, devid)
return self.getDeviceController(dev_type).sxpr(devid)
- def device_configure(self, dev_config, devid = None):
+ def device_configure(self, dev_sxp, devid = None):
"""Configure an existing device.
@param dev_config: device configuration
@@ -506,19 +506,24 @@ class Common_XendDomainInfo:
@return: Returns True if successfully updated device
@rtype: boolean
"""
- deviceClass = sxp.name(dev_config)
-
- # look up uuid of the device
- dev_control = self.getDeviceController(deviceClass)
- dev_sxpr = dev_control.sxpr(devid)
- dev_uuid = sxp.child_value(sxpr, 'uuid')
- if not dev_uuid:
- return False
-
- self.info.device_update(dev_uuid, dev_config)
- dev_config_dict = self.info['devices'].get(dev_uuid)
- if dev_config_dict:
- dev_control.reconfigureDevice(devid, dev_config_dict[1])
+
+ # convert device sxp to a dict
+ dev_class = sxp.name(dev_sxp)
+ dev_config = {}
+ for opt_val in dev_sxp[1:]:
+ try:
+ dev_config[opt_val[0]] = opt_val[1]
+ except IndexError:
+ pass
+
+ # use DevController.reconfigureDevice to change device config
+ dev_control = self.getDeviceController(dev_class)
+ dev_uuid = dev_control.reconfigureDevice(devid, dev_config)
+
+ # update XendConfig with new device info
+ if dev_uuid:
+ self.info.device_update(dev_uuid, dev_sxp)
+
return True
def waitForDevices(self):
@@ -914,7 +919,7 @@ class Common_XendDomainInfo:
return self.info.get('cpu_cap', 0)
def getWeight(self):
- return self.info['cpu_weight']
+ return self.info.get('cpu_weight', 256)
def setResume(self, state):
self._resume = state
@@ -969,9 +974,15 @@ class Common_XendDomainInfo:
log.warn('Domain has crashed: name=%s id=%d.',
self.info['name_label'], self.domid)
+ self._writeVm(LAST_SHUTDOWN_REASON, 'crash')
if xroot.get_enable_dump():
- self.dumpCore()
+ try:
+ self.dumpCore()
+ except XendError:
+ # This error has been logged -- there's nothing more
+ # we can do in this context.
+ pass
restart_reason = 'crash'
self._stateSet(DOM_STATE_HALTED)
@@ -988,6 +999,7 @@ class Common_XendDomainInfo:
log.info('Domain has shutdown: name=%s id=%d reason=%s.',
self.info['name_label'], self.domid, reason)
+ self._writeVm(LAST_SHUTDOWN_REASON, reason)
self._clearRestart()
@@ -1162,7 +1174,10 @@ class Common_XendDomainInfo:
#
def dumpCore(self, corefile = None):
- """Create a core dump for this domain. Nothrow guarantee."""
+ """Create a core dump for this domain.
+
+ @raise: XendError if core dumping failed.
+ """
try:
if not corefile:
@@ -1571,7 +1586,7 @@ class Common_XendDomainInfo:
else:
# Boot using bootloader
if not blexec or blexec == 'pygrub':
- blexec = '/usr/bin/pygrub'
+ blexec = osdep.pygrub_path
blcfg = None
for (devtype, devinfo) in self.info.all_devices_sxpr():
@@ -1997,7 +2012,7 @@ class Common_XendDomainInfo:
config['image'] = config.get('uname', '')
config['io_read_kbs'] = 0.0
config['io_write_kbs'] = 0.0
- if config['mode'] == 'r':
+ if config.get('mode', 'r') == 'r':
config['mode'] = 'RO'
else:
config['mode'] = 'RW'
diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/python/xen/xend/XendNode.py
--- a/tools/python/xen/xend/XendNode.py Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/python/xen/xend/XendNode.py Wed Jan 17 09:56:40 2007 -0500
@@ -103,7 +103,7 @@ class XendNode:
saved_pifs = self.state_store.load_state('pif')
if saved_pifs:
for pif_uuid, pif in saved_pifs.items():
- if pif['network'] in self.networks:
+ if pif.get('network') in self.networks:
network = self.networks[pif['network']]
try:
self.PIF_create(pif['name'], pif['MTU'], pif['VLAN'],
diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/python/xen/xend/osdep.py
--- a/tools/python/xen/xend/osdep.py Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/python/xen/xend/osdep.py Wed Jan 17 09:56:40 2007 -0500
@@ -29,8 +29,13 @@ _xend_autorestart = {
"SunOS": False,
}
+_pygrub_path = {
+ "SunOS": "/usr/lib/xen/bin/pygrub"
+}
+
def _get(var, default=None):
return var.get(os.uname()[0], default)
scripts_dir = _get(_scripts_dir, "/etc/xen/scripts")
xend_autorestart = _get(_xend_autorestart)
+pygrub_path = _get(_pygrub_path, "/usr/bin/pygrub")
diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/python/xen/xend/server/SrvDaemon.py
--- a/tools/python/xen/xend/server/SrvDaemon.py Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/python/xen/xend/server/SrvDaemon.py Wed Jan 17 09:56:40 2007 -0500
@@ -203,7 +203,7 @@ class Daemon:
if not osdep.xend_autorestart:
self.run(os.fdopen(w, 'w'))
- break
+ os._exit(0)
pid = self.fork_pid()
if pid:
diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/python/xen/xend/server/blkif.py
--- a/tools/python/xen/xend/server/blkif.py Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/python/xen/xend/server/blkif.py Wed Jan 17 09:56:40 2007 -0500
@@ -101,6 +101,7 @@ class BlkifController(DevController):
self.writeBackend(devid,
'type', new_back['type'],
'params', new_back['params'])
+ return new_back.get('uuid')
else:
raise VmError('Refusing to reconfigure device %s:%d to %s' %
(self.deviceClass, devid, config))
diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/python/xen/xend/server/netif.py
--- a/tools/python/xen/xend/server/netif.py Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/python/xen/xend/server/netif.py Wed Jan 17 09:56:40 2007 -0500
@@ -150,16 +150,20 @@ class NetifController(DevController):
devid = self.allocateDeviceID()
+ # The default type is 'netfront'.
+ if not typ:
+ typ = 'netfront'
+
if not mac:
mac = randomMAC()
back = { 'script' : script,
'mac' : mac,
- 'handle' : "%i" % devid }
+ 'handle' : "%i" % devid,
+ 'type' : typ }
if typ == 'ioemu':
front = {}
- back['type'] = 'ioemu'
else:
front = { 'handle' : "%i" % devid,
'mac' : mac }
diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/python/xen/xend/server/vfbif.py
--- a/tools/python/xen/xend/server/vfbif.py Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/python/xen/xend/server/vfbif.py Wed Jan 17 09:56:40 2007 -0500
@@ -64,7 +64,7 @@ class VfbifController(DevController):
if config.has_key("vncunused"):
args += ["--unused"]
elif config.has_key("vncdisplay"):
- args += ["--vncport", "%d" % (5900 + config["vncdisplay"])]
+ args += ["--vncport", "%d" % (5900 +
int(config["vncdisplay"]))]
vnclisten = config.get("vnclisten",
xen.xend.XendRoot.instance().get_vnclisten_address())
args += [ "--listen", vnclisten ]
diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/python/xen/xm/create.py
--- a/tools/python/xen/xm/create.py Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/python/xen/xm/create.py Wed Jan 17 09:56:40 2007 -0500
@@ -28,6 +28,7 @@ import xmlrpclib
from xen.xend import sxp
from xen.xend import PrettyPrint
+from xen.xend import osdep
import xen.xend.XendClient
from xen.xend.XendBootloader import bootloader
from xen.util import blkif
@@ -295,7 +296,8 @@ gopts.var('vfb', val="type={vnc,sdl},vnc
For type=vnc, connect an external vncviewer. The server will listen
on ADDR (default 127.0.0.1) on port N+5900. N defaults to the
domain id. If vncunused=1, the server will try to find an arbitrary
- unused port above 5900.
+ unused port above 5900. vncpasswd overrides the XenD configured
+ default password.
For type=sdl, a viewer will be started automatically using the
given DISPLAY and XAUTHORITY, which default to the current user's
ones.""")
@@ -304,7 +306,7 @@ gopts.var('vif', val="type=TYPE,mac=MAC,
fn=append_value, default=[],
use="""Add a network interface with the given MAC address and bridge.
The vif is configured by calling the given configuration script.
- If type is not specified, default is netfront not ioemu device.
+ If type is not specified, default is netfront.
If mac is not specified a random MAC address is used.
If not specified then the network backend chooses it's own MAC
address.
If bridge is not specified the first bridge found is used.
@@ -722,8 +724,11 @@ def run_bootloader(vals, config_image):
"--entry= directly.")
vals.bootargs = "--entry=%s" %(vals.bootentry,)
+ kernel = sxp.child_value(config_image, 'kernel')
+ ramdisk = sxp.child_value(config_image, 'ramdisk')
+ args = sxp.child_value(config_image, 'args')
return bootloader(vals.bootloader, file, not vals.console_autoconnect,
- vals.bootargs, config_image)
+ vals.bootargs, kernel, ramdisk, args)
def make_config(vals):
"""Create the domain configuration.
@@ -763,7 +768,14 @@ def make_config(vals):
config_image = configure_image(vals)
if vals.bootloader:
- config_image = run_bootloader(vals, config_image)
+ if vals.bootloader == "pygrub":
+ vals.bootloader = osdep.pygrub_path
+
+ # if a kernel is specified, we're using the bootloader
+ # non-interactively, and need to let xend run it so we preserve the
+ # real kernel choice.
+ if not vals.kernel:
+ config_image = run_bootloader(vals, config_image)
config.append(['bootloader', vals.bootloader])
if vals.bootargs:
config.append(['bootloader_args', vals.bootargs])
@@ -827,7 +839,7 @@ def preprocess_ioports(vals):
if len(d) == 1:
d.append(d[0])
# Components are in hex: add hex specifier.
- hexd = map(lambda v: '0x'+v, d)
+ hexd = ['0x' + x for x in d]
ioports.append(hexd)
vals.ioports = ioports
@@ -994,8 +1006,6 @@ def preprocess_vnc(vals):
vals.extra = vnc + ' ' + vals.extra
def preprocess(vals):
- if not vals.kernel and not vals.bootloader:
- err("No kernel specified")
preprocess_disk(vals)
preprocess_pci(vals)
preprocess_ioports(vals)
@@ -1180,6 +1190,7 @@ def config_security_check(config, verbos
try:
domain_label = security.ssidref2label(security.NULL_SSIDREF)
except:
+ import traceback
traceback.print_exc(limit=1)
return 0
domain_policy = 'NULL'
diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/python/xen/xm/main.py
--- a/tools/python/xen/xm/main.py Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/python/xen/xm/main.py Wed Jan 17 09:56:40 2007 -0500
@@ -130,7 +130,7 @@ SUBCOMMAND_HELP = {
'log' : ('', 'Print Xend log'),
'rename' : ('<Domain> <NewDomainName>', 'Rename a domain.'),
'sched-sedf' : ('<Domain> [options]', 'Get/set EDF parameters.'),
- 'sched-credit': ('-d <Domain> [-w[=WEIGHT]|-c[=CAP]]',
+ 'sched-credit': ('[-d <Domain> [-w[=WEIGHT]|-c[=CAP]]]',
'Get/set credit scheduler parameters.'),
'sysrq' : ('<Domain> <letter>', 'Send a sysrq to a domain.'),
'vcpu-list' : ('[<Domain>]',
@@ -717,6 +717,10 @@ def parse_sedf_info(info):
'weight' : get_info('weight', int, -1),
}
+def domid_match(domid, info):
+ return domid is None or domid == info['name'] or \
+ domid == str(info['domid'])
+
def xm_brief_list(doms):
print '%-40s %3s %5s %5s %10s %9s' % \
('Name', 'ID', 'Mem', 'VCPUs', 'State', 'Time(s)')
@@ -1091,10 +1095,6 @@ def xm_sched_sedf(args):
print( ("%(name)-32s %(domid)3d %(period)9.1f %(slice)9.1f" +
" %(latency)7.1f %(extratime)6d %(weight)6d") % info)
- def domid_match(domid, info):
- return domid is None or domid == info['name'] or \
- domid == str(info['domid'])
-
# we want to just display current info if no parameters are passed
if len(args) == 0:
domid = None
@@ -1174,27 +1174,43 @@ def xm_sched_credit(args):
err(opterr)
usage('sched-credit')
- domain = None
+ domid = None
weight = None
cap = None
for o, a in opts:
if o == "-d":
- domain = a
+ domid = a
elif o == "-w":
weight = int(a)
elif o == "-c":
cap = int(a);
- if domain is None:
- # place holder for system-wide scheduler parameters
- err("No domain given.")
- usage('sched-credit')
+ doms = filter(lambda x : domid_match(domid, x),
+ [parse_doms_info(dom)
+ for dom in getDomains(None, 'running')])
if weight is None and cap is None:
- print server.xend.domain.sched_credit_get(domain)
- else:
- result = server.xend.domain.sched_credit_set(domain, weight, cap)
+ # print header if we aren't setting any parameters
+ print '%-33s %-2s %-6s %-4s' % ('Name','ID','Weight','Cap')
+
+ for d in doms:
+ try:
+ info = server.xend.domain.sched_credit_get(d['domid'])
+ except xmlrpclib.Fault:
+ # domain does not support sched-credit?
+ info = {'weight': -1, 'cap': -1}
+
+ info['name'] = d['name']
+ info['domid'] = int(d['domid'])
+ print( ("%(name)-32s %(domid)3d %(weight)6d %(cap)4d") % info)
+ else:
+ if domid is None:
+ # place holder for system-wide scheduler parameters
+ err("No domain given.")
+ usage('sched-credit')
+
+ result = server.xend.domain.sched_credit_set(domid, weight, cap)
if result != 0:
err(str(result))
diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/python/xen/xm/migrate.py
--- a/tools/python/xen/xm/migrate.py Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/python/xen/xm/migrate.py Wed Jan 17 09:56:40 2007 -0500
@@ -52,6 +52,7 @@ def help():
def main(argv):
opts = gopts
+ opts.reset()
args = opts.parse(argv)
if len(args) != 2:
diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/python/xen/xm/opts.py
--- a/tools/python/xen/xm/opts.py Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/python/xen/xm/opts.py Wed Jan 17 09:56:40 2007 -0500
@@ -559,14 +559,6 @@ def set_bool(opt, k, v):
else:
opt.opts.err('Invalid value:' +v)
-def set_u32(opt, k, v):
- """Set an option to an u32 value."""
- try:
- v = u32(v)
- except:
- opt.opts.err('Invalid value: ' + str(v))
- opt.set(v)
-
def set_value(opt, k, v):
"""Set an option to a value."""
opt.set(v)
diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/python/xen/xm/shutdown.py
--- a/tools/python/xen/xm/shutdown.py Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/python/xen/xm/shutdown.py Wed Jan 17 09:56:40 2007 -0500
@@ -118,6 +118,7 @@ def main_dom(opts, args):
def main(argv):
opts = gopts
+ opts.reset()
args = opts.parse(argv)
if opts.vals.help:
return
diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/tests/Makefile
--- a/tools/tests/Makefile Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/tests/Makefile Wed Jan 17 09:56:40 2007 -0500
@@ -7,12 +7,21 @@ TARGET := test_x86_emulator
.PHONY: all
all: $(TARGET)
+.PHONY: blowfish.bin
+blowfish.bin:
+ make -f blowfish.mk all
+
+blowfish.h: blowfish.bin
+ (echo "static unsigned int blowfish_code[] = {"; \
+ od -v -t x $< | sed 's/^[0-9]* /0x/' | sed 's/ /, 0x/g' | sed
's/$$/,/';\
+ echo "};") >$@
+
$(TARGET): x86_emulate.o test_x86_emulator.o
$(HOSTCC) -o $@ $^
.PHONY: clean
clean:
- rm -rf $(TARGET) *.o *~ core
+ rm -rf $(TARGET) *.o *~ core blowfish.h blowfish.bin
.PHONY: install
install:
@@ -20,5 +29,5 @@ x86_emulate.o: $(XEN_ROOT)/xen/arch/x86/
x86_emulate.o: $(XEN_ROOT)/xen/arch/x86/x86_emulate.c
$(HOSTCC) $(HOSTCFLAGS) -I$(XEN_ROOT)/xen/include -c -o $@ $<
-%.o: %.c
+test_x86_emulator.o: test_x86_emulator.c blowfish.h
$(HOSTCC) $(HOSTCFLAGS) -I$(XEN_ROOT)/xen/include -c -o $@ $<
diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/tests/test_x86_emulator.c
--- a/tools/tests/test_x86_emulator.c Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/tests/test_x86_emulator.c Wed Jan 17 09:56:40 2007 -0500
@@ -15,6 +15,19 @@ typedef int64_t s64;
#include <asm-x86/x86_emulate.h>
#include <sys/mman.h>
+#include "blowfish.h"
+
+#define MMAP_SZ 16384
+
+/* EFLAGS bit definitions. */
+#define EFLG_OF (1<<11)
+#define EFLG_DF (1<<10)
+#define EFLG_SF (1<<7)
+#define EFLG_ZF (1<<6)
+#define EFLG_AF (1<<4)
+#define EFLG_PF (1<<2)
+#define EFLG_CF (1<<0)
+
static int read(
unsigned int seg,
unsigned long offset,
@@ -97,20 +110,25 @@ int main(int argc, char **argv)
{
struct x86_emulate_ctxt ctxt;
struct cpu_user_regs regs;
- char instr[20] = { 0x01, 0x08 }; /* add %ecx,(%eax) */
- unsigned int *res;
+ char *instr;
+ unsigned int *res, i;
int rc;
+#ifndef __x86_64__
+ unsigned int bcdres_native, bcdres_emul;
+#endif
ctxt.regs = ®s;
- ctxt.mode = X86EMUL_MODE_PROT32;
-
- res = mmap((void *)0x100000, 0x1000, PROT_READ|PROT_WRITE,
+ ctxt.addr_size = 32;
+ ctxt.sp_size = 32;
+
+ res = mmap((void *)0x100000, MMAP_SZ, PROT_READ|PROT_WRITE,
MAP_FIXED|MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
if ( res == MAP_FAILED )
{
fprintf(stderr, "mmap to low address failed\n");
exit(1);
}
+ instr = (char *)res + 0x100;
printf("%-40s", "Testing addl %%ecx,(%%eax)...");
instr[0] = 0x01; instr[1] = 0x08;
@@ -119,7 +137,7 @@ int main(int argc, char **argv)
regs.ecx = 0x12345678;
regs.eax = (unsigned long)res;
*res = 0x7FFFFFFF;
- rc = x86_emulate_memop(&ctxt, &emulops);
+ rc = x86_emulate(&ctxt, &emulops);
if ( (rc != 0) ||
(*res != 0x92345677) ||
(regs.eflags != 0xa94) ||
@@ -133,7 +151,7 @@ int main(int argc, char **argv)
regs.eip = (unsigned long)&instr[0];
regs.ecx = 0x12345678;
regs.eax = 0x7FFFFFFF;
- rc = x86_emulate_memop(&ctxt, &emulops);
+ rc = x86_emulate(&ctxt, &emulops);
if ( (rc != 0) ||
(regs.ecx != 0x12345678) ||
(regs.eax != 0x92345677) ||
@@ -152,7 +170,7 @@ int main(int argc, char **argv)
regs.ecx = 0x12345678UL;
#endif
regs.eax = (unsigned long)res;
- rc = x86_emulate_memop(&ctxt, &emulops);
+ rc = x86_emulate(&ctxt, &emulops);
if ( (rc != 0) ||
(*res != 0x92345677) ||
(regs.ecx != 0x8000000FUL) ||
@@ -166,7 +184,7 @@ int main(int argc, char **argv)
regs.eip = (unsigned long)&instr[0];
regs.ecx = ~0UL;
regs.eax = (unsigned long)res;
- rc = x86_emulate_memop(&ctxt, &emulops);
+ rc = x86_emulate(&ctxt, &emulops);
if ( (rc != 0) ||
(*res != 0x92345677) ||
(regs.ecx != 0x92345677UL) ||
@@ -181,7 +199,7 @@ int main(int argc, char **argv)
regs.eax = 0x92345677UL;
regs.ecx = 0xAA;
regs.ebx = (unsigned long)res;
- rc = x86_emulate_memop(&ctxt, &emulops);
+ rc = x86_emulate(&ctxt, &emulops);
if ( (rc != 0) ||
(*res != 0x923456AA) ||
(regs.eflags != 0x244) ||
@@ -197,7 +215,7 @@ int main(int argc, char **argv)
regs.eax = 0xAABBCC77UL;
regs.ecx = 0xFF;
regs.ebx = (unsigned long)res;
- rc = x86_emulate_memop(&ctxt, &emulops);
+ rc = x86_emulate(&ctxt, &emulops);
if ( (rc != 0) ||
(*res != 0x923456AA) ||
((regs.eflags&0x240) != 0x200) ||
@@ -213,7 +231,7 @@ int main(int argc, char **argv)
regs.eip = (unsigned long)&instr[0];
regs.ecx = 0x12345678;
regs.eax = (unsigned long)res;
- rc = x86_emulate_memop(&ctxt, &emulops);
+ rc = x86_emulate(&ctxt, &emulops);
if ( (rc != 0) ||
(*res != 0x12345678) ||
(regs.eflags != 0x200) ||
@@ -230,7 +248,7 @@ int main(int argc, char **argv)
regs.eax = 0x923456AAUL;
regs.ecx = 0xDDEEFF00L;
regs.ebx = (unsigned long)res;
- rc = x86_emulate_memop(&ctxt, &emulops);
+ rc = x86_emulate(&ctxt, &emulops);
if ( (rc != 0) ||
(*res != 0xDDEEFF00) ||
(regs.eflags != 0x244) ||
@@ -247,7 +265,7 @@ int main(int argc, char **argv)
regs.eip = (unsigned long)&instr[0];
regs.esi = (unsigned long)res + 0;
regs.edi = (unsigned long)res + 2;
- rc = x86_emulate_memop(&ctxt, &emulops);
+ rc = x86_emulate(&ctxt, &emulops);
if ( (rc != 0) ||
(*res != 0x44554455) ||
(regs.eflags != 0x200) ||
@@ -264,7 +282,7 @@ int main(int argc, char **argv)
regs.eflags = 0x200;
regs.eip = (unsigned long)&instr[0];
regs.edi = (unsigned long)res;
- rc = x86_emulate_memop(&ctxt, &emulops);
+ rc = x86_emulate(&ctxt, &emulops);
if ( (rc != 0) ||
(*res != 0x2233445D) ||
((regs.eflags&0x201) != 0x201) ||
@@ -279,7 +297,7 @@ int main(int argc, char **argv)
regs.eip = (unsigned long)&instr[0];
regs.eax = -32;
regs.edi = (unsigned long)(res+1);
- rc = x86_emulate_memop(&ctxt, &emulops);
+ rc = x86_emulate(&ctxt, &emulops);
if ( (rc != 0) ||
(*res != 0x2233445E) ||
((regs.eflags&0x201) != 0x201) ||
@@ -299,7 +317,7 @@ int main(int argc, char **argv)
regs.ecx = 0xCCCCFFFF;
regs.eip = (unsigned long)&instr[0];
regs.edi = (unsigned long)res;
- rc = x86_emulate_memop(&ctxt, &emulops);
+ rc = x86_emulate(&ctxt, &emulops);
if ( (rc != 0) ||
(res[0] != 0x9999AAAA) ||
(res[1] != 0xCCCCFFFF) ||
@@ -313,7 +331,7 @@ int main(int argc, char **argv)
regs.eflags = 0x200;
regs.eip = (unsigned long)&instr[0];
regs.edi = (unsigned long)res;
- rc = x86_emulate_memop(&ctxt, &emulops);
+ rc = x86_emulate(&ctxt, &emulops);
if ( (rc != 0) ||
(res[0] != 0x9999AAAA) ||
(res[1] != 0xCCCCFFFF) ||
@@ -331,7 +349,7 @@ int main(int argc, char **argv)
regs.ecx = 0x12345678;
regs.eax = (unsigned long)res;
*res = 0x82;
- rc = x86_emulate_memop(&ctxt, &emulops);
+ rc = x86_emulate(&ctxt, &emulops);
if ( (rc != 0) ||
(*res != 0x82) ||
(regs.ecx != 0xFFFFFF82) ||
@@ -347,7 +365,7 @@ int main(int argc, char **argv)
regs.ecx = 0x12345678;
regs.eax = (unsigned long)res;
*res = 0x1234aa82;
- rc = x86_emulate_memop(&ctxt, &emulops);
+ rc = x86_emulate(&ctxt, &emulops);
if ( (rc != 0) ||
(*res != 0x1234aa82) ||
(regs.ecx != 0xaa82) ||
@@ -363,7 +381,7 @@ int main(int argc, char **argv)
regs.ecx = (unsigned long)res;
regs.eax = 0x12345678;
*res = 0x11111111;
- rc = x86_emulate_memop(&ctxt, &emulops);
+ rc = x86_emulate(&ctxt, &emulops);
if ( (rc != 0) ||
(*res != 0x11116789) ||
(regs.eax != 0x12341111) ||
@@ -371,6 +389,139 @@ int main(int argc, char **argv)
(regs.eip != (unsigned long)&instr[4]) )
goto fail;
printf("okay\n");
+
+ printf("%-40s", "Testing dec %%ax...");
+ instr[0] = 0x66; instr[1] = 0x48;
+ regs.eflags = 0x200;
+ regs.eip = (unsigned long)&instr[0];
+ regs.eax = 0x00000000;
+ rc = x86_emulate(&ctxt, &emulops);
+ if ( (rc != 0) ||
+ (regs.eax != 0x0000ffff) ||
+ ((regs.eflags&0x240) != 0x200) ||
+ (regs.eip != (unsigned long)&instr[2]) )
+ goto fail;
+ printf("okay\n");
+
+ printf("%-40s", "Testing lea 8(%%ebp),%%eax...");
+ instr[0] = 0x8d; instr[1] = 0x45; instr[2] = 0x08;
+ regs.eflags = 0x200;
+ regs.eip = (unsigned long)&instr[0];
+ regs.eax = 0x12345678;
+ regs.ebp = 0xaaaaaaaa;
+ rc = x86_emulate(&ctxt, &emulops);
+ if ( (rc != 0) ||
+ (regs.eax != 0xaaaaaab2) ||
+ ((regs.eflags&0x240) != 0x200) ||
+ (regs.eip != (unsigned long)&instr[3]) )
+ goto fail;
+ printf("okay\n");
+
+ printf("%-40s", "Testing daa/das (all inputs)...");
+#ifndef __x86_64__
+ /* Bits 0-7: AL; Bit 8: EFLG_AF; Bit 9: EFLG_CF; Bit 10: DAA vs. DAS. */
+ for ( i = 0; i < 0x800; i++ )
+ {
+ regs.eflags = (i & 0x200) ? EFLG_CF : 0;
+ regs.eflags |= (i & 0x100) ? EFLG_AF : 0;
+ if ( i & 0x400 )
+ __asm__ (
+ "pushf; and $0xffffffee,(%%esp); or %1,(%%esp); popf; das; "
+ "pushf; popl %1"
+ : "=a" (bcdres_native), "=r" (regs.eflags)
+ : "0" (i & 0xff), "1" (regs.eflags) );
+ else
+ __asm__ (
+ "pushf; and $0xffffffee,(%%esp); or %1,(%%esp); popf; daa; "
+ "pushf; popl %1"
+ : "=a" (bcdres_native), "=r" (regs.eflags)
+ : "0" (i & 0xff), "1" (regs.eflags) );
+ bcdres_native |= (regs.eflags & EFLG_PF) ? 0x1000 : 0;
+ bcdres_native |= (regs.eflags & EFLG_ZF) ? 0x800 : 0;
+ bcdres_native |= (regs.eflags & EFLG_SF) ? 0x400 : 0;
+ bcdres_native |= (regs.eflags & EFLG_CF) ? 0x200 : 0;
+ bcdres_native |= (regs.eflags & EFLG_AF) ? 0x100 : 0;
+
+ instr[0] = (i & 0x400) ? 0x2f: 0x27; /* daa/das */
+ regs.eflags = (i & 0x200) ? EFLG_CF : 0;
+ regs.eflags |= (i & 0x100) ? EFLG_AF : 0;
+ regs.eip = (unsigned long)&instr[0];
+ regs.eax = (unsigned char)i;
+ rc = x86_emulate(&ctxt, &emulops);
+ bcdres_emul = regs.eax;
+ bcdres_emul |= (regs.eflags & EFLG_PF) ? 0x1000 : 0;
+ bcdres_emul |= (regs.eflags & EFLG_ZF) ? 0x800 : 0;
+ bcdres_emul |= (regs.eflags & EFLG_SF) ? 0x400 : 0;
+ bcdres_emul |= (regs.eflags & EFLG_CF) ? 0x200 : 0;
+ bcdres_emul |= (regs.eflags & EFLG_AF) ? 0x100 : 0;
+ if ( (rc != 0) || (regs.eax > 255) ||
+ (regs.eip != (unsigned long)&instr[1]) )
+ goto fail;
+
+ if ( bcdres_emul != bcdres_native )
+ {
+ printf("%s: AL=%02x %s %s\n"
+ "Output: AL=%02x %s %s %s %s %s\n"
+ "Emul.: AL=%02x %s %s %s %s %s\n",
+ (i & 0x400) ? "DAS" : "DAA",
+ (unsigned char)i,
+ (i & 0x200) ? "CF" : " ",
+ (i & 0x100) ? "AF" : " ",
+ (unsigned char)bcdres_native,
+ (bcdres_native & 0x200) ? "CF" : " ",
+ (bcdres_native & 0x100) ? "AF" : " ",
+ (bcdres_native & 0x1000) ? "PF" : " ",
+ (bcdres_native & 0x800) ? "ZF" : " ",
+ (bcdres_native & 0x400) ? "SF" : " ",
+ (unsigned char)bcdres_emul,
+ (bcdres_emul & 0x200) ? "CF" : " ",
+ (bcdres_emul & 0x100) ? "AF" : " ",
+ (bcdres_emul & 0x1000) ? "PF" : " ",
+ (bcdres_emul & 0x800) ? "ZF" : " ",
+ (bcdres_emul & 0x400) ? "SF" : " ");
+ goto fail;
+ }
+ }
+ printf("okay\n");
+#else
+ printf("skipped\n");
+#endif
+
+ printf("Testing blowfish code sequence");
+ memcpy(res, blowfish_code, sizeof(blowfish_code));
+ regs.eax = 2;
+ regs.edx = 1;
+ regs.eip = (unsigned long)res;
+ regs.esp = (unsigned long)res + MMAP_SZ - 4;
+ *(uint32_t *)(unsigned long)regs.esp = 0x12345678;
+ regs.eflags = 2;
+ i = 0;
+ while ( (uint32_t)regs.eip != 0x12345678 )
+ {
+ if ( (i++ & 8191) == 0 )
+ printf(".");
+ rc = x86_emulate(&ctxt, &emulops);
+ if ( rc != 0 )
+ {
+ printf("failed at %%eip == %08x\n", (unsigned int)regs.eip);
+ return 1;
+ }
+ }
+ if ( (regs.esp != ((unsigned long)res + MMAP_SZ)) ||
+ (regs.eax != 2) || (regs.edx != 1) )
+ goto fail;
+ printf("okay\n");
+
+#ifndef __x86_64__
+ printf("%-40s", "Testing blowfish native execution...");
+ asm volatile (
+ "movl $0x100000,%%ecx; call *%%ecx"
+ : "=a" (regs.eax), "=d" (regs.edx)
+ : "0" (2), "1" (1) : "ecx" );
+ if ( (regs.eax != 2) || (regs.edx != 1) )
+ goto fail;
+ printf("okay\n");
+#endif
return 0;
diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/xenstat/xentop/xentop.c
--- a/tools/xenstat/xentop/xentop.c Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/xenstat/xentop/xentop.c Wed Jan 17 09:56:40 2007 -0500
@@ -1067,9 +1067,9 @@ int main(int argc, char **argv)
gettimeofday(&curtime, NULL);
top();
oldtime = curtime;
- sleep(delay);
if ((!loop) && !(--iterations))
break;
+ sleep(delay);
} while (1);
}
diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/xm-test/README
--- a/tools/xm-test/README Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/xm-test/README Wed Jan 17 09:56:40 2007 -0500
@@ -207,6 +207,49 @@ running DomUs on the system to provide e
running DomUs on the system to provide each test with a "clean slate".
+Testing the XML-RPC and Xen-API interfaces of xend
+==================================================
+
+The xm-test suite can be used to test xm's interface with xend using
+either XML-RPC or the Xen-API. In order to use either one of these modes,
+xm needs to be configured using its configuration file
+'/etc/xen/xm-config.xml'.
+Note: The current default configuration after a fresh install of the xen
+sources currently is to use the XML-RPC interface for communication with xend.
+
+Example content for the xm-config.xml for using the Xen-API looks as
+follows:
+
+<xm>
+ <server type='Xen-API'
+ uri='http://localhost:9363/'
+ username='me'
+ password='mypassword' />
+</xm>
+
+This configuration makes xm talk to xend using port 9363. For this to
+work, also xend needs to be configured to listen to port 9363. Therefore
+The following line must be in /etc/xen/xend-config.sxp.
+
+(xen-api-server (( 127.0.0.1:9363 none )))
+
+To communicate via the legacy XML-RPC interface, the file
+'/etc/xen/xm-config.xml' may simply have the following content or
+may be complete remove from the /etc/xen directory.
+
+<xm>
+</xm>
+
+A few tests have been written for the xm-test suite that test the
+Xen-API interface directly without relying on 'xm'. These tests can be
+found in the grouptest 'xapi' and for them to work properly, xm must have
+been configured to use the Xen-API following the instructions above. To
+run these test, the following command line can be invoked:
+
+ # ./runtest.sh -g xapi <logfile>
+
+
+
Extending
=========
diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/xm-test/configure.ac
--- a/tools/xm-test/configure.ac Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/xm-test/configure.ac Wed Jan 17 09:56:40 2007 -0500
@@ -150,6 +150,7 @@ AC_CONFIG_FILES([
tests/vcpu-pin/Makefile
tests/vcpu-disable/Makefile
tests/vtpm/Makefile
+ tests/xapi/Makefile
tests/enforce_dom0_cpus/Makefile
lib/XmTestReport/xmtest.py
lib/XmTestLib/config.py
diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/xm-test/grouptest/xapi
--- a/tools/xm-test/grouptest/xapi Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/xm-test/grouptest/xapi Wed Jan 17 09:56:40 2007 -0500
@@ -1,1 +1,2 @@ vtpm 09_vtpm-xapi.test
+xapi
vtpm 09_vtpm-xapi.test
diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/xm-test/lib/XmTestLib/XenDomain.py
--- a/tools/xm-test/lib/XmTestLib/XenDomain.py Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/xm-test/lib/XmTestLib/XenDomain.py Wed Jan 17 09:56:40 2007 -0500
@@ -29,6 +29,7 @@ from config import *
from config import *
from Console import *
from XenDevice import *
+from DomainTracking import *
from acm import *
@@ -147,7 +148,7 @@ class DomainError(Exception):
class XenDomain:
- def __init__(self, name=None, config=None):
+ def __init__(self, name=None, config=None, isManaged=False):
"""Create a domain object.
@param config: String filename of config file
"""
@@ -162,6 +163,10 @@ class XenDomain:
self.devices = {}
self.netEnv = "bridge"
+ if os.getenv("XM_MANAGED_DOMAINS"):
+ isManaged = True
+ self.isManaged = isManaged
+
# Set domain type, either PV for ParaVirt domU or HVM for
# FullVirt domain
if ENABLE_HVM_SUPPORT:
@@ -171,7 +176,17 @@ class XenDomain:
def start(self, noConsole=False):
- ret, output = traceCommand("xm create %s" % self.config)
+ if not self.isManaged:
+ ret, output = traceCommand("xm create %s" % self.config)
+ else:
+ ret, output = traceCommand("xm new %s" % self.config)
+ if ret != 0:
+ _ret, output = traceCommand("xm delete " +
+ self.config.getOpt("name"))
+ else:
+ ret, output = traceCommand("xm start " +
+ self.config.getOpt("name"))
+ addManagedDomain(self.config.getOpt("name"))
if ret != 0:
raise DomainError("Failed to create domain",
@@ -218,6 +233,10 @@ class XenDomain:
self.closeConsole()
ret, output = traceCommand(prog + cmd + self.config.getOpt("name"))
+ if self.isManaged:
+ ret, output = traceCommand(prog + " delete " +
+ self.config.getOpt("name"))
+ delManagedDomain(self.config.getOpt("name"))
return ret
@@ -296,7 +315,7 @@ class XmTestDomain(XenDomain):
class XmTestDomain(XenDomain):
def __init__(self, name=None, extraConfig=None,
- baseConfig=arch.configDefaults):
+ baseConfig=arch.configDefaults, isManaged=False):
"""Create a new xm-test domain
@param name: The requested domain name
@param extraConfig: Additional configuration options
@@ -312,7 +331,8 @@ class XmTestDomain(XenDomain):
elif not config.getOpt("name"):
config.setOpt("name", getUniqueName())
- XenDomain.__init__(self, config.getOpt("name"), config=config)
+ XenDomain.__init__(self, config.getOpt("name"), config=config,
+ isManaged=isManaged)
def minSafeMem(self):
return arch.minSafeMem
diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/xm-test/lib/XmTestLib/Xm.py
--- a/tools/xm-test/lib/XmTestLib/Xm.py Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/xm-test/lib/XmTestLib/Xm.py Wed Jan 17 09:56:40 2007 -0500
@@ -48,6 +48,8 @@ def domid(name):
status, output = traceCommand("xm domid " + name);
if status != 0 or "Traceback" in output:
+ return -1
+ if output == "None":
return -1
try:
return int(output)
diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/xm-test/lib/XmTestLib/xapi.py
--- a/tools/xm-test/lib/XmTestLib/xapi.py Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/xm-test/lib/XmTestLib/xapi.py Wed Jan 17 09:56:40 2007 -0500
@@ -17,50 +17,49 @@
# Copyright (C) 2006 IBM Corporation
#============================================================================
+import atexit
import os
import sys
from XmTestLib import *
-from xen.util.xmlrpclib2 import ServerProxy
+from xen.xm import main as xmmain
+from xen.xm import XenAPI
+from xen.xm.opts import OptionError
from types import DictType
+import xml.dom.minidom
+def get_login_pwd():
+ if xmmain.serverType == xmmain.SERVER_XEN_API:
+ try:
+ login, password = xmmain.parseAuthentication()
+ return (login, password)
+ except:
+ raise OptionError("Configuration for login/pwd not found. "
+ "Need to run xapi-setup.py?")
+ raise OptionError("Xm configuration file not using Xen-API for "
+ "communication with xend.")
-XAPI_DEFAULT_LOGIN = " "
-XAPI_DEFAULT_PASSWORD = " "
+sessions=[]
-class XenAPIError(Exception):
- pass
-
-
-#A list of VMs' UUIDs that were created using vm_create
-_VMuuids = []
-
-#Terminate previously created managed(!) VMs and destroy their configs
-def vm_destroy_all():
- server, session = _connect()
- for uuid in _VMuuids:
- execute(server.VM.hard_shutdown, session, uuid)
- execute(server.VM.destroy , session, uuid)
-
-
-def execute(fn, *args):
- result = fn(*args)
- if type(result) != DictType:
- raise TypeError("Function returned object of type: %s" %
- str(type(result)))
- if 'Value' not in result:
- raise XenAPIError(*result['ErrorDescription'])
- return result['Value']
-
-_initialised = False
-_server = None
-_session = None
-def _connect(*args):
- global _server, _session, _initialised
- if not _initialised:
- _server = ServerProxy('httpu:///var/run/xend/xen-api.sock')
- login = XAPI_DEFAULT_LOGIN
- password = XAPI_DEFAULT_PASSWORD
- creds = (login, password)
- _session = execute(_server.session.login_with_password, *creds)
- _initialised = True
- return (_server, _session)
+def connect(*args):
+ try:
+ creds = get_login_pwd()
+ except Exception, e:
+ FAIL("%s" % str(e))
+ try:
+ session = XenAPI.Session(xmmain.serverURI)
+ except:
+ raise OptionError("Could not create XenAPI session with Xend." \
+ "URI=%s" % xmmain.serverURI)
+ try:
+ session.login_with_password(*creds)
+ except:
+ raise OptionError("Could not login to Xend. URI=%s" % xmmain.serverURI)
+ def logout():
+ try:
+ for s in sessions:
+ s.xenapi.session.logout()
+ except:
+ pass
+ sessions.append(session)
+ atexit.register(logout)
+ return session
diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/xm-test/ramdisk/Makefile.am
--- a/tools/xm-test/ramdisk/Makefile.am Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/xm-test/ramdisk/Makefile.am Wed Jan 17 09:56:40 2007 -0500
@@ -36,7 +36,12 @@ XMTEST_VER_IMG = initrd-$(XMTEST_MAJ_VER
XMTEST_VER_IMG = initrd-$(XMTEST_MAJ_VER)-$(BR_ARCH).img
XMTEST_DL_IMG = $(shell echo $(XMTEST_VER_IMG) | sed -e 's/x86_64/i386/g')
-EXTRA_ROOT_DIRS = sys
+EXTRA_ROOT_DIRS = sys modules
+
+BLKDRV = /lib/modules/$(shell uname -r)/kernel/drivers/xen/blkfront/xenblk.ko
+NETDRV = /lib/modules/$(shell uname -r)/kernel/drivers/xen/netfront/xennet.ko
+PKTDRV = /lib/modules/$(shell uname -r)/kernel/net/packet/af_packet.ko
+
if HVM
all: initrd.img disk.img
@@ -60,7 +65,11 @@ endif
$(XMTEST_VER_IMG): $(BR_IMG)
chmod a+x skel/etc/init.d/rcS
- (cd skel; mkdir -p $(EXTRA_ROOT_DIRS); tar cf - .) \
+ cd skel && mkdir -p $(EXTRA_ROOT_DIRS)
+ -[ -e "$(BLKDRV)" ] && cp $(BLKDRV) skel/modules
+ -[ -e "$(NETDRV)" ] && cp $(NETDRV) skel/modules
+ -[ -e "$(PKTDRV)" ] && cp $(PKTDRV) skel/modules
+ (cd skel; tar cf - .) \
| (cd $(BR_SRC)/$(BR_ROOT); tar xvf -)
cd $(BR_SRC) && make
cp $(BR_IMG) $(XMTEST_VER_IMG)
diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/xm-test/ramdisk/skel/etc/init.d/rcS
--- a/tools/xm-test/ramdisk/skel/etc/init.d/rcS Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/xm-test/ramdisk/skel/etc/init.d/rcS Wed Jan 17 09:56:40 2007 -0500
@@ -6,3 +6,14 @@ if uname -r | grep -q '^2.6'; then
if uname -r | grep -q '^2.6'; then
mount -t sysfs none /sys
fi
+
+# If the block, net, and packet drivers are modules, we need to load them
+if test -e /modules/xenblk.ko; then
+ insmod /modules/xenblk.ko > /dev/null 2>&1
+fi
+if test -e /modules/xennet.ko; then
+ insmod /modules/xennet.ko > /dev/null 2>&1
+fi
+if test -e /modules/af_packet.ko; then
+ insmod /modules/af_packet.ko > /dev/null 2>&1
+fi
diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/xm-test/runtest.sh
--- a/tools/xm-test/runtest.sh Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/xm-test/runtest.sh Wed Jan 17 09:56:40 2007 -0500
@@ -16,6 +16,7 @@ usage() {
echo " -r <url> : url of test results repository to use"
echo " -s <report> : just submit report <report>"
echo " -u : unsafe -- do not run the sanity checks before
starting"
+ echo " -md : all created domains are xend-'managed' domains"
echo " -h | --help : show this help"
}
@@ -218,10 +219,13 @@ unsafe=no
unsafe=no
GROUPENTERED=default
+#Prepare for usage with ACM
if [ -d /etc/xen/acm-security/policies ]; then
cp -f tests/security-acm/xm-test-security_policy.xml \
/etc/xen/acm-security/policies
fi
+
+unset XM_MANAGED_DOMAINS
# Resolve options
while [ $# -gt 0 ]
@@ -260,6 +264,10 @@ while [ $# -gt 0 ]
unsafe=yes
report=no
;;
+ -md)
+ echo "(use managed domains)"
+ export XM_MANAGED_DOMAINS=1
+ ;;
-h|--help)
usage
exit 0
diff -r 5568efb41da4 -r 3f6a2745b3a3
tools/xm-test/tests/block-destroy/06_block-destroy_check_list_pos.py
--- a/tools/xm-test/tests/block-destroy/06_block-destroy_check_list_pos.py
Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/xm-test/tests/block-destroy/06_block-destroy_check_list_pos.py
Wed Jan 17 09:56:40 2007 -0500
@@ -6,7 +6,7 @@ from XmTestLib import *
from XmTestLib import *
from XmTestLib.block_utils import *
-import re
+import re, time
def checkXmLongList(domain):
s, o = traceCommand("xm list --long %s" % domain.getName())
@@ -35,4 +35,8 @@ block_detach(domain, "xvda1")
block_detach(domain, "xvda1")
if checkXmLongList(domain):
- FAIL("xm long list does not show that xvda1 was removed")
+ # device info is removed by hotplug scripts - give them a chance
+ # to fire (they run asynchronously with us).
+ time.sleep(1)
+ if checkXmLongList(domain):
+ FAIL("xm long list does not show that xvda1 was removed")
diff -r 5568efb41da4 -r 3f6a2745b3a3
tools/xm-test/tests/sched-credit/01_sched_credit_weight_cap_pos.py
--- a/tools/xm-test/tests/sched-credit/01_sched_credit_weight_cap_pos.py
Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/xm-test/tests/sched-credit/01_sched_credit_weight_cap_pos.py
Wed Jan 17 09:56:40 2007 -0500
@@ -2,14 +2,27 @@
#
# Sched-credit tests modified from SEDF tests
#
+
+import re
+
from XmTestLib import *
+paramsRE = re.compile(r'^[^ ]* *[^ ]* *([^ ]*) *([^ ]*)$')
+
def get_sched_credit_params(domain):
- status, output = traceCommand("xm sched-credit -d %s" %(domain.getName()))
- params = output.strip('{}').split(', ')
- cap = int(params[0].split(':')[1].strip(' '))
- weight = int(params[1].split(':')[1].strip(' '))
- return (status, (weight, cap))
+ status, output = traceCommand("xm sched-credit -d %s | tail -1" %
+ domain.getName())
+
+ if status != 0:
+ FAIL("Getting sched-credit parameters return non-zero rv (%d)",
+ status)
+
+ m = paramsRE.match(output)
+ if not m:
+ FAIL("xm sched-credit gave bad output")
+ weight = int(m.group(1))
+ cap = int(m.group(2))
+ return (weight, cap)
def set_sched_credit_weight(domain, weight):
status, output = traceCommand("xm sched-credit -d %s -w %d"
%(domain.getName(), weight))
@@ -31,11 +44,8 @@ except DomainError, e:
FAIL(str(e))
# check default param values
-(status, params) = get_sched_credit_params(domain)
-if status != 0:
- FAIL("Getting sched-credit parameters return non-zero rv (%d)", status)
+(weight, cap) = get_sched_credit_params(domain)
-(weight, cap) = params
if weight != 256:
FAIL("default weight is 256 (got %d)", weight)
if cap != 0:
@@ -51,11 +61,8 @@ if status != 0:
FAIL("Setting sched-credit cap return non-zero rv (%d)", status)
# check new param values
-(status, params) = get_sched_credit_params(domain)
-if status != 0:
- FAIL("Getting sched-credit parameters return non-zero rv (%d)", status)
+(weight, cap) = get_sched_credit_params(domain)
-(weight, cap) = params
if weight != 512:
FAIL("expected weight of 512 (got %d)", weight)
if cap != 100:
diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/xm-test/tests/vtpm/09_vtpm-xapi.py
--- a/tools/xm-test/tests/vtpm/09_vtpm-xapi.py Mon Jan 15 13:27:20 2007 -0500
+++ b/tools/xm-test/tests/vtpm/09_vtpm-xapi.py Wed Jan 17 09:56:40 2007 -0500
@@ -6,71 +6,66 @@
# Test to test the vtpm class through the Xen-API
from XmTestLib import xapi
-from XmTestLib.XenManagedDomain import XmTestManagedDomain
+from XmTestLib.XenAPIDomain import XmTestAPIDomain
from XmTestLib import *
from vtpm_utils import *
import commands
import os
-def do_test():
- domain = XmTestManagedDomain()
- vm_uuid = domain.get_uuid()
+try:
+ # XmTestAPIDomain tries to establish a connection to XenD
+ domain = XmTestAPIDomain()
+except Exception, e:
+ SKIP("Skipping test. Error: %s" % str(e))
+vm_uuid = domain.get_uuid()
- vtpmcfg = {}
- vtpmcfg['type'] = "paravirtualised"
- vtpmcfg['backend'] = "Domain-0"
- vtpmcfg['instance'] = 1
- vtpmcfg['VM'] = vm_uuid
+vtpmcfg = {}
+vtpmcfg['type'] = "paravirtualised"
+vtpmcfg['backend'] = "Domain-0"
+vtpmcfg['instance'] = 1
+vtpmcfg['VM'] = vm_uuid
- server, session = xapi._connect()
+session = xapi.connect()
- vtpm_uuid = xapi.execute(server.VTPM.create, session, vtpmcfg)
+vtpm_uuid = session.xenapi.VTPM.create(vtpmcfg)
- vtpm_id = xapi.execute(server.VTPM.get_instance, session, vtpm_uuid)
- vtpm_be = xapi.execute(server.VTPM.get_backend , session, vtpm_uuid)
- if vtpm_be != vtpmcfg['backend']:
- FAIL("vTPM's backend is in '%s', expected: '%s'" %
- (vtpm_be, vtpmcfg['backend']))
+vtpm_id = session.xenapi.VTPM.get_instance(vtpm_uuid)
+vtpm_be = session.xenapi.VTPM.get_backend(vtpm_uuid)
+if vtpm_be != vtpmcfg['backend']:
+ FAIL("vTPM's backend is in '%s', expected: '%s'" %
+ (vtpm_be, vtpmcfg['backend']))
- driver = xapi.execute(server.VTPM.get_driver, session, vtpm_uuid)
- if driver != vtpmcfg['type']:
- FAIL("vTPM has driver type '%s', expected: '%s'" %
- (driver, vtpmcfg['type']))
+driver = session.xenapi.VTPM.get_driver(vtpm_uuid)
+if driver != vtpmcfg['type']:
+ FAIL("vTPM has driver type '%s', expected: '%s'" %
+ (driver, vtpmcfg['type']))
- vtpm_rec = xapi.execute(server.VTPM.get_record, session, vtpm_uuid)
+vtpm_rec = session.xenapi.VTPM.get_record(vtpm_uuid)
- if vtpm_rec['driver'] != vtpmcfg['type']:
- FAIL("vTPM record shows driver type '%s', expected: '%s'" %
- (vtpm_rec['driver'], vtpmcfg['type']))
- if vtpm_rec['uuid'] != vtpm_uuid:
- FAIL("vTPM record shows vtpm uuid '%s', expected: '%s'" %
- (vtpm_rec['uuid'], vtpm_uuid))
- if vtpm_rec['VM'] != vm_uuid:
- FAIL("vTPM record shows VM uuid '%s', expected: '%s'" %
- (vtpm_rec['VM'], vm_uuid))
+if vtpm_rec['driver'] != vtpmcfg['type']:
+ FAIL("vTPM record shows driver type '%s', expected: '%s'" %
+ (vtpm_rec['driver'], vtpmcfg['type']))
+if vtpm_rec['uuid'] != vtpm_uuid:
+ FAIL("vTPM record shows vtpm uuid '%s', expected: '%s'" %
+ (vtpm_rec['uuid'], vtpm_uuid))
+if vtpm_rec['VM'] != vm_uuid:
+ FAIL("vTPM record shows VM uuid '%s', expected: '%s'" %
+ (vtpm_rec['VM'], vm_uuid))
- success = domain.start()
+success = domain.start()
- console = domain.getConsole()
-
- try:
- run = console.runCmd("cat /sys/devices/xen/vtpm-0/pcrs")
- except ConsoleError, e:
- saveLog(console.getHistory())
- vtpm_cleanup(domName)
- FAIL("No result from dumping the PCRs")
-
- if re.search("No such file",run["output"]):
- vtpm_cleanup(domName)
- FAIL("TPM frontend support not compiled into (domU?) kernel")
-
- domain.stop()
- domain.destroy()
-
-
+console = domain.getConsole()
try:
- do_test()
-finally:
- #Make sure all domains are gone that were created in this test case
- xapi.vm_destroy_all()
+ run = console.runCmd("cat /sys/devices/xen/vtpm-0/pcrs")
+except ConsoleError, e:
+ saveLog(console.getHistory())
+ vtpm_cleanup(domName)
+ FAIL("No result from dumping the PCRs")
+
+if re.search("No such file",run["output"]):
+ vtpm_cleanup(domName)
+ FAIL("TPM frontend support not compiled into (domU?) kernel")
+
+domain.stop()
+domain.destroy()
diff -r 5568efb41da4 -r 3f6a2745b3a3
unmodified_drivers/linux-2.6/platform-pci/evtchn.c
--- a/unmodified_drivers/linux-2.6/platform-pci/evtchn.c Mon Jan 15
13:27:20 2007 -0500
+++ b/unmodified_drivers/linux-2.6/platform-pci/evtchn.c Wed Jan 17
09:56:40 2007 -0500
@@ -48,6 +48,12 @@ static struct {
void *dev_id;
int close; /* close on unbind_from_irqhandler()? */
} evtchns[MAX_EVTCHN];
+
+int irq_to_evtchn_port(int irq)
+{
+ return irq;
+}
+EXPORT_SYMBOL(irq_to_evtchn_port);
void mask_evtchn(int port)
{
diff -r 5568efb41da4 -r 3f6a2745b3a3
unmodified_drivers/linux-2.6/platform-pci/platform-pci.c
--- a/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c Mon Jan 15
13:27:20 2007 -0500
+++ b/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c Wed Jan 17
09:56:40 2007 -0500
@@ -179,7 +179,7 @@ static int get_hypercall_stubs(void)
#define get_hypercall_stubs() (0)
#endif
-static int get_callback_irq(struct pci_dev *pdev)
+static uint64_t get_callback_via(struct pci_dev *pdev)
{
#ifdef __ia64__
int irq;
@@ -189,16 +189,24 @@ static int get_callback_irq(struct pci_d
}
return 0;
#else /* !__ia64__ */
- return pdev->irq;
+ if (pdev->irq < 16)
+ return pdev->irq; /* ISA IRQ */
+ /* We don't know the GSI. Specify the PCI INTx line instead. */
+ return (((uint64_t)0x01 << 56) | /* PCI INTx identifier */
+ ((uint64_t)pci_domain_nr(pdev->bus) << 32) |
+ ((uint64_t)pdev->bus->number << 16) |
+ ((uint64_t)(pdev->devfn & 0xff) << 8) |
+ ((uint64_t)(pdev->pin - 1) & 3));
#endif
}
static int __devinit platform_pci_init(struct pci_dev *pdev,
const struct pci_device_id *ent)
{
- int i, ret, callback_irq;
+ int i, ret;
long ioaddr, iolen;
long mmio_addr, mmio_len;
+ uint64_t callback_via;
i = pci_enable_device(pdev);
if (i)
@@ -210,9 +218,9 @@ static int __devinit platform_pci_init(s
mmio_addr = pci_resource_start(pdev, 1);
mmio_len = pci_resource_len(pdev, 1);
- callback_irq = get_callback_irq(pdev);
-
- if (mmio_addr == 0 || ioaddr == 0 || callback_irq == 0) {
+ callback_via = get_callback_via(pdev);
+
+ if (mmio_addr == 0 || ioaddr == 0 || callback_via == 0) {
printk(KERN_WARNING DRV_NAME ":no resources found\n");
return -ENOENT;
}
@@ -242,12 +250,12 @@ static int __devinit platform_pci_init(s
if ((ret = init_xen_info()))
goto out;
- if ((ret = request_irq(pdev->irq, evtchn_interrupt, SA_SHIRQ,
- "xen-platform-pci", pdev))) {
- goto out;
- }
-
- if ((ret = set_callback_irq(callback_irq)))
+ if ((ret = request_irq(pdev->irq, evtchn_interrupt,
+ SA_SHIRQ | SA_SAMPLE_RANDOM,
+ "xen-platform-pci", pdev)))
+ goto out;
+
+ if ((ret = set_callback_via(callback_via)))
goto out;
out:
@@ -297,7 +305,7 @@ static void __exit platform_pci_module_c
{
printk(KERN_INFO DRV_NAME ":Do platform module cleanup\n");
/* disable hypervisor for callback irq */
- set_callback_irq(0);
+ set_callback_via(0);
if (pci_device_registered)
pci_unregister_driver(&platform_driver);
}
diff -r 5568efb41da4 -r 3f6a2745b3a3
unmodified_drivers/linux-2.6/platform-pci/platform-pci.h
--- a/unmodified_drivers/linux-2.6/platform-pci/platform-pci.h Mon Jan 15
13:27:20 2007 -0500
+++ b/unmodified_drivers/linux-2.6/platform-pci/platform-pci.h Wed Jan 17
09:56:40 2007 -0500
@@ -24,13 +24,13 @@
#include <linux/interrupt.h>
#include <xen/interface/hvm/params.h>
-static inline int set_callback_irq(int irq)
+static inline int set_callback_via(uint64_t via)
{
struct xen_hvm_param a;
a.domid = DOMID_SELF;
a.index = HVM_PARAM_CALLBACK_IRQ;
- a.value = irq;
+ a.value = via;
return HYPERVISOR_hvm_op(HVMOP_set_param, &a);
}
diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/Makefile
--- a/xen/Makefile Mon Jan 15 13:27:20 2007 -0500
+++ b/xen/Makefile Wed Jan 17 09:56:40 2007 -0500
@@ -14,8 +14,8 @@ default: build
.PHONY: dist
dist: install
-.PHONY: build install clean distclean cscope TAGS tags
-build install debug clean distclean cscope TAGS tags::
+.PHONY: build install clean distclean cscope TAGS tags MAP
+build install debug clean distclean cscope TAGS tags MAP::
$(MAKE) -f Rules.mk _$@
.PHONY: _build
@@ -48,6 +48,7 @@ _debug:
.PHONY: _clean
_clean: delete-unfresh-files
$(MAKE) -C tools clean
+ $(MAKE) -f $(BASEDIR)/Rules.mk -C include clean
$(MAKE) -f $(BASEDIR)/Rules.mk -C common clean
$(MAKE) -f $(BASEDIR)/Rules.mk -C drivers clean
$(MAKE) -f $(BASEDIR)/Rules.mk -C acm clean
@@ -69,6 +70,7 @@ _distclean: clean
$(MAKE) -f $(BASEDIR)/Rules.mk include/xen/compile.h
$(MAKE) -f $(BASEDIR)/Rules.mk include/xen/acm_policy.h
[ -e include/asm ] || ln -sf asm-$(TARGET_ARCH) include/asm
+ $(MAKE) -f $(BASEDIR)/Rules.mk -C include
$(MAKE) -f $(BASEDIR)/Rules.mk -C arch/$(TARGET_ARCH) asm-offsets.s
$(MAKE) -f $(BASEDIR)/Rules.mk include/asm-$(TARGET_ARCH)/asm-offsets.h
$(MAKE) -f $(BASEDIR)/Rules.mk -C arch/$(TARGET_ARCH) $(TARGET)
@@ -158,9 +160,9 @@ _cscope:
$(all_sources) > cscope.files
cscope -k -b -q
-.PHONY: MAP
-MAP:
- $(NM) -n $(TARGET) | grep -v '\(compiled\)\|\(\.o$$\)\|\( [aUw]
\)\|\(\.\.ng$$\)\|\(LASH[RL]DI\)' > System.map
+.PHONY: _MAP
+_MAP:
+ $(NM) -n $(TARGET)-syms | grep -v '\(compiled\)\|\(\.o$$\)\|\( [aUw]
\)\|\(\.\.ng$$\)\|\(LASH[RL]DI\)' > System.map
.PHONY: FORCE
FORCE:
diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/Rules.mk
--- a/xen/Rules.mk Mon Jan 15 13:27:20 2007 -0500
+++ b/xen/Rules.mk Wed Jan 17 09:56:40 2007 -0500
@@ -34,6 +34,7 @@ TARGET := $(BASEDIR)/xen
HDRS := $(wildcard $(BASEDIR)/include/xen/*.h)
HDRS += $(wildcard $(BASEDIR)/include/public/*.h)
+HDRS += $(wildcard $(BASEDIR)/include/compat/*.h)
HDRS += $(wildcard $(BASEDIR)/include/asm-$(TARGET_ARCH)/*.h)
HDRS += $(wildcard $(BASEDIR)/include/asm-$(TARGET_ARCH)/$(TARGET_SUBARCH)/*.h)
diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/ia64/linux-xen/unaligned.c
--- a/xen/arch/ia64/linux-xen/unaligned.c Mon Jan 15 13:27:20 2007 -0500
+++ b/xen/arch/ia64/linux-xen/unaligned.c Wed Jan 17 09:56:40 2007 -0500
@@ -24,7 +24,7 @@
#include <asm/uaccess.h>
#include <asm/unaligned.h>
-extern void die_if_kernel(char *str, struct pt_regs *regs, long err)
__attribute__ ((noreturn));
+extern void die_if_kernel(char *str, struct pt_regs *regs, long err);
#undef DEBUG_UNALIGNED_TRAP
diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/ia64/xen/domain.c
--- a/xen/arch/ia64/xen/domain.c Mon Jan 15 13:27:20 2007 -0500
+++ b/xen/arch/ia64/xen/domain.c Wed Jan 17 09:56:40 2007 -0500
@@ -522,14 +522,14 @@ void arch_domain_destroy(struct domain *
deallocate_rid_range(d);
}
-void arch_getdomaininfo_ctxt(struct vcpu *v, struct vcpu_guest_context *c)
+void arch_get_info_guest(struct vcpu *v, vcpu_guest_context_u c)
{
int i;
- struct vcpu_extra_regs *er = &c->extra_regs;
-
- c->user_regs = *vcpu_regs (v);
- c->privregs_pfn = get_gpfn_from_mfn(virt_to_maddr(v->arch.privregs) >>
- PAGE_SHIFT);
+ struct vcpu_extra_regs *er = &c.nat->extra_regs;
+
+ c.nat->user_regs = *vcpu_regs(v);
+ c.nat->privregs_pfn = get_gpfn_from_mfn(virt_to_maddr(v->arch.privregs)
>>
+ PAGE_SHIFT);
/* Fill extra regs. */
for (i = 0; i < 8; i++) {
@@ -549,12 +549,12 @@ void arch_getdomaininfo_ctxt(struct vcpu
er->iva = v->arch.iva;
}
-int arch_set_info_guest(struct vcpu *v, struct vcpu_guest_context *c)
+int arch_set_info_guest(struct vcpu *v, vcpu_guest_context_u c)
{
struct pt_regs *regs = vcpu_regs (v);
struct domain *d = v->domain;
- *regs = c->user_regs;
+ *regs = c.nat->user_regs;
if (!d->arch.is_vti) {
/* domain runs at PL2/3 */
@@ -562,9 +562,9 @@ int arch_set_info_guest(struct vcpu *v,
regs->ar_rsc |= (2 << 2); /* force PL2/3 */
}
- if (c->flags & VGCF_EXTRA_REGS) {
+ if (c.nat->flags & VGCF_EXTRA_REGS) {
int i;
- struct vcpu_extra_regs *er = &c->extra_regs;
+ struct vcpu_extra_regs *er = &c.nat->extra_regs;
for (i = 0; i < 8; i++) {
vcpu_set_itr(v, i, er->itrs[i].pte,
diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/ia64/xen/xenmisc.c
--- a/xen/arch/ia64/xen/xenmisc.c Mon Jan 15 13:27:20 2007 -0500
+++ b/xen/arch/ia64/xen/xenmisc.c Wed Jan 17 09:56:40 2007 -0500
@@ -57,26 +57,6 @@ struct pt_regs *guest_cpu_user_regs(void
struct pt_regs *guest_cpu_user_regs(void) { return vcpu_regs(current); }
///////////////////////////////
-// from arch/ia64/traps.c
-///////////////////////////////
-
-int is_kernel_text(unsigned long addr)
-{
- extern char _stext[], _etext[];
- if (addr >= (unsigned long) _stext &&
- addr <= (unsigned long) _etext)
- return 1;
-
- return 0;
-}
-
-unsigned long kernel_text_end(void)
-{
- extern char _etext[];
- return (unsigned long) _etext;
-}
-
-///////////////////////////////
// from common/keyhandler.c
///////////////////////////////
void dump_pageframe_info(struct domain *d)
@@ -97,7 +77,7 @@ void console_print(char *msg)
// called from unaligned.c
////////////////////////////////////
-void die_if_kernel(char *str, struct pt_regs *regs, long err) /* __attribute__
((noreturn)) */
+void die_if_kernel(char *str, struct pt_regs *regs, long err)
{
if (user_mode(regs))
return;
@@ -108,7 +88,7 @@ void die_if_kernel(char *str, struct pt_
domain_crash_synchronous();
}
-void vmx_die_if_kernel(char *str, struct pt_regs *regs, long err) /*
__attribute__ ((noreturn)) */
+void vmx_die_if_kernel(char *str, struct pt_regs *regs, long err)
{
if (vmx_user_mode(regs))
return;
diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/powerpc/domain.c
--- a/xen/arch/powerpc/domain.c Mon Jan 15 13:27:20 2007 -0500
+++ b/xen/arch/powerpc/domain.c Wed Jan 17 09:56:40 2007 -0500
@@ -150,11 +150,9 @@ void vcpu_destroy(struct vcpu *v)
{
}
-int arch_set_info_guest(struct vcpu *v, vcpu_guest_context_t *c)
+int arch_set_info_guest(struct vcpu *v, vcpu_guest_context_u c)
{
- struct domain *d = v->domain;
-
- memcpy(&v->arch.ctxt, &c->user_regs, sizeof(c->user_regs));
+ memcpy(&v->arch.ctxt, &c.nat->user_regs, sizeof(c.nat->user_regs));
printk("Domain[%d].%d: initializing\n", d->domain_id, v->vcpu_id);
diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/powerpc/domctl.c
--- a/xen/arch/powerpc/domctl.c Mon Jan 15 13:27:20 2007 -0500
+++ b/xen/arch/powerpc/domctl.c Wed Jan 17 09:56:40 2007 -0500
@@ -22,6 +22,7 @@
#include <xen/types.h>
#include <xen/lib.h>
#include <xen/sched.h>
+#include <xen/domain.h>
#include <xen/guest_access.h>
#include <xen/shadow.h>
#include <public/xen.h>
@@ -29,10 +30,9 @@
#include <public/sysctl.h>
#include <asm/processor.h>
-void arch_getdomaininfo_ctxt(struct vcpu *, vcpu_guest_context_t *);
-void arch_getdomaininfo_ctxt(struct vcpu *v, vcpu_guest_context_t *c)
+void arch_get_info_guest(struct vcpu *v, vcpu_guest_context_u c)
{
- memcpy(&c->user_regs, &v->arch.ctxt, sizeof(struct cpu_user_regs));
+ memcpy(&c.nat->user_regs, &v->arch.ctxt, sizeof(struct cpu_user_regs));
/* XXX fill in rest of vcpu_guest_context_t */
}
diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/powerpc/setup.c
--- a/xen/arch/powerpc/setup.c Mon Jan 15 13:27:20 2007 -0500
+++ b/xen/arch/powerpc/setup.c Wed Jan 17 09:56:40 2007 -0500
@@ -91,19 +91,6 @@ static struct domain *idle_domain;
volatile struct processor_area * volatile global_cpu_table[NR_CPUS];
-int is_kernel_text(unsigned long addr)
-{
- if (addr >= (unsigned long) &_start &&
- addr <= (unsigned long) &_etext)
- return 1;
- return 0;
-}
-
-unsigned long kernel_text_end(void)
-{
- return (unsigned long) &_etext;
-}
-
static void __init do_initcalls(void)
{
initcall_t *call;
diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/powerpc/xen.lds.S
--- a/xen/arch/powerpc/xen.lds.S Mon Jan 15 13:27:20 2007 -0500
+++ b/xen/arch/powerpc/xen.lds.S Wed Jan 17 09:56:40 2007 -0500
@@ -114,10 +114,10 @@ SECTIONS
. = ALIGN(32);
__setup_start = .;
- .setup.init : { *(.setup.init) }
+ .init.setup : { *(.init.setup) }
__setup_end = .;
__initcall_start = .;
- .initcall.init : { *(.initcall.init) }
+ .initcall.init : { *(.initcall1.init) }
__initcall_end = .;
__inithcall_start = .;
.inithcall.text : { *(.inithcall.text) }
diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/x86/boot/x86_32.S
--- a/xen/arch/x86/boot/x86_32.S Mon Jan 15 13:27:20 2007 -0500
+++ b/xen/arch/x86/boot/x86_32.S Wed Jan 17 09:56:40 2007 -0500
@@ -11,8 +11,6 @@
.text
ENTRY(start)
-ENTRY(stext)
-ENTRY(_stext)
jmp __start
.align 4
diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/x86/boot/x86_64.S
--- a/xen/arch/x86/boot/x86_64.S Mon Jan 15 13:27:20 2007 -0500
+++ b/xen/arch/x86/boot/x86_64.S Wed Jan 17 09:56:40 2007 -0500
@@ -14,8 +14,6 @@
#define SYM_PHYS(sym) (sym - __PAGE_OFFSET)
ENTRY(start)
-ENTRY(stext)
-ENTRY(_stext)
jmp __start
.org 0x004
@@ -226,14 +224,33 @@ high_start:
.align PAGE_SIZE, 0
ENTRY(gdt_table)
.quad 0x0000000000000000 /* unused */
- .quad 0x00cf9a000000ffff /* 0xe008 ring 0 code, compatibility */
- .quad 0x00af9a000000ffff /* 0xe010 ring 0 code, 64-bit mode */
- .quad 0x00cf92000000ffff /* 0xe018 ring 0 data */
+ .quad 0x00af9a000000ffff /* 0xe008 ring 0 code, 64-bit mode */
+ .quad 0x00cf92000000ffff /* 0xe010 ring 0 data */
+ .quad 0x0000000000000000 /* reserved */
.quad 0x00cffa000000ffff /* 0xe023 ring 3 code, compatibility */
.quad 0x00cff2000000ffff /* 0xe02b ring 3 data */
.quad 0x00affa000000ffff /* 0xe033 ring 3 code, 64-bit mode */
- .quad 0x0000000000000000 /* unused */
+ .quad 0x00cf9a000000ffff /* 0xe038 ring 0 code, compatibility */
+ .org gdt_table - FIRST_RESERVED_GDT_BYTE + __TSS(0) * 8
.fill 4*NR_CPUS,8,0 /* space for TSS and LDT per CPU */
+
+#ifdef CONFIG_COMPAT
+ .align PAGE_SIZE, 0
+/* NB. Even rings != 0 get access to the full 4Gb, as only the */
+/* (compatibility) machine->physical mapping table lives there. */
+ENTRY(compat_gdt_table)
+ .quad 0x0000000000000000 /* unused */
+ .quad 0x00af9a000000ffff /* 0xe008 ring 0 code, 64-bit mode */
+ .quad 0x00cf92000000ffff /* 0xe010 ring 0 data */
+ .quad 0x00cfba000000ffff /* 0xe019 ring 1 code, compatibility */
+ .quad 0x00cfb2000000ffff /* 0xe021 ring 1 data */
+ .quad 0x00cffa000000ffff /* 0xe02b ring 3 code, compatibility */
+ .quad 0x00cff2000000ffff /* 0xe033 ring 3 data */
+ .quad 0x00cf9a000000ffff /* 0xe038 ring 0 code, compatibility */
+ .org compat_gdt_table - FIRST_RESERVED_GDT_BYTE + __TSS(0) * 8
+ .fill 4*NR_CPUS,8,0 /* space for TSS and LDT per CPU */
+# undef LIMIT
+#endif
/* Initial PML4 -- level-4 page table. */
.align PAGE_SIZE, 0
diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/x86/compat.c
--- a/xen/arch/x86/compat.c Mon Jan 15 13:27:20 2007 -0500
+++ b/xen/arch/x86/compat.c Wed Jan 17 09:56:40 2007 -0500
@@ -9,16 +9,22 @@
#include <xen/guest_access.h>
#include <xen/hypercall.h>
+#ifndef COMPAT
+typedef long ret_t;
+#endif
+
/* Legacy hypercall (as of 0x00030202). */
-long do_physdev_op_compat(XEN_GUEST_HANDLE(physdev_op_t) uop)
+ret_t do_physdev_op_compat(XEN_GUEST_HANDLE(physdev_op_t) uop)
{
struct physdev_op op;
if ( unlikely(copy_from_guest(&op, uop, 1) != 0) )
return -EFAULT;
- return do_physdev_op(op.cmd, (XEN_GUEST_HANDLE(void)) { &uop.p->u });
+ return do_physdev_op(op.cmd, guest_handle_from_ptr(&uop.p->u, void));
}
+
+#ifndef COMPAT
/* Legacy hypercall (as of 0x00030202). */
long do_event_channel_op_compat(XEN_GUEST_HANDLE(evtchn_op_t) uop)
@@ -28,5 +34,7 @@ long do_event_channel_op_compat(XEN_GUES
if ( unlikely(copy_from_guest(&op, uop, 1) != 0) )
return -EFAULT;
- return do_event_channel_op(op.cmd, (XEN_GUEST_HANDLE(void)) {&uop.p->u });
+ return do_event_channel_op(op.cmd, guest_handle_from_ptr(&uop.p->u, void));
}
+
+#endif
diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/x86/crash.c
--- a/xen/arch/x86/crash.c Mon Jan 15 13:27:20 2007 -0500
+++ b/xen/arch/x86/crash.c Wed Jan 17 09:56:40 2007 -0500
@@ -25,6 +25,7 @@
#include <xen/kexec.h>
#include <xen/sched.h>
#include <public/xen.h>
+#include <asm/shared.h>
#include <asm/hvm/hvm.h>
static atomic_t waiting_for_crash_ipi;
@@ -103,7 +104,7 @@ void machine_crash_shutdown(void)
info = kexec_crash_save_info();
info->dom0_pfn_to_mfn_frame_list_list =
- dom0->shared_info->arch.pfn_to_mfn_frame_list_list;
+ arch_get_pfn_to_mfn_frame_list_list(dom0);
}
/*
diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c Mon Jan 15 13:27:20 2007 -0500
+++ b/xen/arch/x86/domain.c Wed Jan 17 09:56:40 2007 -0500
@@ -16,6 +16,7 @@
#include <xen/lib.h>
#include <xen/errno.h>
#include <xen/sched.h>
+#include <xen/domain.h>
#include <xen/smp.h>
#include <xen/delay.h>
#include <xen/softirq.h>
@@ -40,6 +41,9 @@
#include <asm/hvm/hvm.h>
#include <asm/hvm/support.h>
#include <asm/msr.h>
+#ifdef CONFIG_COMPAT
+#include <compat/vcpu.h>
+#endif
DEFINE_PER_CPU(struct vcpu *, curr_vcpu);
@@ -127,6 +131,195 @@ void free_vcpu_struct(struct vcpu *v)
xfree(v);
}
+#ifdef CONFIG_COMPAT
+
+int setup_arg_xlat_area(struct vcpu *v, l4_pgentry_t *l4tab)
+{
+ struct domain *d = v->domain;
+ unsigned i;
+ struct page_info *pg;
+
+ if ( !d->arch.mm_arg_xlat_l3 )
+ {
+ pg = alloc_domheap_page(NULL);
+ if ( !pg )
+ return -ENOMEM;
+ d->arch.mm_arg_xlat_l3 = clear_page(page_to_virt(pg));
+ }
+
+ l4tab[l4_table_offset(COMPAT_ARG_XLAT_VIRT_BASE)] =
+ l4e_from_paddr(__pa(d->arch.mm_arg_xlat_l3), __PAGE_HYPERVISOR);
+
+ for ( i = 0; i < COMPAT_ARG_XLAT_PAGES; ++i )
+ {
+ unsigned long va = COMPAT_ARG_XLAT_VIRT_START(v->vcpu_id) + i *
PAGE_SIZE;
+ l2_pgentry_t *l2tab;
+ l1_pgentry_t *l1tab;
+
+ if ( !l3e_get_intpte(d->arch.mm_arg_xlat_l3[l3_table_offset(va)]) )
+ {
+ pg = alloc_domheap_page(NULL);
+ if ( !pg )
+ return -ENOMEM;
+ clear_page(page_to_virt(pg));
+ d->arch.mm_arg_xlat_l3[l3_table_offset(va)] = l3e_from_page(pg,
__PAGE_HYPERVISOR);
+ }
+ l2tab = l3e_to_l2e(d->arch.mm_arg_xlat_l3[l3_table_offset(va)]);
+ if ( !l2e_get_intpte(l2tab[l2_table_offset(va)]) )
+ {
+ pg = alloc_domheap_page(NULL);
+ if ( !pg )
+ return -ENOMEM;
+ clear_page(page_to_virt(pg));
+ l2tab[l2_table_offset(va)] = l2e_from_page(pg, __PAGE_HYPERVISOR);
+ }
+ l1tab = l2e_to_l1e(l2tab[l2_table_offset(va)]);
+ BUG_ON(l1e_get_intpte(l1tab[l1_table_offset(va)]));
+ pg = alloc_domheap_page(NULL);
+ if ( !pg )
+ return -ENOMEM;
+ l1tab[l1_table_offset(va)] = l1e_from_page(pg, PAGE_HYPERVISOR);
+ }
+
+ return 0;
+}
+
+static void release_arg_xlat_area(struct domain *d)
+{
+ if ( d->arch.mm_arg_xlat_l3 )
+ {
+ unsigned l3;
+
+ for ( l3 = 0; l3 < L3_PAGETABLE_ENTRIES; ++l3 )
+ {
+ if ( l3e_get_intpte(d->arch.mm_arg_xlat_l3[l3]) )
+ {
+ l2_pgentry_t *l2tab = l3e_to_l2e(d->arch.mm_arg_xlat_l3[l3]);
+ unsigned l2;
+
+ for ( l2 = 0; l2 < L2_PAGETABLE_ENTRIES; ++l2 )
+ {
+ if ( l2e_get_intpte(l2tab[l2]) )
+ {
+ l1_pgentry_t *l1tab = l2e_to_l1e(l2tab[l2]);
+ unsigned l1;
+
+ for ( l1 = 0; l1 < L1_PAGETABLE_ENTRIES; ++l1 )
+ {
+ if ( l1e_get_intpte(l1tab[l1]) )
+ free_domheap_page(l1e_get_page(l1tab[l1]));
+ }
+ free_domheap_page(l2e_get_page(l2tab[l2]));
+ }
+ }
+ free_domheap_page(l3e_get_page(d->arch.mm_arg_xlat_l3[l3]));
+ }
+ }
+ free_domheap_page(virt_to_page(d->arch.mm_arg_xlat_l3));
+ }
+}
+
+static int setup_compat_l4(struct vcpu *v)
+{
+ struct page_info *pg = alloc_domheap_page(NULL);
+ l4_pgentry_t *l4tab;
+ int rc;
+
+ if ( !pg )
+ return -ENOMEM;
+ l4tab = copy_page(page_to_virt(pg), idle_pg_table);
+ l4tab[l4_table_offset(LINEAR_PT_VIRT_START)] =
+ l4e_from_page(pg, __PAGE_HYPERVISOR);
+ l4tab[l4_table_offset(PERDOMAIN_VIRT_START)] =
+ l4e_from_paddr(__pa(v->domain->arch.mm_perdomain_l3),
__PAGE_HYPERVISOR);
+ v->arch.guest_table = pagetable_from_page(pg);
+ v->arch.guest_table_user = v->arch.guest_table;
+
+ if ( (rc = setup_arg_xlat_area(v, l4tab)) < 0 )
+ {
+ free_domheap_page(pg);
+ return rc;
+ }
+
+ return 0;
+}
+
+static void release_compat_l4(struct vcpu *v)
+{
+ free_domheap_page(pagetable_get_page(v->arch.guest_table));
+ v->arch.guest_table = pagetable_null();
+ v->arch.guest_table_user = pagetable_null();
+}
+
+static inline int may_switch_mode(struct domain *d)
+{
+ return 1; /* XXX */
+}
+
+int switch_native(struct domain *d)
+{
+ l1_pgentry_t gdt_l1e;
+ unsigned int vcpuid;
+
+ if ( !d )
+ return -EINVAL;
+ if ( !may_switch_mode(d) )
+ return -EACCES;
+ if ( !IS_COMPAT(d) )
+ return 0;
+
+ clear_bit(_DOMF_compat, &d->domain_flags);
+ release_arg_xlat_area(d);
+
+ /* switch gdt */
+ gdt_l1e = l1e_from_page(virt_to_page(gdt_table), PAGE_HYPERVISOR);
+ for ( vcpuid = 0; vcpuid < MAX_VIRT_CPUS; vcpuid++ )
+ {
+ d->arch.mm_perdomain_pt[((vcpuid << GDT_LDT_VCPU_SHIFT) +
+ FIRST_RESERVED_GDT_PAGE)] = gdt_l1e;
+ if (d->vcpu[vcpuid])
+ release_compat_l4(d->vcpu[vcpuid]);
+ }
+
+ return 0;
+}
+
+int switch_compat(struct domain *d)
+{
+ l1_pgentry_t gdt_l1e;
+ unsigned int vcpuid;
+
+ if ( !d )
+ return -EINVAL;
+ if ( compat_disabled )
+ return -ENOSYS;
+ if ( !may_switch_mode(d) )
+ return -EACCES;
+ if ( IS_COMPAT(d) )
+ return 0;
+
+ set_bit(_DOMF_compat, &d->domain_flags);
+
+ /* switch gdt */
+ gdt_l1e = l1e_from_page(virt_to_page(compat_gdt_table), PAGE_HYPERVISOR);
+ for ( vcpuid = 0; vcpuid < MAX_VIRT_CPUS; vcpuid++ )
+ {
+ d->arch.mm_perdomain_pt[((vcpuid << GDT_LDT_VCPU_SHIFT) +
+ FIRST_RESERVED_GDT_PAGE)] = gdt_l1e;
+ if (d->vcpu[vcpuid]
+ && setup_compat_l4(d->vcpu[vcpuid]) != 0)
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+#else
+#define release_arg_xlat_area(d) ((void)0)
+#define setup_compat_l4(v) 0
+#define release_compat_l4(v) ((void)0)
+#endif
+
int vcpu_initialise(struct vcpu *v)
{
struct domain *d = v->domain;
@@ -161,11 +354,16 @@ int vcpu_initialise(struct vcpu *v)
v->arch.perdomain_ptes =
d->arch.mm_perdomain_pt + (v->vcpu_id << GDT_LDT_VCPU_SHIFT);
+ if ( IS_COMPAT(d) && (rc = setup_compat_l4(v)) != 0 )
+ return rc;
+
return 0;
}
void vcpu_destroy(struct vcpu *v)
{
+ if ( IS_COMPAT(v->domain) )
+ release_compat_l4(v);
}
int arch_domain_create(struct domain *d)
@@ -219,6 +417,10 @@ int arch_domain_create(struct domain *d)
#endif /* __x86_64__ */
+#ifdef CONFIG_COMPAT
+ HYPERVISOR_COMPAT_VIRT_START(d) = __HYPERVISOR_COMPAT_VIRT_START;
+#endif
+
shadow_domain_init(d);
if ( !is_idle_domain(d) )
@@ -270,47 +472,88 @@ void arch_domain_destroy(struct domain *
free_domheap_page(virt_to_page(d->arch.mm_perdomain_l3));
#endif
+ if ( IS_COMPAT(d) )
+ release_arg_xlat_area(d);
+
free_xenheap_page(d->shared_info);
}
/* This is called by arch_final_setup_guest and do_boot_vcpu */
int arch_set_info_guest(
- struct vcpu *v, struct vcpu_guest_context *c)
+ struct vcpu *v, vcpu_guest_context_u c)
{
struct domain *d = v->domain;
+#ifdef CONFIG_COMPAT
+#define c(fld) (!IS_COMPAT(d) ? (c.nat->fld) : (c.cmp->fld))
+#else
+#define c(fld) (c.nat->fld)
+#endif
unsigned long cr3_pfn = INVALID_MFN;
+ unsigned long flags = c(flags);
int i, rc;
if ( !is_hvm_vcpu(v) )
{
- fixup_guest_stack_selector(c->user_regs.ss);
- fixup_guest_stack_selector(c->kernel_ss);
- fixup_guest_code_selector(c->user_regs.cs);
-
+ if ( !IS_COMPAT(d) )
+ {
+ fixup_guest_stack_selector(d, c.nat->user_regs.ss);
+ fixup_guest_stack_selector(d, c.nat->kernel_ss);
+ fixup_guest_code_selector(d, c.nat->user_regs.cs);
#ifdef __i386__
- fixup_guest_code_selector(c->event_callback_cs);
- fixup_guest_code_selector(c->failsafe_callback_cs);
-#endif
-
- for ( i = 0; i < 256; i++ )
- fixup_guest_code_selector(c->trap_ctxt[i].cs);
-
- /* LDT safety checks. */
- if ( ((c->ldt_base & (PAGE_SIZE-1)) != 0) ||
- (c->ldt_ents > 8192) ||
- !array_access_ok(c->ldt_base, c->ldt_ents, LDT_ENTRY_SIZE) )
- return -EINVAL;
+ fixup_guest_code_selector(d, c.nat->event_callback_cs);
+ fixup_guest_code_selector(d, c.nat->failsafe_callback_cs);
+#endif
+
+ for ( i = 0; i < 256; i++ )
+ fixup_guest_code_selector(d, c.nat->trap_ctxt[i].cs);
+
+ /* LDT safety checks. */
+ if ( ((c.nat->ldt_base & (PAGE_SIZE-1)) != 0) ||
+ (c.nat->ldt_ents > 8192) ||
+ !array_access_ok(c.nat->ldt_base,
+ c.nat->ldt_ents,
+ LDT_ENTRY_SIZE) )
+ return -EINVAL;
+ }
+#ifdef CONFIG_COMPAT
+ else
+ {
+ fixup_guest_stack_selector(d, c.cmp->user_regs.ss);
+ fixup_guest_stack_selector(d, c.cmp->kernel_ss);
+ fixup_guest_code_selector(d, c.cmp->user_regs.cs);
+ fixup_guest_code_selector(d, c.cmp->event_callback_cs);
+ fixup_guest_code_selector(d, c.cmp->failsafe_callback_cs);
+
+ for ( i = 0; i < 256; i++ )
+ fixup_guest_code_selector(d, c.cmp->trap_ctxt[i].cs);
+
+ /* LDT safety checks. */
+ if ( ((c.cmp->ldt_base & (PAGE_SIZE-1)) != 0) ||
+ (c.cmp->ldt_ents > 8192) ||
+ !compat_array_access_ok(c.cmp->ldt_base,
+ c.cmp->ldt_ents,
+ LDT_ENTRY_SIZE) )
+ return -EINVAL;
+ }
+#endif
}
clear_bit(_VCPUF_fpu_initialised, &v->vcpu_flags);
- if ( c->flags & VGCF_i387_valid )
+ if ( flags & VGCF_I387_VALID )
set_bit(_VCPUF_fpu_initialised, &v->vcpu_flags);
v->arch.flags &= ~TF_kernel_mode;
- if ( (c->flags & VGCF_in_kernel) || is_hvm_vcpu(v)/*???*/ )
+ if ( (flags & VGCF_in_kernel) || is_hvm_vcpu(v)/*???*/ )
v->arch.flags |= TF_kernel_mode;
- memcpy(&v->arch.guest_context, c, sizeof(*c));
+ if ( !IS_COMPAT(v->domain) )
+ memcpy(&v->arch.guest_context, c.nat, sizeof(*c.nat));
+#ifdef CONFIG_COMPAT
+ else
+ {
+ XLAT_vcpu_guest_context(&v->arch.guest_context, c.cmp);
+ }
+#endif
/* Only CR0.TS is modifiable by guest or admin. */
v->arch.guest_context.ctrlreg[0] &= X86_CR0_TS;
@@ -338,28 +581,66 @@ int arch_set_info_guest(
memset(v->arch.guest_context.debugreg, 0,
sizeof(v->arch.guest_context.debugreg));
for ( i = 0; i < 8; i++ )
- (void)set_debugreg(v, i, c->debugreg[i]);
+ (void)set_debugreg(v, i, c(debugreg[i]));
if ( v->vcpu_id == 0 )
- d->vm_assist = c->vm_assist;
+ d->vm_assist = c(vm_assist);
if ( !is_hvm_vcpu(v) )
{
- if ( (rc = (int)set_gdt(v, c->gdt_frames, c->gdt_ents)) != 0 )
+ if ( !IS_COMPAT(d) )
+ rc = (int)set_gdt(v, c.nat->gdt_frames, c.nat->gdt_ents);
+#ifdef CONFIG_COMPAT
+ else
+ {
+ unsigned long gdt_frames[ARRAY_SIZE(c.cmp->gdt_frames)];
+ unsigned int i, n = (c.cmp->gdt_ents + 511) / 512;
+
+ if ( n > ARRAY_SIZE(c.cmp->gdt_frames) )
+ return -EINVAL;
+ for ( i = 0; i < n; ++i )
+ gdt_frames[i] = c.cmp->gdt_frames[i];
+ rc = (int)set_gdt(v, gdt_frames, c.cmp->gdt_ents);
+ }
+#endif
+ if ( rc != 0 )
return rc;
- cr3_pfn = gmfn_to_mfn(d, xen_cr3_to_pfn(c->ctrlreg[3]));
-
- if ( shadow_mode_refcounts(d)
- ? !get_page(mfn_to_page(cr3_pfn), d)
- : !get_page_and_type(mfn_to_page(cr3_pfn), d,
- PGT_base_page_table) )
- {
- destroy_gdt(v);
- return -EINVAL;
- }
-
- v->arch.guest_table = pagetable_from_pfn(cr3_pfn);
+ if ( !IS_COMPAT(d) )
+ {
+ cr3_pfn = gmfn_to_mfn(d, xen_cr3_to_pfn(c.nat->ctrlreg[3]));
+
+ if ( shadow_mode_refcounts(d)
+ ? !get_page(mfn_to_page(cr3_pfn), d)
+ : !get_page_and_type(mfn_to_page(cr3_pfn), d,
+ PGT_base_page_table) )
+ {
+ destroy_gdt(v);
+ return -EINVAL;
+ }
+
+ v->arch.guest_table = pagetable_from_pfn(cr3_pfn);
+ }
+#ifdef CONFIG_COMPAT
+ else
+ {
+ l4_pgentry_t *l4tab;
+
+ cr3_pfn = gmfn_to_mfn(d, compat_cr3_to_pfn(c.cmp->ctrlreg[3]));
+
+ if ( shadow_mode_refcounts(d)
+ ? !get_page(mfn_to_page(cr3_pfn), d)
+ : !get_page_and_type(mfn_to_page(cr3_pfn), d,
+ PGT_l3_page_table) )
+ {
+ destroy_gdt(v);
+ return -EINVAL;
+ }
+
+ l4tab = __va(pagetable_get_paddr(v->arch.guest_table));
+ *l4tab = l4e_from_pfn(cr3_pfn,
_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED);
+ }
+#endif
}
if ( v->vcpu_id == 0 )
@@ -374,6 +655,7 @@ int arch_set_info_guest(
update_cr3(v);
return 0;
+#undef c
}
long
@@ -397,16 +679,16 @@ arch_do_vcpu_op(
break;
rc = 0;
- v->runstate_guest = area.addr.h;
+ runstate_guest(v) = area.addr.h;
if ( v == current )
{
- __copy_to_guest(v->runstate_guest, &v->runstate, 1);
+ __copy_to_guest(runstate_guest(v), &v->runstate, 1);
}
else
{
vcpu_runstate_get(v, &runstate);
- __copy_to_guest(v->runstate_guest, &runstate, 1);
+ __copy_to_guest(runstate_guest(v), &runstate, 1);
}
break;
@@ -489,27 +771,30 @@ static void load_segments(struct vcpu *n
all_segs_okay &= loadsegment(gs, nctxt->user_regs.gs);
}
- /* This can only be non-zero if selector is NULL. */
- if ( nctxt->fs_base )
- wrmsr(MSR_FS_BASE,
- nctxt->fs_base,
- nctxt->fs_base>>32);
-
- /* Most kernels have non-zero GS base, so don't bother testing. */
- /* (This is also a serialising instruction, avoiding AMD erratum #88.) */
- wrmsr(MSR_SHADOW_GS_BASE,
- nctxt->gs_base_kernel,
- nctxt->gs_base_kernel>>32);
-
- /* This can only be non-zero if selector is NULL. */
- if ( nctxt->gs_base_user )
- wrmsr(MSR_GS_BASE,
- nctxt->gs_base_user,
- nctxt->gs_base_user>>32);
-
- /* If in kernel mode then switch the GS bases around. */
- if ( n->arch.flags & TF_kernel_mode )
- __asm__ __volatile__ ( "swapgs" );
+ if ( !IS_COMPAT(n->domain) )
+ {
+ /* This can only be non-zero if selector is NULL. */
+ if ( nctxt->fs_base )
+ wrmsr(MSR_FS_BASE,
+ nctxt->fs_base,
+ nctxt->fs_base>>32);
+
+ /* Most kernels have non-zero GS base, so don't bother testing. */
+ /* (This is also a serialising instruction, avoiding AMD erratum #88.)
*/
+ wrmsr(MSR_SHADOW_GS_BASE,
+ nctxt->gs_base_kernel,
+ nctxt->gs_base_kernel>>32);
+
+ /* This can only be non-zero if selector is NULL. */
+ if ( nctxt->gs_base_user )
+ wrmsr(MSR_GS_BASE,
+ nctxt->gs_base_user,
+ nctxt->gs_base_user>>32);
+
+ /* If in kernel mode then switch the GS bases around. */
+ if ( (n->arch.flags & TF_kernel_mode) )
+ __asm__ __volatile__ ( "swapgs" );
+ }
if ( unlikely(!all_segs_okay) )
{
@@ -520,6 +805,55 @@ static void load_segments(struct vcpu *n
(unsigned long *)nctxt->kernel_sp;
unsigned long cs_and_mask, rflags;
+ if ( IS_COMPAT(n->domain) )
+ {
+ unsigned int *esp = ring_1(regs) ?
+ (unsigned int *)regs->rsp :
+ (unsigned int *)nctxt->kernel_sp;
+ unsigned int cs_and_mask, eflags;
+ int ret = 0;
+
+ /* CS longword also contains full evtchn_upcall_mask. */
+ cs_and_mask = (unsigned short)regs->cs |
+ ((unsigned int)vcpu_info(n, evtchn_upcall_mask) << 16);
+ /* Fold upcall mask into RFLAGS.IF. */
+ eflags = regs->_eflags & ~X86_EFLAGS_IF;
+ eflags |= !vcpu_info(n, evtchn_upcall_mask) << 9;
+
+ if ( !ring_1(regs) )
+ {
+ ret = put_user(regs->ss, esp-1);
+ ret |= put_user(regs->_esp, esp-2);
+ esp -= 2;
+ }
+
+ if ( ret |
+ put_user(eflags, esp-1) |
+ put_user(cs_and_mask, esp-2) |
+ put_user(regs->_eip, esp-3) |
+ put_user(nctxt->user_regs.gs, esp-4) |
+ put_user(nctxt->user_regs.fs, esp-5) |
+ put_user(nctxt->user_regs.es, esp-6) |
+ put_user(nctxt->user_regs.ds, esp-7) )
+ {
+ gdprintk(XENLOG_ERR, "Error while creating compat "
+ "failsafe callback frame.\n");
+ domain_crash(n->domain);
+ }
+
+ if ( test_bit(_VGCF_failsafe_disables_events,
+ &n->arch.guest_context.flags) )
+ vcpu_info(n, evtchn_upcall_mask) = 1;
+
+ regs->entry_vector = TRAP_syscall;
+ regs->_eflags &= 0xFFFCBEFFUL;
+ regs->ss = FLAT_COMPAT_KERNEL_SS;
+ regs->_esp = (unsigned long)(esp-7);
+ regs->cs = FLAT_COMPAT_KERNEL_CS;
+ regs->_eip = nctxt->failsafe_callback_eip;
+ return;
+ }
+
if ( !(n->arch.flags & TF_kernel_mode) )
toggle_guest_mode(n);
else
@@ -527,11 +861,11 @@ static void load_segments(struct vcpu *n
/* CS longword also contains full evtchn_upcall_mask. */
cs_and_mask = (unsigned long)regs->cs |
- ((unsigned long)n->vcpu_info->evtchn_upcall_mask << 32);
+ ((unsigned long)vcpu_info(n, evtchn_upcall_mask) << 32);
/* Fold upcall mask into RFLAGS.IF. */
rflags = regs->rflags & ~X86_EFLAGS_IF;
- rflags |= !n->vcpu_info->evtchn_upcall_mask << 9;
+ rflags |= !vcpu_info(n, evtchn_upcall_mask) << 9;
if ( put_user(regs->ss, rsp- 1) |
put_user(regs->rsp, rsp- 2) |
@@ -552,7 +886,7 @@ static void load_segments(struct vcpu *n
if ( test_bit(_VGCF_failsafe_disables_events,
&n->arch.guest_context.flags) )
- n->vcpu_info->evtchn_upcall_mask = 1;
+ vcpu_info(n, evtchn_upcall_mask) = 1;
regs->entry_vector = TRAP_syscall;
regs->rflags &= ~(X86_EFLAGS_AC|X86_EFLAGS_VM|X86_EFLAGS_RF|
@@ -581,7 +915,7 @@ static void save_segments(struct vcpu *v
if ( regs->es )
dirty_segment_mask |= DIRTY_ES;
- if ( regs->fs )
+ if ( regs->fs || IS_COMPAT(v->domain) )
{
dirty_segment_mask |= DIRTY_FS;
ctxt->fs_base = 0; /* != 0 selector kills fs_base */
@@ -591,7 +925,7 @@ static void save_segments(struct vcpu *v
dirty_segment_mask |= DIRTY_FS_BASE;
}
- if ( regs->gs )
+ if ( regs->gs || IS_COMPAT(v->domain) )
{
dirty_segment_mask |= DIRTY_GS;
ctxt->gs_base_user = 0; /* != 0 selector kills gs_base_user */
@@ -713,7 +1047,7 @@ void context_switch(struct vcpu *prev, s
local_irq_disable();
- if ( is_hvm_vcpu(prev) )
+ if ( is_hvm_vcpu(prev) && !list_empty(&prev->arch.hvm_vcpu.tm_list) )
pt_freeze_time(prev);
set_current(next);
@@ -725,6 +1059,23 @@ void context_switch(struct vcpu *prev, s
else
{
__context_switch();
+
+#ifdef CONFIG_COMPAT
+ if ( is_idle_vcpu(prev)
+ || IS_COMPAT(prev->domain) != IS_COMPAT(next->domain) )
+ {
+ uint32_t efer_lo, efer_hi;
+
+ local_flush_tlb_one(GDT_VIRT_START(next) +
FIRST_RESERVED_GDT_BYTE);
+
+ rdmsr(MSR_EFER, efer_lo, efer_hi);
+ if ( !IS_COMPAT(next->domain) == !(efer_lo & EFER_SCE) )
+ {
+ efer_lo ^= EFER_SCE;
+ wrmsr(MSR_EFER, efer_lo, efer_hi);
+ }
+ }
+#endif
/* Re-enable interrupts before restoring state which may fault. */
local_irq_enable();
@@ -739,8 +1090,20 @@ void context_switch(struct vcpu *prev, s
context_saved(prev);
/* Update per-VCPU guest runstate shared memory area (if registered). */
- if ( !guest_handle_is_null(next->runstate_guest) )
- __copy_to_guest(next->runstate_guest, &next->runstate, 1);
+ if ( !guest_handle_is_null(runstate_guest(next)) )
+ {
+ if ( !IS_COMPAT(next->domain) )
+ __copy_to_guest(runstate_guest(next), &next->runstate, 1);
+#ifdef CONFIG_COMPAT
+ else
+ {
+ struct compat_vcpu_runstate_info info;
+
+ XLAT_vcpu_runstate_info(&info, &next->runstate);
+ __copy_to_guest(next->runstate_guest.compat, &info, 1);
+ }
+#endif
+ }
schedule_tail(next);
BUG();
@@ -811,55 +1174,153 @@ unsigned long hypercall_create_continuat
for ( i = 0; *p != '\0'; i++ )
mcs->call.args[i] = next_arg(p, args);
+ if ( IS_COMPAT(current->domain) )
+ {
+ for ( ; i < 6; i++ )
+ mcs->call.args[i] = 0;
+ }
}
else
{
regs = guest_cpu_user_regs();
-#if defined(__i386__)
regs->eax = op;
-
- if ( supervisor_mode_kernel || is_hvm_vcpu(current) )
- regs->eip &= ~31; /* re-execute entire hypercall entry stub */
+ regs->eip -= 2; /* re-execute 'syscall' / 'int 0x82' */
+
+#ifdef __x86_64__
+ if ( !IS_COMPAT(current->domain) )
+ {
+ for ( i = 0; *p != '\0'; i++ )
+ {
+ arg = next_arg(p, args);
+ switch ( i )
+ {
+ case 0: regs->rdi = arg; break;
+ case 1: regs->rsi = arg; break;
+ case 2: regs->rdx = arg; break;
+ case 3: regs->r10 = arg; break;
+ case 4: regs->r8 = arg; break;
+ case 5: regs->r9 = arg; break;
+ }
+ }
+ }
else
- regs->eip -= 2; /* re-execute 'int 0x82' */
-
- for ( i = 0; *p != '\0'; i++ )
- {
- arg = next_arg(p, args);
+#endif
+ {
+ if ( supervisor_mode_kernel || is_hvm_vcpu(current) )
+ regs->eip &= ~31; /* re-execute entire hypercall entry stub */
+
+ for ( i = 0; *p != '\0'; i++ )
+ {
+ arg = next_arg(p, args);
+ switch ( i )
+ {
+ case 0: regs->ebx = arg; break;
+ case 1: regs->ecx = arg; break;
+ case 2: regs->edx = arg; break;
+ case 3: regs->esi = arg; break;
+ case 4: regs->edi = arg; break;
+ case 5: regs->ebp = arg; break;
+ }
+ }
+ }
+ }
+
+ va_end(args);
+
+ return op;
+}
+
+#ifdef CONFIG_COMPAT
+int hypercall_xlat_continuation(unsigned int *id, unsigned int mask, ...)
+{
+ int rc = 0;
+ struct mc_state *mcs = &this_cpu(mc_state);
+ struct cpu_user_regs *regs;
+ unsigned int i, cval = 0;
+ unsigned long nval = 0;
+ va_list args;
+
+ BUG_ON(*id > 5);
+ BUG_ON(mask & (1U << *id));
+
+ va_start(args, mask);
+
+ if ( test_bit(_MCSF_in_multicall, &mcs->flags) )
+ {
+ if ( !test_bit(_MCSF_call_preempted, &mcs->flags) )
+ return 0;
+ for ( i = 0; i < 6; ++i, mask >>= 1 )
+ {
+ if ( mask & 1 )
+ {
+ nval = va_arg(args, unsigned long);
+ cval = va_arg(args, unsigned int);
+ if ( cval == nval )
+ mask &= ~1U;
+ else
+ BUG_ON(nval == (unsigned int)nval);
+ }
+ else if ( id && *id == i )
+ {
+ *id = mcs->call.args[i];
+ id = NULL;
+ }
+ if ( (mask & 1) && mcs->call.args[i] == nval )
+ ++rc;
+ else
+ {
+ cval = mcs->call.args[i];
+ BUG_ON(mcs->call.args[i] != cval);
+ }
+ mcs->compat_call.args[i] = cval;
+ }
+ }
+ else
+ {
+ regs = guest_cpu_user_regs();
+ for ( i = 0; i < 6; ++i, mask >>= 1 )
+ {
+ unsigned long *reg;
+
switch ( i )
{
- case 0: regs->ebx = arg; break;
- case 1: regs->ecx = arg; break;
- case 2: regs->edx = arg; break;
- case 3: regs->esi = arg; break;
- case 4: regs->edi = arg; break;
- case 5: regs->ebp = arg; break;
- }
- }
-#elif defined(__x86_64__)
- regs->rax = op;
- regs->rip -= 2; /* re-execute 'syscall' */
-
- for ( i = 0; *p != '\0'; i++ )
- {
- arg = next_arg(p, args);
- switch ( i )
- {
- case 0: regs->rdi = arg; break;
- case 1: regs->rsi = arg; break;
- case 2: regs->rdx = arg; break;
- case 3: regs->r10 = arg; break;
- case 4: regs->r8 = arg; break;
- case 5: regs->r9 = arg; break;
- }
- }
-#endif
+ case 0: reg = ®s->ebx; break;
+ case 1: reg = ®s->ecx; break;
+ case 2: reg = ®s->edx; break;
+ case 3: reg = ®s->esi; break;
+ case 4: reg = ®s->edi; break;
+ case 5: reg = ®s->ebp; break;
+ default: BUG(); reg = NULL; break;
+ }
+ if ( (mask & 1) )
+ {
+ nval = va_arg(args, unsigned long);
+ cval = va_arg(args, unsigned int);
+ if ( cval == nval )
+ mask &= ~1U;
+ else
+ BUG_ON(nval == (unsigned int)nval);
+ }
+ else if ( id && *id == i )
+ {
+ *id = *reg;
+ id = NULL;
+ }
+ if ( (mask & 1) && *reg == nval )
+ {
+ *reg = cval;
+ ++rc;
+ }
+ else
+ BUG_ON(*reg != (unsigned int)*reg);
+ }
}
va_end(args);
- return op;
-}
+ return rc;
+}
+#endif
static void relinquish_memory(struct domain *d, struct list_head *list)
{
@@ -931,6 +1392,24 @@ void domain_relinquish_resources(struct
{
/* Drop ref to guest_table (from new_guest_cr3(), svm/vmx cr3 handling,
* or sh_update_paging_modes()) */
+#ifdef CONFIG_COMPAT
+ if ( IS_COMPAT(d) )
+ {
+ if ( is_hvm_vcpu(v) )
+ pfn = pagetable_get_pfn(v->arch.guest_table);
+ else
+ pfn = l4e_get_pfn(*(l4_pgentry_t
*)__va(pagetable_get_paddr(v->arch.guest_table)));
+
+ if ( pfn != 0 )
+ {
+ if ( shadow_mode_refcounts(d) )
+ put_page(mfn_to_page(pfn));
+ else
+ put_page_and_type(mfn_to_page(pfn));
+ }
+ continue;
+ }
+#endif
pfn = pagetable_get_pfn(v->arch.guest_table);
if ( pfn != 0 )
{
@@ -938,6 +1417,10 @@ void domain_relinquish_resources(struct
put_page(mfn_to_page(pfn));
else
put_page_and_type(mfn_to_page(pfn));
+#ifdef __x86_64__
+ if ( pfn == pagetable_get_pfn(v->arch.guest_table_user) )
+ v->arch.guest_table_user = pagetable_null();
+#endif
v->arch.guest_table = pagetable_null();
}
diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/x86/domain_build.c
--- a/xen/arch/x86/domain_build.c Mon Jan 15 13:27:20 2007 -0500
+++ b/xen/arch/x86/domain_build.c Wed Jan 17 09:56:40 2007 -0500
@@ -19,6 +19,7 @@
#include <xen/version.h>
#include <xen/iocap.h>
#include <xen/bitops.h>
+#include <xen/compat.h>
#include <asm/regs.h>
#include <asm/system.h>
#include <asm/io.h>
@@ -90,9 +91,11 @@ string_param("dom0_ioports_disable", opt
#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
#define L3_PROT (_PAGE_PRESENT)
#elif defined(__x86_64__)
-/* Allow ring-3 access in long mode as guest cannot use ring 1. */
+/* Allow ring-3 access in long mode as guest cannot use ring 1 ... */
#define BASE_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
#define L1_PROT (BASE_PROT|_PAGE_GUEST_KERNEL)
+/* ... except for compatibility mode guests. */
+#define COMPAT_L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
#define L2_PROT (BASE_PROT|_PAGE_DIRTY)
#define L3_PROT (BASE_PROT|_PAGE_DIRTY)
#define L4_PROT (BASE_PROT|_PAGE_DIRTY)
@@ -261,8 +264,8 @@ int construct_dom0(struct domain *d,
start_info_t *si;
struct vcpu *v = d->vcpu[0];
const char *p;
- unsigned long hypercall_page;
- int hypercall_page_defined;
+ unsigned long long value;
+ int value_defined;
#if defined(__i386__)
char *image_start = (char *)_image_start; /* use lowmem mappings */
char *initrd_start = (char *)_initrd_start; /* use lowmem mappings */
@@ -319,10 +322,40 @@ int construct_dom0(struct domain *d,
nr_pages = compute_dom0_nr_pages();
- if ( (rc = parseelfimage(&dsi)) != 0 )
- return rc;
-
- xen_pae = (CONFIG_PAGING_LEVELS == 3);
+ rc = parseelfimage(&dsi);
+#ifdef CONFIG_COMPAT
+ if ( rc == -ENOSYS
+ && !compat_disabled
+ && (rc = parseelf32image(&dsi)) == 0 )
+ {
+ l1_pgentry_t gdt_l1e;
+
+ set_bit(_DOMF_compat, &d->domain_flags);
+ v->vcpu_info = (void *)&d->shared_info->compat.vcpu_info[0];
+
+ if ( nr_pages != (unsigned int)nr_pages )
+ nr_pages = UINT_MAX;
+
+ /*
+ * Map compatibility Xen segments into every VCPU's GDT. See
+ * arch_domain_create() for further comments.
+ */
+ gdt_l1e = l1e_from_page(virt_to_page(compat_gdt_table),
+ PAGE_HYPERVISOR);
+ for ( i = 0; i < MAX_VIRT_CPUS; i++ )
+ d->arch.mm_perdomain_pt[((i << GDT_LDT_VCPU_SHIFT) +
+ FIRST_RESERVED_GDT_PAGE)] = gdt_l1e;
+ local_flush_tlb_one(GDT_LDT_VIRT_START + FIRST_RESERVED_GDT_BYTE);
+ }
+#endif
+ if ( rc != 0)
+ {
+ if ( rc == -ENOSYS )
+ printk("DOM0 image is not a Xen-compatible Elf image.\n");
+ return rc;
+ }
+
+ xen_pae = (CONFIG_PAGING_LEVELS == 3) || IS_COMPAT(d);
if (dsi.pae_kernel == PAEKERN_bimodal)
dom0_pae = xen_pae;
else
@@ -338,7 +371,40 @@ int construct_dom0(struct domain *d,
dsi.pae_kernel == PAEKERN_bimodal) )
set_bit(VMASST_TYPE_pae_extended_cr3, &d->vm_assist);
- if ( (p = xen_elfnote_string(&dsi, XEN_ELFNOTE_FEATURES)) != NULL )
+#ifdef CONFIG_COMPAT
+ if ( IS_COMPAT(d) )
+ {
+ value = xen_elf32note_numeric(&dsi, XEN_ELFNOTE_HV_START_LOW,
&value_defined);
+ p = xen_elf32note_string(&dsi, XEN_ELFNOTE_FEATURES);
+ }
+ else
+#endif
+ {
+ value = xen_elfnote_numeric(&dsi, XEN_ELFNOTE_HV_START_LOW,
&value_defined);
+ p = xen_elfnote_string(&dsi, XEN_ELFNOTE_FEATURES);
+ }
+ if ( value_defined )
+ {
+#if CONFIG_PAGING_LEVELS < 4
+ unsigned long mask = (1UL << L2_PAGETABLE_SHIFT) - 1;
+#else
+ unsigned long mask = !IS_COMPAT(d)
+ ? (1UL << L4_PAGETABLE_SHIFT) - 1
+ : (1UL << L2_PAGETABLE_SHIFT) - 1;
+#endif
+
+ value = (value + mask) & ~mask;
+#ifdef CONFIG_COMPAT
+ HYPERVISOR_COMPAT_VIRT_START(d) = max_t(unsigned int,
m2p_compat_vstart, value);
+ if ( value > (!IS_COMPAT(d) ?
+ HYPERVISOR_VIRT_START :
+ __HYPERVISOR_COMPAT_VIRT_START) )
+#else
+ if ( value > HYPERVISOR_VIRT_START )
+#endif
+ panic("Domain 0 expects too high a hypervisor start address.\n");
+ }
+ if ( p != NULL )
{
parse_features(p,
dom0_features_supported,
@@ -364,7 +430,9 @@ int construct_dom0(struct domain *d,
vinitrd_start = round_pgup(dsi.v_end);
vinitrd_end = vinitrd_start + initrd_len;
vphysmap_start = round_pgup(vinitrd_end);
- vphysmap_end = vphysmap_start + (nr_pages * sizeof(unsigned long));
+ vphysmap_end = vphysmap_start + (nr_pages * (!IS_COMPAT(d) ?
+ sizeof(unsigned long) :
+ sizeof(unsigned int)));
vstartinfo_start = round_pgup(vphysmap_end);
vstartinfo_end = (vstartinfo_start +
sizeof(struct start_info) +
@@ -393,7 +461,9 @@ int construct_dom0(struct domain *d,
((_l) & ~((1UL<<(_s))-1))) >> (_s))
if ( (1 + /* # L4 */
NR(dsi.v_start, v_end, L4_PAGETABLE_SHIFT) + /* # L3 */
- NR(dsi.v_start, v_end, L3_PAGETABLE_SHIFT) + /* # L2 */
+ (!IS_COMPAT(d) ?
+ NR(dsi.v_start, v_end, L3_PAGETABLE_SHIFT) : /* # L2 */
+ 4) + /* # compat L2 */
NR(dsi.v_start, v_end, L2_PAGETABLE_SHIFT)) /* # L1 */
<= nr_pt_pages )
break;
@@ -583,22 +653,46 @@ int construct_dom0(struct domain *d,
#elif defined(__x86_64__)
/* Overlap with Xen protected area? */
- if ( (dsi.v_start < HYPERVISOR_VIRT_END) &&
- (v_end > HYPERVISOR_VIRT_START) )
+ if ( !IS_COMPAT(d) ?
+ ((dsi.v_start < HYPERVISOR_VIRT_END) &&
+ (v_end > HYPERVISOR_VIRT_START)) :
+ (v_end > HYPERVISOR_COMPAT_VIRT_START(d)) )
{
printk("DOM0 image overlaps with Xen private area.\n");
return -EINVAL;
}
+ if ( IS_COMPAT(d) )
+ {
+ v->arch.guest_context.failsafe_callback_cs = FLAT_COMPAT_KERNEL_CS;
+ v->arch.guest_context.event_callback_cs = FLAT_COMPAT_KERNEL_CS;
+ }
+
/* WARNING: The new domain must have its 'processor' field filled in! */
- maddr_to_page(mpt_alloc)->u.inuse.type_info = PGT_l4_page_table;
- l4start = l4tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE;
+ if ( !IS_COMPAT(d) )
+ {
+ maddr_to_page(mpt_alloc)->u.inuse.type_info = PGT_l4_page_table;
+ l4start = l4tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE;
+ }
+ else
+ {
+ page = alloc_domheap_page(NULL);
+ if ( !page )
+ panic("Not enough RAM for domain 0 PML4.\n");
+ l4start = l4tab = page_to_virt(page);
+ }
memcpy(l4tab, idle_pg_table, PAGE_SIZE);
l4tab[l4_table_offset(LINEAR_PT_VIRT_START)] =
l4e_from_paddr(__pa(l4start), __PAGE_HYPERVISOR);
l4tab[l4_table_offset(PERDOMAIN_VIRT_START)] =
l4e_from_paddr(__pa(d->arch.mm_perdomain_l3), __PAGE_HYPERVISOR);
v->arch.guest_table = pagetable_from_paddr(__pa(l4start));
+ if ( IS_COMPAT(d) )
+ {
+ v->arch.guest_table_user = v->arch.guest_table;
+ if ( setup_arg_xlat_area(v, l4start) < 0 )
+ panic("Not enough RAM for domain 0 hypercall argument
translation.\n");
+ }
l4tab += l4_table_offset(dsi.v_start);
mfn = alloc_spfn;
@@ -635,7 +729,7 @@ int construct_dom0(struct domain *d,
*l2tab = l2e_from_paddr(__pa(l1start), L2_PROT);
l2tab++;
}
- *l1tab = l1e_from_pfn(mfn, L1_PROT);
+ *l1tab = l1e_from_pfn(mfn, !IS_COMPAT(d) ? L1_PROT : COMPAT_L1_PROT);
l1tab++;
page = mfn_to_page(mfn);
@@ -645,6 +739,30 @@ int construct_dom0(struct domain *d,
mfn++;
}
+
+#ifdef CONFIG_COMPAT
+ if ( IS_COMPAT(d) )
+ {
+ /* Ensure the first four L3 entries are all populated. */
+ for ( i = 0, l3tab = l3start; i < 4; ++i, ++l3tab )
+ {
+ if ( !l3e_get_intpte(*l3tab) )
+ {
+ maddr_to_page(mpt_alloc)->u.inuse.type_info =
PGT_l2_page_table;
+ l2tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE;
+ clear_page(l2tab);
+ *l3tab = l3e_from_paddr(__pa(l2tab), L3_PROT);
+ }
+ if ( i == 3 )
+ l3e_get_page(*l3tab)->u.inuse.type_info |= PGT_pae_xen_l2;
+ }
+ /* Install read-only guest visible MPT mapping. */
+ l2tab = l3e_to_l2e(l3start[3]);
+ memcpy(&l2tab[COMPAT_L2_PAGETABLE_FIRST_XEN_SLOT(d)],
+
&compat_idle_pg_table_l2[l2_table_offset(HIRO_COMPAT_MPT_VIRT_START)],
+ COMPAT_L2_PAGETABLE_XEN_SLOTS(d) * sizeof(*l2tab));
+ }
+#endif
/* Pages that are part of page tables must be read only. */
l4tab = l4start + l4_table_offset(vpt_start);
@@ -664,7 +782,8 @@ int construct_dom0(struct domain *d,
page->u.inuse.type_info |= PGT_validated | 1;
/* Top-level p.t. is pinned. */
- if ( (page->u.inuse.type_info & PGT_type_mask) == PGT_l4_page_table )
+ if ( (page->u.inuse.type_info & PGT_type_mask) ==
+ (!IS_COMPAT(d) ? PGT_l4_page_table : PGT_l3_page_table) )
{
page->count_info += 1;
page->u.inuse.type_info += 1 | PGT_pinned;
@@ -687,7 +806,7 @@ int construct_dom0(struct domain *d,
/* Mask all upcalls... */
for ( i = 0; i < MAX_VIRT_CPUS; i++ )
- d->shared_info->vcpu_info[i].evtchn_upcall_mask = 1;
+ shared_info(d, vcpu_info[i].evtchn_upcall_mask) = 1;
if ( opt_dom0_max_vcpus == 0 )
opt_dom0_max_vcpus = num_online_cpus();
@@ -695,6 +814,8 @@ int construct_dom0(struct domain *d,
opt_dom0_max_vcpus = num_online_cpus();
if ( opt_dom0_max_vcpus > MAX_VIRT_CPUS )
opt_dom0_max_vcpus = MAX_VIRT_CPUS;
+ if ( opt_dom0_max_vcpus > BITS_PER_GUEST_LONG(d) )
+ opt_dom0_max_vcpus = BITS_PER_GUEST_LONG(d);
printk("Dom0 has maximum %u VCPUs\n", opt_dom0_max_vcpus);
for ( i = 1; i < opt_dom0_max_vcpus; i++ )
@@ -711,20 +832,30 @@ int construct_dom0(struct domain *d,
write_ptbase(v);
/* Copy the OS image and free temporary buffer. */
- (void)loadelfimage(&dsi);
-
- hypercall_page =
- xen_elfnote_numeric(&dsi, XEN_ELFNOTE_HYPERCALL_PAGE,
&hypercall_page_defined);
- if ( hypercall_page_defined )
- {
- if ( (hypercall_page < dsi.v_start) || (hypercall_page >= v_end) )
+#ifdef CONFIG_COMPAT
+ if ( IS_COMPAT(d) )
+ {
+ (void)loadelf32image(&dsi);
+ value =
+ xen_elf32note_numeric(&dsi, XEN_ELFNOTE_HYPERCALL_PAGE,
&value_defined);
+ }
+ else
+#endif
+ {
+ (void)loadelfimage(&dsi);
+ value =
+ xen_elfnote_numeric(&dsi, XEN_ELFNOTE_HYPERCALL_PAGE,
&value_defined);
+ }
+ if ( value_defined )
+ {
+ if ( (value < dsi.v_start) || (value >= v_end) )
{
write_ptbase(current);
local_irq_enable();
printk("Invalid HYPERCALL_PAGE field in ELF notes.\n");
return -1;
}
- hypercall_page_initialise(d, (void *)hypercall_page);
+ hypercall_page_initialise(d, (void *)(unsigned long)value);
}
/* Copy the initial ramdisk. */
@@ -742,12 +873,12 @@ int construct_dom0(struct domain *d,
si->shared_info = virt_to_maddr(d->shared_info);
si->flags = SIF_PRIVILEGED | SIF_INITDOMAIN;
- si->pt_base = vpt_start;
+ si->pt_base = vpt_start + 2 * PAGE_SIZE * !!IS_COMPAT(d);
si->nr_pt_frames = nr_pt_pages;
si->mfn_list = vphysmap_start;
sprintf(si->magic, "xen-%i.%i-x86_%d%s",
xen_major_version(), xen_minor_version(),
- BITS_PER_LONG, xen_pae ? "p" : "");
+ !IS_COMPAT(d) ? BITS_PER_LONG : 32, xen_pae ? "p" : "");
/* Write the phys->machine and machine->phys table entries. */
for ( pfn = 0; pfn < d->tot_pages; pfn++ )
@@ -758,7 +889,10 @@ int construct_dom0(struct domain *d,
if ( pfn > REVERSE_START )
mfn = alloc_epfn - (pfn - REVERSE_START);
#endif
- ((unsigned long *)vphysmap_start)[pfn] = mfn;
+ if ( !IS_COMPAT(d) )
+ ((unsigned long *)vphysmap_start)[pfn] = mfn;
+ else
+ ((unsigned int *)vphysmap_start)[pfn] = mfn;
set_gpfn_from_mfn(mfn, pfn);
}
while ( pfn < nr_pages )
@@ -771,7 +905,10 @@ int construct_dom0(struct domain *d,
#ifndef NDEBUG
#define pfn (nr_pages - 1 - (pfn - (alloc_epfn - alloc_spfn)))
#endif
- ((unsigned long *)vphysmap_start)[pfn] = mfn;
+ if ( !IS_COMPAT(d) )
+ ((unsigned long *)vphysmap_start)[pfn] = mfn;
+ else
+ ((unsigned int *)vphysmap_start)[pfn] = mfn;
set_gpfn_from_mfn(mfn, pfn);
#undef pfn
page++; pfn++;
@@ -795,6 +932,11 @@ int construct_dom0(struct domain *d,
si->console.dom0.info_off = sizeof(struct start_info);
si->console.dom0.info_size = sizeof(struct dom0_vga_console_info);
}
+
+#ifdef CONFIG_COMPAT
+ if ( IS_COMPAT(d) )
+ xlat_start_info(si, XLAT_start_info_console_dom0);
+#endif
/* Reinstate the caller's page tables. */
write_ptbase(current);
@@ -819,9 +961,11 @@ int construct_dom0(struct domain *d,
* [EAX,EBX,ECX,EDX,EDI,EBP are zero]
*/
regs = &v->arch.guest_context.user_regs;
- regs->ds = regs->es = regs->fs = regs->gs = FLAT_KERNEL_DS;
- regs->ss = FLAT_KERNEL_SS;
- regs->cs = FLAT_KERNEL_CS;
+ regs->ds = regs->es = regs->fs = regs->gs = !IS_COMPAT(d)
+ ? FLAT_KERNEL_DS
+ : FLAT_COMPAT_KERNEL_DS;
+ regs->ss = !IS_COMPAT(d) ? FLAT_KERNEL_SS : FLAT_COMPAT_KERNEL_SS;
+ regs->cs = !IS_COMPAT(d) ? FLAT_KERNEL_CS : FLAT_COMPAT_KERNEL_CS;
regs->eip = dsi.v_kernentry;
regs->esp = vstack_end;
regs->esi = vstartinfo_start;
@@ -906,12 +1050,27 @@ int elf_sanity_check(const Elf_Ehdr *ehd
(ehdr->e_ident[EI_DATA] != ELFDATA2LSB) ||
(ehdr->e_type != ET_EXEC) )
{
- printk("DOM0 image is not a Xen-compatible Elf image.\n");
return 0;
}
return 1;
}
+
+#ifdef CONFIG_COMPAT
+int elf32_sanity_check(const Elf32_Ehdr *ehdr)
+{
+ if ( !IS_ELF(*ehdr) ||
+ (ehdr->e_ident[EI_CLASS] != ELFCLASS32) ||
+ (ehdr->e_machine != EM_386) ||
+ (ehdr->e_ident[EI_DATA] != ELFDATA2LSB) ||
+ (ehdr->e_type != ET_EXEC) )
+ {
+ return 0;
+ }
+
+ return 1;
+}
+#endif
/*
* Local variables:
diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/x86/domctl.c
--- a/xen/arch/x86/domctl.c Mon Jan 15 13:27:20 2007 -0500
+++ b/xen/arch/x86/domctl.c Wed Jan 17 09:56:40 2007 -0500
@@ -11,6 +11,7 @@
#include <xen/guest_access.h>
#include <public/domctl.h>
#include <xen/sched.h>
+#include <xen/domain.h>
#include <xen/event.h>
#include <xen/domain_page.h>
#include <asm/msr.h>
@@ -23,12 +24,21 @@
#include <asm/hvm/support.h>
#include <asm/processor.h>
#include <public/hvm/e820.h>
-
-long arch_do_domctl(
+#ifdef CONFIG_COMPAT
+#include <compat/xen.h>
+#endif
+
+#ifndef COMPAT
+#define _long long
+#define copy_from_xxx_offset copy_from_guest_offset
+#define copy_to_xxx_offset copy_to_guest_offset
+#endif
+
+_long arch_do_domctl(
struct xen_domctl *domctl,
XEN_GUEST_HANDLE(xen_domctl_t) u_domctl)
{
- long ret = 0;
+ _long ret = 0;
switch ( domctl->cmd )
{
@@ -40,7 +50,9 @@ long arch_do_domctl(
d = find_domain_by_id(domctl->domain);
if ( d != NULL )
{
- ret = shadow_domctl(d, &domctl->u.shadow_op, u_domctl);
+ ret = shadow_domctl(d,
+ &domctl->u.shadow_op,
+ guest_handle_cast(u_domctl, void));
put_domain(d);
copy_to_guest(u_domctl, domctl, 1);
}
@@ -123,12 +135,12 @@ long arch_do_domctl(
case XEN_DOMCTL_getpageframeinfo2:
{
-#define GPF2_BATCH (PAGE_SIZE / sizeof(long))
+#define GPF2_BATCH (PAGE_SIZE / sizeof(_long))
int n,j;
int num = domctl->u.getpageframeinfo2.num;
domid_t dom = domctl->domain;
struct domain *d;
- unsigned long *l_arr;
+ unsigned _long *l_arr;
ret = -ESRCH;
if ( unlikely((d = find_domain_by_id(dom)) == NULL) )
@@ -148,9 +160,9 @@ long arch_do_domctl(
{
int k = ((num-n)>GPF2_BATCH)?GPF2_BATCH:(num-n);
- if ( copy_from_guest_offset(l_arr,
- domctl->u.getpageframeinfo2.array,
- n, k) )
+ if ( copy_from_xxx_offset(l_arr,
+ domctl->u.getpageframeinfo2.array,
+ n, k) )
{
ret = -EINVAL;
break;
@@ -159,13 +171,13 @@ long arch_do_domctl(
for ( j = 0; j < k; j++ )
{
struct page_info *page;
- unsigned long mfn = l_arr[j];
+ unsigned _long mfn = l_arr[j];
page = mfn_to_page(mfn);
if ( likely(mfn_valid(mfn) && get_page(page, d)) )
{
- unsigned long type = 0;
+ unsigned _long type = 0;
switch( page->u.inuse.type_info & PGT_type_mask )
{
@@ -193,8 +205,8 @@ long arch_do_domctl(
}
- if ( copy_to_guest_offset(domctl->u.getpageframeinfo2.array,
- n, l_arr, k) )
+ if ( copy_to_xxx_offset(domctl->u.getpageframeinfo2.array,
+ n, l_arr, k) )
{
ret = -EINVAL;
break;
@@ -214,7 +226,7 @@ long arch_do_domctl(
int i;
struct domain *d = find_domain_by_id(domctl->domain);
unsigned long max_pfns = domctl->u.getmemlist.max_pfns;
- unsigned long mfn;
+ xen_pfn_t mfn;
struct list_head *list_ent;
ret = -EINVAL;
@@ -229,8 +241,8 @@ long arch_do_domctl(
{
mfn = page_to_mfn(list_entry(
list_ent, struct page_info, list));
- if ( copy_to_guest_offset(domctl->u.getmemlist.buffer,
- i, &mfn, 1) )
+ if ( copy_to_xxx_offset(domctl->u.getmemlist.buffer,
+ i, &mfn, 1) )
{
ret = -EFAULT;
break;
@@ -289,32 +301,71 @@ long arch_do_domctl(
return ret;
}
-void arch_getdomaininfo_ctxt(
- struct vcpu *v, struct vcpu_guest_context *c)
+#ifndef COMPAT
+void arch_get_info_guest(struct vcpu *v, vcpu_guest_context_u c)
{
- memcpy(c, &v->arch.guest_context, sizeof(*c));
+#ifdef CONFIG_COMPAT
+#define c(fld) (!IS_COMPAT(v->domain) ? (c.nat->fld) : (c.cmp->fld))
+#else
+#define c(fld) (c.nat->fld)
+#endif
+ unsigned long flags;
+
+ if ( !IS_COMPAT(v->domain) )
+ memcpy(c.nat, &v->arch.guest_context, sizeof(*c.nat));
+#ifdef CONFIG_COMPAT
+ else
+ {
+ XLAT_vcpu_guest_context(c.cmp, &v->arch.guest_context);
+ }
+#endif
if ( is_hvm_vcpu(v) )
{
- hvm_store_cpu_guest_regs(v, &c->user_regs, c->ctrlreg);
+ if ( !IS_COMPAT(v->domain) )
+ hvm_store_cpu_guest_regs(v, &c.nat->user_regs, c.nat->ctrlreg);
+#ifdef CONFIG_COMPAT
+ else
+ {
+ struct cpu_user_regs user_regs;
+ typeof(c.nat->ctrlreg) ctrlreg;
+ unsigned i;
+
+ hvm_store_cpu_guest_regs(v, &user_regs, ctrlreg);
+ XLAT_cpu_user_regs(&c.cmp->user_regs, &user_regs);
+ for ( i = 0; i < ARRAY_SIZE(c.cmp->ctrlreg); ++i )
+ c.cmp->ctrlreg[i] = ctrlreg[i];
+ }
+#endif
}
else
{
/* IOPL privileges are virtualised: merge back into returned eflags. */
- BUG_ON((c->user_regs.eflags & EF_IOPL) != 0);
- c->user_regs.eflags |= v->arch.iopl << 12;
- }
-
- c->flags = 0;
+ BUG_ON((c(user_regs.eflags) & EF_IOPL) != 0);
+ c(user_regs.eflags |= v->arch.iopl << 12);
+ }
+
+ flags = 0;
if ( test_bit(_VCPUF_fpu_initialised, &v->vcpu_flags) )
- c->flags |= VGCF_i387_valid;
+ flags |= VGCF_i387_valid;
if ( guest_kernel_mode(v, &v->arch.guest_context.user_regs) )
- c->flags |= VGCF_in_kernel;
-
- c->ctrlreg[3] = xen_pfn_to_cr3(pagetable_get_pfn(v->arch.guest_table));
-
- c->vm_assist = v->domain->vm_assist;
+ flags |= VGCF_in_kernel;
+ c(flags = flags);
+
+ if ( !IS_COMPAT(v->domain) )
+ c.nat->ctrlreg[3] =
xen_pfn_to_cr3(pagetable_get_pfn(v->arch.guest_table));
+#ifdef CONFIG_COMPAT
+ else
+ {
+ l4_pgentry_t *l4e = __va(pagetable_get_paddr(v->arch.guest_table));
+ c.cmp->ctrlreg[3] = compat_pfn_to_cr3(l4e_get_pfn(*l4e));
+ }
+#endif
+
+ c(vm_assist = v->domain->vm_assist);
+#undef c
}
+#endif
/*
* Local variables:
diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/x86/e820.c
--- a/xen/arch/x86/e820.c Mon Jan 15 13:27:20 2007 -0500
+++ b/xen/arch/x86/e820.c Wed Jan 17 09:56:40 2007 -0500
@@ -1,6 +1,7 @@
#include <xen/config.h>
#include <xen/init.h>
#include <xen/lib.h>
+#include <xen/compat.h>
#include <asm/e820.h>
#include <asm/page.h>
@@ -341,6 +342,39 @@ static void __init clip_4gb(void)
#define clip_4gb() ((void)0)
#endif
+#ifdef CONFIG_COMPAT
+static void __init clip_compat(void)
+{
+ unsigned long long limit;
+ unsigned int i;
+
+ if ( compat_disabled )
+ return;
+ /* 32-bit guests restricted to 166 GB (with current memory allocator). */
+ limit = (unsigned long long)(MACH2PHYS_COMPAT_VIRT_END -
+ __HYPERVISOR_COMPAT_VIRT_START) << 10;
+ for ( i = 0; i < e820.nr_map; i++ )
+ {
+ if ( (e820.map[i].addr + e820.map[i].size) <= limit )
+ continue;
+ printk("WARNING: Only the first %Lu GB of the physical memory map "
+ "can be accessed\n"
+ " by compatibility mode guests. "
+ "Truncating the memory map...\n",
+ limit >> 30);
+ if ( e820.map[i].addr >= limit )
+ e820.nr_map = i;
+ else
+ {
+ e820.map[i].size = limit - e820.map[i].addr;
+ e820.nr_map = i + 1;
+ }
+ }
+}
+#else
+#define clip_compat() ((void)0)
+#endif
+
static void __init clip_mem(void)
{
int i;
@@ -374,6 +408,7 @@ static void __init machine_specific_memo
*raw_nr = nr;
(void)copy_e820_map(raw, nr);
clip_4gb();
+ clip_compat();
clip_mem();
}
diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/x86/hvm/hpet.c
--- a/xen/arch/x86/hvm/hpet.c Mon Jan 15 13:27:20 2007 -0500
+++ b/xen/arch/x86/hvm/hpet.c Wed Jan 17 09:56:40 2007 -0500
@@ -356,8 +356,6 @@ static void hpet_timer_fn(void *opaque)
}
set_timer(&h->timers[tn], NOW() + hpet_tick_to_ns(h, h->period[tn]));
}
-
- vcpu_kick(h->vcpu);
}
void hpet_migrate_timers(struct vcpu *v)
diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c Mon Jan 15 13:27:20 2007 -0500
+++ b/xen/arch/x86/hvm/hvm.c Wed Jan 17 09:56:40 2007 -0500
@@ -800,7 +800,7 @@ long do_hvm_op(unsigned long op, XEN_GUE
d->arch.hvm_domain.buffered_io_va = (unsigned long)p;
break;
case HVM_PARAM_CALLBACK_IRQ:
- hvm_set_callback_gsi(d, a.value);
+ hvm_set_callback_via(d, a.value);
break;
}
d->arch.hvm_domain.params[a.index] = a.value;
diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/x86/hvm/instrlen.c
--- a/xen/arch/x86/hvm/instrlen.c Mon Jan 15 13:27:20 2007 -0500
+++ b/xen/arch/x86/hvm/instrlen.c Wed Jan 17 09:56:40 2007 -0500
@@ -201,7 +201,7 @@ static uint8_t twobyte_table[256] = {
if ( inst_copy_from_guest(&_x, pc, 1) != 1 ) { \
gdprintk(XENLOG_WARNING, \
"Cannot read from address %lx (eip %lx, mode %d)\n", \
- pc, org_pc, mode); \
+ pc, org_pc, address_bytes); \
return -1; \
} \
pc += 1; \
@@ -218,30 +218,20 @@ static uint8_t twobyte_table[256] = {
* EXTERNAL this routine calculates the length of the current instruction
* pointed to by org_pc. The guest state is _not_ changed by this routine.
*/
-int hvm_instruction_length(unsigned long org_pc, int mode)
+int hvm_instruction_length(unsigned long org_pc, int address_bytes)
{
uint8_t b, d, twobyte = 0, rex_prefix = 0, modrm_reg = 0;
unsigned int op_default, op_bytes, ad_default, ad_bytes, tmp;
int length = 0;
unsigned long pc = org_pc;
- switch ( mode )
- {
- case X86EMUL_MODE_REAL:
- case X86EMUL_MODE_PROT16:
- op_bytes = op_default = ad_bytes = ad_default = 2;
- break;
- case X86EMUL_MODE_PROT32:
- op_bytes = op_default = ad_bytes = ad_default = 4;
- break;
-#ifdef __x86_64__
- case X86EMUL_MODE_PROT64:
+ op_bytes = op_default = ad_bytes = ad_default = address_bytes;
+ if ( op_bytes == 8 )
+ {
op_bytes = op_default = 4;
- ad_bytes = ad_default = 8;
- break;
+#ifndef __x86_64__
+ return -1;
#endif
- default:
- return -1;
}
/* Legacy prefixes. */
@@ -253,7 +243,7 @@ int hvm_instruction_length(unsigned long
op_bytes = op_default ^ 6; /* switch between 2/4 bytes */
break;
case 0x67: /* address-size override */
- if ( mode == X86EMUL_MODE_PROT64 )
+ if ( ad_default == 8 )
ad_bytes = ad_default ^ 12; /* switch between 4/8 bytes */
else
ad_bytes = ad_default ^ 6; /* switch between 2/4 bytes */
@@ -270,7 +260,7 @@ int hvm_instruction_length(unsigned long
break;
#ifdef __x86_64__
case 0x40 ... 0x4f:
- if ( mode == X86EMUL_MODE_PROT64 )
+ if ( ad_default == 8 )
{
rex_prefix = b;
continue;
@@ -434,7 +424,7 @@ done:
cannot_emulate:
gdprintk(XENLOG_WARNING,
- "Cannot emulate %02x at address %lx (%lx, mode %d)\n",
- b, pc - 1, org_pc, mode);
+ "Cannot emulate %02x at address %lx (%lx, addr_bytes %d)\n",
+ b, pc - 1, org_pc, address_bytes);
return -1;
}
diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/x86/hvm/intercept.c
--- a/xen/arch/x86/hvm/intercept.c Mon Jan 15 13:27:20 2007 -0500
+++ b/xen/arch/x86/hvm/intercept.c Wed Jan 17 09:56:40 2007 -0500
@@ -182,7 +182,7 @@ int hvm_buffered_io_intercept(ioreq_t *p
spin_lock(buffered_io_lock);
if ( buffered_iopage->write_pointer - buffered_iopage->read_pointer ==
- (unsigned long)IOREQ_BUFFER_SLOT_NUM ) {
+ (unsigned int)IOREQ_BUFFER_SLOT_NUM ) {
/* the queue is full.
* send the iopacket through the normal path.
* NOTE: The arithimetic operation could handle the situation for
diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/x86/hvm/irq.c
--- a/xen/arch/x86/hvm/irq.c Mon Jan 15 13:27:20 2007 -0500
+++ b/xen/arch/x86/hvm/irq.c Wed Jan 17 09:56:40 2007 -0500
@@ -25,7 +25,7 @@
#include <xen/sched.h>
#include <asm/hvm/domain.h>
-void hvm_pci_intx_assert(
+static void __hvm_pci_intx_assert(
struct domain *d, unsigned int device, unsigned int intx)
{
struct hvm_irq *hvm_irq = &d->arch.hvm_domain.irq;
@@ -33,10 +33,8 @@ void hvm_pci_intx_assert(
ASSERT((device <= 31) && (intx <= 3));
- spin_lock(&hvm_irq->lock);
-
if ( __test_and_set_bit(device*4 + intx, &hvm_irq->pci_intx) )
- goto out;
+ return;
gsi = hvm_pci_intx_gsi(device, intx);
if ( hvm_irq->gsi_assert_count[gsi]++ == 0 )
@@ -50,12 +48,19 @@ void hvm_pci_intx_assert(
vioapic_irq_positive_edge(d, isa_irq);
vpic_irq_positive_edge(d, isa_irq);
}
-
- out:
- spin_unlock(&hvm_irq->lock);
-}
-
-void hvm_pci_intx_deassert(
+}
+
+void hvm_pci_intx_assert(
+ struct domain *d, unsigned int device, unsigned int intx)
+{
+ struct hvm_irq *hvm_irq = &d->arch.hvm_domain.irq;
+
+ spin_lock(&hvm_irq->lock);
+ __hvm_pci_intx_assert(d, device, intx);
+ spin_unlock(&hvm_irq->lock);
+}
+
+static void __hvm_pci_intx_deassert(
struct domain *d, unsigned int device, unsigned int intx)
{
struct hvm_irq *hvm_irq = &d->arch.hvm_domain.irq;
@@ -63,10 +68,8 @@ void hvm_pci_intx_deassert(
ASSERT((device <= 31) && (intx <= 3));
- spin_lock(&hvm_irq->lock);
-
if ( !__test_and_clear_bit(device*4 + intx, &hvm_irq->pci_intx) )
- goto out;
+ return;
gsi = hvm_pci_intx_gsi(device, intx);
--hvm_irq->gsi_assert_count[gsi];
@@ -76,8 +79,15 @@ void hvm_pci_intx_deassert(
if ( (--hvm_irq->pci_link_assert_count[link] == 0) && isa_irq &&
(--hvm_irq->gsi_assert_count[isa_irq] == 0) )
vpic_irq_negative_edge(d, isa_irq);
-
- out:
+}
+
+void hvm_pci_intx_deassert(
+ struct domain *d, unsigned int device, unsigned int intx)
+{
+ struct hvm_irq *hvm_irq = &d->arch.hvm_domain.irq;
+
+ spin_lock(&hvm_irq->lock);
+ __hvm_pci_intx_deassert(d, device, intx);
spin_unlock(&hvm_irq->lock);
}
@@ -123,36 +133,47 @@ void hvm_set_callback_irq_level(void)
struct vcpu *v = current;
struct domain *d = v->domain;
struct hvm_irq *hvm_irq = &d->arch.hvm_domain.irq;
- unsigned int gsi = hvm_irq->callback_gsi;
+ unsigned int gsi, pdev, pintx, asserted;
/* Fast lock-free tests. */
- if ( (v->vcpu_id != 0) || (gsi == 0) )
+ if ( (v->vcpu_id != 0) ||
+ (hvm_irq->callback_via_type == HVMIRQ_callback_none) )
return;
spin_lock(&hvm_irq->lock);
- gsi = hvm_irq->callback_gsi;
- if ( gsi == 0 )
+ /* NB. Do not check the evtchn_upcall_mask. It is not used in HVM mode. */
+ asserted = !!vcpu_info(v, evtchn_upcall_pending);
+ if ( hvm_irq->callback_via_asserted == asserted )
goto out;
-
- if ( local_events_need_delivery() )
- {
- if ( !__test_and_set_bit(0, &hvm_irq->callback_irq_wire) &&
- (hvm_irq->gsi_assert_count[gsi]++ == 0) )
+ hvm_irq->callback_via_asserted = asserted;
+
+ /* Callback status has changed. Update the callback via. */
+ switch ( hvm_irq->callback_via_type )
+ {
+ case HVMIRQ_callback_gsi:
+ gsi = hvm_irq->callback_via.gsi;
+ if ( asserted && (hvm_irq->gsi_assert_count[gsi]++ == 0) )
{
vioapic_irq_positive_edge(d, gsi);
if ( gsi <= 15 )
vpic_irq_positive_edge(d, gsi);
}
- }
- else
- {
- if ( __test_and_clear_bit(0, &hvm_irq->callback_irq_wire) &&
- (--hvm_irq->gsi_assert_count[gsi] == 0) )
+ else if ( !asserted && (--hvm_irq->gsi_assert_count[gsi] == 0) )
{
if ( gsi <= 15 )
vpic_irq_negative_edge(d, gsi);
}
+ break;
+ case HVMIRQ_callback_pci_intx:
+ pdev = hvm_irq->callback_via.pci.dev;
+ pintx = hvm_irq->callback_via.pci.intx;
+ if ( asserted )
+ __hvm_pci_intx_assert(d, pdev, pintx);
+ else
+ __hvm_pci_intx_deassert(d, pdev, pintx);
+ default:
+ break;
}
out:
@@ -192,40 +213,79 @@ void hvm_set_pci_link_route(struct domai
d->domain_id, link, old_isa_irq, isa_irq);
}
-void hvm_set_callback_gsi(struct domain *d, unsigned int gsi)
-{
- struct hvm_irq *hvm_irq = &d->arch.hvm_domain.irq;
- unsigned int old_gsi;
-
- if ( gsi >= ARRAY_SIZE(hvm_irq->gsi_assert_count) )
- gsi = 0;
-
- spin_lock(&hvm_irq->lock);
-
- old_gsi = hvm_irq->callback_gsi;
- if ( old_gsi == gsi )
- goto out;
- hvm_irq->callback_gsi = gsi;
-
- if ( !test_bit(0, &hvm_irq->callback_irq_wire) )
- goto out;
-
- if ( old_gsi && (--hvm_irq->gsi_assert_count[old_gsi] == 0) )
- if ( old_gsi <= 15 )
- vpic_irq_negative_edge(d, old_gsi);
-
- if ( gsi && (hvm_irq->gsi_assert_count[gsi]++ == 0) )
- {
- vioapic_irq_positive_edge(d, gsi);
- if ( gsi <= 15 )
- vpic_irq_positive_edge(d, gsi);
- }
-
- out:
- spin_unlock(&hvm_irq->lock);
-
- dprintk(XENLOG_G_INFO, "Dom%u callback GSI changed %u -> %u\n",
- d->domain_id, old_gsi, gsi);
+void hvm_set_callback_via(struct domain *d, uint64_t via)
+{
+ struct hvm_irq *hvm_irq = &d->arch.hvm_domain.irq;
+ unsigned int gsi=0, pdev=0, pintx=0;
+ uint8_t via_type;
+
+ via_type = (uint8_t)(via >> 56) + 1;
+ if ( ((via_type == HVMIRQ_callback_gsi) && (via == 0)) ||
+ (via_type > HVMIRQ_callback_pci_intx) )
+ via_type = HVMIRQ_callback_none;
+
+ spin_lock(&hvm_irq->lock);
+
+ /* Tear down old callback via. */
+ if ( hvm_irq->callback_via_asserted )
+ {
+ switch ( hvm_irq->callback_via_type )
+ {
+ case HVMIRQ_callback_gsi:
+ gsi = hvm_irq->callback_via.gsi;
+ if ( (--hvm_irq->gsi_assert_count[gsi] == 0) && (gsi <= 15) )
+ vpic_irq_negative_edge(d, gsi);
+ break;
+ case HVMIRQ_callback_pci_intx:
+ pdev = hvm_irq->callback_via.pci.dev;
+ pintx = hvm_irq->callback_via.pci.intx;
+ __hvm_pci_intx_deassert(d, pdev, pintx);
+ break;
+ default:
+ break;
+ }
+ }
+
+ /* Set up new callback via. */
+ switch ( hvm_irq->callback_via_type = via_type )
+ {
+ case HVMIRQ_callback_gsi:
+ gsi = hvm_irq->callback_via.gsi = (uint8_t)via;
+ if ( (gsi == 0) || (gsi >= ARRAY_SIZE(hvm_irq->gsi_assert_count)) )
+ hvm_irq->callback_via_type = HVMIRQ_callback_none;
+ else if ( hvm_irq->callback_via_asserted &&
+ (hvm_irq->gsi_assert_count[gsi]++ == 0) )
+ {
+ vioapic_irq_positive_edge(d, gsi);
+ if ( gsi <= 15 )
+ vpic_irq_positive_edge(d, gsi);
+ }
+ break;
+ case HVMIRQ_callback_pci_intx:
+ pdev = hvm_irq->callback_via.pci.dev = (uint8_t)(via >> 11) & 31;
+ pintx = hvm_irq->callback_via.pci.intx = (uint8_t)via & 3;
+ if ( hvm_irq->callback_via_asserted )
+ __hvm_pci_intx_assert(d, pdev, pintx);
+ break;
+ default:
+ break;
+ }
+
+ spin_unlock(&hvm_irq->lock);
+
+ dprintk(XENLOG_G_INFO, "Dom%u callback via changed to ", d->domain_id);
+ switch ( via_type )
+ {
+ case HVMIRQ_callback_gsi:
+ printk("GSI %u\n", gsi);
+ break;
+ case HVMIRQ_callback_pci_intx:
+ printk("PCI INTx Dev 0x%02x Int%c\n", pdev, 'A' + pintx);
+ break;
+ default:
+ printk("None\n");
+ break;
+ }
}
int cpu_has_pending_irq(struct vcpu *v)
diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/x86/hvm/platform.c
--- a/xen/arch/x86/hvm/platform.c Mon Jan 15 13:27:20 2007 -0500
+++ b/xen/arch/x86/hvm/platform.c Wed Jan 17 09:56:40 2007 -0500
@@ -352,7 +352,7 @@ static int reg_mem(unsigned char size, u
return DECODE_success;
}
-static int mmio_decode(int mode, unsigned char *opcode,
+static int mmio_decode(int address_bytes, unsigned char *opcode,
struct hvm_io_op *mmio_op,
unsigned char *ad_size, unsigned char *op_size,
unsigned char *seg_sel)
@@ -368,9 +368,9 @@ static int mmio_decode(int mode, unsigne
opcode = check_prefix(opcode, mmio_op, ad_size, op_size, seg_sel, &rex);
- switch ( mode ) {
- case X86EMUL_MODE_REAL: /* meaning is reversed */
- case X86EMUL_MODE_PROT16:
+ switch ( address_bytes )
+ {
+ case 2:
if ( *op_size == WORD )
*op_size = LONG;
else if ( *op_size == LONG )
@@ -384,14 +384,14 @@ static int mmio_decode(int mode, unsigne
else if ( *ad_size == 0 )
*ad_size = WORD;
break;
- case X86EMUL_MODE_PROT32:
+ case 4:
if ( *op_size == 0 )
*op_size = LONG;
if ( *ad_size == 0 )
*ad_size = LONG;
break;
#ifdef __x86_64__
- case X86EMUL_MODE_PROT64:
+ case 8:
if ( *op_size == 0 )
*op_size = rex & 0x8 ? QUAD : LONG;
if ( *ad_size == 0 )
@@ -907,7 +907,7 @@ void handle_mmio(unsigned long gpa)
struct hvm_io_op *mmio_op;
struct cpu_user_regs *regs;
unsigned char inst[MAX_INST_LEN], ad_size, op_size, seg_sel;
- int i, mode, df, inst_len;
+ int i, address_bytes, df, inst_len;
struct vcpu *v = current;
mmio_op = &v->arch.hvm_vcpu.io_op;
@@ -919,9 +919,9 @@ void handle_mmio(unsigned long gpa)
df = regs->eflags & X86_EFLAGS_DF ? 1 : 0;
- mode = hvm_guest_x86_mode(v);
+ address_bytes = hvm_guest_x86_mode(v);
inst_addr = hvm_get_segment_base(v, x86_seg_cs) + regs->eip;
- inst_len = hvm_instruction_length(inst_addr, mode);
+ inst_len = hvm_instruction_length(inst_addr, address_bytes);
if ( inst_len <= 0 )
{
printk("handle_mmio: failed to get instruction length\n");
@@ -934,8 +934,8 @@ void handle_mmio(unsigned long gpa)
domain_crash_synchronous();
}
- if ( mmio_decode(mode, inst, mmio_op, &ad_size, &op_size, &seg_sel)
- == DECODE_failure ) {
+ if ( mmio_decode(address_bytes, inst, mmio_op, &ad_size,
+ &op_size, &seg_sel) == DECODE_failure ) {
printk("handle_mmio: failed to decode instruction\n");
printk("mmio opcode: gpa 0x%lx, len %d:", gpa, inst_len);
for ( i = 0; i < inst_len; i++ )
diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c Mon Jan 15 13:27:20 2007 -0500
+++ b/xen/arch/x86/hvm/svm/svm.c Wed Jan 17 09:56:40 2007 -0500
@@ -482,15 +482,13 @@ static int svm_guest_x86_mode(struct vcp
{
struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
- if ( vmcb->efer & EFER_LMA )
- return (vmcb->cs.attr.fields.l ?
- X86EMUL_MODE_PROT64 : X86EMUL_MODE_PROT32);
+ if ( (vmcb->efer & EFER_LMA) && vmcb->cs.attr.fields.l )
+ return 8;
if ( svm_realmode(v) )
- return X86EMUL_MODE_REAL;
-
- return (vmcb->cs.attr.fields.db ?
- X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16);
+ return 2;
+
+ return (vmcb->cs.attr.fields.db ? 4 : 2);
}
void svm_update_host_cr3(struct vcpu *v)
diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/x86/hvm/vioapic.c
--- a/xen/arch/x86/hvm/vioapic.c Mon Jan 15 13:27:20 2007 -0500
+++ b/xen/arch/x86/hvm/vioapic.c Wed Jan 17 09:56:40 2007 -0500
@@ -309,6 +309,13 @@ static uint32_t ioapic_get_delivery_bitm
return mask;
}
+static inline int pit_channel0_enabled(void)
+{
+ PITState *pit = ¤t->domain->arch.hvm_domain.pl_time.vpit;
+ struct periodic_time *pt = &pit->channels[0].pt;
+ return pt->enabled;
+}
+
static void vioapic_deliver(struct vioapic *vioapic, int irq)
{
uint16_t dest = vioapic->redirtbl[irq].fields.dest_id;
@@ -341,7 +348,7 @@ static void vioapic_deliver(struct vioap
{
#ifdef IRQ0_SPECIAL_ROUTING
/* Force round-robin to pick VCPU 0 */
- if ( irq == hvm_isa_irq_to_gsi(0) )
+ if ( (irq == hvm_isa_irq_to_gsi(0)) && pit_channel0_enabled() )
{
v = vioapic_domain(vioapic)->vcpu[0];
target = v ? vcpu_vlapic(v) : NULL;
@@ -374,7 +381,7 @@ static void vioapic_deliver(struct vioap
deliver_bitmask &= ~(1 << bit);
#ifdef IRQ0_SPECIAL_ROUTING
/* Do not deliver timer interrupts to VCPU != 0 */
- if ( irq == hvm_isa_irq_to_gsi(0) )
+ if ( (irq == hvm_isa_irq_to_gsi(0)) && pit_channel0_enabled() )
v = vioapic_domain(vioapic)->vcpu[0];
else
#endif
diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/x86/hvm/vmx/vmcs.c
--- a/xen/arch/x86/hvm/vmx/vmcs.c Mon Jan 15 13:27:20 2007 -0500
+++ b/xen/arch/x86/hvm/vmx/vmcs.c Wed Jan 17 09:56:40 2007 -0500
@@ -278,7 +278,14 @@ static void vmx_set_host_env(struct vcpu
host_env.tr_base = (unsigned long) &init_tss[cpu];
__vmwrite(HOST_TR_SELECTOR, host_env.tr_selector);
__vmwrite(HOST_TR_BASE, host_env.tr_base);
- __vmwrite(HOST_RSP, (unsigned long)get_stack_bottom());
+
+ /*
+ * Skip end of cpu_user_regs when entering the hypervisor because the
+ * CPU does not save context onto the stack. SS,RSP,CS,RIP,RFLAGS,etc
+ * all get saved into the VMCS instead.
+ */
+ __vmwrite(HOST_RSP,
+ (unsigned long)&get_cpu_info()->guest_cpu_user_regs.error_code);
}
static void construct_vmcs(struct vcpu *v)
diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c Mon Jan 15 13:27:20 2007 -0500
+++ b/xen/arch/x86/hvm/vmx/vmx.c Wed Jan 17 09:56:40 2007 -0500
@@ -410,10 +410,6 @@ static void vmx_store_cpu_guest_regs(
regs->eflags = __vmread(GUEST_RFLAGS);
regs->ss = __vmread(GUEST_SS_SELECTOR);
regs->cs = __vmread(GUEST_CS_SELECTOR);
- regs->ds = __vmread(GUEST_DS_SELECTOR);
- regs->es = __vmread(GUEST_ES_SELECTOR);
- regs->gs = __vmread(GUEST_GS_SELECTOR);
- regs->fs = __vmread(GUEST_FS_SELECTOR);
regs->eip = __vmread(GUEST_RIP);
regs->esp = __vmread(GUEST_RSP);
}
@@ -429,62 +425,39 @@ static void vmx_store_cpu_guest_regs(
vmx_vmcs_exit(v);
}
-/*
- * The VMX spec (section 4.3.1.2, Checks on Guest Segment
- * Registers) says that virtual-8086 mode guests' segment
- * base-address fields in the VMCS must be equal to their
- * corresponding segment selector field shifted right by
- * four bits upon vmentry.
- *
- * This function (called only for VM86-mode guests) fixes
- * the bases to be consistent with the selectors in regs
- * if they're not already. Without this, we can fail the
- * vmentry check mentioned above.
- */
-static void fixup_vm86_seg_bases(struct cpu_user_regs *regs)
+static void vmx_load_cpu_guest_regs(struct vcpu *v, struct cpu_user_regs *regs)
{
unsigned long base;
- base = __vmread(GUEST_ES_BASE);
- if (regs->es << 4 != base)
- __vmwrite(GUEST_ES_BASE, regs->es << 4);
- base = __vmread(GUEST_CS_BASE);
- if (regs->cs << 4 != base)
- __vmwrite(GUEST_CS_BASE, regs->cs << 4);
- base = __vmread(GUEST_SS_BASE);
- if (regs->ss << 4 != base)
- __vmwrite(GUEST_SS_BASE, regs->ss << 4);
- base = __vmread(GUEST_DS_BASE);
- if (regs->ds << 4 != base)
- __vmwrite(GUEST_DS_BASE, regs->ds << 4);
- base = __vmread(GUEST_FS_BASE);
- if (regs->fs << 4 != base)
- __vmwrite(GUEST_FS_BASE, regs->fs << 4);
- base = __vmread(GUEST_GS_BASE);
- if (regs->gs << 4 != base)
- __vmwrite(GUEST_GS_BASE, regs->gs << 4);
-}
-
-static void vmx_load_cpu_guest_regs(struct vcpu *v, struct cpu_user_regs *regs)
-{
vmx_vmcs_enter(v);
__vmwrite(GUEST_SS_SELECTOR, regs->ss);
- __vmwrite(GUEST_DS_SELECTOR, regs->ds);
- __vmwrite(GUEST_ES_SELECTOR, regs->es);
- __vmwrite(GUEST_GS_SELECTOR, regs->gs);
- __vmwrite(GUEST_FS_SELECTOR, regs->fs);
-
__vmwrite(GUEST_RSP, regs->esp);
/* NB. Bit 1 of RFLAGS must be set for VMENTRY to succeed. */
__vmwrite(GUEST_RFLAGS, regs->eflags | 2UL);
- if (regs->eflags & EF_TF)
+
+ if ( regs->eflags & EF_TF )
__vm_set_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_DB);
else
__vm_clear_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_DB);
- if (regs->eflags & EF_VM)
- fixup_vm86_seg_bases(regs);
+
+ if ( regs->eflags & EF_VM )
+ {
+ /*
+ * The VMX spec (section 4.3.1.2, Checks on Guest Segment
+ * Registers) says that virtual-8086 mode guests' segment
+ * base-address fields in the VMCS must be equal to their
+ * corresponding segment selector field shifted right by
+ * four bits upon vmentry.
+ */
+ base = __vmread(GUEST_CS_BASE);
+ if ( (regs->cs << 4) != base )
+ __vmwrite(GUEST_CS_BASE, regs->cs << 4);
+ base = __vmread(GUEST_SS_BASE);
+ if ( (regs->ss << 4) != base )
+ __vmwrite(GUEST_SS_BASE, regs->ss << 4);
+ }
__vmwrite(GUEST_CS_SELECTOR, regs->cs);
__vmwrite(GUEST_RIP, regs->eip);
@@ -518,8 +491,7 @@ static unsigned long vmx_get_segment_bas
ASSERT(v == current);
#ifdef __x86_64__
- if ( vmx_long_mode_enabled(v) &&
- (__vmread(GUEST_CS_AR_BYTES) & (1u<<13)) )
+ if ( vmx_long_mode_enabled(v) && (__vmread(GUEST_CS_AR_BYTES) & (1u<<13)) )
long_mode = 1;
#endif
@@ -694,15 +666,13 @@ static int vmx_guest_x86_mode(struct vcp
cs_ar_bytes = __vmread(GUEST_CS_AR_BYTES);
- if ( vmx_long_mode_enabled(v) )
- return ((cs_ar_bytes & (1u<<13)) ?
- X86EMUL_MODE_PROT64 : X86EMUL_MODE_PROT32);
+ if ( vmx_long_mode_enabled(v) && (cs_ar_bytes & (1u<<13)) )
+ return 8;
if ( vmx_realmode(v) )
- return X86EMUL_MODE_REAL;
-
- return ((cs_ar_bytes & (1u<<14)) ?
- X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16);
+ return 2;
+
+ return ((cs_ar_bytes & (1u<<14)) ? 4 : 2);
}
static int vmx_pae_enabled(struct vcpu *v)
@@ -2253,47 +2223,54 @@ static void vmx_reflect_exception(struct
}
}
+static void vmx_failed_vmentry(unsigned int exit_reason)
+{
+ unsigned int failed_vmentry_reason = (uint16_t)exit_reason;
+ unsigned long exit_qualification;
+
+ exit_qualification = __vmread(EXIT_QUALIFICATION);
+ printk("Failed vm entry (exit reason 0x%x) ", exit_reason);
+ switch ( failed_vmentry_reason )
+ {
+ case EXIT_REASON_INVALID_GUEST_STATE:
+ printk("caused by invalid guest state (%ld).\n", exit_qualification);
+ break;
+ case EXIT_REASON_MSR_LOADING:
+ printk("caused by MSR entry %ld loading.\n", exit_qualification);
+ break;
+ case EXIT_REASON_MACHINE_CHECK:
+ printk("caused by machine check.\n");
+ break;
+ default:
+ printk("reason not known yet!");
+ break;
+ }
+
+ printk("************* VMCS Area **************\n");
+ vmcs_dump_vcpu();
+ printk("**************************************\n");
+
+ domain_crash(current->domain);
+}
+
asmlinkage void vmx_vmexit_handler(struct cpu_user_regs *regs)
{
unsigned int exit_reason;
unsigned long exit_qualification, inst_len = 0;
struct vcpu *v = current;
+ TRACE_3D(TRC_VMX_VMEXIT + v->vcpu_id, 0, 0, 0);
+
exit_reason = __vmread(VM_EXIT_REASON);
perfc_incra(vmexits, exit_reason);
+ TRACE_VMEXIT(0, exit_reason);
if ( exit_reason != EXIT_REASON_EXTERNAL_INTERRUPT )
local_irq_enable();
if ( unlikely(exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) )
- {
- unsigned int failed_vmentry_reason = exit_reason & 0xFFFF;
-
- exit_qualification = __vmread(EXIT_QUALIFICATION);
- printk("Failed vm entry (exit reason 0x%x) ", exit_reason);
- switch ( failed_vmentry_reason ) {
- case EXIT_REASON_INVALID_GUEST_STATE:
- printk("caused by invalid guest state (%ld).\n",
exit_qualification);
- break;
- case EXIT_REASON_MSR_LOADING:
- printk("caused by MSR entry %ld loading.\n", exit_qualification);
- break;
- case EXIT_REASON_MACHINE_CHECK:
- printk("caused by machine check.\n");
- break;
- default:
- printk("reason not known yet!");
- break;
- }
-
- printk("************* VMCS Area **************\n");
- vmcs_dump_vcpu();
- printk("**************************************\n");
- goto exit_and_crash;
- }
-
- TRACE_VMEXIT(0, exit_reason);
+ return vmx_failed_vmentry(exit_reason);
switch ( exit_reason )
{
@@ -2521,11 +2498,6 @@ asmlinkage void vmx_trace_vmentry(void)
TRACE_VMEXIT(4, 0);
}
-asmlinkage void vmx_trace_vmexit (void)
-{
- TRACE_3D(TRC_VMX_VMEXIT + current->vcpu_id, 0, 0, 0);
-}
-
/*
* Local variables:
* mode: C
diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/x86/hvm/vmx/x86_32/exits.S
--- a/xen/arch/x86/hvm/vmx/x86_32/exits.S Mon Jan 15 13:27:20 2007 -0500
+++ b/xen/arch/x86/hvm/vmx/x86_32/exits.S Wed Jan 17 09:56:40 2007 -0500
@@ -29,35 +29,7 @@
andl $~3,reg; \
movl (reg),reg;
-/*
- * At VMExit time the processor saves the guest selectors, esp, eip,
- * and eflags. Therefore we don't save them, but simply decrement
- * the kernel stack pointer to make it consistent with the stack frame
- * at usual interruption time. The eflags of the host is not saved by VMX,
- * and we set it to the fixed value.
- *
- * We also need the room, especially because orig_eax field is used
- * by do_IRQ(). Compared the cpu_user_regs, we skip pushing for the following:
- * (10) u32 gs;
- * (9) u32 fs;
- * (8) u32 ds;
- * (7) u32 es;
- * <- get_stack_bottom() (= HOST_ESP)
- * (6) u32 ss;
- * (5) u32 esp;
- * (4) u32 eflags;
- * (3) u32 cs;
- * (2) u32 eip;
- * (2/1) u16 entry_vector;
- * (1/1) u16 error_code;
- * However, get_stack_bottom() actually returns 20 bytes before the real
- * bottom of the stack to allow space for:
- * domain pointer, DS, ES, FS, GS. Therefore, we effectively skip 6 registers.
- */
-
-#define NR_SKIPPED_REGS 6 /* See the above explanation */
#define HVM_SAVE_ALL_NOSEGREGS \
- subl $(NR_SKIPPED_REGS*4), %esp; \
movl $0, 0xc(%esp); /* XXX why do we need to force eflags==0 ?? */ \
pushl %eax; \
pushl %ebp; \
@@ -74,14 +46,11 @@
popl %esi; \
popl %edi; \
popl %ebp; \
- popl %eax; \
- addl $(NR_SKIPPED_REGS*4), %esp
+ popl %eax
ALIGN
ENTRY(vmx_asm_vmexit_handler)
- /* selectors are restored/saved by VMX */
HVM_SAVE_ALL_NOSEGREGS
- call vmx_trace_vmexit
movl %esp,%eax
push %eax
call vmx_vmexit_handler
diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/x86/hvm/vmx/x86_64/exits.S
--- a/xen/arch/x86/hvm/vmx/x86_64/exits.S Mon Jan 15 13:27:20 2007 -0500
+++ b/xen/arch/x86/hvm/vmx/x86_64/exits.S Wed Jan 17 09:56:40 2007 -0500
@@ -29,31 +29,7 @@
andq $~7,reg; \
movq (reg),reg;
-/*
- * At VMExit time the processor saves the guest selectors, rsp, rip,
- * and rflags. Therefore we don't save them, but simply decrement
- * the kernel stack pointer to make it consistent with the stack frame
- * at usual interruption time. The rflags of the host is not saved by VMX,
- * and we set it to the fixed value.
- *
- * We also need the room, especially because orig_eax field is used
- * by do_IRQ(). Compared the cpu_user_regs, we skip pushing for the following:
- * (10) u64 gs;
- * (9) u64 fs;
- * (8) u64 ds;
- * (7) u64 es;
- * <- get_stack_bottom() (= HOST_ESP)
- * (6) u64 ss;
- * (5) u64 rsp;
- * (4) u64 rflags;
- * (3) u64 cs;
- * (2) u64 rip;
- * (2/1) u32 entry_vector;
- * (1/1) u32 error_code;
- */
-#define NR_SKIPPED_REGS 6 /* See the above explanation */
#define HVM_SAVE_ALL_NOSEGREGS \
- subq $(NR_SKIPPED_REGS*8), %rsp; \
pushq %rdi; \
pushq %rsi; \
pushq %rdx; \
@@ -85,14 +61,11 @@
popq %rcx; \
popq %rdx; \
popq %rsi; \
- popq %rdi; \
- addq $(NR_SKIPPED_REGS*8), %rsp;
+ popq %rdi
ALIGN
ENTRY(vmx_asm_vmexit_handler)
- /* selectors are restored/saved by VMX */
HVM_SAVE_ALL_NOSEGREGS
- call vmx_trace_vmexit
movq %rsp,%rdi
call vmx_vmexit_handler
jmp vmx_asm_do_vmentry
diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/x86/irq.c
--- a/xen/arch/x86/irq.c Mon Jan 15 13:27:20 2007 -0500
+++ b/xen/arch/x86/irq.c Wed Jan 17 09:56:40 2007 -0500
@@ -13,6 +13,7 @@
#include <xen/perfc.h>
#include <xen/sched.h>
#include <xen/keyhandler.h>
+#include <xen/compat.h>
#include <asm/current.h>
#include <asm/smpboot.h>
@@ -332,7 +333,7 @@ int pirq_guest_unmask(struct domain *d)
irq < NR_IRQS;
irq = find_next_bit(d->pirq_mask, NR_IRQS, irq+1) )
{
- if ( !test_bit(d->pirq_to_evtchn[irq], s->evtchn_mask) )
+ if ( !test_bit(d->pirq_to_evtchn[irq], __shared_info_addr(d, s,
evtchn_mask)) )
__pirq_guest_eoi(d, irq);
}
@@ -624,14 +625,13 @@ static void dump_irqs(unsigned char key)
printk("%u(%c%c%c%c)",
d->domain_id,
(test_bit(d->pirq_to_evtchn[irq],
- d->shared_info->evtchn_pending) ?
+ shared_info_addr(d, evtchn_pending)) ?
'P' : '-'),
- (test_bit(d->pirq_to_evtchn[irq]/BITS_PER_LONG,
- &d->shared_info->vcpu_info[0].
- evtchn_pending_sel) ?
+ (test_bit(d->pirq_to_evtchn[irq]/BITS_PER_GUEST_LONG(d),
+ vcpu_info_addr(d->vcpu[0],
evtchn_pending_sel)) ?
'S' : '-'),
(test_bit(d->pirq_to_evtchn[irq],
- d->shared_info->evtchn_mask) ?
+ shared_info_addr(d, evtchn_mask)) ?
'M' : '-'),
(test_bit(irq, d->pirq_mask) ?
'M' : '-'));
diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Mon Jan 15 13:27:20 2007 -0500
+++ b/xen/arch/x86/mm.c Wed Jan 17 09:56:40 2007 -0500
@@ -106,6 +106,7 @@
#include <asm/ldt.h>
#include <asm/x86_emulate.h>
#include <asm/e820.h>
+#include <asm/hypercall.h>
#include <public/memory.h>
#define MEM_LOG(_f, _a...) gdprintk(XENLOG_WARNING , _f "\n" , ## _a)
@@ -118,20 +119,6 @@
#if !defined(NDEBUG) || defined(CONFIG_X86_PAE)
#define PTE_UPDATE_WITH_CMPXCHG
#endif
-
-/*
- * Both do_mmuext_op() and do_mmu_update():
- * We steal the m.s.b. of the @count parameter to indicate whether this
- * invocation of do_mmu_update() is resuming a previously preempted call.
- */
-#define MMU_UPDATE_PREEMPTED (~(~0U>>1))
-
-static void free_l2_table(struct page_info *page);
-static void free_l1_table(struct page_info *page);
-
-static int mod_l2_entry(l2_pgentry_t *, l2_pgentry_t, unsigned long,
- unsigned long type);
-static int mod_l1_entry(l1_pgentry_t *, l1_pgentry_t, unsigned long gl1mfn);
/* Used to defer flushing of memory structures. */
struct percpu_mm_info {
@@ -157,6 +144,15 @@ struct page_info *frame_table;
struct page_info *frame_table;
unsigned long max_page;
unsigned long total_pages;
+
+#ifdef CONFIG_COMPAT
+l2_pgentry_t *compat_idle_pg_table_l2 = NULL;
+#define l3_disallow_mask(d) (!IS_COMPAT(d) ? \
+ L3_DISALLOW_MASK : \
+ COMPAT_L3_DISALLOW_MASK)
+#else
+#define l3_disallow_mask(d) L3_DISALLOW_MASK
+#endif
void __init init_frametable(void)
{
@@ -433,7 +429,7 @@ static int alloc_segdesc_page(struct pag
descs = map_domain_page(page_to_mfn(page));
for ( i = 0; i < 512; i++ )
- if ( unlikely(!check_descriptor(&descs[i])) )
+ if ( unlikely(!check_descriptor(page_get_owner(page), &descs[i])) )
goto fail;
unmap_domain_page(descs);
@@ -661,9 +657,9 @@ get_page_from_l3e(
if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
return 1;
- if ( unlikely((l3e_get_flags(l3e) & L3_DISALLOW_MASK)) )
- {
- MEM_LOG("Bad L3 flags %x", l3e_get_flags(l3e) & L3_DISALLOW_MASK);
+ if ( unlikely((l3e_get_flags(l3e) & l3_disallow_mask(d))) )
+ {
+ MEM_LOG("Bad L3 flags %x", l3e_get_flags(l3e) & l3_disallow_mask(d));
return 0;
}
@@ -700,9 +696,10 @@ get_page_from_l4e(
#ifdef __x86_64__
#ifdef USER_MAPPINGS_ARE_GLOBAL
-#define adjust_guest_l1e(pl1e) \
+#define adjust_guest_l1e(pl1e, d) \
do { \
- if ( likely(l1e_get_flags((pl1e)) & _PAGE_PRESENT) ) \
+ if ( likely(l1e_get_flags((pl1e)) & _PAGE_PRESENT) && \
+ likely(!IS_COMPAT(d)) ) \
{ \
/* _PAGE_GUEST_KERNEL page cannot have the Global bit set. */ \
if ( (l1e_get_flags((pl1e)) & (_PAGE_GUEST_KERNEL|_PAGE_GLOBAL)) \
@@ -716,37 +713,53 @@ get_page_from_l4e(
} \
} while ( 0 )
#else
-#define adjust_guest_l1e(pl1e) \
+#define adjust_guest_l1e(pl1e, d) \
do { \
- if ( likely(l1e_get_flags((pl1e)) & _PAGE_PRESENT) ) \
+ if ( likely(l1e_get_flags((pl1e)) & _PAGE_PRESENT) && \
+ likely(!IS_COMPAT(d)) ) \
l1e_add_flags((pl1e), _PAGE_USER); \
} while ( 0 )
#endif
-#define adjust_guest_l2e(pl2e) \
+#define adjust_guest_l2e(pl2e, d) \
do { \
- if ( likely(l2e_get_flags((pl2e)) & _PAGE_PRESENT) ) \
+ if ( likely(l2e_get_flags((pl2e)) & _PAGE_PRESENT) && \
+ likely(!IS_COMPAT(d)) ) \
l2e_add_flags((pl2e), _PAGE_USER); \
} while ( 0 )
-#define adjust_guest_l3e(pl3e) \
+#define adjust_guest_l3e(pl3e, d) \
do { \
if ( likely(l3e_get_flags((pl3e)) & _PAGE_PRESENT) ) \
- l3e_add_flags((pl3e), _PAGE_USER); \
+ l3e_add_flags((pl3e), likely(!IS_COMPAT(d)) ? \
+ _PAGE_USER : \
+ _PAGE_USER|_PAGE_RW); \
} while ( 0 )
-#define adjust_guest_l4e(pl4e) \
+#define adjust_guest_l4e(pl4e, d) \
do { \
- if ( likely(l4e_get_flags((pl4e)) & _PAGE_PRESENT) ) \
+ if ( likely(l4e_get_flags((pl4e)) & _PAGE_PRESENT) && \
+ likely(!IS_COMPAT(d)) ) \
l4e_add_flags((pl4e), _PAGE_USER); \
} while ( 0 )
#else /* !defined(__x86_64__) */
-#define adjust_guest_l1e(_p) ((void)0)
-#define adjust_guest_l2e(_p) ((void)0)
-#define adjust_guest_l3e(_p) ((void)0)
-
+#define adjust_guest_l1e(_p, _d) ((void)(_d))
+#define adjust_guest_l2e(_p, _d) ((void)(_d))
+#define adjust_guest_l3e(_p, _d) ((void)(_d))
+
+#endif
+
+#ifdef CONFIG_COMPAT
+#define unadjust_guest_l3e(pl3e, d) \
+ do { \
+ if ( unlikely(IS_COMPAT(d)) && \
+ likely(l3e_get_flags((pl3e)) & _PAGE_PRESENT) ) \
+ l3e_remove_flags((pl3e), _PAGE_USER|_PAGE_RW|_PAGE_ACCESSED); \
+ } while ( 0 )
+#else
+#define unadjust_guest_l3e(_p, _d) ((void)(_d))
#endif
void put_page_from_l1e(l1_pgentry_t l1e, struct domain *d)
@@ -813,7 +826,7 @@ static void put_page_from_l2e(l2_pgentry
{
if ( (l2e_get_flags(l2e) & _PAGE_PRESENT) &&
(l2e_get_pfn(l2e) != pfn) )
- put_page_and_type(mfn_to_page(l2e_get_pfn(l2e)));
+ put_page_and_type(l2e_get_page(l2e));
}
@@ -822,7 +835,7 @@ static void put_page_from_l3e(l3_pgentry
{
if ( (l3e_get_flags(l3e) & _PAGE_PRESENT) &&
(l3e_get_pfn(l3e) != pfn) )
- put_page_and_type(mfn_to_page(l3e_get_pfn(l3e)));
+ put_page_and_type(l3e_get_page(l3e));
}
#endif
@@ -831,7 +844,7 @@ static void put_page_from_l4e(l4_pgentry
{
if ( (l4e_get_flags(l4e) & _PAGE_PRESENT) &&
(l4e_get_pfn(l4e) != pfn) )
- put_page_and_type(mfn_to_page(l4e_get_pfn(l4e)));
+ put_page_and_type(l4e_get_page(l4e));
}
#endif
@@ -850,7 +863,7 @@ static int alloc_l1_table(struct page_in
unlikely(!get_page_from_l1e(pl1e[i], d)) )
goto fail;
- adjust_guest_l1e(pl1e[i]);
+ adjust_guest_l1e(pl1e[i], d);
}
unmap_domain_page(pl1e);
@@ -866,13 +879,20 @@ static int alloc_l1_table(struct page_in
return 0;
}
-#ifdef CONFIG_X86_PAE
-static int create_pae_xen_mappings(l3_pgentry_t *pl3e)
+#if defined(CONFIG_X86_PAE) || defined(CONFIG_COMPAT)
+static int create_pae_xen_mappings(struct domain *d, l3_pgentry_t *pl3e)
{
struct page_info *page;
- l2_pgentry_t *pl2e, l2e;
+ l2_pgentry_t *pl2e;
l3_pgentry_t l3e3;
+#ifndef CONFIG_COMPAT
+ l2_pgentry_t l2e;
int i;
+#else
+
+ if ( !IS_COMPAT(d) )
+ return 1;
+#endif
pl3e = (l3_pgentry_t *)((unsigned long)pl3e & PAGE_MASK);
@@ -905,6 +925,7 @@ static int create_pae_xen_mappings(l3_pg
/* Xen private mappings. */
pl2e = map_domain_page(l3e_get_pfn(l3e3));
+#ifndef CONFIG_COMPAT
memcpy(&pl2e[L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES-1)],
&idle_pg_table_l2[L2_PAGETABLE_FIRST_XEN_SLOT],
L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t));
@@ -922,11 +943,20 @@ static int create_pae_xen_mappings(l3_pg
l2e = l2e_from_pfn(l3e_get_pfn(pl3e[i]), __PAGE_HYPERVISOR);
l2e_write(&pl2e[l2_table_offset(LINEAR_PT_VIRT_START) + i], l2e);
}
+#else
+ memcpy(&pl2e[COMPAT_L2_PAGETABLE_FIRST_XEN_SLOT(d)],
+
&compat_idle_pg_table_l2[l2_table_offset(HIRO_COMPAT_MPT_VIRT_START)],
+ COMPAT_L2_PAGETABLE_XEN_SLOTS(d) * sizeof(*pl2e));
+#endif
unmap_domain_page(pl2e);
return 1;
}
-
+#else
+# define create_pae_xen_mappings(d, pl3e) (1)
+#endif
+
+#ifdef CONFIG_X86_PAE
/* Flush a pgdir update into low-memory caches. */
static void pae_flush_pgd(
unsigned long mfn, unsigned int idx, l3_pgentry_t nl3e)
@@ -961,12 +991,8 @@ static void pae_flush_pgd(
flush_tlb_mask(d->domain_dirty_cpumask);
}
-
-#elif CONFIG_X86_64
-# define create_pae_xen_mappings(pl3e) (1)
+#else
# define pae_flush_pgd(mfn, idx, nl3e) ((void)0)
-#else
-# define create_pae_xen_mappings(pl3e) (1)
#endif
static int alloc_l2_table(struct page_info *page, unsigned long type)
@@ -980,11 +1006,11 @@ static int alloc_l2_table(struct page_in
for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
{
- if ( is_guest_l2_slot(type, i) &&
+ if ( is_guest_l2_slot(d, type, i) &&
unlikely(!get_page_from_l2e(pl2e[i], pfn, d)) )
goto fail;
- adjust_guest_l2e(pl2e[i]);
+ adjust_guest_l2e(pl2e[i], d);
}
#if CONFIG_PAGING_LEVELS == 2
@@ -1007,7 +1033,7 @@ static int alloc_l2_table(struct page_in
fail:
MEM_LOG("Failure in alloc_l2_table: entry %d", i);
while ( i-- > 0 )
- if ( is_guest_l2_slot(type, i) )
+ if ( is_guest_l2_slot(d, type, i) )
put_page_from_l2e(pl2e[i], pfn);
unmap_domain_page(pl2e);
@@ -1039,13 +1065,24 @@ static int alloc_l3_table(struct page_in
#endif
pl3e = map_domain_page(pfn);
+
+ /*
+ * PAE guests allocate full pages, but aren't required to initialize
+ * more than the first four entries; when running in compatibility
+ * mode, however, the full page is visible to the MMU, and hence all
+ * 512 entries must be valid/verified, which is most easily achieved
+ * by clearing them out.
+ */
+ if ( IS_COMPAT(d) )
+ memset(pl3e + 4, 0, (L3_PAGETABLE_ENTRIES - 4) * sizeof(*pl3e));
+
for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
{
-#ifdef CONFIG_X86_PAE
- if ( i == 3 )
+#if defined(CONFIG_X86_PAE) || defined(CONFIG_COMPAT)
+ if ( (CONFIG_PAGING_LEVELS < 4 || IS_COMPAT(d)) && i == 3 )
{
if ( !(l3e_get_flags(pl3e[i]) & _PAGE_PRESENT) ||
- (l3e_get_flags(pl3e[i]) & L3_DISALLOW_MASK) ||
+ (l3e_get_flags(pl3e[i]) & l3_disallow_mask(d)) ||
!get_page_and_type_from_pagenr(l3e_get_pfn(pl3e[i]),
PGT_l2_page_table |
PGT_pae_xen_l2,
@@ -1058,10 +1095,10 @@ static int alloc_l3_table(struct page_in
unlikely(!get_page_from_l3e(pl3e[i], pfn, d)) )
goto fail;
- adjust_guest_l3e(pl3e[i]);
- }
-
- if ( !create_pae_xen_mappings(pl3e) )
+ adjust_guest_l3e(pl3e[i], d);
+ }
+
+ if ( !create_pae_xen_mappings(d, pl3e) )
goto fail;
unmap_domain_page(pl3e);
@@ -1094,7 +1131,7 @@ static int alloc_l4_table(struct page_in
unlikely(!get_page_from_l4e(pl4e[i], pfn, d)) )
goto fail;
- adjust_guest_l4e(pl4e[i]);
+ adjust_guest_l4e(pl4e[i], d);
}
/* Xen private mappings. */
@@ -1104,9 +1141,12 @@ static int alloc_l4_table(struct page_in
pl4e[l4_table_offset(LINEAR_PT_VIRT_START)] =
l4e_from_pfn(pfn, __PAGE_HYPERVISOR);
pl4e[l4_table_offset(PERDOMAIN_VIRT_START)] =
- l4e_from_page(
- virt_to_page(page_get_owner(page)->arch.mm_perdomain_l3),
- __PAGE_HYPERVISOR);
+ l4e_from_page(virt_to_page(d->arch.mm_perdomain_l3),
+ __PAGE_HYPERVISOR);
+ if ( IS_COMPAT(d) )
+ pl4e[l4_table_offset(COMPAT_ARG_XLAT_VIRT_BASE)] =
+ l4e_from_page(virt_to_page(d->arch.mm_arg_xlat_l3),
+ __PAGE_HYPERVISOR);
return 1;
@@ -1142,6 +1182,9 @@ static void free_l1_table(struct page_in
static void free_l2_table(struct page_info *page)
{
+#ifdef CONFIG_COMPAT
+ struct domain *d = page_get_owner(page);
+#endif
unsigned long pfn = page_to_mfn(page);
l2_pgentry_t *pl2e;
int i;
@@ -1149,7 +1192,7 @@ static void free_l2_table(struct page_in
pl2e = map_domain_page(pfn);
for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
- if ( is_guest_l2_slot(page->u.inuse.type_info, i) )
+ if ( is_guest_l2_slot(d, page->u.inuse.type_info, i) )
put_page_from_l2e(pl2e[i], pfn);
unmap_domain_page(pl2e);
@@ -1162,6 +1205,7 @@ static void free_l2_table(struct page_in
static void free_l3_table(struct page_info *page)
{
+ struct domain *d = page_get_owner(page);
unsigned long pfn = page_to_mfn(page);
l3_pgentry_t *pl3e;
int i;
@@ -1170,7 +1214,10 @@ static void free_l3_table(struct page_in
for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
if ( is_guest_l3_slot(i) )
+ {
put_page_from_l3e(pl3e[i], pfn);
+ unadjust_guest_l3e(pl3e[i], d);
+ }
unmap_domain_page(pl3e);
}
@@ -1270,7 +1317,7 @@ static int mod_l1_entry(l1_pgentry_t *pl
return 0;
}
- adjust_guest_l1e(nl1e);
+ adjust_guest_l1e(nl1e, d);
/* Fast path for identical mapping, r/w and presence. */
if ( !l1e_has_changed(ol1e, nl1e, _PAGE_RW | _PAGE_PRESENT) )
@@ -1303,8 +1350,9 @@ static int mod_l2_entry(l2_pgentry_t *pl
unsigned long type)
{
l2_pgentry_t ol2e;
-
- if ( unlikely(!is_guest_l2_slot(type,pgentry_ptr_to_slot(pl2e))) )
+ struct domain *d = current->domain;
+
+ if ( unlikely(!is_guest_l2_slot(d, type, pgentry_ptr_to_slot(pl2e))) )
{
MEM_LOG("Illegal L2 update attempt in Xen-private area %p", pl2e);
return 0;
@@ -1322,13 +1370,13 @@ static int mod_l2_entry(l2_pgentry_t *pl
return 0;
}
- adjust_guest_l2e(nl2e);
+ adjust_guest_l2e(nl2e, d);
/* Fast path for identical mapping and presence. */
if ( !l2e_has_changed(ol2e, nl2e, _PAGE_PRESENT))
return UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, current);
- if ( unlikely(!get_page_from_l2e(nl2e, pfn, current->domain)) )
+ if ( unlikely(!get_page_from_l2e(nl2e, pfn, d)) )
return 0;
if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, current)) )
@@ -1354,6 +1402,7 @@ static int mod_l3_entry(l3_pgentry_t *pl
unsigned long pfn)
{
l3_pgentry_t ol3e;
+ struct domain *d = current->domain;
int okay;
if ( unlikely(!is_guest_l3_slot(pgentry_ptr_to_slot(pl3e))) )
@@ -1362,12 +1411,13 @@ static int mod_l3_entry(l3_pgentry_t *pl
return 0;
}
-#ifdef CONFIG_X86_PAE
+#if defined(CONFIG_X86_PAE) || defined(CONFIG_COMPAT)
/*
* Disallow updates to final L3 slot. It contains Xen mappings, and it
* would be a pain to ensure they remain continuously valid throughout.
*/
- if ( pgentry_ptr_to_slot(pl3e) >= 3 )
+ if ( (CONFIG_PAGING_LEVELS < 4 || IS_COMPAT(d)) &&
+ pgentry_ptr_to_slot(pl3e) >= 3 )
return 0;
#endif
@@ -1376,20 +1426,20 @@ static int mod_l3_entry(l3_pgentry_t *pl
if ( l3e_get_flags(nl3e) & _PAGE_PRESENT )
{
- if ( unlikely(l3e_get_flags(nl3e) & L3_DISALLOW_MASK) )
+ if ( unlikely(l3e_get_flags(nl3e) & l3_disallow_mask(d)) )
{
MEM_LOG("Bad L3 flags %x",
- l3e_get_flags(nl3e) & L3_DISALLOW_MASK);
+ l3e_get_flags(nl3e) & l3_disallow_mask(d));
return 0;
}
- adjust_guest_l3e(nl3e);
+ adjust_guest_l3e(nl3e, d);
/* Fast path for identical mapping and presence. */
if (!l3e_has_changed(ol3e, nl3e, _PAGE_PRESENT))
return UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, current);
- if ( unlikely(!get_page_from_l3e(nl3e, pfn, current->domain)) )
+ if ( unlikely(!get_page_from_l3e(nl3e, pfn, d)) )
return 0;
if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, current)) )
@@ -1403,7 +1453,7 @@ static int mod_l3_entry(l3_pgentry_t *pl
return 0;
}
- okay = create_pae_xen_mappings(pl3e);
+ okay = create_pae_xen_mappings(d, pl3e);
BUG_ON(!okay);
pae_flush_pgd(pfn, pgentry_ptr_to_slot(pl3e), nl3e);
@@ -1441,7 +1491,7 @@ static int mod_l4_entry(l4_pgentry_t *pl
return 0;
}
- adjust_guest_l4e(nl4e);
+ adjust_guest_l4e(nl4e, current->domain);
/* Fast path for identical mapping and presence. */
if (!l4e_has_changed(ol4e, nl4e, _PAGE_PRESENT))
@@ -1712,6 +1762,33 @@ int new_guest_cr3(unsigned long mfn)
if ( is_hvm_domain(d) && !hvm_paging_enabled(v) )
return 0;
+#ifdef CONFIG_COMPAT
+ if ( IS_COMPAT(d) )
+ {
+ l4_pgentry_t l4e = l4e_from_pfn(mfn,
_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED);
+
+ if ( shadow_mode_refcounts(d) )
+ {
+ okay = get_page_from_pagenr(mfn, d);
+ old_base_mfn = l4e_get_pfn(l4e);
+ if ( okay && old_base_mfn )
+ put_page(mfn_to_page(old_base_mfn));
+ }
+ else
+ okay = mod_l4_entry(__va(pagetable_get_paddr(v->arch.guest_table)),
+ l4e, 0);
+ if ( unlikely(!okay) )
+ {
+ MEM_LOG("Error while installing new compat baseptr %lx", mfn);
+ return 0;
+ }
+
+ invalidate_shadow_ldt(v);
+ write_ptbase(v);
+
+ return 1;
+ }
+#endif
if ( shadow_mode_refcounts(d) )
{
okay = get_page_from_pagenr(mfn, d);
@@ -1950,6 +2027,8 @@ int do_mmuext_op(
goto pin_page;
case MMUEXT_PIN_L4_TABLE:
+ if ( IS_COMPAT(FOREIGNDOM) )
+ break;
type = PGT_l4_page_table;
pin_page:
@@ -2013,7 +2092,11 @@ int do_mmuext_op(
#ifdef __x86_64__
case MMUEXT_NEW_USER_BASEPTR:
- okay = 1;
+ if ( IS_COMPAT(FOREIGNDOM) )
+ {
+ okay = 0;
+ break;
+ }
if (likely(mfn != 0))
{
if ( shadow_mode_refcounts(d) )
@@ -2265,8 +2348,7 @@ int do_mmu_update(
case PGT_l2_page_table:
{
l2_pgentry_t l2e = l2e_from_intpte(req.val);
- okay = mod_l2_entry(
- (l2_pgentry_t *)va, l2e, mfn, type_info);
+ okay = mod_l2_entry(va, l2e, mfn, type_info);
}
break;
#if CONFIG_PAGING_LEVELS >= 3
@@ -2279,11 +2361,12 @@ int do_mmu_update(
#endif
#if CONFIG_PAGING_LEVELS >= 4
case PGT_l4_page_table:
- {
- l4_pgentry_t l4e = l4e_from_intpte(req.val);
- okay = mod_l4_entry(va, l4e, mfn);
- }
- break;
+ if ( !IS_COMPAT(FOREIGNDOM) )
+ {
+ l4_pgentry_t l4e = l4e_from_intpte(req.val);
+ okay = mod_l4_entry(va, l4e, mfn);
+ }
+ break;
#endif
}
@@ -2387,7 +2470,7 @@ static int create_grant_pte_mapping(
ASSERT(spin_is_locked(&d->big_lock));
- adjust_guest_l1e(nl1e);
+ adjust_guest_l1e(nl1e, d);
gmfn = pte_addr >> PAGE_SHIFT;
mfn = gmfn_to_mfn(d, gmfn);
@@ -2508,7 +2591,7 @@ static int create_grant_va_mapping(
ASSERT(spin_is_locked(&d->big_lock));
- adjust_guest_l1e(nl1e);
+ adjust_guest_l1e(nl1e, d);
pl1e = guest_map_l1e(v, va, &gl1mfn);
if ( !pl1e )
@@ -2676,7 +2759,9 @@ int do_update_va_mapping(unsigned long v
flush_tlb_mask(d->domain_dirty_cpumask);
break;
default:
- if ( unlikely(get_user(vmask, (unsigned long *)bmap_ptr)) )
+ if ( unlikely(!IS_COMPAT(d) ?
+ get_user(vmask, (unsigned long *)bmap_ptr) :
+ get_user(vmask, (unsigned int *)bmap_ptr)) )
rc = -EFAULT;
pmask = vcpumask_to_pcpumask(d, vmask);
flush_tlb_mask(pmask);
@@ -2835,7 +2920,7 @@ long do_update_descriptor(u64 pa, u64 de
mfn = gmfn_to_mfn(dom, gmfn);
if ( (((unsigned int)pa % sizeof(struct desc_struct)) != 0) ||
!mfn_valid(mfn) ||
- !check_descriptor(&d) )
+ !check_descriptor(dom, &d) )
{
UNLOCK_BIGLOCK(dom);
return -EINVAL;
@@ -3097,7 +3182,7 @@ static int ptwr_emulated_update(
unsigned int do_cmpxchg,
struct ptwr_emulate_ctxt *ptwr_ctxt)
{
- unsigned long gmfn, mfn;
+ unsigned long mfn;
struct page_info *page;
l1_pgentry_t pte, ol1e, nl1e, *pl1e;
struct vcpu *v = current;
@@ -3137,8 +3222,7 @@ static int ptwr_emulated_update(
}
pte = ptwr_ctxt->pte;
- gmfn = l1e_get_pfn(pte);
- mfn = gmfn_to_mfn(d, gmfn);
+ mfn = l1e_get_pfn(pte);
page = mfn_to_page(mfn);
/* We are looking only for read-only mappings of p.t. pages. */
@@ -3151,7 +3235,7 @@ static int ptwr_emulated_update(
nl1e = l1e_from_intpte(val);
if ( unlikely(!get_page_from_l1e(gl1e_to_ml1e(d, nl1e), d)) )
{
- if ( (CONFIG_PAGING_LEVELS == 3) &&
+ if ( (CONFIG_PAGING_LEVELS == 3 || IS_COMPAT(d)) &&
(bytes == 4) &&
!do_cmpxchg &&
(l1e_get_flags(nl1e) & _PAGE_PRESENT) )
@@ -3173,7 +3257,7 @@ static int ptwr_emulated_update(
}
}
- adjust_guest_l1e(nl1e);
+ adjust_guest_l1e(nl1e, d);
/* Checked successfully: do the update (write or cmpxchg). */
pl1e = map_domain_page(page_to_mfn(page));
@@ -3269,7 +3353,6 @@ int ptwr_do_page_fault(struct vcpu *v, u
struct cpu_user_regs *regs)
{
struct domain *d = v->domain;
- unsigned long pfn;
struct page_info *page;
l1_pgentry_t pte;
struct ptwr_emulate_ctxt ptwr_ctxt;
@@ -3283,8 +3366,7 @@ int ptwr_do_page_fault(struct vcpu *v, u
guest_get_eff_l1e(v, addr, &pte);
if ( !(l1e_get_flags(pte) & _PAGE_PRESENT) )
goto bail;
- pfn = l1e_get_pfn(pte);
- page = mfn_to_page(pfn);
+ page = l1e_get_page(pte);
/* We are looking only for read-only mappings of p.t. pages. */
if ( ((l1e_get_flags(pte) & (_PAGE_PRESENT|_PAGE_RW)) != _PAGE_PRESENT) ||
@@ -3294,10 +3376,11 @@ int ptwr_do_page_fault(struct vcpu *v, u
goto bail;
ptwr_ctxt.ctxt.regs = guest_cpu_user_regs();
- ptwr_ctxt.ctxt.mode = X86EMUL_MODE_HOST;
- ptwr_ctxt.cr2 = addr;
- ptwr_ctxt.pte = pte;
- if ( x86_emulate_memop(&ptwr_ctxt.ctxt, &ptwr_emulate_ops) )
+ ptwr_ctxt.ctxt.addr_size = ptwr_ctxt.ctxt.sp_size =
+ IS_COMPAT(d) ? 32 : BITS_PER_LONG;
+ ptwr_ctxt.cr2 = addr;
+ ptwr_ctxt.pte = pte;
+ if ( x86_emulate(&ptwr_ctxt.ctxt, &ptwr_emulate_ops) )
goto bail;
UNLOCK_BIGLOCK(d);
diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/x86/mm/shadow/common.c
--- a/xen/arch/x86/mm/shadow/common.c Mon Jan 15 13:27:20 2007 -0500
+++ b/xen/arch/x86/mm/shadow/common.c Wed Jan 17 09:56:40 2007 -0500
@@ -36,6 +36,7 @@
#include <asm/current.h>
#include <asm/flushtlb.h>
#include <asm/shadow.h>
+#include <asm/shared.h>
#include "private.h"
@@ -109,7 +110,7 @@ static int hvm_translate_linear_addr(
unsigned long limit, addr = offset;
uint32_t last_byte;
- if ( sh_ctxt->ctxt.mode != X86EMUL_MODE_PROT64 )
+ if ( sh_ctxt->ctxt.addr_size != 64 )
{
/*
* COMPATIBILITY MODE: Apply segment checks and add base.
@@ -398,7 +399,7 @@ struct x86_emulate_ops *shadow_init_emul
struct x86_emulate_ops *shadow_init_emulation(
struct sh_emulate_ctxt *sh_ctxt, struct cpu_user_regs *regs)
{
- struct segment_register *creg;
+ struct segment_register *creg, *sreg;
struct vcpu *v = current;
unsigned long addr;
@@ -406,7 +407,7 @@ struct x86_emulate_ops *shadow_init_emul
if ( !is_hvm_vcpu(v) )
{
- sh_ctxt->ctxt.mode = X86EMUL_MODE_HOST;
+ sh_ctxt->ctxt.addr_size = sh_ctxt->ctxt.sp_size = BITS_PER_LONG;
return &pv_shadow_emulator_ops;
}
@@ -415,14 +416,20 @@ struct x86_emulate_ops *shadow_init_emul
creg = hvm_get_seg_reg(x86_seg_cs, sh_ctxt);
/* Work out the emulation mode. */
- if ( hvm_long_mode_enabled(v) )
- sh_ctxt->ctxt.mode = creg->attr.fields.l ?
- X86EMUL_MODE_PROT64 : X86EMUL_MODE_PROT32;
+ if ( hvm_long_mode_enabled(v) && creg->attr.fields.l )
+ {
+ sh_ctxt->ctxt.addr_size = sh_ctxt->ctxt.sp_size = 64;
+ }
else if ( regs->eflags & X86_EFLAGS_VM )
- sh_ctxt->ctxt.mode = X86EMUL_MODE_REAL;
+ {
+ sh_ctxt->ctxt.addr_size = sh_ctxt->ctxt.sp_size = 16;
+ }
else
- sh_ctxt->ctxt.mode = creg->attr.fields.db ?
- X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
+ {
+ sreg = hvm_get_seg_reg(x86_seg_ss, sh_ctxt);
+ sh_ctxt->ctxt.addr_size = creg->attr.fields.db ? 32 : 16;
+ sh_ctxt->ctxt.sp_size = sreg->attr.fields.db ? 32 : 16;
+ }
/* Attempt to prefetch whole instruction. */
sh_ctxt->insn_buf_bytes =
@@ -1304,6 +1311,9 @@ shadow_alloc_p2m_table(struct domain *d)
if ( !shadow_set_p2m_entry(d, gfn, mfn) )
goto error;
+ /* Build a p2m map that matches the m2p entries for this domain's
+ * allocated pages. Skip any pages that have an explicitly invalid
+ * or obviously bogus m2p entry. */
for ( entry = d->page_list.next;
entry != &d->page_list;
entry = entry->next )
@@ -1319,6 +1329,8 @@ shadow_alloc_p2m_table(struct domain *d)
(gfn != 0x55555555L)
#endif
&& gfn != INVALID_M2P_ENTRY
+ && (gfn <
+ (RO_MPT_VIRT_END - RO_MPT_VIRT_START) / sizeof (l1_pgentry_t))
&& !shadow_set_p2m_entry(d, gfn, mfn) )
goto error;
}
@@ -2442,9 +2454,10 @@ static void sh_update_paging_modes(struc
/// PV guest
///
#if CONFIG_PAGING_LEVELS == 4
- /* When 32-on-64 PV guests are supported, they must choose
- * a different mode here */
- v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,4,4);
+ if ( pv_32bit_guest(v) )
+ v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3);
+ else
+ v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,4,4);
#elif CONFIG_PAGING_LEVELS == 3
v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3);
#elif CONFIG_PAGING_LEVELS == 2
@@ -2917,7 +2930,7 @@ sh_alloc_log_dirty_bitmap(struct domain
{
ASSERT(d->arch.shadow.dirty_bitmap == NULL);
d->arch.shadow.dirty_bitmap_size =
- (d->shared_info->arch.max_pfn + (BITS_PER_LONG - 1)) &
+ (arch_get_max_pfn(d) + (BITS_PER_LONG - 1)) &
~(BITS_PER_LONG - 1);
d->arch.shadow.dirty_bitmap =
xmalloc_array(unsigned long,
@@ -3259,7 +3272,7 @@ void shadow_mark_dirty(struct domain *d,
int shadow_domctl(struct domain *d,
xen_domctl_shadow_op_t *sc,
- XEN_GUEST_HANDLE(xen_domctl_t) u_domctl)
+ XEN_GUEST_HANDLE(void) u_domctl)
{
int rc, preempted = 0;
diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/x86/mm/shadow/multi.c
--- a/xen/arch/x86/mm/shadow/multi.c Mon Jan 15 13:27:20 2007 -0500
+++ b/xen/arch/x86/mm/shadow/multi.c Wed Jan 17 09:56:40 2007 -0500
@@ -851,9 +851,7 @@ static inline void safe_write_entry(void
* then writing the high word before the low word. */
BUILD_BUG_ON(sizeof (shadow_l1e_t) != 2 * sizeof (unsigned long));
d[0] = 0;
- wmb();
d[1] = s[1];
- wmb();
d[0] = s[0];
#else
/* In 32-bit and 64-bit, sizeof(pte) == sizeof(ulong) == 1 word,
@@ -1422,7 +1420,7 @@ void sh_install_xen_entries_in_l4(struct
}
#endif
-#if CONFIG_PAGING_LEVELS == 3 && GUEST_PAGING_LEVELS == 3
+#if (CONFIG_PAGING_LEVELS == 3 || defined(CONFIG_COMPAT)) &&
GUEST_PAGING_LEVELS == 3
// For 3-on-3 PV guests, we need to make sure the xen mappings are in
// place, which means that we need to populate the l2h entry in the l3
// table.
@@ -1432,12 +1430,20 @@ void sh_install_xen_entries_in_l2h(struc
{
struct domain *d = v->domain;
shadow_l2e_t *sl2e;
+#if CONFIG_PAGING_LEVELS == 3
int i;
+#else
+
+ if ( !pv_32bit_guest(v) )
+ return;
+#endif
sl2e = sh_map_domain_page(sl2hmfn);
ASSERT(sl2e != NULL);
ASSERT(sizeof (l2_pgentry_t) == sizeof (shadow_l2e_t));
+#if CONFIG_PAGING_LEVELS == 3
+
/* Copy the common Xen mappings from the idle domain */
memcpy(&sl2e[L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES-1)],
&idle_pg_table_l2[L2_PAGETABLE_FIRST_XEN_SLOT],
@@ -1478,6 +1484,15 @@ void sh_install_xen_entries_in_l2h(struc
}
sh_unmap_domain_page(p2m);
}
+
+#else
+
+ /* Copy the common Xen mappings from the idle domain */
+ memcpy(&sl2e[COMPAT_L2_PAGETABLE_FIRST_XEN_SLOT(d)],
+
&compat_idle_pg_table_l2[l2_table_offset(HIRO_COMPAT_MPT_VIRT_START)],
+ COMPAT_L2_PAGETABLE_XEN_SLOTS(d) * sizeof(*sl2e));
+
+#endif
sh_unmap_domain_page(sl2e);
}
@@ -1638,12 +1653,15 @@ mfn_t
mfn_t
sh_make_monitor_table(struct vcpu *v)
{
+ struct domain *d = v->domain;
ASSERT(pagetable_get_pfn(v->arch.monitor_table) == 0);
+ /* Guarantee we can get the memory we need */
+ shadow_prealloc(d, SHADOW_MAX_ORDER);
+
#if CONFIG_PAGING_LEVELS == 4
{
- struct domain *d = v->domain;
mfn_t m4mfn;
m4mfn = shadow_alloc(d, SH_type_monitor_table, 0);
sh_install_xen_entries_in_l4(v, m4mfn, m4mfn);
@@ -1660,6 +1678,19 @@ sh_make_monitor_table(struct vcpu *v)
l4e = sh_map_domain_page(m4mfn);
l4e[0] = l4e_from_pfn(mfn_x(m3mfn), __PAGE_HYPERVISOR);
sh_unmap_domain_page(l4e);
+ if ( pv_32bit_guest(v) )
+ {
+ // Install a monitor l2 table in slot 3 of the l3 table.
+ // This is used for all Xen entries.
+ mfn_t m2mfn;
+ l3_pgentry_t *l3e;
+ m2mfn = shadow_alloc(d, SH_type_monitor_table, 0);
+ mfn_to_page(m2mfn)->shadow_flags = 2;
+ l3e = sh_map_domain_page(m3mfn);
+ l3e[3] = l3e_from_pfn(mfn_x(m2mfn), _PAGE_PRESENT);
+ sh_install_xen_entries_in_l2h(v, m2mfn);
+ sh_unmap_domain_page(l3e);
+ }
}
#endif /* SHADOW_PAGING_LEVELS < 4 */
return m4mfn;
@@ -1668,7 +1699,6 @@ sh_make_monitor_table(struct vcpu *v)
#elif CONFIG_PAGING_LEVELS == 3
{
- struct domain *d = v->domain;
mfn_t m3mfn, m2mfn;
l3_pgentry_t *l3e;
l2_pgentry_t *l2e;
@@ -1702,7 +1732,6 @@ sh_make_monitor_table(struct vcpu *v)
#elif CONFIG_PAGING_LEVELS == 2
{
- struct domain *d = v->domain;
mfn_t m2mfn;
m2mfn = shadow_alloc(d, SH_type_monitor_table, 0);
sh_install_xen_entries_in_l2(v, m2mfn, m2mfn);
@@ -2065,9 +2094,19 @@ void sh_destroy_monitor_table(struct vcp
#if (CONFIG_PAGING_LEVELS == 4) && (SHADOW_PAGING_LEVELS != 4)
/* Need to destroy the l3 monitor page in slot 0 too */
{
+ mfn_t m3mfn;
l4_pgentry_t *l4e = sh_map_domain_page(mmfn);
ASSERT(l4e_get_flags(l4e[0]) & _PAGE_PRESENT);
- shadow_free(d, _mfn(l4e_get_pfn(l4e[0])));
+ m3mfn = _mfn(l4e_get_pfn(l4e[0]));
+ if ( pv_32bit_guest(v) )
+ {
+ /* Need to destroy the l2 monitor page in slot 3 too */
+ l3_pgentry_t *l3e = sh_map_domain_page(m3mfn);
+ ASSERT(l3e_get_flags(l3e[3]) & _PAGE_PRESENT);
+ shadow_free(d, _mfn(l3e_get_pfn(l3e[3])));
+ sh_unmap_domain_page(l3e);
+ }
+ shadow_free(d, m3mfn);
sh_unmap_domain_page(l4e);
}
#elif CONFIG_PAGING_LEVELS == 3
@@ -2836,7 +2875,7 @@ static int sh_page_fault(struct vcpu *v,
* it seems very unlikely that any OS grants user access to page tables.
*/
if ( (regs->error_code & PFEC_user_mode) ||
- x86_emulate_memop(&emul_ctxt.ctxt, emul_ops) )
+ x86_emulate(&emul_ctxt.ctxt, emul_ops) )
{
SHADOW_PRINTK("emulator failure, unshadowing mfn %#lx\n",
mfn_x(gmfn));
@@ -3044,12 +3083,15 @@ sh_update_linear_entries(struct vcpu *v)
#elif (CONFIG_PAGING_LEVELS == 4) && (SHADOW_PAGING_LEVELS == 3)
- /* This case only exists in HVM. To give ourselves a linear map of the
- * shadows, we need to extend a PAE shadow to 4 levels. We do this by
- * having a monitor l3 in slot 0 of the monitor l4 table, and
- * copying the PAE l3 entries into it. Then, by having the monitor l4e
- * for shadow pagetables also point to the monitor l4, we can use it
- * to access the shadows. */
+ /* PV: XXX
+ *
+ * HVM: To give ourselves a linear map of the shadows, we need to
+ * extend a PAE shadow to 4 levels. We do this by having a monitor
+ * l3 in slot 0 of the monitor l4 table, and copying the PAE l3
+ * entries into it. Then, by having the monitor l4e for shadow
+ * pagetables also point to the monitor l4, we can use it to access
+ * the shadows.
+ */
if ( shadow_mode_external(d) )
{
@@ -3092,6 +3134,8 @@ sh_update_linear_entries(struct vcpu *v)
if ( v != current )
sh_unmap_domain_page(ml3e);
}
+ else
+ domain_crash(d); /* XXX */
#elif CONFIG_PAGING_LEVELS == 3
@@ -3404,7 +3448,7 @@ sh_update_cr3(struct vcpu *v, int do_loc
(unsigned long)pagetable_get_pfn(v->arch.guest_table));
#if GUEST_PAGING_LEVELS == 4
- if ( !(v->arch.flags & TF_kernel_mode) )
+ if ( !(v->arch.flags & TF_kernel_mode) && !IS_COMPAT(v->domain) )
gmfn = pagetable_get_mfn(v->arch.guest_table_user);
else
#endif
@@ -3900,7 +3944,7 @@ sh_x86_emulate_write(struct vcpu *v, uns
if ( !skip ) sh_validate_guest_pt_write(v, mfn, addr, bytes);
/* If we are writing zeros to this page, might want to unshadow */
- if ( likely(bytes >= 4) && (*(u32 *)addr == 0) )
+ if ( likely(bytes >= 4) && (*(u32 *)addr == 0) && is_lo_pte(vaddr) )
check_for_early_unshadow(v, mfn);
sh_unmap_domain_page(addr);
@@ -3952,7 +3996,7 @@ sh_x86_emulate_cmpxchg(struct vcpu *v, u
vaddr, prev, old, new, *(unsigned long *)addr, bytes);
/* If we are writing zeros to this page, might want to unshadow */
- if ( likely(bytes >= 4) && (*(u32 *)addr == 0) )
+ if ( likely(bytes >= 4) && (*(u32 *)addr == 0) && is_lo_pte(vaddr) )
check_for_early_unshadow(v, mfn);
sh_unmap_domain_page(addr);
diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/x86/mm/shadow/private.h
--- a/xen/arch/x86/mm/shadow/private.h Mon Jan 15 13:27:20 2007 -0500
+++ b/xen/arch/x86/mm/shadow/private.h Wed Jan 17 09:56:40 2007 -0500
@@ -427,6 +427,11 @@ extern int sh_remove_write_access(struct
#undef mfn_valid
#define mfn_valid(_mfn) (mfn_x(_mfn) < max_page)
+#if GUEST_PAGING_LEVELS >= 3
+# define is_lo_pte(_vaddr) (((_vaddr)&0x4)==0)
+#else
+# define is_lo_pte(_vaddr) (1)
+#endif
static inline int
sh_mfn_is_a_page_table(mfn_t gmfn)
diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/x86/oprofile/nmi_int.c
--- a/xen/arch/x86/oprofile/nmi_int.c Mon Jan 15 13:27:20 2007 -0500
+++ b/xen/arch/x86/oprofile/nmi_int.c Wed Jan 17 09:56:40 2007 -0500
@@ -42,7 +42,7 @@ extern size_t strlcpy(char *dest, const
extern size_t strlcpy(char *dest, const char *src, size_t size);
-int nmi_callback(struct cpu_user_regs *regs, int cpu)
+static int nmi_callback(struct cpu_user_regs *regs, int cpu)
{
int xen_mode, ovf;
diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/x86/physdev.c
--- a/xen/arch/x86/physdev.c Mon Jan 15 13:27:20 2007 -0500
+++ b/xen/arch/x86/physdev.c Wed Jan 17 09:56:40 2007 -0500
@@ -9,8 +9,13 @@
#include <xen/guest_access.h>
#include <asm/current.h>
#include <asm/smpboot.h>
+#include <asm/hypercall.h>
#include <public/xen.h>
#include <public/physdev.h>
+
+#ifndef COMPAT
+typedef long ret_t;
+#endif
int
ioapic_guest_read(
@@ -19,10 +24,10 @@ ioapic_guest_write(
ioapic_guest_write(
unsigned long physbase, unsigned int reg, u32 pval);
-long do_physdev_op(int cmd, XEN_GUEST_HANDLE(void) arg)
+ret_t do_physdev_op(int cmd, XEN_GUEST_HANDLE(void) arg)
{
int irq;
- long ret;
+ ret_t ret;
switch ( cmd )
{
@@ -129,7 +134,11 @@ long do_physdev_op(int cmd, XEN_GUEST_HA
(set_iobitmap.nr_ports > 65536) )
break;
ret = 0;
+#ifndef COMPAT
current->arch.iobmp = set_iobitmap.bitmap;
+#else
+ guest_from_compat_handle(current->arch.iobmp, set_iobitmap.bitmap);
+#endif
current->arch.iobmp_limit = set_iobitmap.nr_ports;
break;
}
diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/x86/platform_hypercall.c
--- a/xen/arch/x86/platform_hypercall.c Mon Jan 15 13:27:20 2007 -0500
+++ b/xen/arch/x86/platform_hypercall.c Wed Jan 17 09:56:40 2007 -0500
@@ -23,11 +23,17 @@
#include <asm/mtrr.h>
#include "cpu/mtrr/mtrr.h"
-long do_platform_op(XEN_GUEST_HANDLE(xen_platform_op_t) u_xenpf_op)
+#ifndef COMPAT
+typedef long ret_t;
+DEFINE_SPINLOCK(xenpf_lock);
+#else
+extern spinlock_t xenpf_lock;
+#endif
+
+ret_t do_platform_op(XEN_GUEST_HANDLE(xen_platform_op_t) u_xenpf_op)
{
- long ret = 0;
+ ret_t ret = 0;
struct xen_platform_op curop, *op = &curop;
- static DEFINE_SPINLOCK(xenpf_lock);
if ( !IS_PRIV(current->domain) )
return -EPERM;
@@ -105,8 +111,15 @@ long do_platform_op(XEN_GUEST_HANDLE(xen
case XENPF_microcode_update:
{
extern int microcode_update(XEN_GUEST_HANDLE(void), unsigned long len);
+#ifndef COMPAT
ret = microcode_update(op->u.microcode.data,
op->u.microcode.length);
+#else
+ XEN_GUEST_HANDLE(void) data;
+
+ guest_from_compat_handle(data, op->u.microcode.data);
+ ret = microcode_update(data, op->u.microcode.length);
+#endif
}
break;
diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/x86/setup.c
--- a/xen/arch/x86/setup.c Mon Jan 15 13:27:20 2007 -0500
+++ b/xen/arch/x86/setup.c Wed Jan 17 09:56:40 2007 -0500
@@ -18,6 +18,10 @@
#include <xen/keyhandler.h>
#include <xen/numa.h>
#include <public/version.h>
+#ifdef CONFIG_COMPAT
+#include <compat/platform.h>
+#include <compat/xen.h>
+#endif
#include <asm/bitops.h>
#include <asm/smp.h>
#include <asm/processor.h>
@@ -407,6 +411,23 @@ void __init __start_xen(multiboot_info_t
printk("WARNING: Buggy e820 map detected and fixed "
"(truncated length fields).\n");
+ /* Ensure that all E820 RAM regions are page-aligned and -sized. */
+ for ( i = 0; i < e820_raw_nr; i++ )
+ {
+ uint64_t s, e;
+ if ( e820_raw[i].type != E820_RAM )
+ continue;
+ s = PFN_UP(e820_raw[i].addr);
+ e = PFN_DOWN(e820_raw[i].addr + e820_raw[i].size);
+ e820_raw[i].size = 0; /* discarded later */
+ if ( s < e )
+ {
+ e820_raw[i].addr = s << PAGE_SHIFT;
+ e820_raw[i].size = (e - s) << PAGE_SHIFT;
+ }
+ }
+
+ /* Sanitise the raw E820 map to produce a final clean version. */
max_page = init_e820(e820_raw, &e820_raw_nr);
modules_length = mod[mbi->mods_count-1].mod_end - mod[0].mod_start;
@@ -419,7 +440,7 @@ void __init __start_xen(multiboot_info_t
printk("Not enough memory to stash the DOM0 kernel image.\n");
for ( ; ; ) ;
}
-
+
if ( (e820.map[i].type == E820_RAM) &&
(e820.map[i].size >= modules_length) &&
((e820.map[i].addr + e820.map[i].size) >=
@@ -470,10 +491,10 @@ void __init __start_xen(multiboot_info_t
start = PFN_UP(e820.map[i].addr);
end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
/* Clip the range to exclude what the bootstrapper initialised. */
- if ( end < init_mapped )
- continue;
if ( start < init_mapped )
start = init_mapped;
+ if ( end <= start )
+ continue;
/* Request the mapping. */
map_pages_to_xen(
PAGE_OFFSET + (start << PAGE_SHIFT),
@@ -482,7 +503,7 @@ void __init __start_xen(multiboot_info_t
#endif
}
- if ( kexec_crash_area.size > 0 )
+ if ( kexec_crash_area.size > 0 && kexec_crash_area.start > 0)
_______________________________________________
Xen-ppc-devel mailing list
Xen-ppc-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-ppc-devel
|