235 files changed, 7718 insertions(+), 4608 deletions(-)
.hgignore | 2
buildconfigs/linux-defconfig_xen0_ia64 | 38
buildconfigs/linux-defconfig_xenU_ia64 | 30
buildconfigs/linux-defconfig_xen_ia64 | 38
docs/src/interface.tex | 29
docs/src/user.tex | 32
extras/mini-os/Makefile | 15
extras/mini-os/include/mm.h | 79
extras/mini-os/include/types.h | 7
extras/mini-os/kernel.c | 11
extras/mini-os/lib/printf.c | 4
extras/mini-os/lib/string.c | 4
extras/mini-os/mm.c | 71
extras/mini-os/traps.c | 21
extras/mini-os/x86_32.S | 8
linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c | 266
+-
linux-2.6-xen-sparse/arch/i386/kernel/swiotlb.c | 1
linux-2.6-xen-sparse/arch/i386/mm/init-xen.c | 13
linux-2.6-xen-sparse/arch/ia64/Kconfig | 54
linux-2.6-xen-sparse/arch/ia64/kernel/setup.c | 3
linux-2.6-xen-sparse/arch/ia64/xen-mkbuildtree-pre | 14
linux-2.6-xen-sparse/arch/ia64/xen/Makefile | 6
linux-2.6-xen-sparse/arch/ia64/xen/drivers/Makefile | 24
linux-2.6-xen-sparse/arch/ia64/xen/drivers/coreMakefile | 12
linux-2.6-xen-sparse/arch/ia64/xen/drivers/evtchn_ia64.c | 17
linux-2.6-xen-sparse/arch/ia64/xen/drivers/xenia64_init.c | 9
linux-2.6-xen-sparse/arch/ia64/xen/hypercall.S | 4
linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c | 550
+++-
linux-2.6-xen-sparse/arch/ia64/xen/util.c | 115
linux-2.6-xen-sparse/arch/ia64/xen/xen_ksyms.c | 12
linux-2.6-xen-sparse/arch/ia64/xen/xenentry.S | 198 -
linux-2.6-xen-sparse/arch/ia64/xen/xenivt.S | 586
++--
linux-2.6-xen-sparse/arch/ia64/xen/xenminstate.h | 2
linux-2.6-xen-sparse/arch/ia64/xen/xensetup.S | 21
linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c | 257
--
linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c | 73
linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c | 26
linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c | 2
linux-2.6-xen-sparse/drivers/xen/Makefile | 1
linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c | 12
linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c | 4
linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c | 2
linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c | 6
linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c | 2
linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c | 2
linux-2.6-xen-sparse/drivers/xen/console/console.c | 32
linux-2.6-xen-sparse/drivers/xen/core/Makefile | 11
linux-2.6-xen-sparse/drivers/xen/core/cpu_hotplug.c | 185 +
linux-2.6-xen-sparse/drivers/xen/core/evtchn.c | 31
linux-2.6-xen-sparse/drivers/xen/core/gnttab.c | 5
linux-2.6-xen-sparse/drivers/xen/core/hypervisor_sysfs.c | 3
linux-2.6-xen-sparse/drivers/xen/core/reboot.c | 9
linux-2.6-xen-sparse/drivers/xen/core/smpboot.c | 215 -
linux-2.6-xen-sparse/drivers/xen/evtchn/evtchn.c | 3
linux-2.6-xen-sparse/drivers/xen/net_driver_util.c | 58
linux-2.6-xen-sparse/drivers/xen/netback/netback.c | 7
linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c | 31
linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c | 91
linux-2.6-xen-sparse/drivers/xen/pciback/xenbus.c | 2
linux-2.6-xen-sparse/drivers/xen/pcifront/xenbus.c | 4
linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c | 26
linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c | 6
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c | 8
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c | 12
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypervisor.h | 2
linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h | 28
linux-2.6-xen-sparse/include/asm-ia64/hypercall.h | 84
linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h | 34
linux-2.6-xen-sparse/include/asm-ia64/page.h | 50
linux-2.6-xen-sparse/include/asm-ia64/privop.h | 11
linux-2.6-xen-sparse/include/asm-ia64/xen/privop.h | 58
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/e820.h | 63
linux-2.6-xen-sparse/include/xen/cpu_hotplug.h | 42
linux-2.6-xen-sparse/include/xen/net_driver_util.h | 48
linux-2.6-xen-sparse/include/xen/xenbus.h | 8
patches/linux-2.6.16.13/fix-ide-cd-pio-mode.patch | 18
patches/linux-2.6.16.13/xen-hotplug.patch | 11
tools/examples/network-bridge | 2
tools/examples/xmexample.vti | 2
tools/libxc/Makefile | 1
tools/libxc/xc_acm.c | 33
tools/libxc/xc_csched.c | 50
tools/libxc/xc_ia64_stubs.c | 6
tools/libxc/xc_linux_build.c | 57
tools/libxc/xc_linux_restore.c | 122
tools/libxc/xc_load_elf.c | 54
tools/libxc/xc_private.c | 22
tools/libxc/xc_ptrace.c | 173 -
tools/libxc/xc_ptrace.h | 3
tools/libxc/xc_ptrace_core.c | 7
tools/libxc/xc_tbuf.c | 56
tools/libxc/xenctrl.h | 13
tools/libxc/xg_private.h | 10
tools/python/xen/lowlevel/acm/acm.c | 54
tools/python/xen/lowlevel/xc/xc.c | 68
tools/python/xen/lowlevel/xs/xs.c | 11
tools/python/xen/xend/XendDomain.py | 22
tools/python/xen/xend/XendDomainInfo.py | 24
tools/python/xen/xend/balloon.py | 11
tools/python/xen/xend/image.py | 27
tools/python/xen/xend/server/SrvDomain.py | 14
tools/python/xen/xend/xenstore/xstransact.py | 28
tools/python/xen/xm/main.py | 45
tools/security/secpol_tool.c | 32
tools/tests/test_x86_emulator.c | 67
tools/xenstore/Makefile | 8
tools/xenstore/xenstored_core.c | 7
tools/xenstore/xenstored_core.h | 8
tools/xenstore/xenstored_domain.c | 37
tools/xenstore/xenstored_linux.c | 69
tools/xenstore/xenstored_proc.h | 27
tools/xentrace/xentrace_format | 6
tools/xm-test/configure.ac | 1
tools/xm-test/ramdisk/bin/create_disk_image | 7
tools/xm-test/tests/Makefile.am | 7
tools/xm-test/tests/block-integrity/01_block_device_read_verify.py | 62
tools/xm-test/tests/block-integrity/Makefile.am | 21
tools/xm-test/tests/enforce_dom0_cpus/01_enforce_dom0_cpus_basic_pos.py | 32
tools/xm-test/tests/network/03_network_local_tcp_pos.py | 4
tools/xm-test/tests/network/04_network_local_udp_pos.py | 4
tools/xm-test/tests/network/06_network_dom0_tcp_pos.py | 2
tools/xm-test/tests/network/07_network_dom0_udp_pos.py | 2
tools/xm-test/tests/network/12_network_domU_tcp_pos.py | 2
tools/xm-test/tests/network/13_network_domU_udp_pos.py | 2
xen/arch/ia64/Rules.mk | 28
xen/arch/ia64/asm-offsets.c | 2
xen/arch/ia64/linux-xen/setup.c | 10
xen/arch/ia64/linux-xen/smp.c | 32
xen/arch/ia64/linux-xen/unaligned.c | 2
xen/arch/ia64/tools/sparse-merge | 144 +
xen/arch/ia64/vmx/pal_emul.c | 6
xen/arch/ia64/vmx/vlsapic.c | 7
xen/arch/ia64/vmx/vmmu.c | 22
xen/arch/ia64/vmx/vmx_entry.S | 69
xen/arch/ia64/vmx/vmx_init.c | 24
xen/arch/ia64/vmx/vmx_interrupt.c | 6
xen/arch/ia64/vmx/vmx_ivt.S | 127 -
xen/arch/ia64/vmx/vmx_phy_mode.c | 9
xen/arch/ia64/vmx/vmx_process.c | 2
xen/arch/ia64/vmx/vmx_support.c | 8
xen/arch/ia64/vmx/vmx_vcpu.c | 4
xen/arch/ia64/vmx/vmx_virt.c | 23
xen/arch/ia64/vmx/vtlb.c | 86
xen/arch/ia64/xen/Makefile | 1
xen/arch/ia64/xen/dom0_ops.c | 12
xen/arch/ia64/xen/dom_fw.c | 51
xen/arch/ia64/xen/domain.c | 687
++++-
xen/arch/ia64/xen/efi_emul.c | 180 +
xen/arch/ia64/xen/hypercall.c | 98
xen/arch/ia64/xen/hyperprivop.S | 138 -
xen/arch/ia64/xen/ivt.S | 49
xen/arch/ia64/xen/privop.c | 27
xen/arch/ia64/xen/process.c | 301
--
xen/arch/ia64/xen/regionreg.c | 10
xen/arch/ia64/xen/vcpu.c | 68
xen/arch/ia64/xen/vhpt.c | 214 +
xen/arch/ia64/xen/xenasm.S | 349
--
xen/arch/ia64/xen/xenmisc.c | 3
xen/arch/ia64/xen/xensetup.c | 3
xen/arch/x86/dom0_ops.c | 21
xen/arch/x86/domain.c | 2
xen/arch/x86/domain_build.c | 5
xen/arch/x86/hvm/hvm.c | 22
xen/arch/x86/hvm/i8254.c | 405
+--
xen/arch/x86/hvm/intercept.c | 82
xen/arch/x86/hvm/svm/intr.c | 47
xen/arch/x86/hvm/svm/svm.c | 48
xen/arch/x86/hvm/svm/vmcb.c | 31
xen/arch/x86/hvm/vmx/io.c | 62
xen/arch/x86/hvm/vmx/vmx.c | 37
xen/arch/x86/mm.c | 204 +
xen/arch/x86/shadow.c | 6
xen/arch/x86/shadow32.c | 8
xen/arch/x86/shadow_public.c | 5
xen/arch/x86/traps.c | 4
xen/arch/x86/x86_emulate.c | 81
xen/common/Makefile | 1
xen/common/acm_ops.c | 282
+-
xen/common/elf.c | 49
xen/common/grant_table.c | 15
xen/common/kernel.c | 5
xen/common/sched_credit.c | 1233
++++++++++
xen/common/schedule.c | 5
xen/common/trace.c | 6
xen/include/asm-ia64/config.h | 11
xen/include/asm-ia64/dom_fw.h | 14
xen/include/asm-ia64/domain.h | 26
xen/include/asm-ia64/event.h | 2
xen/include/asm-ia64/flushtlb.h | 9
xen/include/asm-ia64/grant_table.h | 33
xen/include/asm-ia64/linux-xen/asm/pgalloc.h | 2
xen/include/asm-ia64/linux-xen/asm/pgtable.h | 14
xen/include/asm-ia64/linux-xen/asm/tlbflush.h | 119
xen/include/asm-ia64/mm.h | 10
xen/include/asm-ia64/shadow.h | 57
xen/include/asm-ia64/tlbflush.h | 37
xen/include/asm-ia64/vcpu.h | 8
xen/include/asm-ia64/vhpt.h | 18
xen/include/asm-ia64/vmx_vcpu.h | 2
xen/include/asm-x86/domain.h | 21
xen/include/asm-x86/fixmap.h | 10
xen/include/asm-x86/hvm/domain.h | 6
xen/include/asm-x86/hvm/svm/intr.h | 1
xen/include/asm-x86/hvm/svm/svm.h | 1
xen/include/asm-x86/hvm/vcpu.h | 3
xen/include/asm-x86/hvm/vmx/vmx.h | 1
xen/include/asm-x86/hvm/vpit.h | 67
xen/include/asm-x86/string.h | 162 -
xen/include/asm-x86/x86_emulate.h | 66
xen/include/public/acm_ops.h | 54
xen/include/public/arch-ia64.h | 119
xen/include/public/arch-x86_32.h | 36
xen/include/public/arch-x86_64.h | 29
xen/include/public/callback.h | 10
xen/include/public/dom0_ops.h | 205 -
xen/include/public/event_channel.h | 99
xen/include/public/grant_table.h | 30
xen/include/public/hvm/ioreq.h | 26
xen/include/public/hvm/vmx_assist.h | 5
xen/include/public/io/blkif.h | 12
xen/include/public/io/netif.h | 32
xen/include/public/io/tpmif.h | 19
xen/include/public/io/xenbus.h | 59
xen/include/public/memory.h | 48
xen/include/public/nmi.h | 5
xen/include/public/physdev.h | 45
xen/include/public/sched.h | 15
xen/include/public/sched_ctl.h | 5
xen/include/public/vcpu.h | 10
xen/include/public/version.h | 15
xen/include/public/xen.h | 47
xen/include/public/xenoprof.h | 15
xen/include/xen/hypercall.h | 2
xen/include/xen/sched-if.h | 2
xen/include/xen/softirq.h | 13
# HG changeset patch
# User Hollis Blanchard <hollisb@xxxxxxxxxx>
# Node ID f54d38cea8acaa870e6b73990fbff61fe4c3e2ac
# Parent e7424645152709dfbacd30df4b996db736403408
# Parent d5f98d23427a0d256b896fc63ccfd2c1f79e55ba
[ppc] merge with upstream
Signed-off-by: Hollis Blanchard <hollisb@xxxxxxxxxx>
diff -r e74246451527 -r f54d38cea8ac .hgignore
--- a/.hgignore Tue May 30 12:52:02 2006 -0500
+++ b/.hgignore Tue May 30 14:30:34 2006 -0500
@@ -14,7 +14,7 @@
.*\.orig$
.*\.rej$
.*/a\.out$
-.*/cscope\.*$
+.*/cscope\..*$
^[^/]*\.bz2$
^TAGS$
^dist/.*$
diff -r e74246451527 -r f54d38cea8ac buildconfigs/linux-defconfig_xen0_ia64
--- a/buildconfigs/linux-defconfig_xen0_ia64 Tue May 30 12:52:02 2006 -0500
+++ b/buildconfigs/linux-defconfig_xen0_ia64 Tue May 30 14:30:34 2006 -0500
@@ -1,7 +1,7 @@
#
# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.16-xen0
-# Mon Mar 27 14:46:03 2006
+# Linux kernel version: 2.6.16.13-xen0
+# Mon May 22 14:46:31 2006
#
#
@@ -92,12 +92,7 @@ CONFIG_GENERIC_IOMAP=y
CONFIG_GENERIC_IOMAP=y
CONFIG_XEN=y
CONFIG_ARCH_XEN=y
-CONFIG_XEN_PRIVILEGED_GUEST=y
-CONFIG_XEN_BLKDEV_GRANT=y
-CONFIG_XEN_BLKDEV_FRONTEND=y
-CONFIG_XEN_BACKEND=y
-CONFIG_XEN_BLKDEV_BACKEND=y
-CONFIG_XEN_SYSFS=y
+CONFIG_XEN_IA64_DOM0_VP=y
CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y
CONFIG_DMA_IS_DMA32=y
# CONFIG_IA64_GENERIC is not set
@@ -1522,3 +1517,30 @@ CONFIG_CRYPTO_DES=y
#
# Hardware crypto devices
#
+CONFIG_HAVE_ARCH_ALLOC_SKB=y
+CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y
+CONFIG_XEN_INTERFACE_VERSION=0x00030202
+
+#
+# XEN
+#
+CONFIG_XEN_PRIVILEGED_GUEST=y
+# CONFIG_XEN_UNPRIVILEGED_GUEST is not set
+CONFIG_XEN_BACKEND=y
+# CONFIG_XEN_PCIDEV_BACKEND is not set
+CONFIG_XEN_BLKDEV_BACKEND=y
+# CONFIG_XEN_BLKDEV_TAP_BE is not set
+CONFIG_XEN_NETDEV_BACKEND=y
+# CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER is not set
+CONFIG_XEN_NETDEV_LOOPBACK=y
+# CONFIG_XEN_TPMDEV_BACKEND is not set
+CONFIG_XEN_BLKDEV_FRONTEND=y
+CONFIG_XEN_NETDEV_FRONTEND=y
+# CONFIG_XEN_BLKDEV_TAP is not set
+# CONFIG_XEN_SCRUB_PAGES is not set
+# CONFIG_XEN_DISABLE_SERIAL is not set
+CONFIG_XEN_SYSFS=y
+CONFIG_XEN_COMPAT_030002_AND_LATER=y
+# CONFIG_XEN_COMPAT_LATEST_ONLY is not set
+CONFIG_XEN_COMPAT_030002=y
+CONFIG_NO_IDLE_HZ=y
diff -r e74246451527 -r f54d38cea8ac buildconfigs/linux-defconfig_xenU_ia64
--- a/buildconfigs/linux-defconfig_xenU_ia64 Tue May 30 12:52:02 2006 -0500
+++ b/buildconfigs/linux-defconfig_xenU_ia64 Tue May 30 14:30:34 2006 -0500
@@ -1,7 +1,7 @@
#
# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.16-xenU
-# Mon Mar 27 14:01:13 2006
+# Linux kernel version: 2.6.16.13-xenU
+# Mon May 22 15:05:32 2006
#
#
@@ -89,12 +89,7 @@ CONFIG_GENERIC_IOMAP=y
CONFIG_GENERIC_IOMAP=y
CONFIG_XEN=y
CONFIG_ARCH_XEN=y
-CONFIG_XEN_PRIVILEGED_GUEST=y
-CONFIG_XEN_BLKDEV_GRANT=y
-CONFIG_XEN_BLKDEV_FRONTEND=y
-CONFIG_XEN_BACKEND=y
-CONFIG_XEN_BLKDEV_BACKEND=y
-CONFIG_XEN_SYSFS=y
+CONFIG_XEN_IA64_DOM0_VP=y
CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y
CONFIG_DMA_IS_DMA32=y
# CONFIG_IA64_GENERIC is not set
@@ -1386,3 +1381,22 @@ CONFIG_CRYPTO_DES=y
#
# Hardware crypto devices
#
+CONFIG_HAVE_ARCH_ALLOC_SKB=y
+CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y
+CONFIG_XEN_INTERFACE_VERSION=0x00030202
+
+#
+# XEN
+#
+# CONFIG_XEN_PRIVILEGED_GUEST is not set
+CONFIG_XEN_UNPRIVILEGED_GUEST=y
+# CONFIG_XEN_BACKEND is not set
+CONFIG_XEN_BLKDEV_FRONTEND=y
+CONFIG_XEN_NETDEV_FRONTEND=y
+# CONFIG_XEN_SCRUB_PAGES is not set
+# CONFIG_XEN_DISABLE_SERIAL is not set
+CONFIG_XEN_SYSFS=y
+CONFIG_XEN_COMPAT_030002_AND_LATER=y
+# CONFIG_XEN_COMPAT_LATEST_ONLY is not set
+CONFIG_XEN_COMPAT_030002=y
+CONFIG_NO_IDLE_HZ=y
diff -r e74246451527 -r f54d38cea8ac buildconfigs/linux-defconfig_xen_ia64
--- a/buildconfigs/linux-defconfig_xen_ia64 Tue May 30 12:52:02 2006 -0500
+++ b/buildconfigs/linux-defconfig_xen_ia64 Tue May 30 14:30:34 2006 -0500
@@ -1,7 +1,7 @@
#
# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.16-xen
-# Mon Mar 27 14:36:21 2006
+# Linux kernel version: 2.6.16.13-xen
+# Mon May 22 14:15:20 2006
#
#
@@ -92,12 +92,7 @@ CONFIG_GENERIC_IOMAP=y
CONFIG_GENERIC_IOMAP=y
CONFIG_XEN=y
CONFIG_ARCH_XEN=y
-CONFIG_XEN_PRIVILEGED_GUEST=y
-CONFIG_XEN_BLKDEV_GRANT=y
-CONFIG_XEN_BLKDEV_FRONTEND=y
-CONFIG_XEN_BACKEND=y
-CONFIG_XEN_BLKDEV_BACKEND=y
-CONFIG_XEN_SYSFS=y
+CONFIG_XEN_IA64_DOM0_VP=y
CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y
CONFIG_DMA_IS_DMA32=y
# CONFIG_IA64_GENERIC is not set
@@ -1528,3 +1523,30 @@ CONFIG_CRYPTO_DES=y
#
# Hardware crypto devices
#
+CONFIG_HAVE_ARCH_ALLOC_SKB=y
+CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y
+CONFIG_XEN_INTERFACE_VERSION=0x00030202
+
+#
+# XEN
+#
+CONFIG_XEN_PRIVILEGED_GUEST=y
+# CONFIG_XEN_UNPRIVILEGED_GUEST is not set
+CONFIG_XEN_BACKEND=y
+# CONFIG_XEN_PCIDEV_BACKEND is not set
+CONFIG_XEN_BLKDEV_BACKEND=y
+# CONFIG_XEN_BLKDEV_TAP_BE is not set
+CONFIG_XEN_NETDEV_BACKEND=y
+# CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER is not set
+CONFIG_XEN_NETDEV_LOOPBACK=y
+# CONFIG_XEN_TPMDEV_BACKEND is not set
+CONFIG_XEN_BLKDEV_FRONTEND=y
+CONFIG_XEN_NETDEV_FRONTEND=y
+# CONFIG_XEN_BLKDEV_TAP is not set
+# CONFIG_XEN_SCRUB_PAGES is not set
+# CONFIG_XEN_DISABLE_SERIAL is not set
+CONFIG_XEN_SYSFS=y
+CONFIG_XEN_COMPAT_030002_AND_LATER=y
+# CONFIG_XEN_COMPAT_LATEST_ONLY is not set
+CONFIG_XEN_COMPAT_030002=y
+CONFIG_NO_IDLE_HZ=y
diff -r e74246451527 -r f54d38cea8ac docs/src/interface.tex
--- a/docs/src/interface.tex Tue May 30 12:52:02 2006 -0500
+++ b/docs/src/interface.tex Tue May 30 14:30:34 2006 -0500
@@ -205,30 +205,23 @@ implement timeout values when they block
implement timeout values when they block.
-
-%% % akw: demoting this to a section -- not sure if there is any point
-%% % though, maybe just remove it.
-
-% KAF: Remove these random sections!
-\begin{comment}
\section{Xen CPU Scheduling}
Xen offers a uniform API for CPU schedulers. It is possible to choose
from a number of schedulers at boot and it should be easy to add more.
-The BVT, Atropos and Round Robin schedulers are part of the normal Xen
-distribution. BVT provides proportional fair shares of the CPU to the
-running domains. Atropos can be used to reserve absolute shares of
-the CPU for each domain. Round-robin is provided as an example of
-Xen's internal scheduler API.
+The SEDF, BVT, and Credit schedulers are part of the normal Xen
+distribution. BVT and SEDF will be going away and their use should be
+avoided once the credit scheduler has stabilized and become the default.
+The Credit scheduler provides proportional fair shares of the
+host's CPUs to the running domains. It does this while transparently
+load balancing runnable VCPUs across the whole system.
\paragraph*{Note: SMP host support}
-Xen has always supported SMP host systems. Domains are statically
-assigned to CPUs, either at creation time or when manually pinning to
-a particular CPU. The current schedulers then run locally on each CPU
-to decide which of the assigned domains should be run there. The
-user-level control software can be used to perform coarse-grain
-load-balancing between CPUs.
-\end{comment}
+Xen has always supported SMP host systems. When using the credit scheduler,
+a domain's VCPUs will be dynamically moved across physical CPUs to maximise
+domain and system throughput. VCPUs can also be manually restricted to be
+mapped only on a subset of the host's physical CPUs, using the pinning
+mechanism.
%% More information on the characteristics and use of these schedulers
diff -r e74246451527 -r f54d38cea8ac docs/src/user.tex
--- a/docs/src/user.tex Tue May 30 12:52:02 2006 -0500
+++ b/docs/src/user.tex Tue May 30 14:30:34 2006 -0500
@@ -1093,6 +1093,36 @@ running domains in \xend's SXP configura
You can get access to the console of a particular domain using
the \verb_# xm console_ command (e.g.\ \verb_# xm console myVM_).
+
+\subsection{Domain Scheduling Management Commands}
+
+The credit CPU scheduler automatically load balances guest VCPUs
+across all available physical CPUs on an SMP host. The user need
+not manually pin VCPUs to load balance the system. However, she
+can restrict which CPUs a particular VCPU may run on using
+the \path{xm vcpu-pin} command.
+
+Each guest domain is assigned a \path{weight} and a \path{cap}.
+
+A domain with a weight of 512 will get twice as much CPU as a
+domain with a weight of 256 on a contended host. Legal weights
+range from 1 to 65535 and the default is 256.
+
+The cap optionally fixes the maximum amount of CPU a guest will
+be able to consume, even if the host system has idle CPU cycles.
+The cap is expressed in percentage of one physical CPU: 100 is
+1 physical CPU, 50 is half a CPU, 400 is 4 CPUs, etc... The
+default, 0, means there is no upper cap.
+
+When you are running with the credit scheduler, you can check and
+modify your domains' weights and caps using the \path{xm sched-credit}
+command:
+
+\begin{tabular}{ll}
+\verb!xm sched-credit -d <domain>! & lists weight and cap \\
+\verb!xm sched-credit -d <domain> -w <weight>! & sets the weight \\
+\verb!xm sched-credit -d <domain> -c <cap>! & sets the cap
+\end{tabular}
@@ -1985,7 +2015,7 @@ editing \path{grub.conf}.
\item [ tbuf\_size=xxx ] Set the size of the per-cpu trace buffers, in
pages (default 0).
\item [ sched=xxx ] Select the CPU scheduler Xen should use. The
- current possibilities are `sedf' (default) and `bvt'.
+ current possibilities are `sedf' (default), `credit', and `bvt'.
\item [ apic\_verbosity=debug,verbose ] Print more detailed
information about local APIC and IOAPIC configuration.
\item [ lapic ] Force use of local APIC even when left disabled by
diff -r e74246451527 -r f54d38cea8ac extras/mini-os/Makefile
--- a/extras/mini-os/Makefile Tue May 30 12:52:02 2006 -0500
+++ b/extras/mini-os/Makefile Tue May 30 14:30:34 2006 -0500
@@ -1,4 +1,5 @@ debug ?= y
debug ?= y
+pae ?= n
include $(CURDIR)/../../Config.mk
@@ -12,11 +13,17 @@ override CPPFLAGS := -Iinclude $(CPPFLAG
override CPPFLAGS := -Iinclude $(CPPFLAGS)
ASFLAGS = -D__ASSEMBLY__
+LDLIBS = -L. -lminios
LDFLAGS := -N -T minios-$(TARGET_ARCH).lds
ifeq ($(TARGET_ARCH),x86_32)
CFLAGS += -m32 -march=i686
LDFLAGS += -m elf_i386
+endif
+
+ifeq ($(TARGET_ARCH)$(pae),x86_32y)
+CFLAGS += -DCONFIG_X86_PAE=1
+ASFLAGS += -DCONFIG_X86_PAE=1
endif
ifeq ($(TARGET_ARCH),x86_64)
@@ -49,11 +56,11 @@ links:
links:
[ -e include/xen ] || ln -sf ../../../xen/include/public include/xen
-libminios.a: $(OBJS) $(HEAD)
- ar r libminios.a $(HEAD) $(OBJS)
+libminios.a: links $(OBJS) $(HEAD)
+ $(AR) r libminios.a $(HEAD) $(OBJS)
-$(TARGET): links libminios.a $(HEAD)
- $(LD) $(LDFLAGS) $(HEAD) -L. -lminios -o $@.elf
+$(TARGET): libminios.a $(HEAD)
+ $(LD) $(LDFLAGS) $(HEAD) $(LDLIBS) -o $@.elf
gzip -f -9 -c $@.elf >$@.gz
.PHONY: clean
diff -r e74246451527 -r f54d38cea8ac extras/mini-os/include/mm.h
--- a/extras/mini-os/include/mm.h Tue May 30 12:52:02 2006 -0500
+++ b/extras/mini-os/include/mm.h Tue May 30 14:30:34 2006 -0500
@@ -43,6 +43,8 @@
#if defined(__i386__)
+#if !defined(CONFIG_X86_PAE)
+
#define L2_PAGETABLE_SHIFT 22
#define L1_PAGETABLE_ENTRIES 1024
@@ -50,6 +52,30 @@
#define PADDR_BITS 32
#define PADDR_MASK (~0UL)
+
+#define UNMAPPED_PT_FRAMES 1
+#define PRIpte "08lx"
+typedef unsigned long pgentry_t;
+
+#else /* defined(CONFIG_X86_PAE) */
+
+#define L2_PAGETABLE_SHIFT 21
+#define L3_PAGETABLE_SHIFT 30
+
+#define L1_PAGETABLE_ENTRIES 512
+#define L2_PAGETABLE_ENTRIES 512
+#define L3_PAGETABLE_ENTRIES 4
+
+#define PADDR_BITS 44
+#define PADDR_MASK ((1ULL << PADDR_BITS)-1)
+
+#define L2_MASK ((1UL << L3_PAGETABLE_SHIFT) - 1)
+
+#define UNMAPPED_PT_FRAMES 2
+#define PRIpte "016llx"
+typedef uint64_t pgentry_t;
+
+#endif /* !defined(CONFIG_X86_PAE) */
#elif defined(__x86_64__)
@@ -81,6 +107,10 @@
#define L2_MASK ((1UL << L3_PAGETABLE_SHIFT) - 1)
#define L3_MASK ((1UL << L4_PAGETABLE_SHIFT) - 1)
+#define UNMAPPED_PT_FRAMES 3
+#define PRIpte "016lx"
+typedef unsigned long pgentry_t;
+
#endif
#define L1_MASK ((1UL << L2_PAGETABLE_SHIFT) - 1)
@@ -90,9 +120,11 @@
(((_a) >> L1_PAGETABLE_SHIFT) & (L1_PAGETABLE_ENTRIES - 1))
#define l2_table_offset(_a) \
(((_a) >> L2_PAGETABLE_SHIFT) & (L2_PAGETABLE_ENTRIES - 1))
-#if defined(__x86_64__)
+#if defined(__x86_64__) || defined(CONFIG_X86_PAE)
#define l3_table_offset(_a) \
(((_a) >> L3_PAGETABLE_SHIFT) & (L3_PAGETABLE_ENTRIES - 1))
+#endif
+#if defined(__x86_64__)
#define l4_table_offset(_a) \
(((_a) >> L4_PAGETABLE_SHIFT) & (L4_PAGETABLE_ENTRIES - 1))
#endif
@@ -111,14 +143,21 @@
#if defined(__i386__)
#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY |_PAGE_USER)
+#if defined(CONFIG_X86_PAE)
+#define L3_PROT (_PAGE_PRESENT)
+#endif /* CONFIG_X86_PAE */
#elif defined(__x86_64__)
#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
#define L3_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
#define L4_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
-#endif
-
+#endif /* __i386__ || __x86_64__ */
+
+#ifndef CONFIG_X86_PAE
#define PAGE_SIZE (1UL << L1_PAGETABLE_SHIFT)
+#else
+#define PAGE_SIZE (1ULL << L1_PAGETABLE_SHIFT)
+#endif
#define PAGE_SHIFT L1_PAGETABLE_SHIFT
#define PAGE_MASK (~(PAGE_SIZE-1))
@@ -129,23 +168,31 @@
/* to align the pointer to the (next) page boundary */
#define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK)
+/* Definitions for machine and pseudophysical addresses. */
+#ifdef CONFIG_X86_PAE
+typedef unsigned long long paddr_t;
+typedef unsigned long long maddr_t;
+#else
+typedef unsigned long paddr_t;
+typedef unsigned long maddr_t;
+#endif
+
extern unsigned long *phys_to_machine_mapping;
extern char _text, _etext, _edata, _end;
#define pfn_to_mfn(_pfn) (phys_to_machine_mapping[(_pfn)])
-static __inline__ unsigned long phys_to_machine(unsigned long phys)
-{
- unsigned long machine = pfn_to_mfn(phys >> L1_PAGETABLE_SHIFT);
- machine = (machine << L1_PAGETABLE_SHIFT) | (phys & ~PAGE_MASK);
- return machine;
-}
-
+static __inline__ maddr_t phys_to_machine(paddr_t phys)
+{
+ maddr_t machine = pfn_to_mfn(phys >> PAGE_SHIFT);
+ machine = (machine << PAGE_SHIFT) | (phys & ~PAGE_MASK);
+ return machine;
+}
#define mfn_to_pfn(_mfn) (machine_to_phys_mapping[(_mfn)])
-static __inline__ unsigned long machine_to_phys(unsigned long machine)
-{
- unsigned long phys = mfn_to_pfn(machine >> L1_PAGETABLE_SHIFT);
- phys = (phys << L1_PAGETABLE_SHIFT) | (machine & ~PAGE_MASK);
- return phys;
+static __inline__ paddr_t machine_to_phys(maddr_t machine)
+{
+ paddr_t phys = mfn_to_pfn(machine >> PAGE_SHIFT);
+ phys = (phys << PAGE_SHIFT) | (machine & ~PAGE_MASK);
+ return phys;
}
#define VIRT_START ((unsigned long)&_text)
@@ -155,7 +202,7 @@ static __inline__ unsigned long machine_
#define virt_to_pfn(_virt) (PFN_DOWN(to_phys(_virt)))
#define mach_to_virt(_mach) (to_virt(machine_to_phys(_mach)))
-#define mfn_to_virt(_mfn) (mach_to_virt(_mfn << PAGE_SHIFT))
+#define mfn_to_virt(_mfn) (to_virt(mfn_to_pfn(_mfn) << PAGE_SHIFT))
#define pfn_to_virt(_pfn) (to_virt(_pfn << PAGE_SHIFT))
/* Pagetable walking. */
diff -r e74246451527 -r f54d38cea8ac extras/mini-os/include/types.h
--- a/extras/mini-os/include/types.h Tue May 30 12:52:02 2006 -0500
+++ b/extras/mini-os/include/types.h Tue May 30 14:30:34 2006 -0500
@@ -43,14 +43,19 @@ typedef unsigned long long u_quad_t;
typedef unsigned long long u_quad_t;
typedef unsigned int uintptr_t;
+#if !defined(CONFIG_X86_PAE)
typedef struct { unsigned long pte_low; } pte_t;
+#else
+typedef struct { unsigned long pte_low, pte_high; } pte_t;
+#endif /* CONFIG_X86_PAE */
+
#elif defined(__x86_64__)
typedef long quad_t;
typedef unsigned long u_quad_t;
typedef unsigned long uintptr_t;
typedef struct { unsigned long pte; } pte_t;
-#endif
+#endif /* __i386__ || __x86_64__ */
typedef u8 uint8_t;
typedef s8 int8_t;
diff -r e74246451527 -r f54d38cea8ac extras/mini-os/kernel.c
--- a/extras/mini-os/kernel.c Tue May 30 12:52:02 2006 -0500
+++ b/extras/mini-os/kernel.c Tue May 30 14:30:34 2006 -0500
@@ -63,7 +63,12 @@ void failsafe_callback(void);
extern char shared_info[PAGE_SIZE];
+#if !defined(CONFIG_X86_PAE)
#define __pte(x) ((pte_t) { (x) } )
+#else
+#define __pte(x) ({ unsigned long long _x = (x); \
+ ((pte_t) {(unsigned long)(_x), (unsigned long)(_x>>32)}); })
+#endif
static shared_info_t *map_shared_info(unsigned long pa)
{
@@ -71,7 +76,7 @@ static shared_info_t *map_shared_info(un
(unsigned long)shared_info, __pte(pa | 7), UVMF_INVLPG) )
{
printk("Failed to map shared_info!!\n");
- *(int*)0=0;
+ do_exit();
}
return (shared_info_t *)shared_info;
}
@@ -126,6 +131,10 @@ void start_kernel(start_info_t *si)
/* WARN: don't do printk before here, it uses information from
shared_info. Use xprintk instead. */
memcpy(&start_info, si, sizeof(*si));
+
+ /* set up minimal memory infos */
+ phys_to_machine_mapping = (unsigned long *)start_info.mfn_list;
+
/* Grab the shared_info pointer and put it in a safe place. */
HYPERVISOR_shared_info = map_shared_info(start_info.shared_info);
diff -r e74246451527 -r f54d38cea8ac extras/mini-os/lib/printf.c
--- a/extras/mini-os/lib/printf.c Tue May 30 12:52:02 2006 -0500
+++ b/extras/mini-os/lib/printf.c Tue May 30 14:30:34 2006 -0500
@@ -53,6 +53,8 @@
*
* $FreeBSD: src/sys/libkern/divdi3.c,v 1.6 1999/08/28 00:46:31 peter Exp $
*/
+
+#if !defined HAVE_LIBC
#include <os.h>
#include <types.h>
@@ -789,4 +791,4 @@ int sscanf(const char * buf, const char
return i;
}
-
+#endif
diff -r e74246451527 -r f54d38cea8ac extras/mini-os/lib/string.c
--- a/extras/mini-os/lib/string.c Tue May 30 12:52:02 2006 -0500
+++ b/extras/mini-os/lib/string.c Tue May 30 14:30:34 2006 -0500
@@ -17,6 +17,8 @@
* $Id: c-insert.c,v 1.7 2002/11/08 16:04:34 rn Exp $
****************************************************************************
*/
+
+#if !defined HAVE_LIBC
#include <os.h>
#include <types.h>
@@ -153,3 +155,5 @@ char * strstr(const char * s1,const char
}
return NULL;
}
+
+#endif
diff -r e74246451527 -r f54d38cea8ac extras/mini-os/mm.c
--- a/extras/mini-os/mm.c Tue May 30 12:52:02 2006 -0500
+++ b/extras/mini-os/mm.c Tue May 30 14:30:34 2006 -0500
@@ -368,7 +368,7 @@ void new_pt_frame(unsigned long *pt_pfn,
void new_pt_frame(unsigned long *pt_pfn, unsigned long prev_l_mfn,
unsigned long offset, unsigned long level)
{
- unsigned long *tab = (unsigned long *)start_info.pt_base;
+ pgentry_t *tab = (pgentry_t *)start_info.pt_base;
unsigned long pt_page = (unsigned long)pfn_to_virt(*pt_pfn);
unsigned long prot_e, prot_t, pincmd;
mmu_update_t mmu_updates[1];
@@ -382,40 +382,45 @@ void new_pt_frame(unsigned long *pt_pfn,
as a page table page */
memset((unsigned long*)pfn_to_virt(*pt_pfn), 0, PAGE_SIZE);
- if (level == L1_FRAME)
- {
+ switch ( level )
+ {
+ case L1_FRAME:
prot_e = L1_PROT;
prot_t = L2_PROT;
pincmd = MMUEXT_PIN_L1_TABLE;
- }
-#if (defined __x86_64__)
- else if (level == L2_FRAME)
- {
+ break;
+#if defined(__x86_64__) || defined(CONFIG_X86_PAE)
+ case L2_FRAME:
prot_e = L2_PROT;
prot_t = L3_PROT;
pincmd = MMUEXT_PIN_L2_TABLE;
- }
- else if (level == L3_FRAME)
- {
+ break;
+#endif
+#if defined(__x86_64__)
+ case L3_FRAME:
prot_e = L3_PROT;
prot_t = L4_PROT;
pincmd = MMUEXT_PIN_L3_TABLE;
- }
-#endif
- else
- {
+ break;
+#endif
+ default:
printk("new_pt_frame() called with invalid level number %d\n", level);
do_exit();
- }
+ break;
+ }
/* Update the entry */
-#if (defined __x86_64__)
+#if defined(__x86_64__)
tab = pte_to_virt(tab[l4_table_offset(pt_page)]);
tab = pte_to_virt(tab[l3_table_offset(pt_page)]);
#endif
- mmu_updates[0].ptr = (tab[l2_table_offset(pt_page)] & PAGE_MASK) +
- sizeof(void *)* l1_table_offset(pt_page);
- mmu_updates[0].val = pfn_to_mfn(*pt_pfn) << PAGE_SHIFT |
+#if defined(CONFIG_X86_PAE)
+ tab = pte_to_virt(tab[l3_table_offset(pt_page)]);
+#endif
+
+ mmu_updates[0].ptr = ((pgentry_t)tab[l2_table_offset(pt_page)] &
PAGE_MASK) +
+ sizeof(pgentry_t) * l1_table_offset(pt_page);
+ mmu_updates[0].val = (pgentry_t)pfn_to_mfn(*pt_pfn) << PAGE_SHIFT |
(prot_e & ~_PAGE_RW);
if(HYPERVISOR_mmu_update(mmu_updates, 1, NULL, DOMID_SELF) < 0)
{
@@ -434,8 +439,8 @@ void new_pt_frame(unsigned long *pt_pfn,
/* Now fill the new page table page with entries.
Update the page directory as well. */
- mmu_updates[0].ptr = (prev_l_mfn << PAGE_SHIFT) + sizeof(void *) * offset;
- mmu_updates[0].val = pfn_to_mfn(*pt_pfn) << PAGE_SHIFT | prot_t;
+ mmu_updates[0].ptr = ((pgentry_t)prev_l_mfn << PAGE_SHIFT) +
sizeof(pgentry_t) * offset;
+ mmu_updates[0].val = (pgentry_t)pfn_to_mfn(*pt_pfn) << PAGE_SHIFT | prot_t;
if(HYPERVISOR_mmu_update(mmu_updates, 1, NULL, DOMID_SELF) < 0)
{
printk("ERROR: mmu_update failed\n");
@@ -450,16 +455,13 @@ void build_pagetable(unsigned long *star
unsigned long start_address, end_address;
unsigned long pfn_to_map, pt_pfn = *start_pfn;
static mmu_update_t mmu_updates[L1_PAGETABLE_ENTRIES + 1];
- unsigned long *tab = (unsigned long *)start_info.pt_base;
+ pgentry_t *tab = (pgentry_t *)start_info.pt_base, page;
unsigned long mfn = pfn_to_mfn(virt_to_pfn(start_info.pt_base));
- unsigned long page, offset;
+ unsigned long offset;
int count = 0;
-#if defined(__x86_64__)
- pfn_to_map = (start_info.nr_pt_frames - 3) * L1_PAGETABLE_ENTRIES;
-#else
- pfn_to_map = (start_info.nr_pt_frames - 1) * L1_PAGETABLE_ENTRIES;
-#endif
+ pfn_to_map = (start_info.nr_pt_frames - UNMAPPED_PT_FRAMES) *
L1_PAGETABLE_ENTRIES;
+
start_address = (unsigned long)pfn_to_virt(pfn_to_map);
end_address = (unsigned long)pfn_to_virt(*max_pfn);
@@ -468,7 +470,7 @@ void build_pagetable(unsigned long *star
while(start_address < end_address)
{
- tab = (unsigned long *)start_info.pt_base;
+ tab = (pgentry_t *)start_info.pt_base;
mfn = pfn_to_mfn(virt_to_pfn(start_info.pt_base));
#if defined(__x86_64__)
@@ -480,6 +482,8 @@ void build_pagetable(unsigned long *star
page = tab[offset];
mfn = pte_to_mfn(page);
tab = to_virt(mfn_to_pfn(mfn) << PAGE_SHIFT);
+#endif
+#if defined(__x86_64__) || defined(CONFIG_X86_PAE)
offset = l3_table_offset(start_address);
/* Need new L2 pt frame */
if(!(start_address & L2_MASK))
@@ -498,9 +502,9 @@ void build_pagetable(unsigned long *star
mfn = pte_to_mfn(page);
offset = l1_table_offset(start_address);
- mmu_updates[count].ptr = (mfn << PAGE_SHIFT) + sizeof(void *) * offset;
+ mmu_updates[count].ptr = ((pgentry_t)mfn << PAGE_SHIFT) +
sizeof(pgentry_t) * offset;
mmu_updates[count].val =
- pfn_to_mfn(pfn_to_map++) << PAGE_SHIFT | L1_PROT;
+ (pgentry_t)pfn_to_mfn(pfn_to_map++) << PAGE_SHIFT | L1_PROT;
count++;
if (count == L1_PAGETABLE_ENTRIES || pfn_to_map == *max_pfn)
{
@@ -557,9 +561,6 @@ void init_mm(void)
printk(" stack start: %p\n", &stack);
printk(" _end: %p\n", &_end);
- /* set up minimal memory infos */
- phys_to_machine_mapping = (unsigned long *)start_info.mfn_list;
-
/* First page follows page table pages and 3 more pages (store page etc) */
start_pfn = PFN_UP(to_phys(start_info.pt_base)) +
start_info.nr_pt_frames + 3;
@@ -569,7 +570,7 @@ void init_mm(void)
printk(" max_pfn: %lx\n", max_pfn);
build_pagetable(&start_pfn, &max_pfn);
-
+
/*
* now we can initialise the page allocator
*/
diff -r e74246451527 -r f54d38cea8ac extras/mini-os/traps.c
--- a/extras/mini-os/traps.c Tue May 30 12:52:02 2006 -0500
+++ b/extras/mini-os/traps.c Tue May 30 14:30:34 2006 -0500
@@ -95,25 +95,26 @@ DO_ERROR(18, "machine check", machine_ch
void page_walk(unsigned long virt_address)
{
- unsigned long *tab = (unsigned long *)start_info.pt_base;
- unsigned long addr = virt_address, page;
+ pgentry_t *tab = (pgentry_t *)start_info.pt_base, page;
+ unsigned long addr = virt_address;
printk("Pagetable walk from virt %lx, base %lx:\n", virt_address,
start_info.pt_base);
#if defined(__x86_64__)
page = tab[l4_table_offset(addr)];
- tab = to_virt(mfn_to_pfn(pte_to_mfn(page)) << PAGE_SHIFT);
- printk(" L4 = %p (%p) [offset = %lx]\n", page, tab,
l4_table_offset(addr));
-
+ tab = pte_to_virt(page);
+ printk(" L4 = %"PRIpte" (%p) [offset = %lx]\n", page, tab,
l4_table_offset(addr));
+#endif
+#if defined(__x86_64__) || defined(CONFIG_X86_PAE)
page = tab[l3_table_offset(addr)];
- tab = to_virt(mfn_to_pfn(pte_to_mfn(page)) << PAGE_SHIFT);
- printk(" L3 = %p (%p) [offset = %lx]\n", page, tab,
l3_table_offset(addr));
+ tab = pte_to_virt(page);
+ printk(" L3 = %"PRIpte" (%p) [offset = %lx]\n", page, tab,
l3_table_offset(addr));
#endif
page = tab[l2_table_offset(addr)];
- tab = to_virt(mfn_to_pfn(pte_to_mfn(page)) << PAGE_SHIFT);
- printk(" L2 = %p (%p) [offset = %lx]\n", page, tab,
l2_table_offset(addr));
+ tab = pte_to_virt(page);
+ printk(" L2 = %"PRIpte" (%p) [offset = %lx]\n", page, tab,
l2_table_offset(addr));
page = tab[l1_table_offset(addr)];
- printk(" L1 = %p (%p) [offset = %lx]\n", page, tab,
l1_table_offset(addr));
+ printk(" L1 = %"PRIpte" (%p) [offset = %lx]\n", page, tab,
l1_table_offset(addr));
}
diff -r e74246451527 -r f54d38cea8ac extras/mini-os/x86_32.S
--- a/extras/mini-os/x86_32.S Tue May 30 12:52:02 2006 -0500
+++ b/extras/mini-os/x86_32.S Tue May 30 14:30:34 2006 -0500
@@ -4,9 +4,15 @@
.section __xen_guest
.ascii "GUEST_OS=Mini-OS"
.ascii ",XEN_VER=xen-3.0"
+ .ascii ",VIRT_BASE=0xc0000000" /* &_text from minios_x86_32.lds */
+ .ascii ",ELF_PADDR_OFFSET=0xc0000000"
.ascii ",HYPERCALL_PAGE=0x2"
+#ifdef CONFIG_X86_PAE
+ .ascii ",PAE=yes"
+#else
+ .ascii ",PAE=no"
+#endif
.ascii ",LOADER=generic"
- .ascii ",PT_MODE_WRITABLE"
.byte 0
.text
diff -r e74246451527 -r f54d38cea8ac
linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c Tue May 30 12:52:02
2006 -0500
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c Tue May 30 14:30:34
2006 -0500
@@ -70,9 +70,6 @@
/* Forward Declaration. */
void __init find_max_pfn(void);
-
-/* Allows setting of maximum possible memory size */
-static unsigned long xen_override_max_pfn;
static int xen_panic_event(struct notifier_block *, unsigned long, void *);
static struct notifier_block xen_panic_block = {
@@ -399,6 +396,26 @@ start_info_t *xen_start_info;
start_info_t *xen_start_info;
EXPORT_SYMBOL(xen_start_info);
+static void __init add_memory_region(unsigned long long start,
+ unsigned long long size, int type)
+{
+ int x;
+
+ if (!efi_enabled) {
+ x = e820.nr_map;
+
+ if (x == E820MAX) {
+ printk(KERN_ERR "Ooops! Too many entries in the memory
map!\n");
+ return;
+ }
+
+ e820.map[x].addr = start;
+ e820.map[x].size = size;
+ e820.map[x].type = type;
+ e820.nr_map++;
+ }
+} /* add_memory_region */
+
static void __init limit_regions(unsigned long long size)
{
unsigned long long current_addr = 0;
@@ -442,27 +459,20 @@ static void __init limit_regions(unsigne
}
return;
}
-}
-
-static void __init add_memory_region(unsigned long long start,
- unsigned long long size, int type)
-{
- int x;
-
- if (!efi_enabled) {
- x = e820.nr_map;
-
- if (x == E820MAX) {
- printk(KERN_ERR "Ooops! Too many entries in the memory
map!\n");
- return;
- }
-
- e820.map[x].addr = start;
- e820.map[x].size = size;
- e820.map[x].type = type;
- e820.nr_map++;
- }
-} /* add_memory_region */
+#ifdef CONFIG_XEN
+ if (i==e820.nr_map && current_addr < size) {
+ /*
+ * The e820 map finished before our requested size so
+ * extend the final entry to the requested address.
+ */
+ --i;
+ if (e820.map[i].type == E820_RAM)
+ e820.map[i].size -= current_addr - size;
+ else
+ add_memory_region(current_addr, size - current_addr,
E820_RAM);
+ }
+#endif
+}
#define E820_DEBUG 1
@@ -492,7 +502,6 @@ static void __init print_memory_map(char
}
}
-#if 0
/*
* Sanitize the BIOS e820 map.
*
@@ -680,9 +689,13 @@ static int __init sanitize_e820_map(stru
*/
static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
{
+#ifndef CONFIG_XEN
/* Only one memory region (or negative)? Ignore it */
if (nr_map < 2)
return -1;
+#else
+ BUG_ON(nr_map < 1);
+#endif
do {
unsigned long long start = biosmap->addr;
@@ -694,6 +707,7 @@ static int __init copy_e820_map(struct e
if (start > end)
return -1;
+#ifndef CONFIG_XEN
/*
* Some BIOSes claim RAM in the 640k - 1M region.
* Not right. Fix it up.
@@ -708,11 +722,11 @@ static int __init copy_e820_map(struct e
size = end - start;
}
}
+#endif
add_memory_region(start, size, type);
} while (biosmap++,--nr_map);
return 0;
}
-#endif
#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
struct edd edd;
@@ -785,13 +799,8 @@ static void __init parse_cmdline_early (
unsigned long long mem_size;
mem_size = memparse(from+4, &from);
-#if 0
limit_regions(mem_size);
userdef=1;
-#else
- xen_override_max_pfn =
- (unsigned long)(mem_size>>PAGE_SHIFT);
-#endif
}
}
@@ -984,7 +993,6 @@ static void __init parse_cmdline_early (
}
}
-#if 0 /* !XEN */
/*
* Callback for efi_memory_walk.
*/
@@ -1036,21 +1044,6 @@ void __init find_max_pfn(void)
memory_present(0, start, end);
}
}
-#else
-/* We don't use the fake e820 because we need to respond to user override. */
-void __init find_max_pfn(void)
-{
- if (xen_override_max_pfn == 0) {
- max_pfn = xen_start_info->nr_pages;
- /* Default 8MB slack (to balance backend allocations). */
- max_pfn += 8 << (20 - PAGE_SHIFT);
- } else if (xen_override_max_pfn > xen_start_info->nr_pages) {
- max_pfn = xen_override_max_pfn;
- } else {
- max_pfn = xen_start_info->nr_pages;
- }
-}
-#endif /* XEN */
/*
* Determine low and high memory ranges:
@@ -1158,6 +1151,15 @@ static void __init register_bootmem_low_
*/
last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
+#ifdef CONFIG_XEN
+ /*
+ * Truncate to the number of actual pages currently
+ * present.
+ */
+ if (last_pfn > xen_start_info->nr_pages)
+ last_pfn = xen_start_info->nr_pages;
+#endif
+
if (last_pfn > max_low_pfn)
last_pfn = max_low_pfn;
@@ -1351,83 +1353,33 @@ void __init remapped_pgdat_init(void)
* and also for regions reported as reserved by the e820.
*/
static void __init
-legacy_init_iomem_resources(struct resource *code_resource, struct resource
*data_resource)
+legacy_init_iomem_resources(struct e820entry *e820, int nr_map,
+ struct resource *code_resource,
+ struct resource *data_resource)
{
int i;
-#ifdef CONFIG_XEN
- dom0_op_t op;
- struct dom0_memory_map_entry *map;
- unsigned long gapstart, gapsize;
- unsigned long long last;
-#endif
-
-#ifdef CONFIG_XEN_PRIVILEGED_GUEST
+
+#if defined(CONFIG_XEN_PRIVILEGED_GUEST) || !defined(CONFIG_XEN)
probe_roms();
#endif
-#ifdef CONFIG_XEN
- map = alloc_bootmem_low_pages(PAGE_SIZE);
- op.cmd = DOM0_PHYSICAL_MEMORY_MAP;
- set_xen_guest_handle(op.u.physical_memory_map.memory_map, map);
- op.u.physical_memory_map.max_map_entries =
- PAGE_SIZE / sizeof(struct dom0_memory_map_entry);
- BUG_ON(HYPERVISOR_dom0_op(&op));
-
- last = 0x100000000ULL;
- gapstart = 0x10000000;
- gapsize = 0x400000;
-
- for (i = op.u.physical_memory_map.nr_map_entries - 1; i >= 0; i--) {
+ for (i = 0; i < nr_map; i++) {
struct resource *res;
-
- if ((last > map[i].end) && ((last - map[i].end) > gapsize)) {
- gapsize = last - map[i].end;
- gapstart = map[i].end;
- }
- if (map[i].start < last)
- last = map[i].start;
-
- if (map[i].end > 0x100000000ULL)
+ if (e820[i].addr + e820[i].size > 0x100000000ULL)
continue;
res = alloc_bootmem_low(sizeof(struct resource));
- res->name = map[i].is_ram ? "System RAM" : "reserved";
- res->start = map[i].start;
- res->end = map[i].end - 1;
- res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
- request_resource(&iomem_resource, res);
- }
-
- free_bootmem(__pa(map), PAGE_SIZE);
-
- /*
- * Start allocating dynamic PCI memory a bit into the gap,
- * aligned up to the nearest megabyte.
- *
- * Question: should we try to pad it up a bit (do something
- * like " + (gapsize >> 3)" in there too?). We now have the
- * technology.
- */
- pci_mem_start = (gapstart + 0xfffff) & ~0xfffff;
-
- printk("Allocating PCI resources starting at %08lx (gap:
%08lx:%08lx)\n",
- pci_mem_start, gapstart, gapsize);
-#else
- for (i = 0; i < e820.nr_map; i++) {
- struct resource *res;
- if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL)
- continue;
- res = alloc_bootmem_low(sizeof(struct resource));
- switch (e820.map[i].type) {
+ switch (e820[i].type) {
case E820_RAM: res->name = "System RAM"; break;
case E820_ACPI: res->name = "ACPI Tables"; break;
case E820_NVS: res->name = "ACPI Non-volatile Storage"; break;
default: res->name = "reserved";
}
- res->start = e820.map[i].addr;
- res->end = res->start + e820.map[i].size - 1;
+ res->start = e820[i].addr;
+ res->end = res->start + e820[i].size - 1;
res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
request_resource(&iomem_resource, res);
- if (e820.map[i].type == E820_RAM) {
+#ifndef CONFIG_XEN
+ if (e820[i].type == E820_RAM) {
/*
* We don't know which RAM region contains kernel data,
* so we try it repeatedly and let the resource manager
@@ -1439,38 +1391,21 @@ legacy_init_iomem_resources(struct resou
request_resource(res, &crashk_res);
#endif
}
- }
-#endif
-}
-
-/*
- * Request address space for all standard resources
- */
-static void __init register_memory(void)
-{
-#ifndef CONFIG_XEN
+#endif
+ }
+}
+
+/*
+ * Locate a unused range of the physical address space below 4G which
+ * can be used for PCI mappings.
+ */
+static void __init
+e820_setup_gap(struct e820entry *e820, int nr_map)
+{
unsigned long gapstart, gapsize, round;
unsigned long long last;
-#endif
- int i;
-
- /* Nothing to do if not running in dom0. */
- if (!(xen_start_info->flags & SIF_INITDOMAIN))
- return;
-
- if (efi_enabled)
- efi_initialize_iomem_resources(&code_resource, &data_resource);
- else
- legacy_init_iomem_resources(&code_resource, &data_resource);
-
- /* EFI systems may still have VGA */
- request_resource(&iomem_resource, &video_ram_resource);
-
- /* request I/O space for devices used on all i[345]86 PCs */
- for (i = 0; i < STANDARD_IO_RESOURCES; i++)
- request_resource(&ioport_resource, &standard_io_resources[i]);
-
-#ifndef CONFIG_XEN
+ int i;
+
/*
* Search for the bigest gap in the low 32 bits of the e820
* memory space.
@@ -1478,10 +1413,10 @@ static void __init register_memory(void)
last = 0x100000000ull;
gapstart = 0x10000000;
gapsize = 0x400000;
- i = e820.nr_map;
+ i = nr_map;
while (--i >= 0) {
- unsigned long long start = e820.map[i].addr;
- unsigned long long end = start + e820.map[i].size;
+ unsigned long long start = e820[i].addr;
+ unsigned long long end = start + e820[i].size;
/*
* Since "last" is at most 4GB, we know we'll
@@ -1511,6 +1446,53 @@ static void __init register_memory(void)
printk("Allocating PCI resources starting at %08lx (gap:
%08lx:%08lx)\n",
pci_mem_start, gapstart, gapsize);
+}
+
+/*
+ * Request address space for all standard resources
+ */
+static void __init register_memory(void)
+{
+#ifdef CONFIG_XEN
+ struct e820entry *machine_e820;
+ struct xen_memory_map memmap;
+#endif
+ int i;
+
+ /* Nothing to do if not running in dom0. */
+ if (!(xen_start_info->flags & SIF_INITDOMAIN))
+ return;
+
+#ifdef CONFIG_XEN
+ machine_e820 = alloc_bootmem_low_pages(PAGE_SIZE);
+
+ memmap.nr_entries = E820MAX;
+ set_xen_guest_handle(memmap.buffer, machine_e820);
+
+ BUG_ON(HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap));
+
+ legacy_init_iomem_resources(machine_e820, memmap.nr_entries,
+ &code_resource, &data_resource);
+#else
+ if (efi_enabled)
+ efi_initialize_iomem_resources(&code_resource, &data_resource);
+ else
+ legacy_init_iomem_resources(e820.map, e820.nr_map,
+ &code_resource, &data_resource);
+#endif
+
+ /* EFI systems may still have VGA */
+ request_resource(&iomem_resource, &video_ram_resource);
+
+ /* request I/O space for devices used on all i[345]86 PCs */
+ for (i = 0; i < STANDARD_IO_RESOURCES; i++)
+ request_resource(&ioport_resource, &standard_io_resources[i]);
+
+#ifdef CONFIG_XEN
+ e820_setup_gap(machine_e820, memmap.nr_entries);
+ free_bootmem(__pa(machine_e820), PAGE_SIZE);
+#else
+ e820_setup_gap(e820.map, e820.nr_map);
#endif
}
diff -r e74246451527 -r f54d38cea8ac
linux-2.6-xen-sparse/arch/i386/kernel/swiotlb.c
--- a/linux-2.6-xen-sparse/arch/i386/kernel/swiotlb.c Tue May 30 12:52:02
2006 -0500
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/swiotlb.c Tue May 30 14:30:34
2006 -0500
@@ -191,6 +191,7 @@ swiotlb_init(void)
if (swiotlb_force == 1) {
swiotlb = 1;
} else if ((swiotlb_force != -1) &&
+ is_running_on_xen() &&
(xen_start_info->flags & SIF_INITDOMAIN)) {
/* Domain 0 always has a swiotlb. */
ram_end = HYPERVISOR_memory_op(XENMEM_maximum_ram_page, NULL);
diff -r e74246451527 -r f54d38cea8ac
linux-2.6-xen-sparse/arch/i386/mm/init-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/mm/init-xen.c Tue May 30 12:52:02
2006 -0500
+++ b/linux-2.6-xen-sparse/arch/i386/mm/init-xen.c Tue May 30 14:30:34
2006 -0500
@@ -228,6 +228,12 @@ static inline int page_kills_ppro(unsign
return 0;
}
+#else
+
+#define page_kills_ppro(p) 0
+
+#endif
+
extern int is_available_memory(efi_memory_desc_t *);
int page_is_ram(unsigned long pagenr)
@@ -268,13 +274,6 @@ int page_is_ram(unsigned long pagenr)
}
return 0;
}
-
-#else /* CONFIG_XEN */
-
-#define page_kills_ppro(p) 0
-#define page_is_ram(p) 1
-
-#endif
#ifdef CONFIG_HIGHMEM
pte_t *kmap_pte;
diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/arch/ia64/Kconfig
--- a/linux-2.6-xen-sparse/arch/ia64/Kconfig Tue May 30 12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/arch/ia64/Kconfig Tue May 30 14:30:34 2006 -0500
@@ -51,7 +51,7 @@ config GENERIC_IOMAP
default y
config XEN
- bool
+ bool "Xen hypervisor support"
default y
help
Enable Xen hypervisor support. Resulting kernel runs
@@ -60,34 +60,9 @@ config ARCH_XEN
config ARCH_XEN
bool
default y
+ depends on XEN
help
TEMP ONLY. Needs to be on for drivers/xen to build.
-
-config XEN_PRIVILEGED_GUEST
- bool "Privileged Guest"
- default n
- help
- Used in drivers/xen/privcmd.c. Should go away?
-
-config XEN_BLKDEV_GRANT
- depends on XEN
- bool
- default y
-
-config XEN_BLKDEV_FRONTEND
- depends on XEN
- bool
- default y
-
-config XEN_BACKEND
- depends on XEN
- bool
- default y
-
-config XEN_BLKDEV_BACKEND
- depends on XEN && XEN_BACKEND
- bool
- default y
config XEN_IA64_DOM0_VP
bool "dom0 vp model"
@@ -102,18 +77,6 @@ config XEN_IA64_DOM0_NON_VP
default y
help
dom0 P=M model
-
-config XEN_SYSFS
- bool "Export Xen attributes in sysfs"
- depends on XEN && SYSFS
- default y
- help
- Xen hypervisor attributes will show up under /sys/hypervisor/.
-
-config XEN_INTERFACE_VERSION
- hex
- depends on XEN
- default 0x00030202
config SCHED_NO_NO_OMIT_FRAME_POINTER
bool
@@ -532,3 +495,16 @@ source "security/Kconfig"
source "security/Kconfig"
source "crypto/Kconfig"
+
+# override default values of drivers/xen/Kconfig
+if !XEN_IA64_DOM0_VP
+config HAVE_ARCH_ALLOC_SKB
+ bool
+ default n
+
+config HAVE_ARCH_DEV_ALLOC_SKB
+ bool
+ default n
+endif
+
+source "drivers/xen/Kconfig"
diff -r e74246451527 -r f54d38cea8ac
linux-2.6-xen-sparse/arch/ia64/kernel/setup.c
--- a/linux-2.6-xen-sparse/arch/ia64/kernel/setup.c Tue May 30 12:52:02
2006 -0500
+++ b/linux-2.6-xen-sparse/arch/ia64/kernel/setup.c Tue May 30 14:30:34
2006 -0500
@@ -514,6 +514,9 @@ setup_arch (char **cmdline_p)
#ifdef CONFIG_XEN
if (running_on_xen) {
extern shared_info_t *HYPERVISOR_shared_info;
+ extern int xen_init (void);
+
+ xen_init ();
/* xen_start_info isn't setup yet, get the flags manually */
if (HYPERVISOR_shared_info->arch.flags & SIF_INITDOMAIN) {
diff -r e74246451527 -r f54d38cea8ac
linux-2.6-xen-sparse/arch/ia64/xen-mkbuildtree-pre
--- a/linux-2.6-xen-sparse/arch/ia64/xen-mkbuildtree-pre Tue May 30
12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/arch/ia64/xen-mkbuildtree-pre Tue May 30
14:30:34 2006 -0500
@@ -6,20 +6,6 @@
# for building (as all files in mv'd directories are thought by hg
# to have been deleted). I don't know how to avoid this right now,
# but if someone has a better way, I'm all ears
-
-function try_to_mv() {
- if [ ! -e $2 ]
- then
- mv $1 $2
- fi
-}
-
-try_to_mv mm/Kconfig mm/Kconfig.xen-x86
-
-# need to grab a couple of xen-modified files for generic_page_range and
-# typedef pte_fn_t which are used by driver/xen blkif
-#ln -sf ../mm.xen-x86/memory.c mm/
-#ln -sf ../linux.xen-x86/mm.h include/linux/
#eventually asm-xsi-offsets needs to be part of hypervisor.h/hypercall.h
ln -sf ../../../../xen/include/asm-ia64/asm-xsi-offsets.h include/asm-ia64/xen/
diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/arch/ia64/xen/Makefile
--- a/linux-2.6-xen-sparse/arch/ia64/xen/Makefile Tue May 30 12:52:02
2006 -0500
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/Makefile Tue May 30 14:30:34
2006 -0500
@@ -2,7 +2,7 @@
# Makefile for Xen components
#
-obj-y := hypercall.o xenivt.o xenentry.o xensetup.o xenpal.o xenhpski.o
xenconsole.o xen_ksyms.o
+obj-y := hypercall.o xenivt.o xenentry.o xensetup.o xenpal.o xenhpski.o
xenconsole.o
-obj-$(CONFIG_XEN_IA64_DOM0_VP) += hypervisor.o pci-dma-xen.o
-pci-dma-xen-$(CONFIG_XEN_IA64_DOM0_VP) := ../../i386/kernel/pci-dma-xen.o
\ No newline at end of file
+obj-$(CONFIG_XEN_IA64_DOM0_VP) += hypervisor.o pci-dma-xen.o util.o
+pci-dma-xen-$(CONFIG_XEN_IA64_DOM0_VP) := ../../i386/kernel/pci-dma-xen.o
diff -r e74246451527 -r f54d38cea8ac
linux-2.6-xen-sparse/arch/ia64/xen/drivers/Makefile
--- a/linux-2.6-xen-sparse/arch/ia64/xen/drivers/Makefile Tue May 30
12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/drivers/Makefile Tue May 30
14:30:34 2006 -0500
@@ -1,20 +1,22 @@
+ifneq ($(CONFIG_XEN_IA64_DOM0_VP),y)
obj-y += util.o
+endif
obj-y += core/
+#obj-y += char/
obj-y += console/
obj-y += evtchn/
-#obj-y += balloon/
+obj-$(CONFIG_XEN_IA64_DOM0_VP) += balloon/
obj-y += privcmd/
-obj-y += blkback/
-#obj-y += netback/
-obj-y += blkfront/
obj-y += xenbus/
-#obj-y += netfront/
-#obj-$(CONFIG_XEN_PRIVILEGED_GUEST) += privcmd/
-#obj-$(CONFIG_XEN_BLKDEV_BACKEND) += blkback/
-#obj-$(CONFIG_XEN_NETDEV_BACKEND) += netback/
-#obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += blkfront/
-#obj-$(CONFIG_XEN_NETDEV_FRONTEND) += netfront/
-#obj-$(CONFIG_XEN_BLKDEV_TAP) += blktap/
+obj-$(CONFIG_XEN_BLKDEV_BACKEND) += blkback/
+obj-$(CONFIG_XEN_NETDEV_BACKEND) += netback/
+obj-$(CONFIG_XEN_TPMDEV_BACKEND) += tpmback/
+obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += blkfront/
+obj-$(CONFIG_XEN_NETDEV_FRONTEND) += netfront/
+obj-$(CONFIG_XEN_BLKDEV_TAP) += blktap/
+obj-$(CONFIG_XEN_TPMDEV_FRONTEND) += tpmfront/
+obj-$(CONFIG_XEN_PCIDEV_BACKEND) += pciback/
+obj-$(CONFIG_XEN_PCIDEV_FRONTEND) += pcifront/
diff -r e74246451527 -r f54d38cea8ac
linux-2.6-xen-sparse/arch/ia64/xen/drivers/coreMakefile
--- a/linux-2.6-xen-sparse/arch/ia64/xen/drivers/coreMakefile Tue May 30
12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/drivers/coreMakefile Tue May 30
14:30:34 2006 -0500
@@ -1,14 +1,6 @@
#
# Makefile for the linux kernel.
#
-
-XENARCH := $(subst ",,$(CONFIG_XENARCH))
-
-CPPFLAGS_vmlinux.lds += -U$(XENARCH)
-
-$(obj)/vmlinux.lds.S:
- @ln -fsn $(srctree)/arch/$(XENARCH)/kernel/vmlinux.lds.S $@
-
obj-y := gnttab.o features.o
obj-$(CONFIG_PROC_FS) += xen_proc.o
@@ -16,8 +8,10 @@ ifeq ($(ARCH),ia64)
ifeq ($(ARCH),ia64)
obj-y += evtchn_ia64.o
obj-y += xenia64_init.o
+ifeq ($(CONFIG_XEN_IA64_DOM0_VP),y)
+obj-$(CONFIG_NET) += skbuff.o
+endif
else
-extra-y += vmlinux.lds
obj-y += reboot.o evtchn.o fixup.o
obj-$(CONFIG_SMP) += smp.o # setup_profiling_timer def'd in ia64
obj-$(CONFIG_NET) += skbuff.o # until networking is up on ia64
diff -r e74246451527 -r f54d38cea8ac
linux-2.6-xen-sparse/arch/ia64/xen/drivers/evtchn_ia64.c
--- a/linux-2.6-xen-sparse/arch/ia64/xen/drivers/evtchn_ia64.c Tue May 30
12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/drivers/evtchn_ia64.c Tue May 30
14:30:34 2006 -0500
@@ -246,25 +246,14 @@ static struct irqaction evtchn_irqaction
.name = "xen-event-channel"
};
-int evtchn_irq = 0xe9;
+static int evtchn_irq = 0xe9;
void __init evtchn_init(void)
{
shared_info_t *s = HYPERVISOR_shared_info;
- vcpu_info_t *vcpu_info = &s->vcpu_info[smp_processor_id()];
-
-#if 0
- int ret;
- irq = assign_irq_vector(AUTO_ASSIGN);
- ret = request_irq(irq, evtchn_interrupt, 0, "xen-event-channel", NULL);
- if (ret < 0)
- {
- printk("xen-event-channel unable to get irq %d (%d)\n", irq, ret);
- return;
- }
-#endif
+
register_percpu_irq(evtchn_irq, &evtchn_irqaction);
- vcpu_info->arch.evtchn_vector = evtchn_irq;
+ s->arch.evtchn_vector = evtchn_irq;
printk("xen-event-channel using irq %d\n", evtchn_irq);
spin_lock_init(&irq_mapping_update_lock);
diff -r e74246451527 -r f54d38cea8ac
linux-2.6-xen-sparse/arch/ia64/xen/drivers/xenia64_init.c
--- a/linux-2.6-xen-sparse/arch/ia64/xen/drivers/xenia64_init.c Tue May 30
12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/drivers/xenia64_init.c Tue May 30
14:30:34 2006 -0500
@@ -11,17 +11,20 @@ shared_info_t *HYPERVISOR_shared_info =
shared_info_t *HYPERVISOR_shared_info = (shared_info_t *)XSI_BASE;
EXPORT_SYMBOL(HYPERVISOR_shared_info);
-static int initialized;
start_info_t *xen_start_info;
+
+int running_on_xen;
+EXPORT_SYMBOL(running_on_xen);
int xen_init(void)
{
+ static int initialized;
shared_info_t *s = HYPERVISOR_shared_info;
if (initialized)
return running_on_xen ? 0 : -1;
- if (!running_on_xen)
+ if (!is_running_on_xen())
return -1;
xen_start_info = __va(s->arch.start_info_pfn << PAGE_SHIFT);
@@ -35,6 +38,7 @@ int xen_init(void)
return 0;
}
+#ifndef CONFIG_XEN_IA64_DOM0_VP
/* We just need a range of legal va here, though finally identity
* mapped one is instead used for gnttab mapping.
*/
@@ -47,6 +51,7 @@ unsigned long alloc_empty_foreign_map_pa
return (unsigned long)vma->addr;
}
+#endif
#if 0
/* These should be define'd but some drivers use them without
diff -r e74246451527 -r f54d38cea8ac
linux-2.6-xen-sparse/arch/ia64/xen/hypercall.S
--- a/linux-2.6-xen-sparse/arch/ia64/xen/hypercall.S Tue May 30 12:52:02
2006 -0500
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/hypercall.S Tue May 30 14:30:34
2006 -0500
@@ -247,7 +247,7 @@ 1: mov r8=r32
XEN_RESTORE_PSR_IC
;;
br.ret.sptk.many rp
-END(xen_set_rr)
+END(xen_set_kr)
GLOBAL_ENTRY(xen_fc)
movl r8=running_on_xen;;
@@ -345,7 +345,7 @@ GLOBAL_ENTRY(xen_send_ipi)
GLOBAL_ENTRY(xen_send_ipi)
mov r14=r32
mov r15=r33
- mov r2=0x380
+ mov r2=0x400
break 0x1000
;;
br.ret.sptk.many rp
diff -r e74246451527 -r f54d38cea8ac
linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c
--- a/linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c Tue May 30 12:52:02
2006 -0500
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c Tue May 30 14:30:34
2006 -0500
@@ -23,18 +23,56 @@
//#include <linux/kernel.h>
#include <linux/spinlock.h>
#include <linux/bootmem.h>
+#include <linux/vmalloc.h>
#include <asm/page.h>
#include <asm/hypervisor.h>
#include <asm/hypercall.h>
-
-#define XEN_IA64_BALLOON_IS_NOT_YET
-#ifndef XEN_IA64_BALLOON_IS_NOT_YET
+#include <xen/interface/memory.h>
#include <xen/balloon.h>
-#else
-#define balloon_lock(flags) ((void)flags)
-#define balloon_unlock(flags) ((void)flags)
-#endif
-
+
+//XXX xen/ia64 copy_from_guest() is broken.
+// This is a temporal work around until it is fixed.
+// used by balloon.c netfront.c
+
+// get_xen_guest_handle is defined only when __XEN_TOOLS__ is defined
+// if the definition in arch-ia64.h is changed, this must be updated.
+#define get_xen_guest_handle(val, hnd) do { val = (hnd).p; } while (0)
+
+int
+ia64_xenmem_reservation_op(unsigned long op,
+ struct xen_memory_reservation* reservation__)
+{
+ struct xen_memory_reservation reservation = *reservation__;
+ unsigned long* frame_list;
+ unsigned long nr_extents = reservation__->nr_extents;
+ int ret = 0;
+ get_xen_guest_handle(frame_list, reservation__->extent_start);
+
+ BUG_ON(op != XENMEM_increase_reservation &&
+ op != XENMEM_decrease_reservation &&
+ op != XENMEM_populate_physmap);
+
+ while (nr_extents > 0) {
+ int tmp_ret;
+ volatile unsigned long dummy;
+
+ set_xen_guest_handle(reservation.extent_start, frame_list);
+ reservation.nr_extents = nr_extents;
+
+ dummy = frame_list[0];// re-install tlb entry before hypercall
+ tmp_ret = ____HYPERVISOR_memory_op(op, &reservation);
+ if (tmp_ret < 0) {
+ if (ret == 0) {
+ ret = tmp_ret;
+ }
+ break;
+ }
+ frame_list += tmp_ret;
+ nr_extents -= tmp_ret;
+ ret += tmp_ret;
+ }
+ return ret;
+}
//XXX same as i386, x86_64 contiguous_bitmap_set(), contiguous_bitmap_clear()
// move those to lib/contiguous_bitmap?
@@ -105,6 +143,39 @@ static void contiguous_bitmap_clear(
}
}
+static unsigned long
+HYPERVISOR_populate_physmap(unsigned long gpfn, unsigned int extent_order,
+ unsigned int address_bits)
+{
+ unsigned long ret;
+ struct xen_memory_reservation reservation = {
+ .nr_extents = 1,
+ .address_bits = address_bits,
+ .extent_order = extent_order,
+ .domid = DOMID_SELF
+ };
+ set_xen_guest_handle(reservation.extent_start, &gpfn);
+ ret = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation);
+ BUG_ON(ret != 1);
+ return 0;
+}
+
+static unsigned long
+HYPERVISOR_remove_physmap(unsigned long gpfn, unsigned int extent_order)
+{
+ unsigned long ret;
+ struct xen_memory_reservation reservation = {
+ .nr_extents = 1,
+ .address_bits = 0,
+ .extent_order = extent_order,
+ .domid = DOMID_SELF
+ };
+ set_xen_guest_handle(reservation.extent_start, &gpfn);
+ ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
+ BUG_ON(ret != 1);
+ return 0;
+}
+
/* Ensure multi-page extents are contiguous in machine memory. */
int
__xen_create_contiguous_region(unsigned long vstart,
@@ -113,29 +184,29 @@ __xen_create_contiguous_region(unsigned
unsigned long error = 0;
unsigned long gphys = __pa(vstart);
unsigned long start_gpfn = gphys >> PAGE_SHIFT;
- unsigned long num_pfn = 1 << order;
+ unsigned long num_gpfn = 1 << order;
unsigned long i;
unsigned long flags;
- scrub_pages(vstart, 1 << order);
+ scrub_pages(vstart, num_gpfn);
balloon_lock(flags);
- //XXX order
- for (i = 0; i < num_pfn; i++) {
- error = HYPERVISOR_zap_physmap(start_gpfn + i, 0);
- if (error) {
- goto out;
- }
+ error = HYPERVISOR_remove_physmap(start_gpfn, order);
+ if (error) {
+ goto fail;
}
error = HYPERVISOR_populate_physmap(start_gpfn, order, address_bits);
- contiguous_bitmap_set(start_gpfn, 1UL << order);
+ if (error) {
+ goto fail;
+ }
+ contiguous_bitmap_set(start_gpfn, num_gpfn);
#if 0
{
unsigned long mfn;
unsigned long mfn_prev = ~0UL;
- for (i = 0; i < 1 << order; i++) {
+ for (i = 0; i < num_gpfn; i++) {
mfn = pfn_to_mfn_for_dma(start_gpfn + i);
if (mfn_prev != ~0UL && mfn != mfn_prev + 1) {
xprintk("\n");
@@ -145,7 +216,7 @@ __xen_create_contiguous_region(unsigned
vstart, virt_to_bus((void*)vstart),
phys_to_machine_for_dma(gphys));
xprintk("mfn: ");
- for (i = 0; i < 1 << order; i++) {
+ for (i = 0; i < num_gpfn; i++) {
mfn = pfn_to_mfn_for_dma(start_gpfn + i);
xprintk("0x%lx ", mfn);
}
@@ -159,76 +230,405 @@ out:
out:
balloon_unlock(flags);
return error;
+
+fail:
+ for (i = 0; i < num_gpfn; i++) {
+ error = HYPERVISOR_populate_physmap(start_gpfn + i, 0, 0);
+ if (error) {
+ BUG();//XXX
+ }
+ }
+ goto out;
}
void
__xen_destroy_contiguous_region(unsigned long vstart, unsigned int order)
{
+ unsigned long flags;
unsigned long error = 0;
- unsigned long gphys = __pa(vstart);
- unsigned long start_gpfn = gphys >> PAGE_SHIFT;
- unsigned long num_pfn = 1 << order;
+ unsigned long start_gpfn = __pa(vstart) >> PAGE_SHIFT;
+ unsigned long num_gpfn = 1UL << order;
+ unsigned long* gpfns;
+ struct xen_memory_reservation reservation;
unsigned long i;
- unsigned long flags;
-
- scrub_pages(vstart, 1 << order);
+
+ gpfns = kmalloc(sizeof(gpfns[0]) * num_gpfn,
+ GFP_KERNEL | __GFP_NOFAIL);
+ for (i = 0; i < num_gpfn; i++) {
+ gpfns[i] = start_gpfn + i;
+ }
+
+ scrub_pages(vstart, num_gpfn);
balloon_lock(flags);
- contiguous_bitmap_clear(start_gpfn, 1UL << order);
-
- //XXX order
- for (i = 0; i < num_pfn; i++) {
- error = HYPERVISOR_zap_physmap(start_gpfn + i, 0);
- if (error) {
- goto out;
- }
- }
-
- for (i = 0; i < num_pfn; i++) {
- error = HYPERVISOR_populate_physmap(start_gpfn + i, 0, 0);
- if (error) {
- goto out;
- }
- }
-
+ contiguous_bitmap_clear(start_gpfn, num_gpfn);
+ error = HYPERVISOR_remove_physmap(start_gpfn, order);
+ if (error) {
+ goto fail;
+ }
+
+ set_xen_guest_handle(reservation.extent_start, gpfns);
+ reservation.nr_extents = num_gpfn;
+ reservation.address_bits = 0;
+ reservation.extent_order = 0;
+ reservation.domid = DOMID_SELF;
+ error = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation);
+ if (error != num_gpfn) {
+ error = -EFAULT;//XXX
+ goto fail;
+ }
+ error = 0;
out:
balloon_unlock(flags);
+ kfree(gpfns);
if (error) {
- //XXX
- }
+ // error can't be returned.
+ BUG();//XXX
+ }
+ return;
+
+fail:
+ for (i = 0; i < num_gpfn; i++) {
+ int tmp_error;// don't overwrite error.
+ tmp_error = HYPERVISOR_populate_physmap(start_gpfn + i, 0, 0);
+ if (tmp_error) {
+ BUG();//XXX
+ }
+ }
+ goto out;
}
///////////////////////////////////////////////////////////////////////////
-//XXX taken from balloon.c
-// temporal hack until balloon driver support.
-#include <linux/module.h>
-
-struct page *balloon_alloc_empty_page_range(unsigned long nr_pages)
-{
- unsigned long vstart;
- unsigned int order = get_order(nr_pages * PAGE_SIZE);
-
- vstart = __get_free_pages(GFP_KERNEL, order);
- if (vstart == 0)
- return NULL;
-
- return virt_to_page(vstart);
-}
-
-void balloon_dealloc_empty_page_range(
- struct page *page, unsigned long nr_pages)
-{
- __free_pages(page, get_order(nr_pages * PAGE_SIZE));
-}
-
-void balloon_update_driver_allowance(long delta)
-{
-}
-
-EXPORT_SYMBOL(balloon_alloc_empty_page_range);
-EXPORT_SYMBOL(balloon_dealloc_empty_page_range);
-EXPORT_SYMBOL(balloon_update_driver_allowance);
-
-
+// grant table hack
+// cmd: GNTTABOP_xxx
+
+#include <linux/mm.h>
+#include <xen/interface/xen.h>
+#include <xen/gnttab.h>
+
+static void
+gnttab_map_grant_ref_pre(struct gnttab_map_grant_ref *uop)
+{
+ uint32_t flags;
+
+ flags = uop->flags;
+ if (flags & GNTMAP_readonly) {
+#if 0
+ xprintd("GNTMAP_readonly is not supported yet\n");
+#endif
+ flags &= ~GNTMAP_readonly;
+ }
+
+ if (flags & GNTMAP_host_map) {
+ if (flags & GNTMAP_application_map) {
+ xprintd("GNTMAP_application_map is not supported yet:
flags 0x%x\n", flags);
+ BUG();
+ }
+ if (flags & GNTMAP_contains_pte) {
+ xprintd("GNTMAP_contains_pte is not supported yet flags
0x%x\n", flags);
+ BUG();
+ }
+ } else if (flags & GNTMAP_device_map) {
+ xprintd("GNTMAP_device_map is not supported yet 0x%x\n", flags);
+ BUG();//XXX not yet. actually this flag is not used.
+ } else {
+ BUG();
+ }
+}
+
+int
+HYPERVISOR_grant_table_op(unsigned int cmd, void *uop, unsigned int count)
+{
+ if (cmd == GNTTABOP_map_grant_ref) {
+ unsigned int i;
+ for (i = 0; i < count; i++) {
+ gnttab_map_grant_ref_pre(
+ (struct gnttab_map_grant_ref*)uop + i);
+ }
+ }
+
+ return ____HYPERVISOR_grant_table_op(cmd, uop, count);
+}
+
+
+///////////////////////////////////////////////////////////////////////////
+// PageForeign(), SetPageForeign(), ClearPageForeign()
+
+struct address_space xen_ia64_foreign_dummy_mapping;
+
+///////////////////////////////////////////////////////////////////////////
+// foreign mapping
+
+struct xen_ia64_privcmd_entry {
+ atomic_t map_count;
+ struct page* page;
+};
+
+static void
+xen_ia64_privcmd_init_entry(struct xen_ia64_privcmd_entry* entry)
+{
+ atomic_set(&entry->map_count, 0);
+ entry->page = NULL;
+}
+
+//TODO alloc_page() to allocate pseudo physical address space is
+// waste of memory.
+// When vti domain is created, qemu maps all of vti domain pages which
+// reaches to several hundred megabytes at least.
+// remove alloc_page().
+static int
+xen_ia64_privcmd_entry_mmap(struct vm_area_struct* vma,
+ unsigned long addr,
+ struct xen_ia64_privcmd_entry* entry,
+ unsigned long mfn,
+ pgprot_t prot,
+ domid_t domid)
+{
+ int error = 0;
+ struct page* page;
+ unsigned long gpfn;
+
+ BUG_ON((addr & ~PAGE_MASK) != 0);
+ BUG_ON(mfn == INVALID_MFN);
+
+ if (entry->page != NULL) {
+ error = -EBUSY;
+ goto out;
+ }
+ page = alloc_page(GFP_KERNEL);
+ if (page == NULL) {
+ error = -ENOMEM;
+ goto out;
+ }
+ gpfn = page_to_pfn(page);
+
+ error = HYPERVISOR_add_physmap(gpfn, mfn, 0/* prot:XXX */,
+ domid);
+ if (error != 0) {
+ goto out;
+ }
+
+ prot = vma->vm_page_prot;
+ error = remap_pfn_range(vma, addr, gpfn, 1 << PAGE_SHIFT, prot);
+ if (error != 0) {
+ (void)HYPERVISOR_zap_physmap(gpfn, 0);
+ error = HYPERVISOR_populate_physmap(gpfn, 0, 0);
+ if (error) {
+ BUG();//XXX
+ }
+ __free_page(page);
+ } else {
+ atomic_inc(&entry->map_count);
+ entry->page = page;
+ }
+
+out:
+ return error;
+}
+
+static void
+xen_ia64_privcmd_entry_munmap(struct xen_ia64_privcmd_entry* entry)
+{
+ struct page* page = entry->page;
+ unsigned long gpfn = page_to_pfn(page);
+ int error;
+
+ error = HYPERVISOR_zap_physmap(gpfn, 0);
+ if (error) {
+ BUG();//XXX
+ }
+
+ error = HYPERVISOR_populate_physmap(gpfn, 0, 0);
+ if (error) {
+ BUG();//XXX
+ }
+
+ entry->page = NULL;
+ __free_page(page);
+}
+
+static int
+xen_ia64_privcmd_entry_open(struct xen_ia64_privcmd_entry* entry)
+{
+ if (entry->page != NULL) {
+ atomic_inc(&entry->map_count);
+ } else {
+ BUG_ON(atomic_read(&entry->map_count) != 0);
+ }
+}
+
+static int
+xen_ia64_privcmd_entry_close(struct xen_ia64_privcmd_entry* entry)
+{
+ if (entry->page != NULL && atomic_dec_and_test(&entry->map_count)) {
+ xen_ia64_privcmd_entry_munmap(entry);
+ }
+}
+
+struct xen_ia64_privcmd_range {
+ atomic_t ref_count;
+ unsigned long pgoff; // in PAGE_SIZE
+
+ unsigned long num_entries;
+ struct xen_ia64_privcmd_entry entries[0];
+};
+
+struct xen_ia64_privcmd_vma {
+ struct xen_ia64_privcmd_range* range;
+
+ unsigned long num_entries;
+ struct xen_ia64_privcmd_entry* entries;
+};
+
+static void xen_ia64_privcmd_vma_open(struct vm_area_struct* vma);
+static void xen_ia64_privcmd_vma_close(struct vm_area_struct* vma);
+
+struct vm_operations_struct xen_ia64_privcmd_vm_ops = {
+ .open = &xen_ia64_privcmd_vma_open,
+ .close = &xen_ia64_privcmd_vma_close,
+};
+
+static void
+__xen_ia64_privcmd_vma_open(struct vm_area_struct* vma,
+ struct xen_ia64_privcmd_vma* privcmd_vma,
+ struct xen_ia64_privcmd_range* privcmd_range)
+{
+ unsigned long entry_offset = vma->vm_pgoff - privcmd_range->pgoff;
+ unsigned long num_entries = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+ unsigned long i;
+
+ BUG_ON(entry_offset < 0);
+ BUG_ON(entry_offset + num_entries > privcmd_range->num_entries);
+
+ privcmd_vma->range = privcmd_range;
+ privcmd_vma->num_entries = num_entries;
+ privcmd_vma->entries = &privcmd_range->entries[entry_offset];
+ vma->vm_private_data = privcmd_vma;
+ for (i = 0; i < privcmd_vma->num_entries; i++) {
+ xen_ia64_privcmd_entry_open(&privcmd_vma->entries[i]);
+ }
+
+ vma->vm_private_data = privcmd_vma;
+ vma->vm_ops = &xen_ia64_privcmd_vm_ops;
+}
+
+static void
+xen_ia64_privcmd_vma_open(struct vm_area_struct* vma)
+{
+ struct xen_ia64_privcmd_vma* privcmd_vma = (struct
xen_ia64_privcmd_vma*)vma->vm_private_data;
+ struct xen_ia64_privcmd_range* privcmd_range = privcmd_vma->range;
+
+ atomic_inc(&privcmd_range->ref_count);
+ // vm_op->open() can't fail.
+ privcmd_vma = kmalloc(sizeof(*privcmd_vma), GFP_KERNEL | __GFP_NOFAIL);
+
+ __xen_ia64_privcmd_vma_open(vma, privcmd_vma, privcmd_range);
+}
+
+static void
+xen_ia64_privcmd_vma_close(struct vm_area_struct* vma)
+{
+ struct xen_ia64_privcmd_vma* privcmd_vma =
+ (struct xen_ia64_privcmd_vma*)vma->vm_private_data;
+ struct xen_ia64_privcmd_range* privcmd_range = privcmd_vma->range;
+ unsigned long i;
+
+ for (i = 0; i < privcmd_vma->num_entries; i++) {
+ xen_ia64_privcmd_entry_close(&privcmd_vma->entries[i]);
+ }
+ vma->vm_private_data = NULL;
+ kfree(privcmd_vma);
+
+ if (atomic_dec_and_test(&privcmd_range->ref_count)) {
+#if 1
+ for (i = 0; i < privcmd_range->num_entries; i++) {
+ struct xen_ia64_privcmd_entry* entry =
+ &privcmd_range->entries[i];
+ BUG_ON(atomic_read(&entry->map_count) != 0);
+ BUG_ON(entry->page != NULL);
+ }
+#endif
+ vfree(privcmd_range);
+ }
+}
+
+int
+privcmd_mmap(struct file * file, struct vm_area_struct * vma)
+{
+ unsigned long num_entries = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+ struct xen_ia64_privcmd_range* privcmd_range;
+ struct xen_ia64_privcmd_vma* privcmd_vma;
+ unsigned long i;
+ BUG_ON(!running_on_xen);
+
+ BUG_ON(file->private_data != NULL);
+ privcmd_range =
+ vmalloc(sizeof(*privcmd_range) +
+ sizeof(privcmd_range->entries[0]) * num_entries);
+ if (privcmd_range == NULL) {
+ goto out_enomem0;
+ }
+ privcmd_vma = kmalloc(sizeof(*privcmd_vma), GFP_KERNEL);
+ if (privcmd_vma == NULL) {
+ goto out_enomem1;
+ }
+
+ /* DONTCOPY is essential for Xen as copy_page_range is broken. */
+ vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY | VM_PFNMAP;
+
+ atomic_set(&privcmd_range->ref_count, 1);
+ privcmd_range->pgoff = vma->vm_pgoff;
+ privcmd_range->num_entries = num_entries;
+ for (i = 0; i < privcmd_range->num_entries; i++) {
+ xen_ia64_privcmd_init_entry(&privcmd_range->entries[i]);
+ }
+
+ __xen_ia64_privcmd_vma_open(vma, privcmd_vma, privcmd_range);
+ return 0;
+
+out_enomem1:
+ kfree(privcmd_vma);
+out_enomem0:
+ vfree(privcmd_range);
+ return -ENOMEM;
+}
+
+int
+direct_remap_pfn_range(struct vm_area_struct *vma,
+ unsigned long address, // process virtual address
+ unsigned long mfn, // mfn, mfn + 1, ... mfn +
size/PAGE_SIZE
+ unsigned long size,
+ pgprot_t prot,
+ domid_t domid) // target domain
+{
+ struct xen_ia64_privcmd_vma* privcmd_vma =
+ (struct xen_ia64_privcmd_vma*)vma->vm_private_data;
+ unsigned long i;
+ unsigned long offset;
+ int error = 0;
+ BUG_ON(!running_on_xen);
+
+#if 0
+ if (prot != vm->vm_page_prot) {
+ return -EINVAL;
+ }
+#endif
+
+ i = (address - vma->vm_start) >> PAGE_SHIFT;
+ for (offset = 0; offset < size; offset += PAGE_SIZE) {
+ struct xen_ia64_privcmd_entry* entry =
+ &privcmd_vma->entries[i];
+ error = xen_ia64_privcmd_entry_mmap(vma, (address + offset) &
PAGE_MASK, entry, mfn, prot, domid);
+ if (error != 0) {
+ break;
+ }
+
+ i++;
+ mfn++;
+ }
+
+ return error;
+}
+
diff -r e74246451527 -r f54d38cea8ac
linux-2.6-xen-sparse/arch/ia64/xen/xenentry.S
--- a/linux-2.6-xen-sparse/arch/ia64/xen/xenentry.S Tue May 30 12:52:02
2006 -0500
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/xenentry.S Tue May 30 14:30:34
2006 -0500
@@ -83,11 +83,7 @@ GLOBAL_ENTRY(ia64_switch_to)
mov r8=1
;;
st4 [r27]=r8 // psr.ic back on
- ;;
-#else
-(p6) ssm psr.ic // if we had to map, reenable the
psr.ic bit FIRST!!!
- ;;
-(p6) srlz.d
+#else
ld8 sp=[r21] // load kernel stack pointer of new task
mov IA64_KR(CURRENT)=in0 // update "current" application register
#endif
@@ -136,6 +132,11 @@ GLOBAL_ENTRY(ia64_switch_to)
#endif
;;
itr.d dtr[r25]=r23 // wire in new mapping...
+#ifndef CONFIG_XEN
+ ssm psr.ic // reenable the psr.ic bit
+ ;;
+ srlz.d
+#endif
br.cond.sptk .done
#ifdef CONFIG_XEN
END(xen_switch_to)
@@ -216,7 +217,9 @@ GLOBAL_ENTRY(ia64_trace_syscall)
.mem.offset 0,0; st8.spill [r2]=r8 // store return value in slot
for r8
.mem.offset 8,0; st8.spill [r3]=r10 // clear error indication in
slot for r10
br.call.sptk.many rp=syscall_trace_leave // give parent a chance to
catch return value
-.ret3: br.cond.sptk .work_pending_syscall_end
+.ret3:
+(pUStk) cmp.eq.unc p6,p0=r0,r0 // p6 <- pUStk
+ br.cond.sptk .work_pending_syscall_end
strace_error:
ld8 r3=[r2] // load pt_regs.r8
@@ -246,7 +249,7 @@ END(ia64_trace_syscall)
* r8-r11: restored (syscall return value(s))
* r12: restored (user-level stack pointer)
* r13: restored (user-level thread pointer)
- * r14: cleared
+ * r14: set to __kernel_syscall_via_epc
* r15: restored (syscall #)
* r16-r17: cleared
* r18: user-level b6
@@ -267,7 +270,7 @@ END(ia64_trace_syscall)
* pr: restored (user-level pr)
* b0: restored (user-level rp)
* b6: restored
- * b7: cleared
+ * b7: set to __kernel_syscall_via_epc
* ar.unat: restored (user-level ar.unat)
* ar.pfs: restored (user-level ar.pfs)
* ar.rsc: restored (user-level ar.rsc)
@@ -331,20 +334,20 @@ ENTRY(ia64_leave_syscall)
;;
(p6) ld4 r31=[r18] // load
current_thread_info()->flags
ld8 r19=[r2],PT(B6)-PT(LOADRS) // load ar.rsc value for
"loadrs"
- mov b7=r0 // clear b7
- ;;
- ld8 r23=[r3],PT(R11)-PT(AR_BSPSTORE) // load ar.bspstore (may be
garbage)
+ nop.i 0
+ ;;
+ mov r16=ar.bsp // M2 get existing backing
store pointer
ld8 r18=[r2],PT(R9)-PT(B6) // load b6
(p6) and r15=TIF_WORK_MASK,r31 // any work other than
TIF_SYSCALL_TRACE?
;;
- mov r16=ar.bsp // M2 get existing backing
store pointer
+ ld8 r23=[r3],PT(R11)-PT(AR_BSPSTORE) // load ar.bspstore (may be
garbage)
(p6) cmp4.ne.unc p6,p0=r15, r0 // any special work pending?
(p6) br.cond.spnt .work_pending_syscall
;;
// start restoring the state saved on the kernel stack (struct pt_regs):
ld8 r9=[r2],PT(CR_IPSR)-PT(R9)
ld8 r11=[r3],PT(CR_IIP)-PT(R11)
- mov f6=f0 // clear f6
+(pNonSys) break 0 // bug check: we shouldn't be here if
pNonSys is TRUE!
;;
invala // M0|1 invalidate ALAT
#ifdef CONFIG_XEN
@@ -358,57 +361,68 @@ ENTRY(ia64_leave_syscall)
st4 [r29]=r0 // note: clears both vpsr.i and vpsr.ic!
;;
#else
- rsm psr.i | psr.ic // M2 initiate turning off of interrupt and
interruption collection
-#endif
- mov f9=f0 // clear f9
-
- ld8 r29=[r2],16 // load cr.ipsr
- ld8 r28=[r3],16 // load cr.iip
- mov f8=f0 // clear f8
+ rsm psr.i | psr.ic // M2 turn off interrupts and interruption
collection
+#endif
+ cmp.eq p9,p0=r0,r0 // A set p9 to indicate that we should
restore cr.ifs
+
+ ld8 r29=[r2],16 // M0|1 load cr.ipsr
+ ld8 r28=[r3],16 // M0|1 load cr.iip
+ mov r22=r0 // A clear r22
;;
ld8 r30=[r2],16 // M0|1 load cr.ifs
- mov.m ar.ssd=r0 // M2 clear ar.ssd
- cmp.eq p9,p0=r0,r0 // set p9 to indicate that we should restore
cr.ifs
- ;;
ld8 r25=[r3],16 // M0|1 load ar.unat
- mov.m ar.csd=r0 // M2 clear ar.csd
- mov r22=r0 // clear r22
+(pUStk) add r14=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
;;
ld8 r26=[r2],PT(B0)-PT(AR_PFS) // M0|1 load ar.pfs
-(pKStk) mov r22=psr // M2 read PSR now that interrupts are
disabled
- mov f10=f0 // clear f10
- ;;
- ld8 r21=[r2],PT(AR_RNAT)-PT(B0) // load b0
- ld8 r27=[r3],PT(PR)-PT(AR_RSC) // load ar.rsc
- mov f11=f0 // clear f11
- ;;
- ld8 r24=[r2],PT(AR_FPSR)-PT(AR_RNAT) // load ar.rnat (may be garbage)
- ld8 r31=[r3],PT(R1)-PT(PR) // load predicates
-(pUStk) add r14=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
- ;;
- ld8 r20=[r2],PT(R12)-PT(AR_FPSR) // load ar.fpsr
- ld8.fill r1=[r3],16 // load r1
-(pUStk) mov r17=1
- ;;
- srlz.d // M0 ensure interruption collection is off
- ld8.fill r13=[r3],16
- mov f7=f0 // clear f7
- ;;
- ld8.fill r12=[r2] // restore r12 (sp)
- ld8.fill r15=[r3] // restore r15
- addl r3=THIS_CPU(ia64_phys_stacked_size_p8),r0
- ;;
-(pUStk) ld4 r3=[r3] // r3 = cpu_data->phys_stacked_size_p8
-(pUStk) st1 [r14]=r17
- mov b6=r18 // I0 restore b6
- ;;
- mov r14=r0 // clear r14
- shr.u r18=r19,16 // I0|1 get byte size of existing "dirty"
partition
-(pKStk) br.cond.dpnt.many skip_rbs_switch
-
- mov.m ar.ccv=r0 // clear ar.ccv
-(pNonSys) br.cond.dpnt.many dont_preserve_current_frame
- br.cond.sptk.many rbs_switch
+(pKStk) mov r22=psr // M2 read PSR now that
interrupts are disabled
+ nop 0
+ ;;
+ ld8 r21=[r2],PT(AR_RNAT)-PT(B0) // M0|1 load b0
+ ld8 r27=[r3],PT(PR)-PT(AR_RSC) // M0|1 load ar.rsc
+ mov f6=f0 // F clear f6
+ ;;
+ ld8 r24=[r2],PT(AR_FPSR)-PT(AR_RNAT) // M0|1 load ar.rnat (may be
garbage)
+ ld8 r31=[r3],PT(R1)-PT(PR) // M0|1 load predicates
+ mov f7=f0 // F clear f7
+ ;;
+ ld8 r20=[r2],PT(R12)-PT(AR_FPSR) // M0|1 load ar.fpsr
+ ld8.fill r1=[r3],16 // M0|1 load r1
+(pUStk) mov r17=1 // A
+ ;;
+(pUStk) st1 [r14]=r17 // M2|3
+ ld8.fill r13=[r3],16 // M0|1
+ mov f8=f0 // F clear f8
+ ;;
+ ld8.fill r12=[r2] // M0|1 restore r12 (sp)
+ ld8.fill r15=[r3] // M0|1 restore r15
+ mov b6=r18 // I0 restore b6
+
+ addl r17=THIS_CPU(ia64_phys_stacked_size_p8),r0 // A
+ mov f9=f0 // F clear f9
+(pKStk) br.cond.dpnt.many skip_rbs_switch // B
+
+ srlz.d // M0 ensure interruption collection
is off (for cover)
+ shr.u r18=r19,16 // I0|1 get byte size of existing
"dirty" partition
+#ifdef CONFIG_XEN
+ XEN_HYPER_COVER;
+#else
+ cover // B add current frame into dirty
partition & set cr.ifs
+#endif
+ ;;
+(pUStk) ld4 r17=[r17] // M0|1 r17 =
cpu_data->phys_stacked_size_p8
+ mov r19=ar.bsp // M2 get new backing store pointer
+ mov f10=f0 // F clear f10
+
+ nop.m 0
+ movl r14=__kernel_syscall_via_epc // X
+ ;;
+ mov.m ar.csd=r0 // M2 clear ar.csd
+ mov.m ar.ccv=r0 // M2 clear ar.ccv
+ mov b7=r14 // I0 clear b7 (hint with
__kernel_syscall_via_epc)
+
+ mov.m ar.ssd=r0 // M2 clear ar.ssd
+ mov f11=f0 // F clear f11
+ br.cond.sptk.many rbs_switch // B
#ifdef CONFIG_XEN
END(xen_leave_syscall)
#else
@@ -546,7 +560,7 @@ GLOBAL_ENTRY(ia64_leave_kernel)
ldf.fill f7=[r2],PT(F11)-PT(F7)
ldf.fill f8=[r3],32
;;
- srlz.i // ensure interruption collection is off
+ srlz.d // ensure that inter. collection is off (VHPT is don't care,
since text is pinned)
mov ar.ccv=r15
;;
ldf.fill f11=[r2]
@@ -556,29 +570,29 @@ GLOBAL_ENTRY(ia64_leave_kernel)
movl r2=XSI_BANK1_R16
movl r3=XSI_BANK1_R16+8
;;
- st8.spill [r2]=r16,16
- st8.spill [r3]=r17,16
- ;;
- st8.spill [r2]=r18,16
- st8.spill [r3]=r19,16
- ;;
- st8.spill [r2]=r20,16
- st8.spill [r3]=r21,16
- ;;
- st8.spill [r2]=r22,16
- st8.spill [r3]=r23,16
- ;;
- st8.spill [r2]=r24,16
- st8.spill [r3]=r25,16
- ;;
- st8.spill [r2]=r26,16
- st8.spill [r3]=r27,16
- ;;
- st8.spill [r2]=r28,16
- st8.spill [r3]=r29,16
- ;;
- st8.spill [r2]=r30,16
- st8.spill [r3]=r31,16
+.mem.offset 0,0; st8.spill [r2]=r16,16
+.mem.offset 8,0; st8.spill [r3]=r17,16
+ ;;
+.mem.offset 0,0; st8.spill [r2]=r18,16
+.mem.offset 8,0; st8.spill [r3]=r19,16
+ ;;
+.mem.offset 0,0; st8.spill [r2]=r20,16
+.mem.offset 8,0; st8.spill [r3]=r21,16
+ ;;
+.mem.offset 0,0; st8.spill [r2]=r22,16
+.mem.offset 8,0; st8.spill [r3]=r23,16
+ ;;
+.mem.offset 0,0; st8.spill [r2]=r24,16
+.mem.offset 8,0; st8.spill [r3]=r25,16
+ ;;
+.mem.offset 0,0; st8.spill [r2]=r26,16
+.mem.offset 8,0; st8.spill [r3]=r27,16
+ ;;
+.mem.offset 0,0; st8.spill [r2]=r28,16
+.mem.offset 8,0; st8.spill [r3]=r29,16
+ ;;
+.mem.offset 0,0; st8.spill [r2]=r30,16
+.mem.offset 8,0; st8.spill [r3]=r31,16
;;
movl r2=XSI_BANKNUM;;
st4 [r2]=r0;
@@ -641,14 +655,14 @@ GLOBAL_ENTRY(ia64_leave_kernel)
*/
(pNonSys) br.cond.dpnt dont_preserve_current_frame
+#ifdef CONFIG_XEN
+ XEN_HYPER_COVER;
+#else
+ cover // add current frame into dirty
partition and set cr.ifs
+#endif
+ ;;
+ mov r19=ar.bsp // get new backing store pointer
rbs_switch:
-#ifdef CONFIG_XEN
- XEN_HYPER_COVER;
-#else
- cover // add current frame into dirty
partition and set cr.ifs
-#endif
- ;;
- mov r19=ar.bsp // get new backing store pointer
sub r16=r16,r18 // krbs = old bsp - size of dirty
partition
cmp.ne p9,p0=r0,r0 // clear p9 to skip restore of cr.ifs
;;
@@ -723,14 +737,14 @@ rse_clear_invalid:
mov loc5=0
mov loc6=0
mov loc7=0
-(pRecurse) br.call.sptk.few b0=rse_clear_invalid
+(pRecurse) br.call.dptk.few b0=rse_clear_invalid
;;
mov loc8=0
mov loc9=0
cmp.ne pReturn,p0=r0,in1 // if recursion count != 0, we need to
do a br.ret
mov loc10=0
mov loc11=0
-(pReturn) br.ret.sptk.many b0
+(pReturn) br.ret.dptk.many b0
#endif /* !CONFIG_ITANIUM */
# undef pRecurse
# undef pReturn
diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/arch/ia64/xen/xenivt.S
--- a/linux-2.6-xen-sparse/arch/ia64/xen/xenivt.S Tue May 30 12:52:02
2006 -0500
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/xenivt.S Tue May 30 14:30:34
2006 -0500
@@ -87,16 +87,17 @@ ENTRY(vhpt_miss)
* (the "original") TLB miss, which may either be caused by an
instruction
* fetch or a data access (or non-access).
*
- * What we do here is normal TLB miss handing for the _original_ miss,
followed
- * by inserting the TLB entry for the virtual page table page that the
VHPT
- * walker was attempting to access. The latter gets inserted as long
- * as both L1 and L2 have valid mappings for the faulting address.
- * The TLB entry for the original miss gets inserted only if
- * the L3 entry indicates that the page is present.
+ * What we do here is normal TLB miss handing for the _original_ miss,
+ * followed by inserting the TLB entry for the virtual page table page
+ * that the VHPT walker was attempting to access. The latter gets
+ * inserted as long as page table entry above pte level have valid
+ * mappings for the faulting address. The TLB entry for the original
+ * miss gets inserted only if the pte entry indicates that the page is
+ * present.
*
* do_page_fault gets invoked in the following cases:
* - the faulting virtual address uses unimplemented address bits
- * - the faulting virtual address has no L1, L2, or L3 mapping
+ * - the faulting virtual address has no valid page table mapping
*/
#ifdef CONFIG_XEN
movl r16=XSI_IFA
@@ -127,7 +128,7 @@ ENTRY(vhpt_miss)
shl r21=r16,3 // shift bit 60 into sign bit
shr.u r17=r16,61 // get the region number into
r17
;;
- shr r22=r21,3
+ shr.u r22=r21,3
#ifdef CONFIG_HUGETLB_PAGE
extr.u r26=r25,2,6
;;
@@ -139,7 +140,7 @@ ENTRY(vhpt_miss)
#endif
;;
cmp.eq p6,p7=5,r17 // is IFA pointing into to
region 5?
- shr.u r18=r22,PGDIR_SHIFT // get bits 33-63 of the
faulting address
+ shr.u r18=r22,PGDIR_SHIFT // get bottom portion of pgd
index bit
;;
(p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in
place
@@ -150,41 +151,54 @@ ENTRY(vhpt_miss)
(p6) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT
(p7) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3
;;
-(p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=PTA + IFA(33,42)*8
-(p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=PTA + (((IFA(61,63) <<
7) | IFA(33,39))*8)
+(p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=pgd_offset for region 5
+(p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=pgd_offset for
region[0-4]
cmp.eq p7,p6=0,r21 // unused address bits all
zeroes?
- shr.u r18=r22,PMD_SHIFT // shift L2 index into position
- ;;
- ld8 r17=[r17] // fetch the L1 entry (may be 0)
- ;;
-(p7) cmp.eq p6,p7=r17,r0 // was L1 entry NULL?
- dep r17=r18,r17,3,(PAGE_SHIFT-3) // compute address of L2 page
table entry
- ;;
-(p7) ld8 r20=[r17] // fetch the L2 entry (may be 0)
- shr.u r19=r22,PAGE_SHIFT // shift L3 index into position
- ;;
-(p7) cmp.eq.or.andcm p6,p7=r20,r0 // was L2 entry NULL?
- dep r21=r19,r20,3,(PAGE_SHIFT-3) // compute address of L3 page
table entry
- ;;
-#ifdef CONFIG_XEN
-(p7) ld8 r18=[r21] // read the L3 PTE
+#ifdef CONFIG_PGTABLE_4
+ shr.u r28=r22,PUD_SHIFT // shift pud index into position
+#else
+ shr.u r18=r22,PMD_SHIFT // shift pmd index into position
+#endif
+ ;;
+ ld8 r17=[r17] // get *pgd (may be 0)
+ ;;
+(p7) cmp.eq p6,p7=r17,r0 // was pgd_present(*pgd) ==
NULL?
+#ifdef CONFIG_PGTABLE_4
+ dep r28=r28,r17,3,(PAGE_SHIFT-3) // r28=pud_offset(pgd,addr)
+ ;;
+ shr.u r18=r22,PMD_SHIFT // shift pmd index into position
+(p7) ld8 r29=[r28] // get *pud (may be 0)
+ ;;
+(p7) cmp.eq.or.andcm p6,p7=r29,r0 // was pud_present(*pud) ==
NULL?
+ dep r17=r18,r29,3,(PAGE_SHIFT-3) // r17=pmd_offset(pud,addr)
+#else
+ dep r17=r18,r17,3,(PAGE_SHIFT-3) // r17=pmd_offset(pgd,addr)
+#endif
+ ;;
+(p7) ld8 r20=[r17] // get *pmd (may be 0)
+ shr.u r19=r22,PAGE_SHIFT // shift pte index into position
+ ;;
+(p7) cmp.eq.or.andcm p6,p7=r20,r0 // was pmd_present(*pmd) ==
NULL?
+ dep r21=r19,r20,3,(PAGE_SHIFT-3) // r21=pte_offset(pmd,addr)
+ ;;
+(p7) ld8 r18=[r21] // read *pte
+#ifdef CONFIG_XEN
movl r19=XSI_ISR
;;
ld8 r19=[r19]
+#else
+ mov r19=cr.isr // cr.isr bit 32 tells us if
this is an insn miss
+#endif
;;
(p7) tbit.z p6,p7=r18,_PAGE_P_BIT // page present bit cleared?
+#ifdef CONFIG_XEN
movl r22=XSI_IHA
;;
ld8 r22=[r22]
- ;;
-#else
-(p7) ld8 r18=[r21] // read the L3 PTE
- mov r19=cr.isr // cr.isr bit 0 tells us if
this is an insn miss
- ;;
-(p7) tbit.z p6,p7=r18,_PAGE_P_BIT // page present bit cleared?
+#else
mov r22=cr.iha // get the VHPT address that
caused the TLB miss
+#endif
;; // avoid RAW on p7
-#endif
(p7) tbit.nz.unc p10,p11=r19,32 // is it an instruction TLB
miss?
dep r23=0,r20,0,PAGE_SHIFT // clear low bits to get page
address
;;
@@ -198,16 +212,17 @@ ENTRY(vhpt_miss)
;;
mov r8=r24
;;
-(p6) br.cond.spnt.many page_fault // handle bad address/page not
present (page fault)
- ;;
- movl r24=XSI_IFA
- ;;
- st8 [r24]=r22
- ;;
#else
(p10) itc.i r18 // insert the instruction TLB
entry
(p11) itc.d r18 // insert the data TLB entry
+#endif
(p6) br.cond.spnt.many page_fault // handle bad address/page not
present (page fault)
+#ifdef CONFIG_XEN
+ movl r24=XSI_IFA
+ ;;
+ st8 [r24]=r22
+ ;;
+#else
mov cr.ifa=r22
#endif
@@ -242,25 +257,41 @@ ENTRY(vhpt_miss)
dv_serialize_data
/*
- * Re-check L2 and L3 pagetable. If they changed, we may have received
a ptc.g
+ * Re-check pagetable entry. If they changed, we may have received a
ptc.g
* between reading the pagetable and the "itc". If so, flush the entry
we
- * inserted and retry.
- */
- ld8 r25=[r21] // read L3 PTE again
- ld8 r26=[r17] // read L2 entry again
- ;;
- cmp.ne p6,p7=r26,r20 // did L2 entry change
+ * inserted and retry. At this point, we have:
+ *
+ * r28 = equivalent of pud_offset(pgd, ifa)
+ * r17 = equivalent of pmd_offset(pud, ifa)
+ * r21 = equivalent of pte_offset(pmd, ifa)
+ *
+ * r29 = *pud
+ * r20 = *pmd
+ * r18 = *pte
+ */
+ ld8 r25=[r21] // read *pte again
+ ld8 r26=[r17] // read *pmd again
+#ifdef CONFIG_PGTABLE_4
+ ld8 r19=[r28] // read *pud again
+#endif
+ cmp.ne p6,p7=r0,r0
+ ;;
+ cmp.ne.or.andcm p6,p7=r26,r20 // did *pmd change
+#ifdef CONFIG_PGTABLE_4
+ cmp.ne.or.andcm p6,p7=r19,r29 // did *pud change
+#endif
mov r27=PAGE_SHIFT<<2
;;
(p6) ptc.l r22,r27 // purge PTE page translation
-(p7) cmp.ne.or.andcm p6,p7=r25,r18 // did L3 PTE change
+(p7) cmp.ne.or.andcm p6,p7=r25,r18 // did *pte change
;;
(p6) ptc.l r16,r27 // purge translation
#endif
mov pr=r31,-1 // restore predicate registers
#ifdef CONFIG_XEN
- XEN_HYPER_RFI;
+ XEN_HYPER_RFI
+ dv_serialize_data
#else
rfi
#endif
@@ -272,10 +303,10 @@ ENTRY(itlb_miss)
ENTRY(itlb_miss)
DBG_FAULT(1)
/*
- * The ITLB handler accesses the L3 PTE via the virtually mapped linear
+ * The ITLB handler accesses the PTE via the virtually mapped linear
* page table. If a nested TLB miss occurs, we switch into physical
- * mode, walk the page table, and then re-execute the L3 PTE read
- * and go on normally after that.
+ * mode, walk the page table, and then re-execute the PTE read and
+ * go on normally after that.
*/
#ifdef CONFIG_XEN
movl r16=XSI_IFA
@@ -292,11 +323,11 @@ ENTRY(itlb_miss)
;;
ld8 r17=[r17] // get virtual address of L3 PTE
#else
- mov r17=cr.iha // get virtual address of L3 PTE
+ mov r17=cr.iha // get virtual address of PTE
#endif
movl r30=1f // load nested fault
continuation point
;;
-1: ld8 r18=[r17] // read L3 PTE
+1: ld8 r18=[r17] // read *pte
;;
mov b0=r29
tbit.z p6,p0=r18,_PAGE_P_BIT // page present bit cleared?
@@ -320,7 +351,7 @@ 1: ld8 r18=[r17] // read L3 PTE
*/
dv_serialize_data
- ld8 r19=[r17] // read L3 PTE again and see if
same
+ ld8 r19=[r17] // read *pte again and see if
same
mov r20=PAGE_SHIFT<<2 // setup page size for purge
;;
cmp.ne p7,p0=r18,r19
@@ -329,7 +360,8 @@ 1: ld8 r18=[r17] // read L3 PTE
#endif
mov pr=r31,-1
#ifdef CONFIG_XEN
- XEN_HYPER_RFI;
+ XEN_HYPER_RFI
+ dv_serialize_data
#else
rfi
#endif
@@ -341,10 +373,10 @@ ENTRY(dtlb_miss)
ENTRY(dtlb_miss)
DBG_FAULT(2)
/*
- * The DTLB handler accesses the L3 PTE via the virtually mapped linear
+ * The DTLB handler accesses the PTE via the virtually mapped linear
* page table. If a nested TLB miss occurs, we switch into physical
- * mode, walk the page table, and then re-execute the L3 PTE read
- * and go on normally after that.
+ * mode, walk the page table, and then re-execute the PTE read and
+ * go on normally after that.
*/
#ifdef CONFIG_XEN
movl r16=XSI_IFA
@@ -361,11 +393,11 @@ dtlb_fault:
;;
ld8 r17=[r17] // get virtual address of L3 PTE
#else
- mov r17=cr.iha // get virtual address of L3 PTE
+ mov r17=cr.iha // get virtual address of PTE
#endif
movl r30=1f // load nested fault
continuation point
;;
-1: ld8 r18=[r17] // read L3 PTE
+1: ld8 r18=[r17] // read *pte
;;
mov b0=r29
tbit.z p6,p0=r18,_PAGE_P_BIT // page present bit cleared?
@@ -390,7 +422,7 @@ 1: ld8 r18=[r17] // read L3 PTE
*/
dv_serialize_data
- ld8 r19=[r17] // read L3 PTE again and see if
same
+ ld8 r19=[r17] // read *pte again and see if
same
mov r20=PAGE_SHIFT<<2 // setup page size for purge
;;
cmp.ne p7,p0=r18,r19
@@ -399,7 +431,8 @@ 1: ld8 r18=[r17] // read L3 PTE
#endif
mov pr=r31,-1
#ifdef CONFIG_XEN
- XEN_HYPER_RFI;
+ XEN_HYPER_RFI
+ dv_serialize_data
#else
rfi
#endif
@@ -416,19 +449,15 @@ ENTRY(alt_itlb_miss)
ld8 r21=[r31],XSI_IFA-XSI_IPSR // get ipsr, point to ifa
movl r17=PAGE_KERNEL
;;
- movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
- ;;
ld8 r16=[r31] // get ifa
- mov r31=pr
- ;;
#else
mov r16=cr.ifa // get address that caused the TLB miss
movl r17=PAGE_KERNEL
mov r21=cr.ipsr
+#endif
movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
mov r31=pr
;;
-#endif
#ifdef CONFIG_DISABLE_VHPT
shr.u r22=r16,61 // get the region number into
r21
;;
@@ -486,17 +515,15 @@ ENTRY(alt_dtlb_miss)
movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
;;
ld8 r16=[r31] // get ifa
- mov r31=pr
- ;;
#else
mov r16=cr.ifa // get address that caused the TLB miss
movl r17=PAGE_KERNEL
mov r20=cr.isr
movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
mov r21=cr.ipsr
+#endif
mov r31=pr
;;
-#endif
#ifdef CONFIG_DISABLE_VHPT
shr.u r22=r16,61 // get the region number into
r21
;;
@@ -565,12 +592,12 @@ ENTRY(nested_dtlb_miss)
* r30: continuation address
* r31: saved pr
*
- * Output: r17: physical address of L3 PTE of faulting address
+ * Output: r17: physical address of PTE of faulting address
* r29: saved b0
* r30: continuation address
* r31: saved pr
*
- * Clobbered: b0, r18, r19, r21, psr.dt (cleared)
+ * Clobbered: b0, r18, r19, r21, r22, psr.dt (cleared)
*/
#ifdef CONFIG_XEN
XEN_HYPER_RSM_PSR_DT;
@@ -579,12 +606,23 @@ ENTRY(nested_dtlb_miss)
#endif
mov r19=IA64_KR(PT_BASE) // get the page table base
address
shl r21=r16,3 // shift bit 60 into sign bit
+#ifdef CONFIG_XEN
+ movl r18=XSI_ITIR
+ ;;
+ ld8 r18=[r18]
+#else
+ mov r18=cr.itir
+#endif
;;
shr.u r17=r16,61 // get the region number into
r17
+ extr.u r18=r18,2,6 // get the faulting page size
;;
cmp.eq p6,p7=5,r17 // is faulting address in
region 5?
- shr.u r18=r16,PGDIR_SHIFT // get bits 33-63 of faulting
address
- ;;
+ add r22=-PAGE_SHIFT,r18 // adjustment for hugetlb
address
+ add r18=PGDIR_SHIFT-PAGE_SHIFT,r18
+ ;;
+ shr.u r22=r16,r22
+ shr.u r18=r16,r18
(p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in
place
srlz.d
@@ -594,21 +632,33 @@ ENTRY(nested_dtlb_miss)
(p6) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT
(p7) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3
;;
-(p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=PTA + IFA(33,42)*8
-(p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=PTA + (((IFA(61,63) <<
7) | IFA(33,39))*8)
+(p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=pgd_offset for region 5
+(p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=pgd_offset for
region[0-4]
cmp.eq p7,p6=0,r21 // unused address bits all
zeroes?
- shr.u r18=r16,PMD_SHIFT // shift L2 index into position
- ;;
- ld8 r17=[r17] // fetch the L1 entry (may be 0)
- ;;
-(p7) cmp.eq p6,p7=r17,r0 // was L1 entry NULL?
- dep r17=r18,r17,3,(PAGE_SHIFT-3) // compute address of L2 page
table entry
- ;;
-(p7) ld8 r17=[r17] // fetch the L2 entry (may be 0)
- shr.u r19=r16,PAGE_SHIFT // shift L3 index into position
- ;;
-(p7) cmp.eq.or.andcm p6,p7=r17,r0 // was L2 entry NULL?
- dep r17=r19,r17,3,(PAGE_SHIFT-3) // compute address of L3 page
table entry
+#ifdef CONFIG_PGTABLE_4
+ shr.u r18=r22,PUD_SHIFT // shift pud index into position
+#else
+ shr.u r18=r22,PMD_SHIFT // shift pmd index into position
+#endif
+ ;;
+ ld8 r17=[r17] // get *pgd (may be 0)
+ ;;
+(p7) cmp.eq p6,p7=r17,r0 // was pgd_present(*pgd) ==
NULL?
+ dep r17=r18,r17,3,(PAGE_SHIFT-3) // r17=p[u|m]d_offset(pgd,addr)
+ ;;
+#ifdef CONFIG_PGTABLE_4
+(p7) ld8 r17=[r17] // get *pud (may be 0)
+ shr.u r18=r22,PMD_SHIFT // shift pmd index into position
+ ;;
+(p7) cmp.eq.or.andcm p6,p7=r17,r0 // was pud_present(*pud) ==
NULL?
+ dep r17=r18,r17,3,(PAGE_SHIFT-3) // r17=pmd_offset(pud,addr)
+ ;;
+#endif
+(p7) ld8 r17=[r17] // get *pmd (may be 0)
+ shr.u r19=r22,PAGE_SHIFT // shift pte index into position
+ ;;
+(p7) cmp.eq.or.andcm p6,p7=r17,r0 // was pmd_present(*pmd) ==
NULL?
+ dep r17=r19,r17,3,(PAGE_SHIFT-3) // r17=pte_offset(pmd,addr);
(p6) br.cond.spnt page_fault
mov b0=r30
br.sptk.many b0 // return to continuation point
@@ -626,7 +676,7 @@ END(ikey_miss)
// call do_page_fault (predicates are in r31, psr.dt may be off, r16 is
faulting address)
ENTRY(page_fault)
#ifdef CONFIG_XEN
- XEN_HYPER_SSM_PSR_DT;
+ XEN_HYPER_SSM_PSR_DT
#else
ssm psr.dt
;;
@@ -742,11 +792,12 @@ 1: ld8 r18=[r17]
;; // avoid RAW on r18
mov ar.ccv=r18 // set compare value for cmpxchg
or r25=_PAGE_D|_PAGE_A,r18 // set the dirty and accessed
bits
- ;;
- cmpxchg8.acq r26=[r17],r25,ar.ccv
+ tbit.z p7,p6 = r18,_PAGE_P_BIT // Check present bit
+ ;;
+(p6) cmpxchg8.acq r26=[r17],r25,ar.ccv // Only update if page is
present
mov r24=PAGE_SHIFT<<2
;;
- cmp.eq p6,p7=r26,r18
+(p6) cmp.eq p6,p7=r26,r18 // Only compare if page is
present
;;
(p6) itc.d r25 // install updated PTE
;;
@@ -775,7 +826,8 @@ 1: ld8 r18=[r17]
#endif
mov pr=r31,-1 // restore pr
#ifdef CONFIG_XEN
- XEN_HYPER_RFI;
+ XEN_HYPER_RFI
+ dv_serialize_data
#else
rfi
#endif
@@ -826,11 +878,12 @@ 1: ld8 r18=[r17]
;;
mov ar.ccv=r18 // set compare value for cmpxchg
or r25=_PAGE_A,r18 // set the accessed bit
- ;;
- cmpxchg8.acq r26=[r17],r25,ar.ccv
+ tbit.z p7,p6 = r18,_PAGE_P_BIT // Check present bit
+ ;;
+(p6) cmpxchg8.acq r26=[r17],r25,ar.ccv // Only if page present
mov r24=PAGE_SHIFT<<2
;;
- cmp.eq p6,p7=r26,r18
+(p6) cmp.eq p6,p7=r26,r18 // Only if page present
;;
#ifdef CONFIG_XEN
mov r26=r8
@@ -869,7 +922,8 @@ 1: ld8 r18=[r17]
#endif /* !CONFIG_SMP */
mov pr=r31,-1
#ifdef CONFIG_XEN
- XEN_HYPER_RFI;
+ XEN_HYPER_RFI
+ dv_serialize_data
#else
rfi
#endif
@@ -892,11 +946,13 @@ ENTRY(daccess_bit)
movl r30=1f // load continuation point in
case of nested fault
;;
#ifdef CONFIG_XEN
- mov r18=r8;
- mov r8=r16;
- XEN_HYPER_THASH;;
- mov r17=r8;
- mov r8=r18;;
+ mov r18=r8
+ mov r8=r16
+ XEN_HYPER_THASH
+ ;;
+ mov r17=r8
+ mov r8=r18
+ ;;
#else
thash r17=r16 // compute virtual address of
L3 PTE
#endif
@@ -909,11 +965,12 @@ 1: ld8 r18=[r17]
;; // avoid RAW on r18
mov ar.ccv=r18 // set compare value for cmpxchg
or r25=_PAGE_A,r18 // set the dirty bit
- ;;
- cmpxchg8.acq r26=[r17],r25,ar.ccv
+ tbit.z p7,p6 = r18,_PAGE_P_BIT // Check present bit
+ ;;
+(p6) cmpxchg8.acq r26=[r17],r25,ar.ccv // Only if page is present
mov r24=PAGE_SHIFT<<2
;;
- cmp.eq p6,p7=r26,r18
+(p6) cmp.eq p6,p7=r26,r18 // Only if page is present
;;
#ifdef CONFIG_XEN
mov r26=r8
@@ -950,7 +1007,8 @@ 1: ld8 r18=[r17]
mov b0=r29 // restore b0
mov pr=r31,-1
#ifdef CONFIG_XEN
- XEN_HYPER_RFI;
+ XEN_HYPER_RFI
+ dv_serialize_data
#else
rfi
#endif
@@ -976,143 +1034,157 @@ ENTRY(break_fault)
* to prevent leaking bits from kernel to user level.
*/
DBG_FAULT(11)
- mov r16=IA64_KR(CURRENT) // r16 = current task; 12 cycle
read lat.
-#ifdef CONFIG_XEN
- movl r31=XSI_IPSR
- ;;
- ld8 r29=[r31],XSI_IIP-XSI_IPSR // get ipsr, point to iip
- mov r18=__IA64_BREAK_SYSCALL
- mov r21=ar.fpsr
- ;;
- ld8 r28=[r31],XSI_IIM-XSI_IIP // get iip, point to iim
- mov r19=b6
- mov r25=ar.unat
- ;;
- ld8 r17=[r31] // get iim
- mov r27=ar.rsc
- mov r26=ar.pfs
- ;;
-#else
- mov r17=cr.iim
- mov r18=__IA64_BREAK_SYSCALL
- mov r21=ar.fpsr
- mov r29=cr.ipsr
- mov r19=b6
- mov r25=ar.unat
- mov r27=ar.rsc
- mov r26=ar.pfs
- mov r28=cr.iip
-#endif
- mov r31=pr // prepare to save predicates
- mov r20=r1
- ;;
+ mov.m r16=IA64_KR(CURRENT) // M2 r16 <- current task (12
cyc)
+#ifdef CONFIG_XEN
+ movl r22=XSI_IPSR
+ ;;
+ ld8 r29=[r22],XSI_IIM-XSI_IPSR // get ipsr, point to iip
+#else
+ mov r29=cr.ipsr // M2 (12 cyc)
+#endif
+ mov r31=pr // I0 (2 cyc)
+
+#ifdef CONFIG_XEN
+ ;;
+ ld8 r17=[r22],XSI_IIP-XSI_IIM
+#else
+ mov r17=cr.iim // M2 (2 cyc)
+#endif
+ mov.m r27=ar.rsc // M2 (12 cyc)
+ mov r18=__IA64_BREAK_SYSCALL // A
+
+ mov.m ar.rsc=0 // M2
+ mov.m r21=ar.fpsr // M2 (12 cyc)
+ mov r19=b6 // I0 (2 cyc)
+ ;;
+ mov.m r23=ar.bspstore // M2 (12 cyc)
+ mov.m r24=ar.rnat // M2 (5 cyc)
+ mov.i r26=ar.pfs // I0 (2 cyc)
+
+ invala // M0|1
+ nop.m 0 // M
+ mov r20=r1 // A save r1
+
+ nop.m 0
+ movl r30=sys_call_table // X
+
+#ifdef CONFIG_XEN
+ ld8 r28=[r22]
+#else
+ mov r28=cr.iip // M2 (2 cyc)
+#endif
+ cmp.eq p0,p7=r18,r17 // I0 is this a system call?
+(p7) br.cond.spnt non_syscall // B no ->
+ //
+ // From this point on, we are definitely on the syscall-path
+ // and we can use (non-banked) scratch registers.
+ //
+///////////////////////////////////////////////////////////////////////
+ mov r1=r16 // A move task-pointer to
"addl"-addressable reg
+ mov r2=r16 // A setup r2 for
ia64_syscall_setup
+ add r9=TI_FLAGS+IA64_TASK_SIZE,r16 // A r9 =
¤t_thread_info()->flags
+
adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16
- cmp.eq p0,p7=r18,r17 // is this a system call? (p7
<- false, if so)
-(p7) br.cond.spnt non_syscall
- ;;
- ld1 r17=[r16] // load
current->thread.on_ustack flag
- st1 [r16]=r0 // clear
current->thread.on_ustack flag
- add r1=-IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 // set r1 for
MINSTATE_START_SAVE_MIN_VIRT
- ;;
- invala
-
- /* adjust return address so we skip over the break instruction: */
-
- extr.u r8=r29,41,2 // extract ei field from cr.ipsr
- ;;
- cmp.eq p6,p7=2,r8 // isr.ei==2?
- mov r2=r1 // setup r2 for
ia64_syscall_setup
- ;;
-(p6) mov r8=0 // clear ei to 0
-(p6) adds r28=16,r28 // switch cr.iip to next bundle
cr.ipsr.ei wrapped
-(p7) adds r8=1,r8 // increment ei to next slot
- ;;
- cmp.eq pKStk,pUStk=r0,r17 // are we in kernel mode
already?
- dep r29=r8,r29,41,2 // insert new ei into cr.ipsr
- ;;
-
- // switch from user to kernel RBS:
- MINSTATE_START_SAVE_MIN_VIRT
- br.call.sptk.many b7=ia64_syscall_setup
- ;;
+ adds r15=-1024,r15 // A subtract 1024 from
syscall number
+ mov r3=NR_syscalls - 1
+ ;;
+ ld1.bias r17=[r16] // M0|1 r17 =
current->thread.on_ustack flag
+ ld4 r9=[r9] // M0|1 r9 =
current_thread_info()->flags
+ extr.u r8=r29,41,2 // I0 extract ei field from
cr.ipsr
+
+ shladd r30=r15,3,r30 // A r30 = sys_call_table +
8*(syscall-1024)
+ addl r22=IA64_RBS_OFFSET,r1 // A compute base of RBS
+ cmp.leu p6,p7=r15,r3 // A syscall number in range?
+ ;;
+
+ lfetch.fault.excl.nt1 [r22] // M0|1 prefetch RBS
+(p6) ld8 r30=[r30] // M0|1 load address of syscall
entry point
+ tnat.nz.or p7,p0=r15 // I0 is syscall nr a NaT?
+
+ mov.m ar.bspstore=r22 // M2 switch to kernel RBS
+ cmp.eq p8,p9=2,r8 // A isr.ei==2?
+ ;;
+
+(p8) mov r8=0 // A clear ei to 0
+(p7) movl r30=sys_ni_syscall // X
+
+(p8) adds r28=16,r28 // A switch cr.iip to next
bundle
+(p9) adds r8=1,r8 // A increment ei to next
slot
+ nop.i 0
+ ;;
+
+ mov.m r25=ar.unat // M2 (5 cyc)
+ dep r29=r8,r29,41,2 // I0 insert new ei into
cr.ipsr
+ adds r15=1024,r15 // A restore original
syscall number
+ //
+ // If any of the above loads miss in L1D, we'll stall here until
+ // the data arrives.
+ //
+///////////////////////////////////////////////////////////////////////
+ st1 [r16]=r0 // M2|3 clear
current->thread.on_ustack flag
+ mov b6=r30 // I0 setup syscall handler
branch reg early
+ cmp.eq pKStk,pUStk=r0,r17 // A were we on kernel
stacks already?
+
+ and r9=_TIF_SYSCALL_TRACEAUDIT,r9 // A mask trace or audit
+ mov r18=ar.bsp // M2 (12 cyc)
+(pKStk) br.cond.spnt .break_fixup // B we're already
in kernel-mode -- fix up RBS
+ ;;
+.back_from_break_fixup:
+(pUStk) addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1 // A compute
base of memory stack
+ cmp.eq p14,p0=r9,r0 // A are syscalls being
traced/audited?
+ br.call.sptk.many b7=ia64_syscall_setup // B
+1:
+ mov ar.rsc=0x3 // M2 set eager mode, pl 0,
LE, loadrs=0
+ nop 0
#ifdef CONFIG_XEN
mov r2=b0; br.call.sptk b0=xen_bsw1;; mov b0=r2;;
#else
- MINSTATE_END_SAVE_MIN_VIRT // switch to bank 1
-#endif
-#ifdef CONFIG_XEN
- movl r3=XSI_PSR_IC
- mov r16=1
- ;;
-#if 1
- st4 [r3]=r16,XSI_PSR_I_ADDR-XSI_PSR_IC // vpsr.ic = 1
- ;;
-(p15) ld8 r3=[r3]
- ;;
-(p15) st1 [r3]=r0,XSI_PEND-XSI_PSR_I_ADDR // if (p15) vpsr.i = 1
- mov r16=r0
- ;;
-(p15) ld4 r16=[r3] // if (pending_interrupts)
- ;;
- cmp.ne p6,p0=r16,r0
+ bsw.1 // B (6 cyc) regs are saved,
switch to bank 1
+#endif
+ ;;
+
+#ifdef CONFIG_XEN
+ movl r16=XSI_PSR_IC
+ mov r3=1
+ ;;
+ st4 [r16]=r3,XSI_PSR_I_ADDR-XSI_PSR_IC // vpsr.ic = 1
+#else
+ ssm psr.ic | PSR_DEFAULT_BITS // M2 now it's safe to
re-enable intr.-collection
+#endif
+ movl r3=ia64_ret_from_syscall // X
+ ;;
+
+ srlz.i // M0 ensure interruption
collection is on
+ mov rp=r3 // I0 set the real return addr
+(p10) br.cond.spnt.many ia64_ret_from_syscall // B return if bad
call-frame or r15 is a NaT
+
+#ifdef CONFIG_XEN
+(p15) ld8 r16=[r16] // vpsr.i
+ ;;
+(p15) st1 [r16]=r0,XSI_PEND-XSI_PSR_I_ADDR // if (p15) vpsr.i = 1
+ mov r2=r0
+ ;;
+(p15) ld4 r2=[r16] // if (pending_interrupts)
+ ;;
+ cmp.ne p6,p0=r2,r0
;;
(p6) ssm psr.i // do a real ssm psr.i
- ;;
-#else
-// st4 [r3]=r16,XSI_PSR_I_ADDR-XSI_PSR_IC // vpsr.ic = 1
- adds r3=XSI_PSR_I_ADDR-XSI_PSR_IC,r3 // SKIP vpsr.ic = 1
- ;;
-(p15) ld8 r3=[r3]
- ;;
-(p15) st1 [r3]=r0,XSI_PEND-XSI_PSR_I_ADDR // if (p15) vpsr.i = 1
- mov r16=r0
- ;;
-(p15) ld4 r16=[r3] // if (pending_interrupts)
- ;;
- cmp.ne p6,p0=r16,r0
- ;;
-//(p6) ssm psr.i // do a real ssm psr.i
-//(p6) XEN_HYPER_SSM_I;
-(p6) break 0x7;
- ;;
-#endif
- mov r3=NR_syscalls - 1
- ;;
-#else
- ssm psr.ic | PSR_DEFAULT_BITS
- ;;
- srlz.i // guarantee that interruption
collection is on
- mov r3=NR_syscalls - 1
- ;;
-(p15) ssm psr.i // restore psr.i
-#endif
- // p10==true means out registers are more than 8 or r15's Nat is true
-(p10) br.cond.spnt.many ia64_ret_from_syscall
- ;;
- movl r16=sys_call_table
-
- adds r15=-1024,r15 // r15 contains the syscall
number---subtract 1024
- movl r2=ia64_ret_from_syscall
- ;;
- shladd r20=r15,3,r16 // r20 = sys_call_table +
8*(syscall-1024)
- cmp.leu p6,p7=r15,r3 // (syscall > 0 && syscall <
1024 + NR_syscalls) ?
- mov rp=r2 // set the real return addr
- ;;
-(p6) ld8 r20=[r20] // load address of syscall
entry point
-(p7) movl r20=sys_ni_syscall
-
- add r2=TI_FLAGS+IA64_TASK_SIZE,r13
- ;;
- ld4 r2=[r2] // r2 =
current_thread_info()->flags
- ;;
- and r2=_TIF_SYSCALL_TRACEAUDIT,r2 // mask trace or audit
- ;;
- cmp.eq p8,p0=r2,r0
- mov b6=r20
- ;;
-(p8) br.call.sptk.many b6=b6 // ignore this return addr
- br.cond.sptk ia64_trace_syscall
+#else
+(p15) ssm psr.i // M2 restore psr.i
+#endif
+(p14) br.call.sptk.many b6=b6 // B invoke syscall-handker
(ignore return addr)
+ br.cond.spnt.many ia64_trace_syscall // B do syscall-tracing
thingamagic
// NOT REACHED
+///////////////////////////////////////////////////////////////////////
+ // On entry, we optimistically assumed that we're coming from
user-space.
+ // For the rare cases where a system-call is done from within the
kernel,
+ // we fix things up at this point:
+.break_fixup:
+ add r1=-IA64_PT_REGS_SIZE,sp // A allocate space for
pt_regs structure
+ mov ar.rnat=r24 // M2 restore kernel's AR.RNAT
+ ;;
+ mov ar.bspstore=r23 // M2 restore kernel's
AR.BSPSTORE
+ br.cond.sptk .back_from_break_fixup
END(break_fault)
.org ia64_ivt+0x3000
@@ -1201,8 +1273,6 @@ END(interrupt)
* - r31: saved pr
* - b0: original contents (to be saved)
* On exit:
- * - executing on bank 1 registers
- * - psr.ic enabled, interrupts restored
* - p10: TRUE if syscall is invoked with more than 8 out
* registers or r15's Nat is true
* - r1: kernel's gp
@@ -1210,8 +1280,11 @@ END(interrupt)
* - r8: -EINVAL if p10 is true
* - r12: points to kernel stack
* - r13: points to current task
+ * - r14: preserved (same as on entry)
+ * - p13: preserved
* - p15: TRUE if interrupts need to be re-enabled
* - ar.fpsr: set to kernel settings
+ * - b6: preserved (same as on entry)
*/
#ifndef CONFIG_XEN
GLOBAL_ENTRY(ia64_syscall_setup)
@@ -1280,10 +1353,10 @@ GLOBAL_ENTRY(ia64_syscall_setup)
(p13) mov in5=-1
;;
st8 [r16]=r21,PT(R8)-PT(AR_FPSR) // save ar.fpsr
- tnat.nz p14,p0=in6
+ tnat.nz p13,p0=in6
cmp.lt p10,p9=r11,r8 // frame size can't be more than local+8
;;
- stf8 [r16]=f1 // ensure pt_regs.r8 != 0 (see
handle_syscall_error)
+ mov r8=1
(p9) tnat.nz p10,p0=r15
adds r12=-16,r1 // switch to kernel memory stack (with 16 bytes
of scratch)
@@ -1294,9 +1367,9 @@ GLOBAL_ENTRY(ia64_syscall_setup)
mov r13=r2 // establish `current'
movl r1=__gp // establish kernel global
pointer
;;
-(p14) mov in6=-1
+ st8 [r16]=r8 // ensure pt_regs.r8 != 0 (see
handle_syscall_error)
+(p13) mov in6=-1
(p8) mov in7=-1
- nop.i 0
cmp.eq pSys,pNonSys=r0,r0 // set pSys=1, pNonSys=0
movl r17=FPSR_DEFAULT
@@ -1323,6 +1396,8 @@ END(ia64_syscall_setup)
* element, followed by the arguments.
*/
ENTRY(dispatch_illegal_op_fault)
+ .prologue
+ .body
SAVE_MIN_WITH_COVER
ssm psr.ic | PSR_DEFAULT_BITS
;;
@@ -1335,6 +1410,7 @@ ENTRY(dispatch_illegal_op_fault)
mov out0=ar.ec
;;
SAVE_REST
+ PT_REGS_UNWIND_INFO(0)
;;
br.call.sptk.many rp=ia64_illegal_op_fault
.ret0: ;;
@@ -1365,6 +1441,8 @@ END(dispatch_illegal_op_fault)
FAULT(17)
ENTRY(non_syscall)
+ mov ar.rsc=r27 // restore ar.rsc before
SAVE_MIN_WITH_COVER
+ ;;
SAVE_MIN_WITH_COVER
// There is no particular reason for this code to be here, other than
that
@@ -1540,7 +1618,7 @@ ENTRY(daccess_rights)
;;
ld8 r16=[r16]
;;
- XEN_HYPER_RSM_PSR_DT;
+ XEN_HYPER_RSM_PSR_DT
#else
mov r16=cr.ifa
rsm psr.dt
@@ -1584,6 +1662,25 @@ END(disabled_fp_reg)
// 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50)
ENTRY(nat_consumption)
DBG_FAULT(26)
+
+ mov r16=cr.ipsr
+ mov r17=cr.isr
+ mov r31=pr // save PR
+ ;;
+ and r18=0xf,r17 // r18 = cr.ipsr.code{3:0}
+ tbit.z p6,p0=r17,IA64_ISR_NA_BIT
+ ;;
+ cmp.ne.or p6,p0=IA64_ISR_CODE_LFETCH,r18
+ dep r16=-1,r16,IA64_PSR_ED_BIT,1
+(p6) br.cond.spnt 1f // branch if (cr.ispr.na == 0 ||
cr.ipsr.code{3:0} != LFETCH)
+ ;;
+ mov cr.ipsr=r16 // set cr.ipsr.na
+ mov pr=r31,-1
+ ;;
+ rfi
+
+1: mov pr=r31,-1
+ ;;
FAULT(26)
END(nat_consumption)
@@ -1624,7 +1721,7 @@ ENTRY(speculation_vector)
#ifdef CONFIG_XEN
XEN_HYPER_RFI;
#else
- rfi
+ rfi // and go back
#endif
END(speculation_vector)
@@ -1647,7 +1744,6 @@ END(debug_vector)
// 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57)
ENTRY(unaligned_access)
DBG_FAULT(30)
- mov r16=cr.ipsr
mov r31=pr // prepare to save predicates
;;
br.sptk.many dispatch_unaligned_handler
diff -r e74246451527 -r f54d38cea8ac
linux-2.6-xen-sparse/arch/ia64/xen/xenminstate.h
--- a/linux-2.6-xen-sparse/arch/ia64/xen/xenminstate.h Tue May 30 12:52:02
2006 -0500
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/xenminstate.h Tue May 30 14:30:34
2006 -0500
@@ -155,6 +155,8 @@
;;
\
ld4 r30=[r8];
\
;;
\
+ /* set XSI_INCOMPL_REGFR 0 */
\
+ st4 [r8]=r0;
\
cmp.eq p6,p7=r30,r0;
\
;; /* not sure if this stop bit is necessary */
\
(p6) adds r8=XSI_PRECOVER_IFS-XSI_INCOMPL_REGFR,r8;
\
diff -r e74246451527 -r f54d38cea8ac
linux-2.6-xen-sparse/arch/ia64/xen/xensetup.S
--- a/linux-2.6-xen-sparse/arch/ia64/xen/xensetup.S Tue May 30 12:52:02
2006 -0500
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/xensetup.S Tue May 30 14:30:34
2006 -0500
@@ -8,28 +8,17 @@
#include <asm/processor.h>
#include <asm/asmmacro.h>
- .data
- .align 8
- .globl running_on_xen
-running_on_xen:
- data4 0
-
#define isBP p3 // are we the Bootstrap Processor?
.text
GLOBAL_ENTRY(early_xen_setup)
- mov r8=cr.dcr
+ mov r8=ar.rsc // Initialized in head.S
(isBP) movl r9=running_on_xen;;
- extr.u r8=r8,63,1;;
- cmp.ne p7,p0=r8,r0;;
+ extr.u r8=r8,2,2;; // Extract pl fields
+ cmp.ne p7,p0=r8,r0;; // p7: running on xen
+(p7) mov r8=1 // booleanize.
+(p7) movl r10=xen_ivt;;
(isBP) st4 [r9]=r8
-(p7) movl r10=xen_ivt;;
(p7) mov cr.iva=r10
br.ret.sptk.many rp;;
END(early_xen_setup)
-
-GLOBAL_ENTRY(is_running_on_xen)
- movl r9=running_on_xen;;
- ld4 r8=[r9]
- br.ret.sptk.many rp;;
-END(is_running_on_xen)
diff -r e74246451527 -r f54d38cea8ac
linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c Tue May 30
12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c Tue May 30
14:30:34 2006 -0500
@@ -26,53 +26,34 @@
#include <asm/sections.h>
#include <xen/interface/memory.h>
-unsigned long pci_mem_start = 0xaeedbabe;
-
/*
* PFN of last memory page.
*/
unsigned long end_pfn;
EXPORT_SYMBOL(end_pfn);
-unsigned long end_user_pfn = MAXMEM>>PAGE_SHIFT;
-unsigned long end_pfn_map;
-
-/*
- * Add a memory region to the kernel e820 map.
- */
-void __init add_memory_region(unsigned long start, unsigned long size, int
type)
-{
- int x = e820.nr_map;
-
- if (x == E820MAX) {
- printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
- return;
- }
-
- e820.map[x].addr = start;
- e820.map[x].size = size;
- e820.map[x].type = type;
- e820.nr_map++;
-}
-
-#ifndef CONFIG_XEN
/*
* end_pfn only includes RAM, while end_pfn_map includes all e820 entries.
* The direct mapping extends to end_pfn_map, so that we can directly access
* apertures, ACPI and other tables without having to play with fixmaps.
*/
+unsigned long end_pfn_map;
/*
* Last pfn which the user wants to use.
*/
-
+unsigned long end_user_pfn = MAXMEM>>PAGE_SHIFT;
+
+#ifndef CONFIG_XEN
extern struct resource code_resource, data_resource;
+#endif
/* Check for some hardcoded bad areas that early boot is not allowed to touch
*/
static inline int bad_addr(unsigned long *addrp, unsigned long size)
{
unsigned long addr = *addrp, last = addr + size;
+#ifndef CONFIG_XEN
/* various gunk below that needed for SMP startup */
if (addr < 0x8000) {
*addrp = 0x8000;
@@ -100,9 +81,16 @@ static inline int bad_addr(unsigned long
return 1;
}
/* XXX ramdisk image here? */
+#else
+ if (last < (table_end<<PAGE_SHIFT)) {
+ *addrp = table_end << PAGE_SHIFT;
+ return 1;
+ }
+#endif
return 0;
}
+#ifndef CONFIG_XEN
int __init e820_mapped(unsigned long start, unsigned long end, unsigned type)
{
int i;
@@ -116,6 +104,7 @@ int __init e820_mapped(unsigned long sta
}
return 0;
}
+#endif
/*
* Find a free area in a specific range.
@@ -246,22 +235,23 @@ e820_hole_size(unsigned long start_pfn,
/*
* Mark e820 reserved areas as busy for the resource manager.
*/
-void __init e820_reserve_resources(void)
-{
- int i;
- for (i = 0; i < e820.nr_map; i++) {
+void __init e820_reserve_resources(struct e820entry *e820, int nr_map)
+{
+ int i;
+ for (i = 0; i < nr_map; i++) {
struct resource *res;
res = alloc_bootmem_low(sizeof(struct resource));
- switch (e820.map[i].type) {
+ switch (e820[i].type) {
case E820_RAM: res->name = "System RAM"; break;
case E820_ACPI: res->name = "ACPI Tables"; break;
case E820_NVS: res->name = "ACPI Non-volatile Storage"; break;
default: res->name = "reserved";
}
- res->start = e820.map[i].addr;
- res->end = res->start + e820.map[i].size - 1;
+ res->start = e820[i].addr;
+ res->end = res->start + e820[i].size - 1;
res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
request_resource(&iomem_resource, res);
+#ifndef CONFIG_XEN
if (e820.map[i].type == E820_RAM) {
/*
* We don't know which RAM region contains kernel data,
@@ -274,9 +264,27 @@ void __init e820_reserve_resources(void)
request_resource(res, &crashk_res);
#endif
}
- }
-}
-#endif /* CONFIG_XEN */
+#endif
+ }
+}
+
+/*
+ * Add a memory region to the kernel e820 map.
+ */
+void __init add_memory_region(unsigned long start, unsigned long size, int
type)
+{
+ int x = e820.nr_map;
+
+ if (x == E820MAX) {
+ printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
+ return;
+ }
+
+ e820.map[x].addr = start;
+ e820.map[x].size = size;
+ e820.map[x].type = type;
+ e820.nr_map++;
+}
void __init e820_print_map(char *who)
{
@@ -304,7 +312,6 @@ void __init e820_print_map(char *who)
}
}
-#ifndef CONFIG_XEN
/*
* Sanitize the BIOS e820 map.
*
@@ -491,9 +498,13 @@ static int __init sanitize_e820_map(stru
*/
static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
{
+#ifndef CONFIG_XEN
/* Only one memory region (or negative)? Ignore it */
if (nr_map < 2)
return -1;
+#else
+ BUG_ON(nr_map < 1);
+#endif
do {
unsigned long start = biosmap->addr;
@@ -505,6 +516,7 @@ static int __init copy_e820_map(struct e
if (start > end)
return -1;
+#ifndef CONFIG_XEN
/*
* Some BIOSes claim RAM in the 640k - 1M region.
* Not right. Fix it up.
@@ -523,12 +535,14 @@ static int __init copy_e820_map(struct e
size = end - start;
}
}
+#endif
add_memory_region(start, size, type);
} while (biosmap++,--nr_map);
return 0;
}
+#ifndef CONFIG_XEN
void __init setup_memory_region(void)
{
char *who = "BIOS-e820";
@@ -562,104 +576,63 @@ void __init setup_memory_region(void)
#else /* CONFIG_XEN */
-extern unsigned long xen_override_max_pfn;
-extern union xen_start_info_union xen_start_info_union;
-
-unsigned long __init e820_end_of_ram(void)
-{
- unsigned long max_end_pfn;
-
- if (xen_override_max_pfn == 0) {
- max_end_pfn = xen_start_info->nr_pages;
- /* Default 8MB slack (to balance backend allocations). */
- max_end_pfn += 8 << (20 - PAGE_SHIFT);
- } else if (xen_override_max_pfn > xen_start_info->nr_pages) {
- max_end_pfn = xen_override_max_pfn;
- } else {
- max_end_pfn = xen_start_info->nr_pages;
- }
-
- return max_end_pfn;
-}
-
-unsigned long __init
-e820_hole_size(unsigned long start_pfn, unsigned long end_pfn)
-{
- return 0;
-}
-
-void __init e820_reserve_resources(void)
-{
- dom0_op_t op;
- struct dom0_memory_map_entry *map;
- unsigned long gapstart, gapsize, round, last;
- int i, found = 0;
-
- if (!(xen_start_info->flags & SIF_INITDOMAIN))
- return;
-
- map = alloc_bootmem_low_pages(PAGE_SIZE);
- op.cmd = DOM0_PHYSICAL_MEMORY_MAP;
- set_xen_guest_handle(op.u.physical_memory_map.memory_map, map);
- op.u.physical_memory_map.max_map_entries =
- PAGE_SIZE / sizeof(struct dom0_memory_map_entry);
- BUG_ON(HYPERVISOR_dom0_op(&op));
-
- last = 0x100000000ULL;
- gapstart = 0x10000000;
- gapsize = 0x400000;
-
- for (i = op.u.physical_memory_map.nr_map_entries - 1; i >= 0; i--) {
- struct resource *res;
-
- if ((last > map[i].end) && ((last - map[i].end) > gapsize)) {
- gapsize = last - map[i].end;
- gapstart = map[i].end;
- found = 1;
- }
- if (map[i].start < last)
- last = map[i].start;
-
- if (map[i].end > 0x100000000ULL)
- continue;
- res = alloc_bootmem_low(sizeof(struct resource));
- res->name = map[i].is_ram ? "System RAM" : "reserved";
- res->start = map[i].start;
- res->end = map[i].end - 1;
- res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
- request_resource(&iomem_resource, res);
- }
-
- free_bootmem(__pa(map), PAGE_SIZE);
-
- if (!found) {
- gapstart = HYPERVISOR_memory_op(XENMEM_maximum_ram_page, NULL);
- gapstart = (gapstart << PAGE_SHIFT) + 1024*1024;
- printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit
address range\n"
- KERN_ERR "PCI: Unassigned devices with 32bit resource
registers may break!\n");
- }
-
+void __init setup_memory_region(void)
+{
+ int rc;
+ struct xen_memory_map memmap;
/*
- * See how much we want to round up: start off with
- * rounding to the next 1MB area.
+ * This is rather large for a stack variable but this early in
+ * the boot process we know we have plenty slack space.
*/
- round = 0x100000;
- while ((gapsize >> 4) > round)
- round += round;
- /* Fun with two's complement */
- pci_mem_start = (gapstart + round) & -round;
-
- printk(KERN_INFO "Allocating PCI resources starting at %lx (gap:
%lx:%lx)\n",
- pci_mem_start, gapstart, gapsize);
-}
-
+ struct e820entry map[E820MAX];
+
+ memmap.nr_entries = E820MAX;
+ set_xen_guest_handle(memmap.buffer, map);
+
+ rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap);
+ if ( rc == -ENOSYS ) {
+ memmap.nr_entries = 1;
+ map[0].addr = 0ULL;
+ map[0].size = xen_start_info->nr_pages << PAGE_SHIFT;
+ /* 8MB slack (to balance backend allocations). */
+ map[0].size += 8 << 20;
+ map[0].type = E820_RAM;
+ rc = 0;
+ }
+ BUG_ON(rc);
+
+ sanitize_e820_map(map, (char *)&memmap.nr_entries);
+
+ BUG_ON(copy_e820_map(map, (char)memmap.nr_entries) < 0);
+
+ printk(KERN_INFO "BIOS-provided physical RAM map:\n");
+ e820_print_map("Xen");
+}
#endif
void __init parse_memopt(char *p, char **from)
{
+ int i;
+ unsigned long current_end;
+ unsigned long end;
+
end_user_pfn = memparse(p, from);
end_user_pfn >>= PAGE_SHIFT;
- xen_override_max_pfn = (unsigned long) end_user_pfn;
+
+ end = end_user_pfn<<PAGE_SHIFT;
+ i = e820.nr_map-1;
+ current_end = e820.map[i].addr + e820.map[i].size;
+
+ if (current_end < end) {
+ /*
+ * The e820 map ends before our requested size so
+ * extend the final entry to the requested address.
+ */
+ if (e820.map[i].type == E820_RAM)
+ e820.map[i].size = end - e820.map[i].addr;
+ else
+ add_memory_region(current_end, end - current_end,
E820_RAM);
+ }
}
void __init parse_memmapopt(char *p, char **from)
@@ -683,16 +656,17 @@ void __init parse_memmapopt(char *p, cha
p = *from;
}
+unsigned long pci_mem_start = 0xaeedbabe;
+
/*
* Search for the biggest gap in the low 32 bits of the e820
* memory space. We pass this space to PCI to assign MMIO resources
* for hotplug or unconfigured devices in.
* Hopefully the BIOS let enough space left.
*/
-__init void e820_setup_gap(void)
-{
-#ifndef CONFIG_XEN
- unsigned long gapstart, gapsize;
+__init void e820_setup_gap(struct e820entry *e820, int nr_map)
+{
+ unsigned long gapstart, gapsize, round;
unsigned long last;
int i;
int found = 0;
@@ -700,10 +674,10 @@ __init void e820_setup_gap(void)
last = 0x100000000ull;
gapstart = 0x10000000;
gapsize = 0x400000;
- i = e820.nr_map;
+ i = nr_map;
while (--i >= 0) {
- unsigned long long start = e820.map[i].addr;
- unsigned long long end = start + e820.map[i].size;
+ unsigned long long start = e820[i].addr;
+ unsigned long long end = start + e820[i].size;
/*
* Since "last" is at most 4GB, we know we'll
@@ -729,16 +703,15 @@ __init void e820_setup_gap(void)
}
/*
- * Start allocating dynamic PCI memory a bit into the gap,
- * aligned up to the nearest megabyte.
- *
- * Question: should we try to pad it up a bit (do something
- * like " + (gapsize >> 3)" in there too?). We now have the
- * technology.
+ * See how much we want to round up: start off with
+ * rounding to the next 1MB area.
*/
- pci_mem_start = (gapstart + 0xfffff) & ~0xfffff;
+ round = 0x100000;
+ while ((gapsize >> 4) > round)
+ round += round;
+ /* Fun with two's complement */
+ pci_mem_start = (gapstart + round) & -round;
printk(KERN_INFO "Allocating PCI resources starting at %lx (gap:
%lx:%lx)\n",
pci_mem_start, gapstart, gapsize);
-#endif
-}
+}
diff -r e74246451527 -r f54d38cea8ac
linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c Tue May 30
12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c Tue May 30
14:30:34 2006 -0500
@@ -76,8 +76,8 @@
#include <xen/features.h>
#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
#define PFN_PHYS(x) ((x) << PAGE_SHIFT)
-#define end_pfn_map end_pfn
#include <asm/mach-xen/setup_arch_post.h>
+#include <xen/interface/memory.h>
extern unsigned long start_pfn;
extern struct edid_info edid_info;
@@ -490,19 +490,6 @@ static __init void parse_cmdline_early (
}
#ifndef CONFIG_NUMA
-#ifdef CONFIG_XEN
-static void __init
-contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
-{
- unsigned long bootmap_size;
-
- bootmap_size = init_bootmem(start_pfn, end_pfn);
- free_bootmem(0, xen_start_info->nr_pages << PAGE_SHIFT);
- reserve_bootmem(HIGH_MEMORY,
- (PFN_PHYS(start_pfn) + bootmap_size + PAGE_SIZE-1)
- - HIGH_MEMORY);
-}
-#else
static void __init
contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
{
@@ -513,10 +500,13 @@ contig_initmem_init(unsigned long start_
if (bootmap == -1L)
panic("Cannot find bootmem map of size %ld\n",bootmap_size);
bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn);
+#ifdef CONFIG_XEN
+ e820_bootmem_free(NODE_DATA(0), 0,
xen_start_info->nr_pages<<PAGE_SHIFT);
+#else
e820_bootmem_free(NODE_DATA(0), 0, end_pfn << PAGE_SHIFT);
+#endif
reserve_bootmem(bootmap, bootmap_size);
}
-#endif /* !CONFIG_XEN */
#endif
/* Use inline assembly to define this because the nops are defined
@@ -636,6 +626,11 @@ void __init setup_arch(char **cmdline_p)
{
unsigned long kernel_end;
+#if defined(CONFIG_XEN_PRIVILEGED_GUEST)
+ struct e820entry *machine_e820;
+ struct xen_memory_map memmap;
+#endif
+
#ifdef CONFIG_XEN
/* Register a call for panic conditions. */
notifier_chain_register(&panic_notifier_list, &xen_panic_block);
@@ -693,20 +688,18 @@ void __init setup_arch(char **cmdline_p)
rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
#endif
+#endif /* !CONFIG_XEN */
setup_memory_region();
copy_edd();
-#endif /* !CONFIG_XEN */
if (!MOUNT_ROOT_RDONLY)
root_mountflags &= ~MS_RDONLY;
init_mm.start_code = (unsigned long) &_text;
init_mm.end_code = (unsigned long) &_etext;
init_mm.end_data = (unsigned long) &_edata;
-#ifdef CONFIG_XEN
- init_mm.brk = start_pfn << PAGE_SHIFT;
-#else
- init_mm.brk = (unsigned long) &_end;
-
+ init_mm.brk = (unsigned long) &_end;
+
+#ifndef CONFIG_XEN
code_resource.start = virt_to_phys(&_text);
code_resource.end = virt_to_phys(&_etext)-1;
data_resource.start = virt_to_phys(&_etext);
@@ -735,12 +728,11 @@ void __init setup_arch(char **cmdline_p)
#endif
#ifdef CONFIG_NUMA
- numa_initmem_init(start_pfn, end_pfn);
+ numa_initmem_init(0, end_pfn);
#else
- contig_initmem_init(start_pfn, end_pfn);
-#endif
-
-#ifndef CONFIG_XEN
+ contig_initmem_init(0, end_pfn);
+#endif
+
/* Reserve direct mapping */
reserve_bootmem_generic(table_start << PAGE_SHIFT,
(table_end - table_start) << PAGE_SHIFT);
@@ -749,6 +741,10 @@ void __init setup_arch(char **cmdline_p)
kernel_end = round_up(__pa_symbol(&_end),PAGE_SIZE);
reserve_bootmem_generic(HIGH_MEMORY, kernel_end - HIGH_MEMORY);
+#ifdef CONFIG_XEN
+ /* reserve physmap, start info and initial page tables */
+ reserve_bootmem(kernel_end, (table_start<<PAGE_SHIFT)-kernel_end);
+#else
/*
* reserve physical page 0 - it's a special BIOS page on many boxes,
* enabling clean reboots, SMP operation, laptop functions.
@@ -933,13 +929,25 @@ void __init setup_arch(char **cmdline_p)
prefill_possible_map();
#endif
-#if defined(CONFIG_XEN_PRIVILEGED_GUEST) || !defined(CONFIG_XEN)
/*
* Request address space for all standard RAM and ROM resources
* and also for regions reported as reserved by the e820.
*/
+#if defined(CONFIG_XEN_PRIVILEGED_GUEST)
probe_roms();
- e820_reserve_resources();
+ if (xen_start_info->flags & SIF_INITDOMAIN) {
+ machine_e820 = alloc_bootmem_low_pages(PAGE_SIZE);
+
+ memmap.nr_entries = E820MAX;
+ set_xen_guest_handle(memmap.buffer, machine_e820);
+
+ BUG_ON(HYPERVISOR_memory_op(XENMEM_machine_memory_map,
&memmap));
+
+ e820_reserve_resources(machine_e820, memmap.nr_entries);
+ }
+#elif !defined(CONFIG_XEN)
+ probe_roms();
+ e820_reserve_resources(e820.map, e820.nr_map);
#endif
request_resource(&iomem_resource, &video_ram_resource);
@@ -951,7 +959,14 @@ void __init setup_arch(char **cmdline_p)
request_resource(&ioport_resource, &standard_io_resources[i]);
}
- e820_setup_gap();
+#if defined(CONFIG_XEN_PRIVILEGED_GUEST)
+ if (xen_start_info->flags & SIF_INITDOMAIN) {
+ e820_setup_gap(machine_e820, memmap.nr_entries);
+ free_bootmem(__pa(machine_e820), PAGE_SIZE);
+ }
+#elif !defined(CONFIG_XEN)
+ e820_setup_gap(e820.map, e820.nr_map);
+#endif
#ifdef CONFIG_GART_IOMMU
iommu_hole_init();
diff -r e74246451527 -r f54d38cea8ac
linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c Tue May 30 12:52:02
2006 -0500
+++ b/linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c Tue May 30 14:30:34
2006 -0500
@@ -370,7 +370,7 @@ void __set_fixmap_user (enum fixed_addre
set_pte_phys(address, phys, prot, SET_FIXMAP_USER);
}
-unsigned long __initdata table_start, tables_space;
+unsigned long __initdata table_start, table_end;
unsigned long get_machine_pfn(unsigned long addr)
{
@@ -409,11 +409,17 @@ static inline int make_readonly(unsigned
{
int readonly = 0;
- /* Make old and new page tables read-only. */
+ /* Make new page tables read-only. */
+ if (!xen_feature(XENFEAT_writable_page_tables)
+ && (paddr >= (table_start << PAGE_SHIFT))
+ && (paddr < (table_end << PAGE_SHIFT)))
+ readonly = 1;
+ /* Make old page tables read-only. */
if (!xen_feature(XENFEAT_writable_page_tables)
&& (paddr >= (xen_start_info->pt_base - __START_KERNEL_map))
- && (paddr < ((table_start << PAGE_SHIFT) + tables_space)))
+ && (paddr < (start_pfn << PAGE_SHIFT)))
readonly = 1;
+
/*
* No need for writable mapping of kernel image. This also ensures that
* page and descriptor tables embedded inside don't have writable
@@ -544,7 +550,7 @@ void __init xen_init_pt(void)
mk_kernel_pgd(__pa_symbol(level3_user_pgt)));
}
-void __init extend_init_mapping(void)
+void __init extend_init_mapping(unsigned long tables_space)
{
unsigned long va = __START_KERNEL_map;
unsigned long phys, addr, *pte_page;
@@ -599,23 +605,23 @@ void __init extend_init_mapping(void)
static void __init find_early_table_space(unsigned long end)
{
- unsigned long puds, pmds, ptes;
+ unsigned long puds, pmds, ptes, tables;
puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
ptes = (end + PTE_SIZE - 1) >> PAGE_SHIFT;
- tables_space =
- round_up(puds * 8, PAGE_SIZE) +
+ tables = round_up(puds * 8, PAGE_SIZE) +
round_up(pmds * 8, PAGE_SIZE) +
round_up(ptes * 8, PAGE_SIZE);
- extend_init_mapping();
+ extend_init_mapping(tables);
table_start = start_pfn;
+ table_end = table_start + (tables>>PAGE_SHIFT);
early_printk("kernel direct mapping tables up to %lx @ %lx-%lx\n",
- end, table_start << PAGE_SHIFT, start_pfn << PAGE_SHIFT);
+ end, table_start << PAGE_SHIFT, table_end << PAGE_SHIFT);
}
/* Setup the direct mapping of the physical memory at PAGE_OFFSET.
@@ -660,7 +666,7 @@ void __meminit init_memory_mapping(unsig
set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys));
}
- BUG_ON(!after_bootmem && start_pfn != table_start + (tables_space >>
PAGE_SHIFT));
+ BUG_ON(!after_bootmem && start_pfn != table_end);
__flush_tlb_all();
}
diff -r e74246451527 -r f54d38cea8ac
linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c
--- a/linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c Tue May 30 12:52:02
2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c Tue May 30 14:30:34
2006 -0500
@@ -329,7 +329,7 @@ out:
* Callback received when the backend's state changes.
*/
static void backend_changed(struct xenbus_device *dev,
- XenbusState backend_state)
+ enum xenbus_state backend_state)
{
struct tpm_private *tp = dev->data;
DPRINTK("\n");
diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/drivers/xen/Makefile
--- a/linux-2.6-xen-sparse/drivers/xen/Makefile Tue May 30 12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/Makefile Tue May 30 14:30:34 2006 -0500
@@ -1,5 +1,4 @@
-obj-y += net_driver_util.o
obj-y += util.o
obj-y += core/
diff -r e74246451527 -r f54d38cea8ac
linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c
--- a/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c Tue May 30
12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c Tue May 30
14:30:34 2006 -0500
@@ -67,7 +67,7 @@ static DECLARE_MUTEX(balloon_mutex);
* Also protects non-atomic updates of current_pages and driver_pages, and
* balloon lists.
*/
-spinlock_t balloon_lock = SPIN_LOCK_UNLOCKED;
+DEFINE_SPINLOCK(balloon_lock);
/* We aim for 'current allocation' == 'target allocation'. */
static unsigned long current_pages;
@@ -360,6 +360,12 @@ static void balloon_process(void *unused
/* Resets the Xen limit, sets new target, and kicks off processing. */
static void set_new_target(unsigned long target)
{
+ unsigned long min_target;
+
+ /* Do not allow target to reduce below 2% of maximum memory size. */
+ min_target = max_pfn / 50;
+ target = max(target, min_target);
+
/* No need for lock. Not read-modify-write updates. */
hard_limit = ~0UL;
target_pages = target;
@@ -468,8 +474,8 @@ static int __init balloon_init(void)
IPRINTK("Initialising balloon driver.\n");
- if (xen_init() < 0)
- return -1;
+ if (!is_running_on_xen())
+ return -ENODEV;
current_pages = min(xen_start_info->nr_pages, max_pfn);
totalram_pages = current_pages;
diff -r e74246451527 -r f54d38cea8ac
linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c Tue May 30
12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c Tue May 30
14:30:34 2006 -0500
@@ -82,7 +82,7 @@ typedef struct {
static pending_req_t *pending_reqs;
static struct list_head pending_free;
-static spinlock_t pending_free_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(pending_free_lock);
static DECLARE_WAIT_QUEUE_HEAD(pending_free_wq);
#define BLKBACK_INVALID_HANDLE (~0)
@@ -526,7 +526,7 @@ static int __init blkif_init(void)
struct page *page;
int i;
- if (xen_init() < 0)
+ if (!is_running_on_xen())
return -ENODEV;
mmap_pages = blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST;
diff -r e74246451527 -r f54d38cea8ac
linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Tue May 30 12:52:02
2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Tue May 30 14:30:34
2006 -0500
@@ -247,7 +247,7 @@ static void backend_changed(struct xenbu
* Callback received when the frontend's state changes.
*/
static void frontend_changed(struct xenbus_device *dev,
- XenbusState frontend_state)
+ enum xenbus_state frontend_state)
{
struct backend_info *be = dev->data;
int err;
diff -r e74246451527 -r f54d38cea8ac
linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Tue May 30
12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Tue May 30
14:30:34 2006 -0500
@@ -247,7 +247,7 @@ fail:
* Callback received when the backend's state changes.
*/
static void backend_changed(struct xenbus_device *dev,
- XenbusState backend_state)
+ enum xenbus_state backend_state)
{
struct blkfront_info *info = dev->data;
struct block_device *bd;
@@ -434,7 +434,7 @@ int blkif_release(struct inode *inode, s
have ignored this request initially, as the device was
still mounted. */
struct xenbus_device * dev = info->xbdev;
- XenbusState state = xenbus_read_driver_state(dev->otherend);
+ enum xenbus_state state =
xenbus_read_driver_state(dev->otherend);
if (state == XenbusStateClosing)
blkfront_closing(dev);
@@ -792,7 +792,7 @@ static struct xenbus_driver blkfront = {
static int __init xlblk_init(void)
{
- if (xen_init() < 0)
+ if (!is_running_on_xen())
return -ENODEV;
return xenbus_register_frontend(&blkfront);
diff -r e74246451527 -r f54d38cea8ac
linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c Tue May 30 12:52:02
2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c Tue May 30 14:30:34
2006 -0500
@@ -93,7 +93,7 @@ static struct block_device_operations xl
.ioctl = blkif_ioctl,
};
-spinlock_t blkif_io_lock = SPIN_LOCK_UNLOCKED;
+DEFINE_SPINLOCK(blkif_io_lock);
static struct xlbd_major_info *
xlbd_alloc_major_info(int major, int minor, int index)
diff -r e74246451527 -r f54d38cea8ac
linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c Tue May 30 12:52:02
2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c Tue May 30 14:30:34
2006 -0500
@@ -138,7 +138,7 @@ typedef struct {
*/
static pending_req_t pending_reqs[MAX_PENDING_REQS];
static unsigned char pending_ring[MAX_PENDING_REQS];
-static spinlock_t pend_prod_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(pend_prod_lock);
/* NB. We use a different index type to differentiate from shared blk rings. */
typedef unsigned int PEND_RING_IDX;
#define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
diff -r e74246451527 -r f54d38cea8ac
linux-2.6-xen-sparse/drivers/xen/console/console.c
--- a/linux-2.6-xen-sparse/drivers/xen/console/console.c Tue May 30
12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/console/console.c Tue May 30
14:30:34 2006 -0500
@@ -117,14 +117,17 @@ static int __init xencons_bufsz_setup(ch
{
unsigned int goal;
goal = simple_strtoul(str, NULL, 0);
- while (wbuf_size < goal)
- wbuf_size <<= 1;
+ if (goal) {
+ goal = roundup_pow_of_two(goal);
+ if (wbuf_size < goal)
+ wbuf_size = goal;
+ }
return 1;
}
__setup("xencons_bufsz=", xencons_bufsz_setup);
/* This lock protects accesses to the common transmit buffer. */
-static spinlock_t xencons_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(xencons_lock);
/* Common transmit-kick routine. */
static void __xencons_tx_flush(void);
@@ -133,8 +136,7 @@ static struct tty_driver *xencons_driver
/******************** Kernel console driver ********************************/
-static void kcons_write(
- struct console *c, const char *s, unsigned int count)
+static void kcons_write(struct console *c, const char *s, unsigned int count)
{
int i = 0;
unsigned long flags;
@@ -155,14 +157,14 @@ static void kcons_write(
spin_unlock_irqrestore(&xencons_lock, flags);
}
-static void kcons_write_dom0(
- struct console *c, const char *s, unsigned int count)
-{
- int rc;
-
- while ((count > 0) &&
- ((rc = HYPERVISOR_console_io(
- CONSOLEIO_write, count, (char *)s)) > 0)) {
+static void kcons_write_dom0(struct console *c, const char *s, unsigned int
count)
+{
+
+ while (count > 0) {
+ int rc;
+ rc = HYPERVISOR_console_io( CONSOLEIO_write, count, (char *)s);
+ if (rc <= 0)
+ break;
count -= rc;
s += rc;
}
@@ -183,7 +185,7 @@ static struct console kcons_info = {
#define __RETCODE 0
static int __init xen_console_init(void)
{
- if (xen_init() < 0)
+ if (!is_running_on_xen())
return __RETCODE;
if (xen_start_info->flags & SIF_INITDOMAIN) {
@@ -566,7 +568,7 @@ static int __init xencons_init(void)
{
int rc;
- if (xen_init() < 0)
+ if (!is_running_on_xen())
return -ENODEV;
if (xc_mode == XC_OFF)
diff -r e74246451527 -r f54d38cea8ac
linux-2.6-xen-sparse/drivers/xen/core/Makefile
--- a/linux-2.6-xen-sparse/drivers/xen/core/Makefile Tue May 30 12:52:02
2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/core/Makefile Tue May 30 14:30:34
2006 -0500
@@ -4,8 +4,9 @@
obj-y := evtchn.o reboot.o gnttab.o features.o
-obj-$(CONFIG_PROC_FS) += xen_proc.o
-obj-$(CONFIG_NET) += skbuff.o
-obj-$(CONFIG_SMP) += smpboot.o
-obj-$(CONFIG_SYSFS) += hypervisor_sysfs.o
-obj-$(CONFIG_XEN_SYSFS) += xen_sysfs.o
+obj-$(CONFIG_PROC_FS) += xen_proc.o
+obj-$(CONFIG_NET) += skbuff.o
+obj-$(CONFIG_SMP) += smpboot.o
+obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o
+obj-$(CONFIG_SYSFS) += hypervisor_sysfs.o
+obj-$(CONFIG_XEN_SYSFS) += xen_sysfs.o
diff -r e74246451527 -r f54d38cea8ac
linux-2.6-xen-sparse/drivers/xen/core/evtchn.c
--- a/linux-2.6-xen-sparse/drivers/xen/core/evtchn.c Tue May 30 12:52:02
2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/core/evtchn.c Tue May 30 14:30:34
2006 -0500
@@ -51,10 +51,10 @@
* This lock protects updates to the following mapping and reference-count
* arrays. The lock does not need to be acquired to read the mapping tables.
*/
-static spinlock_t irq_mapping_update_lock;
+static DEFINE_SPINLOCK(irq_mapping_update_lock);
/* IRQ <-> event-channel mappings. */
-static int evtchn_to_irq[NR_EVENT_CHANNELS];
+static int evtchn_to_irq[NR_EVENT_CHANNELS] = {[0 ... NR_EVENT_CHANNELS-1] =
-1};
/* Packed IRQ information: binding type, sub-type index, and event channel. */
static u32 irq_info[NR_IRQS];
@@ -91,13 +91,13 @@ static inline unsigned int type_from_irq
}
/* IRQ <-> VIRQ mapping. */
-DEFINE_PER_CPU(int, virq_to_irq[NR_VIRQS]);
+DEFINE_PER_CPU(int, virq_to_irq[NR_VIRQS]) = {[0 ... NR_VIRQS-1] = -1};
/* IRQ <-> IPI mapping. */
#ifndef NR_IPIS
#define NR_IPIS 1
#endif
-DEFINE_PER_CPU(int, ipi_to_irq[NR_IPIS]);
+DEFINE_PER_CPU(int, ipi_to_irq[NR_IPIS]) = {[0 ... NR_IPIS-1] = -1};
/* Reference counts for bindings to IRQs. */
static int irq_bindcount[NR_IRQS];
@@ -751,7 +751,9 @@ void irq_resume(void)
BUG_ON(irq_info[pirq_to_irq(pirq)] != IRQ_UNBOUND);
/* Secondary CPUs must have no VIRQ or IPI bindings. */
- for (cpu = 1; cpu < NR_CPUS; cpu++) {
+ for_each_possible_cpu(cpu) {
+ if (cpu == 0)
+ continue;
for (virq = 0; virq < NR_VIRQS; virq++)
BUG_ON(per_cpu(virq_to_irq, cpu)[virq] != -1);
for (ipi = 0; ipi < NR_IPIS; ipi++)
@@ -813,25 +815,12 @@ void __init xen_init_IRQ(void)
void __init xen_init_IRQ(void)
{
int i;
- int cpu;
-
- spin_lock_init(&irq_mapping_update_lock);
init_evtchn_cpu_bindings();
- /* No VIRQ or IPI bindings. */
- for (cpu = 0; cpu < NR_CPUS; cpu++) {
- for (i = 0; i < NR_VIRQS; i++)
- per_cpu(virq_to_irq, cpu)[i] = -1;
- for (i = 0; i < NR_IPIS; i++)
- per_cpu(ipi_to_irq, cpu)[i] = -1;
- }
-
- /* No event-channel -> IRQ mappings. */
- for (i = 0; i < NR_EVENT_CHANNELS; i++) {
- evtchn_to_irq[i] = -1;
- mask_evtchn(i); /* No event channels are 'live' right now. */
- }
+ /* No event channels are 'live' right now. */
+ for (i = 0; i < NR_EVENT_CHANNELS; i++)
+ mask_evtchn(i);
/* No IRQ -> event-channel mappings. */
for (i = 0; i < NR_IRQS; i++)
diff -r e74246451527 -r f54d38cea8ac
linux-2.6-xen-sparse/drivers/xen/core/gnttab.c
--- a/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c Tue May 30 12:52:02
2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c Tue May 30 14:30:34
2006 -0500
@@ -38,7 +38,6 @@
#include <linux/vmalloc.h>
#include <asm/pgtable.h>
#include <xen/interface/xen.h>
-#include <asm/fixmap.h>
#include <asm/uaccess.h>
#include <xen/gnttab.h>
#include <asm/synch_bitops.h>
@@ -81,7 +80,7 @@ static grant_ref_t gnttab_list[NR_GRANT_
static grant_ref_t gnttab_list[NR_GRANT_ENTRIES];
static int gnttab_free_count;
static grant_ref_t gnttab_free_head;
-static spinlock_t gnttab_list_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(gnttab_list_lock);
static grant_entry_t *shared = NULL;
@@ -443,7 +442,7 @@ gnttab_init(void)
{
int i;
- if (xen_init() < 0)
+ if (!is_running_on_xen())
return -ENODEV;
if (gnttab_resume() < 0)
diff -r e74246451527 -r f54d38cea8ac
linux-2.6-xen-sparse/drivers/xen/core/hypervisor_sysfs.c
--- a/linux-2.6-xen-sparse/drivers/xen/core/hypervisor_sysfs.c Tue May 30
12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/core/hypervisor_sysfs.c Tue May 30
14:30:34 2006 -0500
@@ -49,6 +49,9 @@ static struct kobj_type hyp_sysfs_kobj_t
static int __init hypervisor_subsys_init(void)
{
+ if (!is_running_on_xen())
+ return -ENODEV;
+
hypervisor_subsys.kset.kobj.ktype = &hyp_sysfs_kobj_type;
return subsystem_register(&hypervisor_subsys);
}
diff -r e74246451527 -r f54d38cea8ac
linux-2.6-xen-sparse/drivers/xen/core/reboot.c
--- a/linux-2.6-xen-sparse/drivers/xen/core/reboot.c Tue May 30 12:52:02
2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/core/reboot.c Tue May 30 14:30:34
2006 -0500
@@ -17,6 +17,7 @@
#include <linux/kthread.h>
#include <xen/gnttab.h>
#include <xen/xencons.h>
+#include <xen/cpu_hotplug.h>
#if defined(__i386__) || defined(__x86_64__)
/*
@@ -80,14 +81,6 @@ static int shutting_down = SHUTDOWN_INVA
static int shutting_down = SHUTDOWN_INVALID;
static void __shutdown_handler(void *unused);
static DECLARE_WORK(shutdown_work, __shutdown_handler, NULL);
-
-#ifdef CONFIG_SMP
-int smp_suspend(void);
-void smp_resume(void);
-#else
-#define smp_suspend() (0)
-#define smp_resume() ((void)0)
-#endif
/* Ensure we run on the idle task page tables so that we will
switch page tables before running user space. This is needed
diff -r e74246451527 -r f54d38cea8ac
linux-2.6-xen-sparse/drivers/xen/core/smpboot.c
--- a/linux-2.6-xen-sparse/drivers/xen/core/smpboot.c Tue May 30 12:52:02
2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/core/smpboot.c Tue May 30 14:30:34
2006 -0500
@@ -23,6 +23,7 @@
#include <asm/pgalloc.h>
#include <xen/evtchn.h>
#include <xen/interface/vcpu.h>
+#include <xen/cpu_hotplug.h>
#include <xen/xenbus.h>
#ifdef CONFIG_SMP_ALTERNATIVES
@@ -78,15 +79,6 @@ EXPORT_SYMBOL(x86_cpu_to_apicid);
#elif !defined(CONFIG_X86_IO_APIC)
unsigned int maxcpus = NR_CPUS;
#endif
-
-/*
- * Set of CPUs that remote admin software will allow us to bring online.
- * Notified to us via xenbus.
- */
-static cpumask_t xenbus_allowed_cpumask;
-
-/* Set of CPUs that local admin will allow us to bring online. */
-static cpumask_t local_allowed_cpumask = CPU_MASK_ALL;
void __init prefill_possible_map(void)
{
@@ -167,17 +159,17 @@ static void cpu_bringup(void)
cpu_idle();
}
-static void vcpu_prepare(int vcpu)
+void cpu_initialize_context(unsigned int cpu)
{
vcpu_guest_context_t ctxt;
- struct task_struct *idle = idle_task(vcpu);
+ struct task_struct *idle = idle_task(cpu);
#ifdef __x86_64__
- struct desc_ptr *gdt_descr = &cpu_gdt_descr[vcpu];
+ struct desc_ptr *gdt_descr = &cpu_gdt_descr[cpu];
#else
- struct Xgt_desc_struct *gdt_descr = &per_cpu(cpu_gdt_descr, vcpu);
-#endif
-
- if (vcpu == 0)
+ struct Xgt_desc_struct *gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
+#endif
+
+ if (cpu == 0)
return;
memset(&ctxt, 0, sizeof(ctxt));
@@ -226,10 +218,10 @@ static void vcpu_prepare(int vcpu)
ctxt.ctrlreg[3] = virt_to_mfn(init_level4_pgt) << PAGE_SHIFT;
- ctxt.gs_base_kernel = (unsigned long)(cpu_pda(vcpu));
-#endif
-
- BUG_ON(HYPERVISOR_vcpu_op(VCPUOP_initialise, vcpu, &ctxt));
+ ctxt.gs_base_kernel = (unsigned long)(cpu_pda(cpu));
+#endif
+
+ BUG_ON(HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, &ctxt));
}
void __init smp_prepare_cpus(unsigned int max_cpus)
@@ -304,10 +296,10 @@ void __init smp_prepare_cpus(unsigned in
cpu_set(cpu, cpu_present_map);
#endif
- vcpu_prepare(cpu);
- }
-
- xenbus_allowed_cpumask = cpu_present_map;
+ cpu_initialize_context(cpu);
+ }
+
+ init_xenbus_allowed_cpumask();
/* Currently, Xen gives no dynamic NUMA/HT info. */
for (cpu = 1; cpu < NR_CPUS; cpu++) {
@@ -332,15 +324,6 @@ void __devinit smp_prepare_boot_cpu(void
cpu_online_map = cpumask_of_cpu(0);
}
-static int local_cpu_hotplug_request(void)
-{
- /*
- * We assume a CPU hotplug request comes from local admin if it is made
- * via a userspace process (i.e., one with a real mm_struct).
- */
- return (current->mm != NULL);
-}
-
#ifdef CONFIG_HOTPLUG_CPU
/*
@@ -355,141 +338,6 @@ static int __init initialize_cpu_present
}
core_initcall(initialize_cpu_present_map);
-static void vcpu_hotplug(unsigned int cpu)
-{
- int err;
- char dir[32], state[32];
-
- if ((cpu >= NR_CPUS) || !cpu_possible(cpu))
- return;
-
- sprintf(dir, "cpu/%d", cpu);
- err = xenbus_scanf(XBT_NULL, dir, "availability", "%s", state);
- if (err != 1) {
- printk(KERN_ERR "XENBUS: Unable to read cpu state\n");
- return;
- }
-
- if (strcmp(state, "online") == 0) {
- cpu_set(cpu, xenbus_allowed_cpumask);
- (void)cpu_up(cpu);
- } else if (strcmp(state, "offline") == 0) {
- cpu_clear(cpu, xenbus_allowed_cpumask);
- (void)cpu_down(cpu);
- } else {
- printk(KERN_ERR "XENBUS: unknown state(%s) on CPU%d\n",
- state, cpu);
- }
-}
-
-static void handle_vcpu_hotplug_event(
- struct xenbus_watch *watch, const char **vec, unsigned int len)
-{
- int cpu;
- char *cpustr;
- const char *node = vec[XS_WATCH_PATH];
-
- if ((cpustr = strstr(node, "cpu/")) != NULL) {
- sscanf(cpustr, "cpu/%d", &cpu);
- vcpu_hotplug(cpu);
- }
-}
-
-static int smpboot_cpu_notify(struct notifier_block *notifier,
- unsigned long action, void *hcpu)
-{
- int cpu = (long)hcpu;
-
- /*
- * We do this in a callback notifier rather than __cpu_disable()
- * because local_cpu_hotplug_request() does not work in the latter
- * as it's always executed from within a stopmachine kthread.
- */
- if ((action == CPU_DOWN_PREPARE) && local_cpu_hotplug_request())
- cpu_clear(cpu, local_allowed_cpumask);
-
- return NOTIFY_OK;
-}
-
-static int setup_cpu_watcher(struct notifier_block *notifier,
- unsigned long event, void *data)
-{
- int i;
-
- static struct xenbus_watch cpu_watch = {
- .node = "cpu",
- .callback = handle_vcpu_hotplug_event,
- .flags = XBWF_new_thread };
- (void)register_xenbus_watch(&cpu_watch);
-
- if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
- for_each_cpu(i)
- vcpu_hotplug(i);
- printk(KERN_INFO "Brought up %ld CPUs\n",
- (long)num_online_cpus());
- }
-
- return NOTIFY_DONE;
-}
-
-static int __init setup_vcpu_hotplug_event(void)
-{
- static struct notifier_block hotplug_cpu = {
- .notifier_call = smpboot_cpu_notify };
- static struct notifier_block xsn_cpu = {
- .notifier_call = setup_cpu_watcher };
-
- register_cpu_notifier(&hotplug_cpu);
- register_xenstore_notifier(&xsn_cpu);
-
- return 0;
-}
-
-arch_initcall(setup_vcpu_hotplug_event);
-
-int smp_suspend(void)
-{
- int i, err;
-
- lock_cpu_hotplug();
-
- /*
- * Take all other CPUs offline. We hold the hotplug mutex to
- * avoid other processes bringing up CPUs under our feet.
- */
- while (num_online_cpus() > 1) {
- unlock_cpu_hotplug();
- for_each_online_cpu(i) {
- if (i == 0)
- continue;
- err = cpu_down(i);
- if (err) {
- printk(KERN_CRIT "Failed to take all CPUs "
- "down: %d.\n", err);
- for_each_cpu(i)
- vcpu_hotplug(i);
- return err;
- }
- }
- lock_cpu_hotplug();
- }
-
- return 0;
-}
-
-void smp_resume(void)
-{
- int i;
-
- for_each_cpu(i)
- vcpu_prepare(i);
-
- unlock_cpu_hotplug();
-
- for_each_cpu(i)
- vcpu_hotplug(i);
-}
-
static void
remove_siblinginfo(int cpu)
{
@@ -536,20 +384,6 @@ void __cpu_die(unsigned int cpu)
#else /* !CONFIG_HOTPLUG_CPU */
-int smp_suspend(void)
-{
- if (num_online_cpus() > 1) {
- printk(KERN_WARNING "Can't suspend SMP guests "
- "without CONFIG_HOTPLUG_CPU\n");
- return -EOPNOTSUPP;
- }
- return 0;
-}
-
-void smp_resume(void)
-{
-}
-
int __cpu_disable(void)
{
return -ENOSYS;
@@ -566,17 +400,9 @@ int __devinit __cpu_up(unsigned int cpu)
{
int rc;
- if (local_cpu_hotplug_request()) {
- cpu_set(cpu, local_allowed_cpumask);
- if (!cpu_isset(cpu, xenbus_allowed_cpumask)) {
- printk("%s: attempt to bring up CPU %u disallowed by "
- "remote admin.\n", __FUNCTION__, cpu);
- return -EBUSY;
- }
- } else if (!cpu_isset(cpu, local_allowed_cpumask) ||
- !cpu_isset(cpu, xenbus_allowed_cpumask)) {
- return -EBUSY;
- }
+ rc = cpu_up_check(cpu);
+ if (rc)
+ return rc;
#ifdef CONFIG_SMP_ALTERNATIVES
if (num_online_cpus() == 1)
@@ -591,8 +417,7 @@ int __devinit __cpu_up(unsigned int cpu)
cpu_set(cpu, cpu_online_map);
rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL);
- if (rc != 0)
- BUG();
+ BUG_ON(rc);
return 0;
}
diff -r e74246451527 -r f54d38cea8ac
linux-2.6-xen-sparse/drivers/xen/evtchn/evtchn.c
--- a/linux-2.6-xen-sparse/drivers/xen/evtchn/evtchn.c Tue May 30 12:52:02
2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/evtchn/evtchn.c Tue May 30 14:30:34
2006 -0500
@@ -429,6 +429,9 @@ static int __init evtchn_init(void)
{
int err;
+ if (!is_running_on_xen())
+ return -ENODEV;
+
spin_lock_init(&port_user_lock);
memset(port_user, 0, sizeof(port_user));
diff -r e74246451527 -r f54d38cea8ac
linux-2.6-xen-sparse/drivers/xen/netback/netback.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c Tue May 30
12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c Tue May 30
14:30:34 2006 -0500
@@ -99,7 +99,7 @@ static spinlock_t net_schedule_list_lock
#define MAX_MFN_ALLOC 64
static unsigned long mfn_list[MAX_MFN_ALLOC];
static unsigned int alloc_index = 0;
-static spinlock_t mfn_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(mfn_lock);
static unsigned long alloc_mfn(void)
{
@@ -691,7 +691,7 @@ static void net_tx_action(unsigned long
static void netif_idx_release(u16 pending_idx)
{
- static spinlock_t _lock = SPIN_LOCK_UNLOCKED;
+ static DEFINE_SPINLOCK(_lock);
unsigned long flags;
spin_lock_irqsave(&_lock, flags);
@@ -810,6 +810,9 @@ static int __init netback_init(void)
int i;
struct page *page;
+ if (!is_running_on_xen())
+ return -ENODEV;
+
/* We can increase reservation by this much in net_rx_action(). */
balloon_update_driver_allowance(NET_RX_RING_SIZE);
diff -r e74246451527 -r f54d38cea8ac
linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Tue May 30 12:52:02
2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Tue May 30 14:30:34
2006 -0500
@@ -17,13 +17,10 @@
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
-
#include <stdarg.h>
#include <linux/module.h>
#include <xen/xenbus.h>
-#include <xen/net_driver_util.h>
#include "common.h"
-
#if 0
#undef DPRINTK
@@ -31,22 +28,19 @@
printk("netback/xenbus (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, ##args)
#endif
-
struct backend_info
{
struct xenbus_device *dev;
netif_t *netif;
struct xenbus_watch backend_watch;
- XenbusState frontend_state;
+ enum xenbus_state frontend_state;
};
-
static int connect_rings(struct backend_info *);
static void connect(struct backend_info *);
static void maybe_connect(struct backend_info *);
static void backend_changed(struct xenbus_watch *, const char **,
unsigned int);
-
static int netback_remove(struct xenbus_device *dev)
{
@@ -191,7 +185,7 @@ static void backend_changed(struct xenbu
* Callback received when the frontend's state changes.
*/
static void frontend_changed(struct xenbus_device *dev,
- XenbusState frontend_state)
+ enum xenbus_state frontend_state)
{
struct backend_info *be = dev->data;
@@ -273,6 +267,27 @@ static void xen_net_read_rate(struct xen
kfree(ratestr);
}
+static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
+{
+ char *s, *e, *macstr;
+ int i;
+
+ macstr = s = xenbus_read(XBT_NULL, dev->nodename, "mac", NULL);
+ if (IS_ERR(macstr))
+ return PTR_ERR(macstr);
+
+ for (i = 0; i < ETH_ALEN; i++) {
+ mac[i] = simple_strtoul(s, &e, 16);
+ if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) {
+ kfree(macstr);
+ return -ENOENT;
+ }
+ s = e+1;
+ }
+
+ kfree(macstr);
+ return 0;
+}
static void connect(struct backend_info *be)
{
diff -r e74246451527 -r f54d38cea8ac
linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Tue May 30
12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Tue May 30
14:30:34 2006 -0500
@@ -60,12 +60,11 @@
#include <asm/uaccess.h>
#include <xen/interface/grant_table.h>
#include <xen/gnttab.h>
-#include <xen/net_driver_util.h>
#define GRANT_INVALID_REF 0
-#define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE)
-#define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE)
+#define NET_TX_RING_SIZE __RING_SIZE((struct netif_tx_sring *)0, PAGE_SIZE)
+#define NET_RX_RING_SIZE __RING_SIZE((struct netif_rx_sring *)0, PAGE_SIZE)
static inline void init_skb_shinfo(struct sk_buff *skb)
{
@@ -80,20 +79,14 @@ struct netfront_info {
struct net_device_stats stats;
- netif_tx_front_ring_t tx;
- netif_rx_front_ring_t rx;
+ struct netif_tx_front_ring tx;
+ struct netif_rx_front_ring rx;
spinlock_t tx_lock;
spinlock_t rx_lock;
unsigned int handle;
unsigned int evtchn, irq;
-
- /* What is the status of our connection to the remote backend? */
-#define BEST_CLOSED 0
-#define BEST_DISCONNECTED 1
-#define BEST_CONNECTED 2
- unsigned int backend_state;
/* Receive-ring batched refills. */
#define RX_MIN_TARGET 8
@@ -123,8 +116,8 @@ struct netfront_info {
u8 mac[ETH_ALEN];
unsigned long rx_pfn_array[NET_RX_RING_SIZE];
- multicall_entry_t rx_mcl[NET_RX_RING_SIZE+1];
- mmu_update_t rx_mmu[NET_RX_RING_SIZE];
+ struct multicall_entry rx_mcl[NET_RX_RING_SIZE+1];
+ struct mmu_update rx_mmu[NET_RX_RING_SIZE];
};
/*
@@ -143,14 +136,6 @@ static inline unsigned short get_id_from
list[0] = list[id];
return id;
}
-
-#ifdef DEBUG
-static const char *be_state_name[] = {
- [BEST_CLOSED] = "closed",
- [BEST_DISCONNECTED] = "disconnected",
- [BEST_CONNECTED] = "connected",
-};
-#endif
#define DPRINTK(fmt, args...) pr_debug("netfront (%s:%d) " fmt, \
__FUNCTION__, __LINE__, ##args)
@@ -247,6 +232,27 @@ static int netfront_resume(struct xenbus
return talk_to_backend(dev, info);
}
+static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
+{
+ char *s, *e, *macstr;
+ int i;
+
+ macstr = s = xenbus_read(XBT_NULL, dev->nodename, "mac", NULL);
+ if (IS_ERR(macstr))
+ return PTR_ERR(macstr);
+
+ for (i = 0; i < ETH_ALEN; i++) {
+ mac[i] = simple_strtoul(s, &e, 16);
+ if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) {
+ kfree(macstr);
+ return -ENOENT;
+ }
+ s = e+1;
+ }
+
+ kfree(macstr);
+ return 0;
+}
/* Common code used when first setting up, and when resuming. */
static int talk_to_backend(struct xenbus_device *dev,
@@ -317,8 +323,8 @@ again:
static int setup_device(struct xenbus_device *dev, struct netfront_info *info)
{
- netif_tx_sring_t *txs;
- netif_rx_sring_t *rxs;
+ struct netif_tx_sring *txs;
+ struct netif_rx_sring *rxs;
int err;
struct net_device *netdev = info->netdev;
@@ -328,13 +334,13 @@ static int setup_device(struct xenbus_de
info->tx.sring = NULL;
info->irq = 0;
- txs = (netif_tx_sring_t *)__get_free_page(GFP_KERNEL);
+ txs = (struct netif_tx_sring *)__get_free_page(GFP_KERNEL);
if (!txs) {
err = -ENOMEM;
xenbus_dev_fatal(dev, err, "allocating tx ring page");
goto fail;
}
- rxs = (netif_rx_sring_t *)__get_free_page(GFP_KERNEL);
+ rxs = (struct netif_rx_sring *)__get_free_page(GFP_KERNEL);
if (!rxs) {
err = -ENOMEM;
xenbus_dev_fatal(dev, err, "allocating rx ring page");
@@ -342,7 +348,6 @@ static int setup_device(struct xenbus_de
}
memset(txs, 0, PAGE_SIZE);
memset(rxs, 0, PAGE_SIZE);
- info->backend_state = BEST_DISCONNECTED;
SHARED_RING_INIT(txs);
FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE);
@@ -384,7 +389,7 @@ static int setup_device(struct xenbus_de
* Callback received when the backend's state changes.
*/
static void backend_changed(struct xenbus_device *dev,
- XenbusState backend_state)
+ enum xenbus_state backend_state)
{
DPRINTK("\n");
@@ -465,7 +470,7 @@ static void network_tx_buf_gc(struct net
struct netfront_info *np = netdev_priv(dev);
struct sk_buff *skb;
- if (np->backend_state != BEST_CONNECTED)
+ if (unlikely(!netif_carrier_ok(dev)))
return;
do {
@@ -527,7 +532,7 @@ static void network_alloc_rx_buffers(str
struct xen_memory_reservation reservation;
grant_ref_t ref;
- if (unlikely(np->backend_state != BEST_CONNECTED))
+ if (unlikely(!netif_carrier_ok(dev)))
return;
/*
@@ -638,7 +643,7 @@ static int network_start_xmit(struct sk_
{
unsigned short id;
struct netfront_info *np = netdev_priv(dev);
- netif_tx_request_t *tx;
+ struct netif_tx_request *tx;
RING_IDX i;
grant_ref_t ref;
unsigned long mfn;
@@ -662,7 +667,7 @@ static int network_start_xmit(struct sk_
spin_lock_irq(&np->tx_lock);
- if (np->backend_state != BEST_CONNECTED) {
+ if (unlikely(!netif_carrier_ok(dev))) {
spin_unlock_irq(&np->tx_lock);
goto drop;
}
@@ -736,10 +741,10 @@ static int netif_poll(struct net_device
{
struct netfront_info *np = netdev_priv(dev);
struct sk_buff *skb, *nskb;
- netif_rx_response_t *rx;
+ struct netif_rx_response *rx;
RING_IDX i, rp;
- mmu_update_t *mmu = np->rx_mmu;
- multicall_entry_t *mcl = np->rx_mcl;
+ struct mmu_update *mmu = np->rx_mmu;
+ struct multicall_entry *mcl = np->rx_mcl;
int work_done, budget, more_to_do = 1;
struct sk_buff_head rxq;
unsigned long flags;
@@ -748,7 +753,7 @@ static int netif_poll(struct net_device
spin_lock(&np->rx_lock);
- if (np->backend_state != BEST_CONNECTED) {
+ if (unlikely(!netif_carrier_ok(dev))) {
spin_unlock(&np->rx_lock);
return 0;
}
@@ -962,7 +967,7 @@ static void network_connect(struct net_d
{
struct netfront_info *np;
int i, requeue_idx;
- netif_tx_request_t *tx;
+ struct netif_tx_request *tx;
struct sk_buff *skb;
np = netdev_priv(dev);
@@ -1041,11 +1046,9 @@ static void network_connect(struct net_d
* domain a kick because we've probably just requeued some
* packets.
*/
- np->backend_state = BEST_CONNECTED;
+ netif_carrier_on(dev);
notify_remote_via_irq(np->irq);
network_tx_buf_gc(dev);
-
- network_maybe_wake_tx(dev);
spin_unlock(&np->rx_lock);
spin_unlock_irq(&np->tx_lock);
@@ -1057,7 +1060,7 @@ static void show_device(struct netfront_
if (np) {
IPRINTK("<vif handle=%u %s(%s) evtchn=%u tx=%p rx=%p>\n",
np->handle,
- be_state_name[np->backend_state],
+ netif_carrier_ok(np->netdev) ? "on" : "off",
netif_running(np->netdev) ? "open" : "closed",
np->evtchn,
np->tx,
@@ -1243,9 +1246,10 @@ static struct net_device * __devinit cre
}
np = netdev_priv(netdev);
- np->backend_state = BEST_CLOSED;
np->handle = handle;
np->xbdev = dev;
+
+ netif_carrier_off(netdev);
spin_lock_init(&np->tx_lock);
spin_lock_init(&np->rx_lock);
@@ -1394,7 +1398,7 @@ static void netif_disconnect_backend(str
/* Stop old i/f to prevent errors whilst we rebuild the state. */
spin_lock_irq(&info->tx_lock);
spin_lock(&info->rx_lock);
- info->backend_state = BEST_DISCONNECTED;
+ netif_carrier_off(info->netdev);
spin_unlock(&info->rx_lock);
spin_unlock_irq(&info->tx_lock);
@@ -1454,6 +1458,9 @@ static struct notifier_block notifier_in
static int __init netif_init(void)
{
+ if (!is_running_on_xen())
+ return -ENODEV;
+
if (xen_start_info->flags & SIF_INITDOMAIN)
return 0;
diff -r e74246451527 -r f54d38cea8ac
linux-2.6-xen-sparse/drivers/xen/pciback/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/pciback/xenbus.c Tue May 30 12:52:02
2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/pciback/xenbus.c Tue May 30 14:30:34
2006 -0500
@@ -166,7 +166,7 @@ static int pciback_attach(struct pciback
}
static void pciback_frontend_changed(struct xenbus_device *xdev,
- XenbusState fe_state)
+ enum xenbus_state fe_state)
{
struct pciback_device *pdev = xdev->data;
diff -r e74246451527 -r f54d38cea8ac
linux-2.6-xen-sparse/drivers/xen/pcifront/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/pcifront/xenbus.c Tue May 30
12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/pcifront/xenbus.c Tue May 30
14:30:34 2006 -0500
@@ -196,7 +196,7 @@ static int pcifront_try_disconnect(struc
static int pcifront_try_disconnect(struct pcifront_device *pdev)
{
int err = 0;
- XenbusState prev_state;
+ enum xenbus_state prev_state;
spin_lock(&pdev->dev_lock);
@@ -214,7 +214,7 @@ static int pcifront_try_disconnect(struc
}
static void pcifront_backend_changed(struct xenbus_device *xdev,
- XenbusState be_state)
+ enum xenbus_state be_state)
{
struct pcifront_device *pdev = xdev->data;
diff -r e74246451527 -r f54d38cea8ac
linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c
--- a/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c Tue May 30
12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c Tue May 30
14:30:34 2006 -0500
@@ -159,10 +159,6 @@ static int privcmd_ioctl(struct inode *i
break;
case IOCTL_PRIVCMD_MMAPBATCH: {
-#ifndef __ia64__
- mmu_update_t u;
- uint64_t ptep;
-#endif
privcmd_mmapbatch_t m;
struct vm_area_struct *vma = NULL;
unsigned long __user *p;
@@ -200,24 +196,12 @@ static int privcmd_ioctl(struct inode *i
for (i = 0; i < m.num; i++, addr += PAGE_SIZE, p++) {
if (get_user(mfn, p))
return -EFAULT;
-#ifdef __ia64__
+
ret = direct_remap_pfn_range(vma, addr & PAGE_MASK,
- mfn, 1 << PAGE_SHIFT,
+ mfn, PAGE_SIZE,
vma->vm_page_prot, m.dom);
if (ret < 0)
- goto batch_err;
-#else
-
- ret = create_lookup_pte_addr(vma->vm_mm, addr, &ptep);
- if (ret)
- goto batch_err;
-
- u.val = pte_val_ma(pfn_pte_ma(mfn, vma->vm_page_prot));
- u.ptr = ptep;
-
- if (HYPERVISOR_mmu_update(&u, 1, NULL, m.dom) < 0)
put_user(0xF0000000 | mfn, p);
-#endif
}
ret = 0;
@@ -271,6 +255,9 @@ static int capabilities_read(char *page,
static int __init privcmd_init(void)
{
+ if (!is_running_on_xen())
+ return -ENODEV;
+
/* Set of hypercalls that privileged applications may execute. */
set_bit(__HYPERVISOR_acm_op, hypercall_permission_map);
set_bit(__HYPERVISOR_dom0_op, hypercall_permission_map);
@@ -280,6 +267,9 @@ static int __init privcmd_init(void)
set_bit(__HYPERVISOR_mmuext_op, hypercall_permission_map);
set_bit(__HYPERVISOR_xen_version, hypercall_permission_map);
set_bit(__HYPERVISOR_sched_op, hypercall_permission_map);
+ set_bit(__HYPERVISOR_sched_op_compat, hypercall_permission_map);
+ set_bit(__HYPERVISOR_event_channel_op_compat,
+ hypercall_permission_map);
privcmd_intf = create_xen_proc_entry("privcmd", 0400);
if (privcmd_intf != NULL)
diff -r e74246451527 -r f54d38cea8ac
linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c Tue May 30 12:52:02
2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c Tue May 30 14:30:34
2006 -0500
@@ -34,7 +34,7 @@ struct backend_info
/* watch front end for changes */
struct xenbus_watch backend_watch;
- XenbusState frontend_state;
+ enum xenbus_state frontend_state;
};
static void maybe_connect(struct backend_info *be);
@@ -43,7 +43,7 @@ static void backend_changed(struct xenbu
static void backend_changed(struct xenbus_watch *watch,
const char **vec, unsigned int len);
static void frontend_changed(struct xenbus_device *dev,
- XenbusState frontend_state);
+ enum xenbus_state frontend_state);
static int tpmback_remove(struct xenbus_device *dev)
{
@@ -129,7 +129,7 @@ static void backend_changed(struct xenbu
static void frontend_changed(struct xenbus_device *dev,
- XenbusState frontend_state)
+ enum xenbus_state frontend_state)
{
struct backend_info *be = dev->data;
int err;
diff -r e74246451527 -r f54d38cea8ac
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c Tue May 30
12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c Tue May 30
14:30:34 2006 -0500
@@ -84,7 +84,7 @@ EXPORT_SYMBOL_GPL(xenbus_watch_path2);
EXPORT_SYMBOL_GPL(xenbus_watch_path2);
-int xenbus_switch_state(struct xenbus_device *dev, XenbusState state)
+int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state state)
{
/* We check whether the state is currently set to the given value, and
if not, then the state is set. We don't want to unconditionally
@@ -269,9 +269,9 @@ int xenbus_free_evtchn(struct xenbus_dev
}
-XenbusState xenbus_read_driver_state(const char *path)
-{
- XenbusState result;
+enum xenbus_state xenbus_read_driver_state(const char *path)
+{
+ enum xenbus_state result;
int err = xenbus_gather(XBT_NULL, path, "state", "%d", &result, NULL);
if (err)
result = XenbusStateClosed;
diff -r e74246451527 -r f54d38cea8ac
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c Tue May 30
12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c Tue May 30
14:30:34 2006 -0500
@@ -284,7 +284,7 @@ static void otherend_changed(struct xenb
struct xenbus_device *dev =
container_of(watch, struct xenbus_device, otherend_watch);
struct xenbus_driver *drv = to_xenbus_driver(dev->dev.driver);
- XenbusState state;
+ enum xenbus_state state;
/* Protect us against watches firing on old details when the otherend
details change, say immediately after a resume. */
@@ -539,7 +539,7 @@ static int xenbus_probe_node(struct xen_
size_t stringlen;
char *tmpstring;
- XenbusState state = xenbus_read_driver_state(nodename);
+ enum xenbus_state state = xenbus_read_driver_state(nodename);
if (state != XenbusStateInitialising) {
/* Device is not new, so ignore it. This can happen if a
@@ -966,10 +966,8 @@ static int __init xenbus_probe_init(void
DPRINTK("");
- if (xen_init() < 0) {
- DPRINTK("failed");
+ if (!is_running_on_xen())
return -ENODEV;
- }
/* Register ourselves with the kernel bus subsystem */
bus_register(&xenbus_frontend.bus);
@@ -1069,10 +1067,8 @@ static int __init wait_for_devices(void)
{
unsigned long timeout = jiffies + 10*HZ;
- if (xen_init() < 0) {
- DPRINTK("failed");
+ if (!is_running_on_xen())
return -ENODEV;
- }
while (time_before(jiffies, timeout)) {
if (all_devices_ready())
diff -r e74246451527 -r f54d38cea8ac
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypervisor.h
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypervisor.h Tue May
30 12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypervisor.h Tue May
30 14:30:34 2006 -0500
@@ -118,7 +118,7 @@ u64 jiffies_to_st(unsigned long jiffies)
#define MULTI_UVMDOMID_INDEX 4
#endif
-#define xen_init() (0)
+#define is_running_on_xen() 1
static inline int
HYPERVISOR_yield(
diff -r e74246451527 -r f54d38cea8ac
linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h Tue May
30 12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h Tue May
30 14:30:34 2006 -0500
@@ -10,10 +10,32 @@
static char * __init machine_specific_memory_setup(void)
{
- unsigned long max_pfn = xen_start_info->nr_pages;
+ int rc;
+ struct xen_memory_map memmap;
+ /*
+ * This is rather large for a stack variable but this early in
+ * the boot process we know we have plenty slack space.
+ */
+ struct e820entry map[E820MAX];
- e820.nr_map = 0;
- add_memory_region(0, PFN_PHYS(max_pfn), E820_RAM);
+ memmap.nr_entries = E820MAX;
+ set_xen_guest_handle(memmap.buffer, map);
+
+ rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap);
+ if ( rc == -ENOSYS ) {
+ memmap.nr_entries = 1;
+ map[0].addr = 0ULL;
+ map[0].size = xen_start_info->nr_pages << PAGE_SHIFT;
+ /* 8MB slack (to balance backend allocations). */
+ map[0].size += 8 << 20;
+ map[0].type = E820_RAM;
+ rc = 0;
+ }
+ BUG_ON(rc);
+
+ sanitize_e820_map(map, (char *)&memmap.nr_entries);
+
+ BUG_ON(copy_e820_map(map, (char)memmap.nr_entries) < 0);
return "Xen";
}
diff -r e74246451527 -r f54d38cea8ac
linux-2.6-xen-sparse/include/asm-ia64/hypercall.h
--- a/linux-2.6-xen-sparse/include/asm-ia64/hypercall.h Tue May 30 12:52:02
2006 -0500
+++ b/linux-2.6-xen-sparse/include/asm-ia64/hypercall.h Tue May 30 14:30:34
2006 -0500
@@ -195,12 +195,42 @@ HYPERVISOR_multicall(
return _hypercall2(int, multicall, call_list, nr_calls);
}
+#ifndef CONFIG_XEN_IA64_DOM0_VP
static inline int
HYPERVISOR_memory_op(
unsigned int cmd, void *arg)
{
return _hypercall2(int, memory_op, cmd, arg);
}
+#else
+//XXX xen/ia64 copy_from_guest() is broken.
+// This is a temporal work around until it is fixed.
+static inline int
+____HYPERVISOR_memory_op(
+ unsigned int cmd, void *arg)
+{
+ return _hypercall2(int, memory_op, cmd, arg);
+}
+
+#include <xen/interface/memory.h>
+int ia64_xenmem_reservation_op(unsigned long op,
+ struct xen_memory_reservation* reservation__);
+static inline int
+HYPERVISOR_memory_op(
+ unsigned int cmd, void *arg)
+{
+ switch (cmd) {
+ case XENMEM_increase_reservation:
+ case XENMEM_decrease_reservation:
+ case XENMEM_populate_physmap:
+ return ia64_xenmem_reservation_op(cmd,
+ (struct xen_memory_reservation*)arg);
+ default:
+ return ____HYPERVISOR_memory_op(cmd, arg);
+ }
+ /* NOTREACHED */
+}
+#endif
static inline int
HYPERVISOR_event_channel_op(
@@ -244,12 +274,19 @@ HYPERVISOR_physdev_op(
return rc;
}
-static inline int
-HYPERVISOR_grant_table_op(
+//XXX __HYPERVISOR_grant_table_op is used for this hypercall constant.
+static inline int
+____HYPERVISOR_grant_table_op(
unsigned int cmd, void *uop, unsigned int count)
{
return _hypercall3(int, grant_table_op, cmd, uop, count);
}
+#ifndef CONFIG_XEN_IA64_DOM0_VP
+#define HYPERVISOR_grant_table_op(cmd, uop, count) \
+ ____HYPERVISOR_grant_table_op((cmd), (uop), (count))
+#else
+int HYPERVISOR_grant_table_op(unsigned int cmd, void *uop, unsigned int count);
+#endif
static inline int
HYPERVISOR_vcpu_op(
@@ -281,6 +318,7 @@ static inline void exit_idle(void) {}
#define do_IRQ(irq, regs) __do_IRQ((irq), (regs))
#ifdef CONFIG_XEN_IA64_DOM0_VP
+#include <linux/err.h>
#include <asm/xen/privop.h>
#define _hypercall_imm1(type, name, imm, a1) \
@@ -382,6 +420,10 @@ HYPERVISOR_ioremap(unsigned long ioaddr,
unsigned long ret = ioaddr;
if (running_on_xen) {
ret = __HYPERVISOR_ioremap(ioaddr, size);
+ if (unlikely(IS_ERR_VALUE(ret)))
+ panic("hypercall %s failed with %ld. "
+ "Please check Xen and Linux config mismatch\n",
+ __func__, -ret);
}
return ret;
}
@@ -421,27 +463,6 @@ HYPERVISOR_machtophys(unsigned long mfn)
}
static inline unsigned long
-__HYPERVISOR_populate_physmap(unsigned long gpfn, unsigned int extent_order,
- unsigned int address_bits)
-{
- return _hypercall_imm3(unsigned long, ia64_dom0vp_op,
- IA64_DOM0VP_populate_physmap, gpfn,
- extent_order, address_bits);
-}
-
-static inline unsigned long
-HYPERVISOR_populate_physmap(unsigned long gpfn, unsigned int extent_order,
- unsigned int address_bits)
-{
- unsigned long ret = 0;
- if (running_on_xen) {
- ret = __HYPERVISOR_populate_physmap(gpfn, extent_order,
- address_bits);
- }
- return ret;
-}
-
-static inline unsigned long
__HYPERVISOR_zap_physmap(unsigned long gpfn, unsigned int extent_order)
{
return _hypercall_imm2(unsigned long, ia64_dom0vp_op,
@@ -466,6 +487,7 @@ __HYPERVISOR_add_physmap(unsigned long g
IA64_DOM0VP_add_physmap, gpfn, mfn, flags,
domid);
}
+
static inline unsigned long
HYPERVISOR_add_physmap(unsigned long gpfn, unsigned long mfn,
unsigned int flags, domid_t domid)
@@ -477,13 +499,15 @@ HYPERVISOR_add_physmap(unsigned long gpf
}
return ret;
}
+
+// for balloon driver
+#define HYPERVISOR_update_va_mapping(va, new_val, flags) (0)
+
#else
-#define HYPERVISOR_ioremap(ioaddr, size) ({ioaddr;})
-#define HYPERVISOR_phystomach(gpfn) ({gpfn;})
-#define HYPERVISOR_machtophys(mfn) ({mfn;})
-#define HYPERVISOR_populate_physmap(gpfn, extent_order, address_bits) \
- ({0;})
-#define HYPERVISOR_zap_physmap(gpfn, extent_order) ({0;})
-#define HYPERVISOR_add_physmap(gpfn, mfn, flags) ({0;})
+#define HYPERVISOR_ioremap(ioaddr, size) (ioaddr)
+#define HYPERVISOR_phystomach(gpfn) (gpfn)
+#define HYPERVISOR_machtophys(mfn) (mfn)
+#define HYPERVISOR_zap_physmap(gpfn, extent_order) (0)
+#define HYPERVISOR_add_physmap(gpfn, mfn, flags) (0)
#endif
#endif /* __HYPERCALL_H__ */
diff -r e74246451527 -r f54d38cea8ac
linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h
--- a/linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h Tue May 30
12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h Tue May 30
14:30:34 2006 -0500
@@ -53,7 +53,7 @@ extern start_info_t *xen_start_info;
void force_evtchn_callback(void);
-int xen_init(void);
+#define is_running_on_xen() running_on_xen
/* Turn jiffies into Xen system time. XXX Implement me. */
#define jiffies_to_st(j) 0
@@ -118,11 +118,22 @@ HYPERVISOR_poll(
}
// for drivers/xen/privcmd/privcmd.c
-#define direct_remap_pfn_range(a,b,c,d,e,f) remap_pfn_range(a,b,c,d,e)
#define machine_to_phys_mapping 0
#ifndef CONFIG_XEN_IA64_DOM0_VP
+#define direct_remap_pfn_range(a,b,c,d,e,f) remap_pfn_range(a,b,c,d,e)
#define pfn_to_mfn(x) (x)
#define mfn_to_pfn(x) (x)
+#else
+struct vm_area_struct;
+int direct_remap_pfn_range(struct vm_area_struct *vma,
+ unsigned long address,
+ unsigned long mfn,
+ unsigned long size,
+ pgprot_t prot,
+ domid_t domid);
+struct file;
+int privcmd_mmap(struct file * file, struct vm_area_struct * vma);
+#define HAVE_ARCH_PRIVCMD_MMAP
#endif
// for drivers/xen/balloon/balloon.c
@@ -147,7 +158,7 @@ xen_create_contiguous_region(unsigned lo
unsigned int order, unsigned int address_bits)
{
int ret = 0;
- if (running_on_xen) {
+ if (is_running_on_xen()) {
ret = __xen_create_contiguous_region(vstart, order,
address_bits);
}
@@ -158,11 +169,24 @@ static inline void
static inline void
xen_destroy_contiguous_region(unsigned long vstart, unsigned int order)
{
- if (running_on_xen)
+ if (is_running_on_xen())
__xen_destroy_contiguous_region(vstart, order);
}
+
+// for netfront.c, netback.c
+#define MULTI_UVMFLAGS_INDEX 0 //XXX any value
+
+static inline void
+MULTI_update_va_mapping(
+ multicall_entry_t *mcl, unsigned long va,
+ pte_t new_val, unsigned long flags)
+{
+ mcl->op = __HYPERVISOR_update_va_mapping;
+ mcl->result = 0;
+}
+
#else
-#define xen_create_contiguous_region(vstart, order, address_bits) ({0;})
+#define xen_create_contiguous_region(vstart, order, address_bits) (0)
#define xen_destroy_contiguous_region(vstart, order) do {} while (0)
#endif
diff -r e74246451527 -r f54d38cea8ac
linux-2.6-xen-sparse/include/asm-ia64/page.h
--- a/linux-2.6-xen-sparse/include/asm-ia64/page.h Tue May 30 12:52:02
2006 -0500
+++ b/linux-2.6-xen-sparse/include/asm-ia64/page.h Tue May 30 14:30:34
2006 -0500
@@ -234,6 +234,43 @@ get_order (unsigned long size)
#include <linux/kernel.h>
#include <asm/hypervisor.h>
+#include <xen/features.h> // to compile netback, netfront
+typedef unsigned long maddr_t; // to compile netback, netfront
+
+// XXX hack!
+// Linux/IA64 uses PG_arch_1.
+// This hack will be removed once PG_foreign bit is taken.
+//#include <xen/foreign_page.h>
+#ifdef __ASM_XEN_FOREIGN_PAGE_H__
+# error "don't include include/xen/foreign_page.h!"
+#endif
+
+extern struct address_space xen_ia64_foreign_dummy_mapping;
+#define PageForeign(page) \
+ ((page)->mapping == &xen_ia64_foreign_dummy_mapping)
+
+#define SetPageForeign(page, dtor) do { \
+ set_page_private((page), (unsigned long)(dtor)); \
+ (page)->mapping = &xen_ia64_foreign_dummy_mapping; \
+ smp_rmb(); \
+} while (0)
+
+#define ClearPageForeign(page) do { \
+ (page)->mapping = NULL; \
+ smp_rmb(); \
+ set_page_private((page), 0); \
+} while (0)
+
+#define PageForeignDestructor(page) \
+ ( (void (*) (struct page *)) page_private(page) )
+
+#define arch_free_page(_page,_order) \
+({ int foreign = PageForeign(_page); \
+ if (foreign) \
+ (PageForeignDestructor(_page))(_page); \
+ foreign; \
+})
+#define HAVE_ARCH_FREE_PAGE
//XXX xen page size != page size
@@ -279,11 +316,14 @@ machine_to_phys_for_dma(unsigned long ma
#define set_phys_to_machine(pfn, mfn) do { } while (0)
#define xen_machphys_update(mfn, pfn) do { } while (0)
-#define mfn_to_pfn(mfn) ({(mfn);})
-#define mfn_to_virt(mfn) ({__va((mfn) << PAGE_SHIFT);})
-#define pfn_to_mfn(pfn) ({(pfn);})
-#define virt_to_mfn(virt) ({__pa(virt) >> PAGE_SHIFT;})
-#define virt_to_machine(virt) ({__pa(virt);}) // for tpmfront.c
+//XXX to compile set_phys_to_machine(vaddr, FOREIGN_FRAME(m))
+#define FOREIGN_FRAME(m) (INVALID_P2M_ENTRY)
+
+#define mfn_to_pfn(mfn) (mfn)
+#define mfn_to_virt(mfn) (__va((mfn) << PAGE_SHIFT))
+#define pfn_to_mfn(pfn) (pfn)
+#define virt_to_mfn(virt) (__pa(virt) >> PAGE_SHIFT)
+#define virt_to_machine(virt) __pa(virt) // for tpmfront.c
#endif /* CONFIG_XEN_IA64_DOM0_VP */
#endif /* CONFIG_XEN */
diff -r e74246451527 -r f54d38cea8ac
linux-2.6-xen-sparse/include/asm-ia64/privop.h
--- a/linux-2.6-xen-sparse/include/asm-ia64/privop.h Tue May 30 12:52:02
2006 -0500
+++ b/linux-2.6-xen-sparse/include/asm-ia64/privop.h Tue May 30 14:30:34
2006 -0500
@@ -44,13 +44,14 @@
#define ia64_ptcl __ia64_ptcl
#define ia64_ptri __ia64_ptri
#define ia64_ptrd __ia64_ptrd
-#define ia64_get_psr_i __ia64_get_psr_i
+#define ia64_get_psr_i __ia64_get_psr_i
#define ia64_intrin_local_irq_restore __ia64_intrin_local_irq_restore
#define ia64_pal_halt_light __ia64_pal_halt_light
-#define ia64_leave_kernel __ia64_leave_kernel
-#define ia64_leave_syscall __ia64_leave_syscall
-#define ia64_switch_to __ia64_switch_to
-#define ia64_pal_call_static __ia64_pal_call_static
+#define ia64_leave_kernel __ia64_leave_kernel
+#define ia64_leave_syscall __ia64_leave_syscall
+#define ia64_trace_syscall __ia64_trace_syscall
+#define ia64_switch_to __ia64_switch_to
+#define ia64_pal_call_static __ia64_pal_call_static
#endif /* !IA64_PARAVIRTUALIZED */
diff -r e74246451527 -r f54d38cea8ac
linux-2.6-xen-sparse/include/asm-ia64/xen/privop.h
--- a/linux-2.6-xen-sparse/include/asm-ia64/xen/privop.h Tue May 30
12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/include/asm-ia64/xen/privop.h Tue May 30
14:30:34 2006 -0500
@@ -11,45 +11,41 @@
#include <asm/xen/asm-xsi-offsets.h>
+#include <xen/interface/arch-ia64.h>
#define IA64_PARAVIRTUALIZED
#ifdef __ASSEMBLY__
-#define XEN_HYPER_RFI break 0x1
-#define XEN_HYPER_RSM_PSR_DT break 0x2
-#define XEN_HYPER_SSM_PSR_DT break 0x3
-#define XEN_HYPER_COVER break 0x4
-#define XEN_HYPER_ITC_D break 0x5
-#define XEN_HYPER_ITC_I break 0x6
-#define XEN_HYPER_SSM_I break 0x7
-#define XEN_HYPER_GET_IVR break 0x8
-#define XEN_HYPER_GET_TPR break 0x9
-#define XEN_HYPER_SET_TPR break 0xa
-#define XEN_HYPER_EOI break 0xb
-#define XEN_HYPER_SET_ITM break 0xc
-#define XEN_HYPER_THASH break 0xd
-#define XEN_HYPER_PTC_GA break 0xe
-#define XEN_HYPER_ITR_D break 0xf
-#define XEN_HYPER_GET_RR break 0x10
-#define XEN_HYPER_SET_RR break 0x11
-#define XEN_HYPER_SET_KR break 0x12
-#define XEN_HYPER_FC break 0x13
-#define XEN_HYPER_GET_CPUID break 0x14
-#define XEN_HYPER_GET_PMD break 0x15
-#define XEN_HYPER_GET_EFLAG break 0x16
-#define XEN_HYPER_SET_EFLAG break 0x17
+#define XEN_HYPER_RFI break HYPERPRIVOP_RFI
+#define XEN_HYPER_RSM_PSR_DT break HYPERPRIVOP_RSM_DT
+#define XEN_HYPER_SSM_PSR_DT break HYPERPRIVOP_SSM_DT
+#define XEN_HYPER_COVER break HYPERPRIVOP_COVER
+#define XEN_HYPER_ITC_D break HYPERPRIVOP_ITC_D
+#define XEN_HYPER_ITC_I break HYPERPRIVOP_ITC_I
+#define XEN_HYPER_SSM_I break HYPERPRIVOP_SSM_I
+#define XEN_HYPER_GET_IVR break HYPERPRIVOP_GET_IVR
+#define XEN_HYPER_GET_TPR break HYPERPRIVOP_GET_TPR
+#define XEN_HYPER_SET_TPR break HYPERPRIVOP_SET_TPR
+#define XEN_HYPER_EOI break HYPERPRIVOP_EOI
+#define XEN_HYPER_SET_ITM break HYPERPRIVOP_SET_ITM
+#define XEN_HYPER_THASH break HYPERPRIVOP_THASH
+#define XEN_HYPER_PTC_GA break HYPERPRIVOP_PTC_GA
+#define XEN_HYPER_ITR_D break HYPERPRIVOP_ITR_D
+#define XEN_HYPER_GET_RR break HYPERPRIVOP_GET_RR
+#define XEN_HYPER_SET_RR break HYPERPRIVOP_SET_RR
+#define XEN_HYPER_SET_KR break HYPERPRIVOP_SET_KR
+#define XEN_HYPER_FC break HYPERPRIVOP_FC
+#define XEN_HYPER_GET_CPUID break HYPERPRIVOP_GET_CPUID
+#define XEN_HYPER_GET_PMD break HYPERPRIVOP_GET_PMD
+#define XEN_HYPER_GET_EFLAG break HYPERPRIVOP_GET_EFLAG
+#define XEN_HYPER_SET_EFLAG break HYPERPRIVOP_SET_EFLAG
#endif
#ifndef __ASSEMBLY__
-#ifdef MODULE
-extern int is_running_on_xen(void);
-#define running_on_xen (is_running_on_xen())
-#else
extern int running_on_xen;
-#endif
-
-#define XEN_HYPER_SSM_I asm("break 0x7");
-#define XEN_HYPER_GET_IVR asm("break 0x8");
+
+#define XEN_HYPER_SSM_I asm("break %0" : : "i"
(HYPERPRIVOP_SSM_I))
+#define XEN_HYPER_GET_IVR asm("break %0" : : "i"
(HYPERPRIVOP_GET_IVR))
/************************************************/
/* Instructions paravirtualized for correctness */
diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/include/xen/xenbus.h
--- a/linux-2.6-xen-sparse/include/xen/xenbus.h Tue May 30 12:52:02 2006 -0500
+++ b/linux-2.6-xen-sparse/include/xen/xenbus.h Tue May 30 14:30:34 2006 -0500
@@ -75,7 +75,7 @@ struct xenbus_device {
int otherend_id;
struct xenbus_watch otherend_watch;
struct device dev;
- XenbusState state;
+ enum xenbus_state state;
void *data;
};
@@ -98,7 +98,7 @@ struct xenbus_driver {
int (*probe)(struct xenbus_device *dev,
const struct xenbus_device_id *id);
void (*otherend_changed)(struct xenbus_device *dev,
- XenbusState backend_state);
+ enum xenbus_state backend_state);
int (*remove)(struct xenbus_device *dev);
int (*suspend)(struct xenbus_device *dev);
int (*resume)(struct xenbus_device *dev);
@@ -207,7 +207,7 @@ int xenbus_watch_path2(struct xenbus_dev
* Return 0 on success, or -errno on error. On error, the device will switch
* to XenbusStateClosing, and the error will be saved in the store.
*/
-int xenbus_switch_state(struct xenbus_device *dev, XenbusState new_state);
+int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state
new_state);
/**
@@ -273,7 +273,7 @@ int xenbus_free_evtchn(struct xenbus_dev
* Return the state of the driver rooted at the given store path, or
* XenbusStateClosed if no state can be read.
*/
-XenbusState xenbus_read_driver_state(const char *path);
+enum xenbus_state xenbus_read_driver_state(const char *path);
/***
diff -r e74246451527 -r f54d38cea8ac tools/examples/network-bridge
--- a/tools/examples/network-bridge Tue May 30 12:52:02 2006 -0500
+++ b/tools/examples/network-bridge Tue May 30 14:30:34 2006 -0500
@@ -59,7 +59,7 @@ findCommand "$@"
findCommand "$@"
evalVariables "$@"
-vifnum=${vifnum:-$(ip route list | awk '/^default / {
sub(/^(eth|xenbr)/,"",$NF); print $NF }')}
+vifnum=${vifnum:-$(ip route list | awk '/^default / { print $NF }' | sed
's/^[^0-9]*//')}
bridge=${bridge:-xenbr${vifnum}}
netdev=${netdev:-eth${vifnum}}
antispoof=${antispoof:-no}
diff -r e74246451527 -r f54d38cea8ac tools/examples/xmexample.vti
--- a/tools/examples/xmexample.vti Tue May 30 12:52:02 2006 -0500
+++ b/tools/examples/xmexample.vti Tue May 30 14:30:34 2006 -0500
@@ -36,7 +36,7 @@ name = "ExampleVTIDomain"
# Random MACs are assigned if not given.
#vif = [ 'type=ioemu, mac=00:16:3e:00:00:11, bridge=xenbr0' ]
# type=ioemu specify the NIC is an ioemu device not netfront
-#vif = [ 'type=ioemu, bridge=xenbr0' ]
+vif = [ 'type=ioemu, bridge=xenbr0' ]
# for multiple NICs in device model, 3 in this example
#vif = [ 'type=ioemu, bridge=xenbr0', 'type=ioemu', 'type=ioemu']
diff -r e74246451527 -r f54d38cea8ac tools/libxc/Makefile
--- a/tools/libxc/Makefile Tue May 30 12:52:02 2006 -0500
+++ b/tools/libxc/Makefile Tue May 30 14:30:34 2006 -0500
@@ -20,6 +20,7 @@ SRCS += xc_physdev.c
SRCS += xc_physdev.c
SRCS += xc_private.c
SRCS += xc_sedf.c
+SRCS += xc_csched.c
SRCS += xc_tbuf.c
ifeq ($(patsubst x86%,x86,$(XEN_TARGET_ARCH)),x86)
diff -r e74246451527 -r f54d38cea8ac tools/libxc/xc_acm.c
--- a/tools/libxc/xc_acm.c Tue May 30 12:52:02 2006 -0500
+++ b/tools/libxc/xc_acm.c Tue May 30 14:30:34 2006 -0500
@@ -1,13 +1,10 @@
/******************************************************************************
+ * xc_acm.c
*
- * Copyright (C) 2005 IBM Corporation
+ * Copyright (C) 2005, 2006 IBM Corporation, R Sailer
*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
- *
- * Authors:
- * Reiner Sailer <sailer@xxxxxxxxxxxxxx>
- * Stefan Berger <stefanb@xxxxxxxxxxxxxx>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
@@ -17,29 +14,23 @@
#include "xc_private.h"
-int xc_acm_op(int xc_handle, struct acm_op *op)
+
+int xc_acm_op(int xc_handle, int cmd, void *arg, size_t arg_size)
{
int ret = -1;
DECLARE_HYPERCALL;
- op->interface_version = ACM_INTERFACE_VERSION;
+ hypercall.op = __HYPERVISOR_acm_op;
+ hypercall.arg[0] = cmd;
+ hypercall.arg[1] = (unsigned long) arg;
- hypercall.op = __HYPERVISOR_acm_op;
- hypercall.arg[0] = (unsigned long) op;
-
- if (mlock(op, sizeof(*op)) != 0) {
- PERROR("Could not lock memory for Xen policy hypercall");
- goto out1;
+ if (mlock(arg, arg_size) != 0) {
+ PERROR("xc_acm_op: arg mlock failed");
+ goto out;
}
-
ret = do_xen_hypercall(xc_handle, &hypercall);
- ret = ioctl(xc_handle, IOCTL_PRIVCMD_HYPERCALL, &hypercall);
- if (ret < 0) {
- goto out2;
- }
- out2:
- safe_munlock(op, sizeof(*op));
- out1:
+ safe_munlock(arg, arg_size);
+ out:
return ret;
}
diff -r e74246451527 -r f54d38cea8ac tools/libxc/xc_ia64_stubs.c
--- a/tools/libxc/xc_ia64_stubs.c Tue May 30 12:52:02 2006 -0500
+++ b/tools/libxc/xc_ia64_stubs.c Tue May 30 14:30:34 2006 -0500
@@ -50,7 +50,7 @@ xc_plan9_build(int xc_handle,
}
/*
VMM uses put_user to copy pfn_list to guest buffer, this maybe fail,
- VMM don't handle this now.
+ VMM doesn't handle this now.
This method will touch guest buffer to make sure the buffer's mapping
is tracked by VMM,
*/
@@ -66,6 +66,7 @@ int xc_ia64_get_pfn_list(int xc_handle,
unsigned int __start_page, __nr_pages;
unsigned long max_pfns;
unsigned long *__pfn_buf;
+
__start_page = start_page;
__nr_pages = nr_pages;
__pfn_buf = pfn_buf;
@@ -75,6 +76,7 @@ int xc_ia64_get_pfn_list(int xc_handle,
op.cmd = DOM0_GETMEMLIST;
op.u.getmemlist.domain = (domid_t)domid;
op.u.getmemlist.max_pfns = max_pfns;
+ op.u.getmemlist.num_pfns = 0;
set_xen_guest_handle(op.u.getmemlist.buffer, __pfn_buf);
if ( (max_pfns != -1UL)
@@ -723,7 +725,7 @@ int xc_hvm_build(int xc_handle,
ctxt->flags = VGCF_VMX_GUEST;
ctxt->regs.cr_iip = 0x80000000ffffffb0UL;
- ctxt->vcpu.privregs = 0;
+ ctxt->privregs = 0;
memset( &launch_op, 0, sizeof(launch_op) );
diff -r e74246451527 -r f54d38cea8ac tools/libxc/xc_linux_build.c
--- a/tools/libxc/xc_linux_build.c Tue May 30 12:52:02 2006 -0500
+++ b/tools/libxc/xc_linux_build.c Tue May 30 14:30:34 2006 -0500
@@ -268,21 +268,10 @@ static int setup_pg_tables_pae(int xc_ha
l2_pgentry_64_t *vl2tab = NULL, *vl2e = NULL;
l3_pgentry_64_t *vl3tab = NULL, *vl3e = NULL;
uint64_t l1tab, l2tab, l3tab, pl1tab, pl2tab, pl3tab;
- unsigned long ppt_alloc, count, nmfn;
+ unsigned long ppt_alloc, count;
/* First allocate page for page dir. */
ppt_alloc = (vpt_start - dsi_v_start) >> PAGE_SHIFT;
-
- if ( page_array[ppt_alloc] > 0xfffff )
- {
- nmfn = xc_make_page_below_4G(xc_handle, dom, page_array[ppt_alloc]);
- if ( nmfn == 0 )
- {
- fprintf(stderr, "Couldn't get a page below 4GB :-(\n");
- goto error_out;
- }
- page_array[ppt_alloc] = nmfn;
- }
alloc_pt(l3tab, vl3tab, pl3tab);
vl3e = &vl3tab[l3_table_offset_pae(dsi_v_start)];
@@ -327,6 +316,13 @@ static int setup_pg_tables_pae(int xc_ha
*vl1e &= ~_PAGE_RW;
}
vl1e++;
+ }
+
+ /* Xen requires a mid-level pgdir mapping 0xC0000000 region. */
+ if ( (vl3tab[3] & _PAGE_PRESENT) == 0 )
+ {
+ alloc_pt(l2tab, vl2tab, pl2tab);
+ vl3tab[3] = l2tab | L3_PROT;
}
munmap(vl1tab, PAGE_SIZE);
@@ -727,25 +723,28 @@ static int setup_guest(int xc_handle,
v_end = (vstack_end + (1UL<<22)-1) & ~((1UL<<22)-1);
if ( (v_end - vstack_end) < (512UL << 10) )
v_end += 1UL << 22; /* Add extra 4MB to get >= 512kB padding. */
-#if defined(__i386__)
- if ( dsi.pae_kernel )
- {
- /* FIXME: assumes one L2 pgtable @ 0xc0000000 */
- if ( (((v_end - dsi.v_start + ((1<<L2_PAGETABLE_SHIFT_PAE)-1)) >>
- L2_PAGETABLE_SHIFT_PAE) + 2) <= nr_pt_pages )
- break;
- }
- else
- {
- if ( (((v_end - dsi.v_start + ((1<<L2_PAGETABLE_SHIFT)-1)) >>
- L2_PAGETABLE_SHIFT) + 1) <= nr_pt_pages )
- break;
- }
-#endif
-#if defined(__x86_64__)
#define NR(_l,_h,_s) \
(((((_h) + ((1UL<<(_s))-1)) & ~((1UL<<(_s))-1)) - \
((_l) & ~((1UL<<(_s))-1))) >> (_s))
+#if defined(__i386__)
+ if ( dsi.pae_kernel )
+ {
+ if ( (1 + /* # L3 */
+ NR(dsi.v_start, v_end, L3_PAGETABLE_SHIFT_PAE) + /* # L2 */
+ NR(dsi.v_start, v_end, L2_PAGETABLE_SHIFT_PAE) + /* # L1 */
+ /* Include a fourth mid-level page directory for Xen. */
+ (v_end <= (3 << L3_PAGETABLE_SHIFT_PAE)))
+ <= nr_pt_pages )
+ break;
+ }
+ else
+ {
+ if ( (1 + /* # L2 */
+ NR(dsi.v_start, v_end, L2_PAGETABLE_SHIFT)) /* # L1 */
+ <= nr_pt_pages )
+ break;
+ }
+#elif defined(__x86_64__)
if ( (1 + /* # L4 */
NR(dsi.v_start, v_end, L4_PAGETABLE_SHIFT) + /* # L3 */
NR(dsi.v_start, v_end, L3_PAGETABLE_SHIFT) + /* # L2 */
@@ -1116,7 +1115,7 @@ static int xc_linux_build_internal(int x
ctxt->regs.ar_fpsr = xc_ia64_fpsr_default();
/* currently done by hypervisor, should move here */
/* ctxt->regs.r28 = dom_fw_setup(); */
- ctxt->vcpu.privregs = 0;
+ ctxt->privregs = 0;
ctxt->sys_pgnr = 3;
i = 0; /* silence unused variable warning */
#else /* x86 */
diff -r e74246451527 -r f54d38cea8ac tools/libxc/xc_linux_restore.c
--- a/tools/libxc/xc_linux_restore.c Tue May 30 12:52:02 2006 -0500
+++ b/tools/libxc/xc_linux_restore.c Tue May 30 14:30:34 2006 -0500
@@ -331,25 +331,17 @@ int xc_linux_restore(int xc_handle, int
** A page table page - need to 'uncanonicalize' it, i.e.
** replace all the references to pfns with the corresponding
** mfns for the new domain.
- **
- ** On PAE we need to ensure that PGDs are in MFNs < 4G, and
- ** so we may need to update the p2m after the main loop.
- ** Hence we defer canonicalization of L1s until then.
*/
- if(pt_levels != 3 || pagetype != L1TAB) {
-
- if(!uncanonicalize_pagetable(pagetype, page)) {
- /*
- ** Failing to uncanonicalize a page table can be ok
- ** under live migration since the pages type may have
- ** changed by now (and we'll get an update later).
- */
- DPRINTF("PT L%ld race on pfn=%08lx mfn=%08lx\n",
- pagetype >> 28, pfn, mfn);
- nraces++;
- continue;
- }
-
+ if(!uncanonicalize_pagetable(pagetype, page)) {
+ /*
+ ** Failing to uncanonicalize a page table can be ok
+ ** under live migration since the pages type may have
+ ** changed by now (and we'll get an update later).
+ */
+ DPRINTF("PT L%ld race on pfn=%08lx mfn=%08lx\n",
+ pagetype >> 28, pfn, mfn);
+ nraces++;
+ continue;
}
} else if(pagetype != NOTAB) {
@@ -397,100 +389,6 @@ int xc_linux_restore(int xc_handle, int
}
DPRINTF("Received all pages (%d races)\n", nraces);
-
- if(pt_levels == 3) {
-
- /*
- ** XXX SMH on PAE we need to ensure PGDs are in MFNs < 4G. This
- ** is a little awkward and involves (a) finding all such PGDs and
- ** replacing them with 'lowmem' versions; (b) upating the p2m[]
- ** with the new info; and (c) canonicalizing all the L1s using the
- ** (potentially updated) p2m[].
- **
- ** This is relatively slow (and currently involves two passes through
- ** the pfn_type[] array), but at least seems to be correct. May wish
- ** to consider more complex approaches to optimize this later.
- */
-
- int j, k;
-
- /* First pass: find all L3TABs current in > 4G mfns and get new mfns */
- for (i = 0; i < max_pfn; i++) {
-
- if (((pfn_type[i] & LTABTYPE_MASK)==L3TAB) && (p2m[i]>0xfffffUL)) {
-
- unsigned long new_mfn;
- uint64_t l3ptes[4];
- uint64_t *l3tab;
-
- l3tab = (uint64_t *)
- xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
- PROT_READ, p2m[i]);
-
- for(j = 0; j < 4; j++)
- l3ptes[j] = l3tab[j];
-
- munmap(l3tab, PAGE_SIZE);
-
- if (!(new_mfn=xc_make_page_below_4G(xc_handle, dom, p2m[i]))) {
- ERR("Couldn't get a page below 4GB :-(");
- goto out;
- }
-
- p2m[i] = new_mfn;
- if (xc_add_mmu_update(xc_handle, mmu,
- (((unsigned long long)new_mfn)
- << PAGE_SHIFT) |
- MMU_MACHPHYS_UPDATE, i)) {
- ERR("Couldn't m2p on PAE root pgdir");
- goto out;
- }
-
- l3tab = (uint64_t *)
- xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
- PROT_READ | PROT_WRITE, p2m[i]);
-
- for(j = 0; j < 4; j++)
- l3tab[j] = l3ptes[j];
-
- munmap(l3tab, PAGE_SIZE);
-
- }
- }
-
- /* Second pass: find all L1TABs and uncanonicalize them */
- j = 0;
-
- for(i = 0; i < max_pfn; i++) {
-
- if (((pfn_type[i] & LTABTYPE_MASK)==L1TAB)) {
- region_mfn[j] = p2m[i];
- j++;
- }
-
- if(i == (max_pfn-1) || j == MAX_BATCH_SIZE) {
-
- if (!(region_base = xc_map_foreign_batch(
- xc_handle, dom, PROT_READ | PROT_WRITE,
- region_mfn, j))) {
- ERR("map batch failed");
- goto out;
- }
-
- for(k = 0; k < j; k++) {
- if(!uncanonicalize_pagetable(L1TAB,
- region_base + k*PAGE_SIZE)) {
- ERR("failed uncanonicalize pt!");
- goto out;
- }
- }
-
- munmap(region_base, j*PAGE_SIZE);
- j = 0;
- }
- }
-
- }
if (xc_finish_mmu_updates(xc_handle, mmu)) {
diff -r e74246451527 -r f54d38cea8ac tools/libxc/xc_load_elf.c
--- a/tools/libxc/xc_load_elf.c Tue May 30 12:52:02 2006 -0500
+++ b/tools/libxc/xc_load_elf.c Tue May 30 14:30:34 2006 -0500
@@ -59,10 +59,10 @@ static int parseelfimage(const char *ima
Elf_Ehdr *ehdr = (Elf_Ehdr *)image;
Elf_Phdr *phdr;
Elf_Shdr *shdr;
- unsigned long kernstart = ~0UL, kernend=0UL, vaddr, virt_base;
+ unsigned long kernstart = ~0UL, kernend=0UL, vaddr, virt_base, elf_pa_off;
const char *shstrtab;
char *guestinfo=NULL, *p;
- int h;
+ int h, virt_base_defined, elf_pa_off_defined;
if ( !IS_ELF(*ehdr) )
{
@@ -164,34 +164,40 @@ static int parseelfimage(const char *ima
dsi->xen_guest_string = guestinfo;
- if ( (p = strstr(guestinfo, "VIRT_BASE=")) == NULL )
- {
- ERROR("Malformed ELF image. No VIRT_BASE specified");
- return -EINVAL;
- }
-
- virt_base = strtoul(p+10, &p, 0);
-
- dsi->elf_paddr_offset = virt_base;
- if ( (p = strstr(guestinfo, "ELF_PADDR_OFFSET=")) != NULL )
- dsi->elf_paddr_offset = strtoul(p+17, &p, 0);
+ /* Initial guess for virt_base is 0 if it is not explicitly defined. */
+ p = strstr(guestinfo, "VIRT_BASE=");
+ virt_base_defined = (p != NULL);
+ virt_base = virt_base_defined ? strtoul(p+10, &p, 0) : 0;
+
+ /* Initial guess for elf_pa_off is virt_base if not explicitly defined. */
+ p = strstr(guestinfo, "ELF_PADDR_OFFSET=");
+ elf_pa_off_defined = (p != NULL);
+ elf_pa_off = elf_pa_off_defined ? strtoul(p+17, &p, 0) : virt_base;
+
+ if ( elf_pa_off_defined && !virt_base_defined )
+ goto bad_image;
for ( h = 0; h < ehdr->e_phnum; h++ )
{
phdr = (Elf_Phdr *)(image + ehdr->e_phoff + (h*ehdr->e_phentsize));
if ( !is_loadable_phdr(phdr) )
continue;
- vaddr = phdr->p_paddr - dsi->elf_paddr_offset + virt_base;
+ vaddr = phdr->p_paddr - elf_pa_off + virt_base;
+ if ( (vaddr + phdr->p_memsz) < vaddr )
+ goto bad_image;
if ( vaddr < kernstart )
kernstart = vaddr;
if ( (vaddr + phdr->p_memsz) > kernend )
kernend = vaddr + phdr->p_memsz;
}
- if ( virt_base )
- dsi->v_start = virt_base;
- else
- dsi->v_start = kernstart;
+ /*
+ * Legacy compatibility and images with no __xen_guest section: assume
+ * header addresses are virtual addresses, and that guest memory should be
+ * mapped starting at kernel load address.
+ */
+ dsi->v_start = virt_base_defined ? virt_base : kernstart;
+ dsi->elf_paddr_offset = elf_pa_off_defined ? elf_pa_off : dsi->v_start;
dsi->v_kernentry = ehdr->e_entry;
if ( (p = strstr(guestinfo, "VIRT_ENTRY=")) != NULL )
@@ -199,11 +205,9 @@ static int parseelfimage(const char *ima
if ( (kernstart > kernend) ||
(dsi->v_kernentry < kernstart) ||
- (dsi->v_kernentry > kernend) )
- {
- ERROR("Malformed ELF image.");
- return -EINVAL;
- }
+ (dsi->v_kernentry > kernend) ||
+ (dsi->v_start > kernstart) )
+ goto bad_image;
if ( (p = strstr(guestinfo, "BSD_SYMTAB")) != NULL )
dsi->load_symtab = 1;
@@ -215,6 +219,10 @@ static int parseelfimage(const char *ima
loadelfsymtab(image, 0, 0, NULL, dsi);
return 0;
+
+ bad_image:
+ ERROR("Malformed ELF image.");
+ return -EINVAL;
}
static int
diff -r e74246451527 -r f54d38cea8ac tools/libxc/xc_private.c
--- a/tools/libxc/xc_private.c Tue May 30 12:52:02 2006 -0500
+++ b/tools/libxc/xc_private.c Tue May 30 14:30:34 2006 -0500
@@ -430,28 +430,6 @@ int xc_version(int xc_handle, int cmd, v
return rc;
}
-unsigned long xc_make_page_below_4G(
- int xc_handle, uint32_t domid, unsigned long mfn)
-{
- unsigned long new_mfn;
-
- if ( xc_domain_memory_decrease_reservation(
- xc_handle, domid, 1, 0, &mfn) != 0 )
- {
- fprintf(stderr,"xc_make_page_below_4G decrease failed. mfn=%lx\n",mfn);
- return 0;
- }
-
- if ( xc_domain_memory_increase_reservation(
- xc_handle, domid, 1, 0, 32, &new_mfn) != 0 )
- {
- fprintf(stderr,"xc_make_page_below_4G increase failed. mfn=%lx\n",mfn);
- return 0;
- }
-
- return new_mfn;
-}
-
/*
* Local variables:
* mode: C
diff -r e74246451527 -r f54d38cea8ac tools/libxc/xc_ptrace.c
--- a/tools/libxc/xc_ptrace.c Tue May 30 12:52:02 2006 -0500
+++ b/tools/libxc/xc_ptrace.c Tue May 30 14:30:34 2006 -0500
@@ -185,61 +185,36 @@ map_domain_va_32(
void *guest_va,
int perm)
{
- unsigned long pde, page;
- unsigned long va = (unsigned long)guest_va;
-
- static unsigned long cr3_phys[MAX_VIRT_CPUS];
- static uint32_t *cr3_virt[MAX_VIRT_CPUS];
- static unsigned long pde_phys[MAX_VIRT_CPUS];
- static uint32_t *pde_virt[MAX_VIRT_CPUS];
- static unsigned long page_phys[MAX_VIRT_CPUS];
- static uint32_t *page_virt[MAX_VIRT_CPUS];
- static int prev_perm[MAX_VIRT_CPUS];
-
- if (ctxt[cpu].ctrlreg[3] == 0)
- return NULL;
- if ( ctxt[cpu].ctrlreg[3] != cr3_phys[cpu] )
- {
- cr3_phys[cpu] = ctxt[cpu].ctrlreg[3];
- if ( cr3_virt[cpu] )
- munmap(cr3_virt[cpu], PAGE_SIZE);
- cr3_virt[cpu] = xc_map_foreign_range(
- xc_handle, current_domid, PAGE_SIZE, PROT_READ,
- cr3_phys[cpu] >> PAGE_SHIFT);
- if ( cr3_virt[cpu] == NULL )
- return NULL;
- }
- pde = to_ma(cpu, cr3_virt[cpu][vtopdi(va)]);
- if ( pde != pde_phys[cpu] )
- {
- pde_phys[cpu] = pde;
- if ( pde_virt[cpu] )
- munmap(pde_virt[cpu], PAGE_SIZE);
- pde_virt[cpu] = xc_map_foreign_range(
- xc_handle, current_domid, PAGE_SIZE, PROT_READ,
- pde_phys[cpu] >> PAGE_SHIFT);
- if ( pde_virt[cpu] == NULL )
- return NULL;
- }
- page = to_ma(cpu, pde_virt[cpu][vtopti(va)]);
-
- if ( (page != page_phys[cpu]) || (perm != prev_perm[cpu]) )
- {
- page_phys[cpu] = page;
- if ( page_virt[cpu] )
- munmap(page_virt[cpu], PAGE_SIZE);
- page_virt[cpu] = xc_map_foreign_range(
- xc_handle, current_domid, PAGE_SIZE, perm,
- page_phys[cpu] >> PAGE_SHIFT);
- if ( page_virt[cpu] == NULL )
- {
- page_phys[cpu] = 0;
- return NULL;
- }
- prev_perm[cpu] = perm;
- }
-
- return (void *)(((unsigned long)page_virt[cpu]) | (va & BSD_PAGE_MASK));
+ unsigned long l2e, l1e, l1p, p, va = (unsigned long)guest_va;
+ uint32_t *l2, *l1;
+ static void *v[MAX_VIRT_CPUS];
+
+ l2 = xc_map_foreign_range(
+ xc_handle, current_domid, PAGE_SIZE, PROT_READ, ctxt[cpu].ctrlreg[3]
>> PAGE_SHIFT);
+ if ( l2 == NULL )
+ return NULL;
+
+ l2e = l2[l2_table_offset_i386(va)];
+ munmap(l2, PAGE_SIZE);
+ if ( !(l2e & _PAGE_PRESENT) )
+ return NULL;
+ l1p = to_ma(cpu, l2e);
+ l1 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, PROT_READ,
l1p >> PAGE_SHIFT);
+ if ( l1 == NULL )
+ return NULL;
+
+ l1e = l1[l1_table_offset_i386(va)];
+ munmap(l1, PAGE_SIZE);
+ if ( !(l1e & _PAGE_PRESENT) )
+ return NULL;
+ p = to_ma(cpu, l1e);
+ if ( v[cpu] != NULL )
+ munmap(v[cpu], PAGE_SIZE);
+ v[cpu] = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, perm, p
>> PAGE_SHIFT);
+ if ( v[cpu] == NULL )
+ return NULL;
+
+ return (void *)((unsigned long)v[cpu] | (va & (PAGE_SIZE - 1)));
}
@@ -250,36 +225,45 @@ map_domain_va_pae(
void *guest_va,
int perm)
{
- unsigned long l2p, l1p, p, va = (unsigned long)guest_va;
+ unsigned long l3e, l2e, l1e, l2p, l1p, p, va = (unsigned long)guest_va;
uint64_t *l3, *l2, *l1;
- static void *v;
+ static void *v[MAX_VIRT_CPUS];
l3 = xc_map_foreign_range(
xc_handle, current_domid, PAGE_SIZE, PROT_READ, ctxt[cpu].ctrlreg[3]
>> PAGE_SHIFT);
if ( l3 == NULL )
return NULL;
- l2p = to_ma(cpu, l3[l3_table_offset_pae(va)]);
+ l3e = l3[l3_table_offset_pae(va)];
+ munmap(l3, PAGE_SIZE);
+ if ( !(l3e & _PAGE_PRESENT) )
+ return NULL;
+ l2p = to_ma(cpu, l3e);
l2 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, PROT_READ,
l2p >> PAGE_SHIFT);
- munmap(l3, PAGE_SIZE);
if ( l2 == NULL )
return NULL;
- l1p = to_ma(cpu, l2[l2_table_offset_pae(va)]);
+ l2e = l2[l2_table_offset_pae(va)];
+ munmap(l2, PAGE_SIZE);
+ if ( !(l2e & _PAGE_PRESENT) )
+ return NULL;
+ l1p = to_ma(cpu, l2e);
l1 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, perm, l1p
>> PAGE_SHIFT);
- munmap(l2, PAGE_SIZE);
if ( l1 == NULL )
return NULL;
- p = to_ma(cpu, l1[l1_table_offset_pae(va)]);
- if ( v != NULL )
- munmap(v, PAGE_SIZE);
- v = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, perm, p >>
PAGE_SHIFT);
+ l1e = l1[l1_table_offset_pae(va)];
munmap(l1, PAGE_SIZE);
- if ( v == NULL )
- return NULL;
-
- return (void *)((unsigned long)v | (va & (PAGE_SIZE - 1)));
+ if ( !(l1e & _PAGE_PRESENT) )
+ return NULL;
+ p = to_ma(cpu, l1e);
+ if ( v[cpu] != NULL )
+ munmap(v[cpu], PAGE_SIZE);
+ v[cpu] = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, perm, p
>> PAGE_SHIFT);
+ if ( v[cpu] == NULL )
+ return NULL;
+
+ return (void *)((unsigned long)v[cpu] | (va & (PAGE_SIZE - 1)));
}
#ifdef __x86_64__
@@ -290,9 +274,10 @@ map_domain_va_64(
void *guest_va,
int perm)
{
- unsigned long l3p, l2p, l1p, l1e, p, va = (unsigned long)guest_va;
+ unsigned long l4e, l3e, l2e, l1e, l3p, l2p, l1p, p, va = (unsigned
long)guest_va;
uint64_t *l4, *l3, *l2, *l1;
- static void *v;
+ static void *v[MAX_VIRT_CPUS];
+
if ((ctxt[cpu].ctrlreg[4] & 0x20) == 0 ) /* legacy ia32 mode */
return map_domain_va_32(xc_handle, cpu, guest_va, perm);
@@ -302,41 +287,51 @@ map_domain_va_64(
if ( l4 == NULL )
return NULL;
- l3p = to_ma(cpu, l4[l4_table_offset(va)]);
+ l4e = l4[l4_table_offset(va)];
+ munmap(l4, PAGE_SIZE);
+ if ( !(l4e & _PAGE_PRESENT) )
+ return NULL;
+ l3p = to_ma(cpu, l4e);
l3 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, PROT_READ,
l3p >> PAGE_SHIFT);
- munmap(l4, PAGE_SIZE);
if ( l3 == NULL )
return NULL;
- l2p = to_ma(cpu, l3[l3_table_offset(va)]);
+ l3e = l3[l3_table_offset(va)];
+ munmap(l3, PAGE_SIZE);
+ if ( !(l3e & _PAGE_PRESENT) )
+ return NULL;
+ l2p = to_ma(cpu, l3e);
l2 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, PROT_READ,
l2p >> PAGE_SHIFT);
- munmap(l3, PAGE_SIZE);
if ( l2 == NULL )
return NULL;
l1 = NULL;
- l1e = to_ma(cpu, l2[l2_table_offset(va)]);
- l1p = l1e >> PAGE_SHIFT;
- if (l1e & 0x80) { /* 2M pages */
+ l2e = l2[l2_table_offset(va)];
+ munmap(l2, PAGE_SIZE);
+ if ( !(l2e & _PAGE_PRESENT) )
+ return NULL;
+ l1p = to_ma(cpu, l2e);
+ if (l2e & 0x80) { /* 2M pages */
p = to_ma(cpu, (l1p + l1_table_offset(va)) << PAGE_SHIFT);
} else { /* 4K pages */
- //l1p = to_ma(cpu, l1e[l1_table_offset(va)]);
l1 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, perm,
l1p >> PAGE_SHIFT);
- munmap(l2, PAGE_SIZE);
if ( l1 == NULL )
return NULL;
- p = to_ma(cpu, l1[l1_table_offset(va)]);
- }
- if ( v != NULL )
- munmap(v, PAGE_SIZE);
- v = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, perm, p >>
PAGE_SHIFT);
+ l1e = l1[l1_table_offset(va)];
+ if ( !(l1e & _PAGE_PRESENT) )
+ return NULL;
+ p = to_ma(cpu, l1e);
+ }
+ if ( v[cpu] != NULL )
+ munmap(v[cpu], PAGE_SIZE);
+ v[cpu] = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, perm, p
>> PAGE_SHIFT);
if (l1)
munmap(l1, PAGE_SIZE);
- if ( v == NULL )
- return NULL;
-
- return (void *)((unsigned long)v | (va & (PAGE_SIZE - 1)));
+ if ( v[cpu] == NULL )
+ return NULL;
+
+ return (void *)((unsigned long)v[cpu] | (va & (PAGE_SIZE - 1)));
}
#endif
diff -r e74246451527 -r f54d38cea8ac tools/libxc/xc_ptrace.h
--- a/tools/libxc/xc_ptrace.h Tue May 30 12:52:02 2006 -0500
+++ b/tools/libxc/xc_ptrace.h Tue May 30 14:30:34 2006 -0500
@@ -7,7 +7,6 @@
#define X86_CR0_PE 0x00000001 /* Enable Protected Mode (RW) */
#define X86_CR0_PG 0x80000000 /* Paging (RW) */
#define BSD_PAGE_MASK (PAGE_SIZE-1)
-#define PDRSHIFT 22
#define PSL_T 0x00000100 /* trace enable bit */
#ifdef __x86_64__
@@ -162,8 +161,6 @@ struct gdb_regs {
#endif
#define printval(x) printf("%s = %lx\n", #x, (long)x);
-#define vtopdi(va) ((va) >> PDRSHIFT)
-#define vtopti(va) (((va) >> PAGE_SHIFT) & 0x3ff)
#endif
typedef void (*thr_ev_handler_t)(long);
diff -r e74246451527 -r f54d38cea8ac tools/libxc/xc_ptrace_core.c
--- a/tools/libxc/xc_ptrace_core.c Tue May 30 12:52:02 2006 -0500
+++ b/tools/libxc/xc_ptrace_core.c Tue May 30 14:30:34 2006 -0500
@@ -3,6 +3,7 @@
#include <sys/ptrace.h>
#include <sys/wait.h>
#include "xc_private.h"
+#include "xg_private.h"
#include "xc_ptrace.h"
#include <time.h>
@@ -54,7 +55,7 @@ map_domain_va_core(unsigned long domfd,
}
cr3_virt[cpu] = v;
}
- if ((pde = cr3_virt[cpu][vtopdi(va)]) == 0) /* logical address */
+ if ((pde = cr3_virt[cpu][l2_table_offset_i386(va)]) == 0) /* logical
address */
return NULL;
if (ctxt[cpu].flags & VGCF_HVM_GUEST)
pde = p2m_array[pde >> PAGE_SHIFT] << PAGE_SHIFT;
@@ -70,7 +71,7 @@ map_domain_va_core(unsigned long domfd,
return NULL;
pde_virt[cpu] = v;
}
- if ((page = pde_virt[cpu][vtopti(va)]) == 0) /* logical address */
+ if ((page = pde_virt[cpu][l1_table_offset_i386(va)]) == 0) /* logical
address */
return NULL;
if (ctxt[cpu].flags & VGCF_HVM_GUEST)
page = p2m_array[page >> PAGE_SHIFT] << PAGE_SHIFT;
@@ -84,7 +85,7 @@ map_domain_va_core(unsigned long domfd,
map_mtop_offset(page_phys[cpu]));
if (v == MAP_FAILED)
{
- printf("cr3 %lx pde %lx page %lx pti %lx\n", cr3[cpu], pde, page,
vtopti(va));
+ printf("cr3 %lx pde %lx page %lx pti %lx\n", cr3[cpu], pde, page,
l1_table_offset_i386(va));
page_phys[cpu] = 0;
return NULL;
}
diff -r e74246451527 -r f54d38cea8ac tools/libxc/xc_tbuf.c
--- a/tools/libxc/xc_tbuf.c Tue May 30 12:52:02 2006 -0500
+++ b/tools/libxc/xc_tbuf.c Tue May 30 14:30:34 2006 -0500
@@ -18,53 +18,57 @@
static int tbuf_enable(int xc_handle, int enable)
{
- DECLARE_DOM0_OP;
+ DECLARE_DOM0_OP;
- op.cmd = DOM0_TBUFCONTROL;
- op.interface_version = DOM0_INTERFACE_VERSION;
- if (enable)
- op.u.tbufcontrol.op = DOM0_TBUF_ENABLE;
- else
- op.u.tbufcontrol.op = DOM0_TBUF_DISABLE;
+ op.cmd = DOM0_TBUFCONTROL;
+ op.interface_version = DOM0_INTERFACE_VERSION;
+ if (enable)
+ op.u.tbufcontrol.op = DOM0_TBUF_ENABLE;
+ else
+ op.u.tbufcontrol.op = DOM0_TBUF_DISABLE;
- return xc_dom0_op(xc_handle, &op);
+ return xc_dom0_op(xc_handle, &op);
}
int xc_tbuf_set_size(int xc_handle, unsigned long size)
{
- DECLARE_DOM0_OP;
+ DECLARE_DOM0_OP;
- op.cmd = DOM0_TBUFCONTROL;
- op.interface_version = DOM0_INTERFACE_VERSION;
- op.u.tbufcontrol.op = DOM0_TBUF_SET_SIZE;
- op.u.tbufcontrol.size = size;
+ op.cmd = DOM0_TBUFCONTROL;
+ op.interface_version = DOM0_INTERFACE_VERSION;
+ op.u.tbufcontrol.op = DOM0_TBUF_SET_SIZE;
+ op.u.tbufcontrol.size = size;
- return xc_dom0_op(xc_handle, &op);
+ return xc_dom0_op(xc_handle, &op);
}
int xc_tbuf_get_size(int xc_handle, unsigned long *size)
{
- int rc;
- DECLARE_DOM0_OP;
+ int rc;
+ DECLARE_DOM0_OP;
- op.cmd = DOM0_TBUFCONTROL;
- op.interface_version = DOM0_INTERFACE_VERSION;
- op.u.tbufcontrol.op = DOM0_TBUF_GET_INFO;
+ op.cmd = DOM0_TBUFCONTROL;
+ op.interface_version = DOM0_INTERFACE_VERSION;
+ op.u.tbufcontrol.op = DOM0_TBUF_GET_INFO;
- rc = xc_dom0_op(xc_handle, &op);
- if (rc == 0)
- *size = op.u.tbufcontrol.size;
- return rc;
+ rc = xc_dom0_op(xc_handle, &op);
+ if (rc == 0)
+ *size = op.u.tbufcontrol.size;
+ return rc;
}
int xc_tbuf_enable(int xc_handle, size_t cnt, unsigned long *mfn,
- unsigned long *size)
+ unsigned long *size)
{
DECLARE_DOM0_OP;
int rc;
- if ( xc_tbuf_set_size(xc_handle, cnt) != 0 )
- return -1;
+ /*
+ * Ignore errors (at least for now) as we get an error if size is already
+ * set (since trace buffers cannot be reallocated). If we really have no
+ * buffers at all then tbuf_enable() will fail, so this is safe.
+ */
+ (void)xc_tbuf_set_size(xc_handle, cnt);
if ( tbuf_enable(xc_handle, 1) != 0 )
return -1;
diff -r e74246451527 -r f54d38cea8ac tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h Tue May 30 12:52:02 2006 -0500
+++ b/tools/libxc/xenctrl.h Tue May 30 14:30:34 2006 -0500
@@ -359,6 +359,14 @@ int xc_sedf_domain_get(int xc_handle,
uint64_t *latency, uint16_t *extratime,
uint16_t *weight);
+int xc_sched_credit_domain_set(int xc_handle,
+ uint32_t domid,
+ struct sched_credit_adjdom *sdom);
+
+int xc_sched_credit_domain_get(int xc_handle,
+ uint32_t domid,
+ struct sched_credit_adjdom *sdom);
+
typedef evtchn_status_t xc_evtchn_status_t;
/*
@@ -449,9 +457,6 @@ int xc_domain_iomem_permission(int xc_ha
unsigned long first_mfn,
unsigned long nr_mfns,
uint8_t allow_access);
-
-unsigned long xc_make_page_below_4G(int xc_handle, uint32_t domid,
- unsigned long mfn);
typedef dom0_perfc_desc_t xc_perfc_desc_t;
/* IMPORTANT: The caller is responsible for mlock()'ing the @desc array. */
@@ -599,6 +604,6 @@ int xc_add_mmu_update(int xc_handle, xc_
unsigned long long ptr, unsigned long long val);
int xc_finish_mmu_updates(int xc_handle, xc_mmu_t *mmu);
-int xc_acm_op(int xc_handle, struct acm_op *op);
+int xc_acm_op(int xc_handle, int cmd, void *arg, size_t arg_size);
#endif
diff -r e74246451527 -r f54d38cea8ac tools/libxc/xg_private.h
--- a/tools/libxc/xg_private.h Tue May 30 12:52:02 2006 -0500
+++ b/tools/libxc/xg_private.h Tue May 30 14:30:34 2006 -0500
@@ -48,6 +48,8 @@ unsigned long csum_page (void * page);
#define L2_PAGETABLE_SHIFT_PAE 21
#define L3_PAGETABLE_SHIFT_PAE 30
+#define L2_PAGETABLE_SHIFT_I386 22
+
#if defined(__i386__)
#define L1_PAGETABLE_SHIFT 12
#define L2_PAGETABLE_SHIFT 22
@@ -61,6 +63,9 @@ unsigned long csum_page (void * page);
#define L1_PAGETABLE_ENTRIES_PAE 512
#define L2_PAGETABLE_ENTRIES_PAE 512
#define L3_PAGETABLE_ENTRIES_PAE 4
+
+#define L1_PAGETABLE_ENTRIES_I386 1024
+#define L2_PAGETABLE_ENTRIES_I386 1024
#if defined(__i386__)
#define L1_PAGETABLE_ENTRIES 1024
@@ -95,6 +100,11 @@ typedef unsigned long l4_pgentry_t;
#define l3_table_offset_pae(_a) \
(((_a) >> L3_PAGETABLE_SHIFT_PAE) & (L3_PAGETABLE_ENTRIES_PAE - 1))
+#define l1_table_offset_i386(_a) \
+ (((_a) >> L1_PAGETABLE_SHIFT) & (L1_PAGETABLE_ENTRIES_I386 - 1))
+#define l2_table_offset_i386(_a) \
+ (((_a) >> L2_PAGETABLE_SHIFT_I386) & (L2_PAGETABLE_ENTRIES_I386 - 1))
+
#if defined(__i386__)
#define l1_table_offset(_a) \
(((_a) >> L1_PAGETABLE_SHIFT) & (L1_PAGETABLE_ENTRIES - 1))
diff -r e74246451527 -r f54d38cea8ac tools/python/xen/lowlevel/acm/acm.c
--- a/tools/python/xen/lowlevel/acm/acm.c Tue May 30 12:52:02 2006 -0500
+++ b/tools/python/xen/lowlevel/acm/acm.c Tue May 30 14:30:34 2006 -0500
@@ -38,7 +38,7 @@ fprintf(stderr, "ERROR: " _m " (%d = %s)
/* generic shared function */
void * __getssid(int domid, uint32_t *buflen)
{
- struct acm_op op;
+ struct acm_getssid getssid;
int xc_handle;
#define SSID_BUFFER_SIZE 4096
void *buf = NULL;
@@ -51,14 +51,13 @@ void * __getssid(int domid, uint32_t *bu
goto out2;
}
memset(buf, 0, SSID_BUFFER_SIZE);
- op.cmd = ACM_GETSSID;
- op.interface_version = ACM_INTERFACE_VERSION;
- op.u.getssid.ssidbuf = buf;
- op.u.getssid.ssidbuf_size = SSID_BUFFER_SIZE;
- op.u.getssid.get_ssid_by = DOMAINID;
- op.u.getssid.id.domainid = domid;
-
- if (xc_acm_op(xc_handle, &op) < 0) {
+ getssid.interface_version = ACM_INTERFACE_VERSION;
+ getssid.ssidbuf = buf;
+ getssid.ssidbuf_size = SSID_BUFFER_SIZE;
+ getssid.get_ssid_by = DOMAINID;
+ getssid.id.domainid = domid;
+
+ if (xc_acm_op(xc_handle, ACMOP_getssid, &getssid, sizeof(getssid)) < 0) {
if (errno == EACCES)
PERROR("ACM operation failed.");
free(buf);
@@ -147,7 +146,7 @@ static PyObject *getdecision(PyObject *
static PyObject *getdecision(PyObject * self, PyObject * args)
{
char *arg1_name, *arg1, *arg2_name, *arg2, *decision = NULL;
- struct acm_op op;
+ struct acm_getdecision getdecision;
int xc_handle;
if (!PyArg_ParseTuple(args, "ssss", &arg1_name, &arg1, &arg2_name, &arg2))
{
@@ -163,34 +162,33 @@ static PyObject *getdecision(PyObject *
(strcmp(arg2_name, "domid") && strcmp(arg2_name, "ssidref")))
return NULL;
- op.cmd = ACM_GETDECISION;
- op.interface_version = ACM_INTERFACE_VERSION;
- op.u.getdecision.hook = SHARING;
+ getdecision.interface_version = ACM_INTERFACE_VERSION;
+ getdecision.hook = SHARING;
if (!strcmp(arg1_name, "domid")) {
- op.u.getdecision.get_decision_by1 = DOMAINID;
- op.u.getdecision.id1.domainid = atoi(arg1);
- } else {
- op.u.getdecision.get_decision_by1 = SSIDREF;
- op.u.getdecision.id1.ssidref = atol(arg1);
+ getdecision.get_decision_by1 = DOMAINID;
+ getdecision.id1.domainid = atoi(arg1);
+ } else {
+ getdecision.get_decision_by1 = SSIDREF;
+ getdecision.id1.ssidref = atol(arg1);
}
if (!strcmp(arg2_name, "domid")) {
- op.u.getdecision.get_decision_by2 = DOMAINID;
- op.u.getdecision.id2.domainid = atoi(arg2);
- } else {
- op.u.getdecision.get_decision_by2 = SSIDREF;
- op.u.getdecision.id2.ssidref = atol(arg2);
- }
-
- if (xc_acm_op(xc_handle, &op) < 0) {
+ getdecision.get_decision_by2 = DOMAINID;
+ getdecision.id2.domainid = atoi(arg2);
+ } else {
+ getdecision.get_decision_by2 = SSIDREF;
+ getdecision.id2.ssidref = atol(arg2);
+ }
+
+ if (xc_acm_op(xc_handle, ACMOP_getdecision, &getdecision,
sizeof(getdecision)) < 0) {
if (errno == EACCES)
PERROR("ACM operation failed.");
}
xc_interface_close(xc_handle);
- if (op.u.getdecision.acm_decision == ACM_ACCESS_PERMITTED)
+ if (getdecision.acm_decision == ACM_ACCESS_PERMITTED)
decision = "PERMITTED";
- else if (op.u.getdecision.acm_decision == ACM_ACCESS_DENIED)
+ else if (getdecision.acm_decision == ACM_ACCESS_DENIED)
decision = "DENIED";
return Py_BuildValue("s", decision);
diff -r e74246451527 -r f54d38cea8ac tools/python/xen/lowlevel/xc/xc.c
--- a/tools/python/xen/lowlevel/xc/xc.c Tue May 30 12:52:02 2006 -0500
+++ b/tools/python/xen/lowlevel/xc/xc.c Tue May 30 14:30:34 2006 -0500
@@ -716,6 +716,49 @@ static PyObject *pyxc_sedf_domain_get(Xc
"weight", weight);
}
+static PyObject *pyxc_sched_credit_domain_set(XcObject *self,
+ PyObject *args,
+ PyObject *kwds)
+{
+ uint32_t domid;
+ uint16_t weight;
+ uint16_t cap;
+ static char *kwd_list[] = { "dom", "weight", "cap", NULL };
+ static char kwd_type[] = "I|HH";
+ struct sched_credit_adjdom sdom;
+
+ weight = 0;
+ cap = (uint16_t)~0U;
+ if( !PyArg_ParseTupleAndKeywords(args, kwds, kwd_type, kwd_list,
+ &domid, &weight, &cap) )
+ return NULL;
+
+ sdom.weight = weight;
+ sdom.cap = cap;
+
+ if ( xc_sched_credit_domain_set(self->xc_handle, domid, &sdom) != 0 )
+ return PyErr_SetFromErrno(xc_error);
+
+ Py_INCREF(zero);
+ return zero;
+}
+
+static PyObject *pyxc_sched_credit_domain_get(XcObject *self, PyObject *args)
+{
+ uint32_t domid;
+ struct sched_credit_adjdom sdom;
+
+ if( !PyArg_ParseTuple(args, "I", &domid) )
+ return NULL;
+
+ if ( xc_sched_credit_domain_get(self->xc_handle, domid, &sdom) != 0 )
+ return PyErr_SetFromErrno(xc_error);
+
+ return Py_BuildValue("{s:H,s:H}",
+ "weight", sdom.weight,
+ "cap", sdom.cap);
+}
+
static PyObject *pyxc_domain_setmaxmem(XcObject *self, PyObject *args)
{
uint32_t dom;
@@ -1040,6 +1083,24 @@ static PyMethodDef pyxc_methods[] = {
" slice [long]: CPU reservation per period\n"
" latency [long]: domain's wakeup latency hint\n"
" extratime [int]: domain aware of extratime?\n"},
+
+ { "sched_credit_domain_set",
+ (PyCFunction)pyxc_sched_credit_domain_set,
+ METH_KEYWORDS, "\n"
+ "Set the scheduling parameters for a domain when running with the\n"
+ "SMP credit scheduler.\n"
+ " domid [int]: domain id to set\n"
+ " weight [short]: domain's scheduling weight\n"
+ "Returns: [int] 0 on success; -1 on error.\n" },
+
+ { "sched_credit_domain_get",
+ (PyCFunction)pyxc_sched_credit_domain_get,
+ METH_VARARGS, "\n"
+ "Get the scheduling parameters for a domain when running with the\n"
+ "SMP credit scheduler.\n"
+ " domid [int]: domain id to get\n"
+ "Returns: [dict]\n"
+ " weight [short]: domain's scheduling weight\n"},
{ "evtchn_alloc_unbound",
(PyCFunction)pyxc_evtchn_alloc_unbound,
@@ -1172,7 +1233,7 @@ PyXc_init(XcObject *self, PyObject *args
PyXc_init(XcObject *self, PyObject *args, PyObject *kwds)
{
if ((self->xc_handle = xc_interface_open()) == -1) {
- PyErr_SetFromErrno(PyExc_RuntimeError);
+ PyErr_SetFromErrno(xc_error);
return -1;
}
@@ -1245,7 +1306,7 @@ PyMODINIT_FUNC initxc(void)
if (m == NULL)
return;
- xc_error = PyErr_NewException(PKG ".error", NULL, NULL);
+ xc_error = PyErr_NewException(PKG ".Error", PyExc_RuntimeError, NULL);
zero = PyInt_FromLong(0);
/* KAF: This ensures that we get debug output in a timely manner. */
@@ -1254,6 +1315,9 @@ PyMODINIT_FUNC initxc(void)
Py_INCREF(&PyXcType);
PyModule_AddObject(m, CLS, (PyObject *)&PyXcType);
+
+ Py_INCREF(xc_error);
+ PyModule_AddObject(m, "Error", xc_error);
}
diff -r e74246451527 -r f54d38cea8ac tools/python/xen/lowlevel/xs/xs.c
--- a/tools/python/xen/lowlevel/xs/xs.c Tue May 30 12:52:02 2006 -0500
+++ b/tools/python/xen/lowlevel/xs/xs.c Tue May 30 14:30:34 2006 -0500
@@ -272,11 +272,12 @@ static PyObject *xspy_get_permissions(Xs
if (perms) {
PyObject *val = PyList_New(perms_n);
- for (i = 0; i < perms_n; i++, perms++) {
- PyObject *p = Py_BuildValue("{s:i,s:i,s:i}",
- "dom", perms->id,
- "read", perms->perms & XS_PERM_READ,
- "write",perms->perms & XS_PERM_WRITE);
+ for (i = 0; i < perms_n; i++) {
+ PyObject *p =
+ Py_BuildValue("{s:i,s:i,s:i}",
+ "dom", perms[i].id,
+ "read", perms[i].perms & XS_PERM_READ,
+ "write", perms[i].perms & XS_PERM_WRITE);
PyList_SetItem(val, i, p);
}
diff -r e74246451527 -r f54d38cea8ac tools/python/xen/xend/XendDomain.py
--- a/tools/python/xen/xend/XendDomain.py Tue May 30 12:52:02 2006 -0500
+++ b/tools/python/xen/xend/XendDomain.py Tue May 30 14:30:34 2006 -0500
@@ -522,6 +522,28 @@ class XendDomain:
except Exception, ex:
raise XendError(str(ex))
+ def domain_sched_credit_get(self, domid):
+ """Get credit scheduler parameters for a domain.
+ """
+ dominfo = self.domain_lookup_by_name_or_id_nr(domid)
+ if not dominfo:
+ raise XendInvalidDomain(str(domid))
+ try:
+ return xc.sched_credit_domain_get(dominfo.getDomid())
+ except Exception, ex:
+ raise XendError(str(ex))
+
+ def domain_sched_credit_set(self, domid, weight, cap):
+ """Set credit scheduler parameters for a domain.
+ """
+ dominfo = self.domain_lookup_by_name_or_id_nr(domid)
+ if not dominfo:
+ raise XendInvalidDomain(str(domid))
+ try:
+ return xc.sched_credit_domain_set(dominfo.getDomid(), weight, cap)
+ except Exception, ex:
+ raise XendError(str(ex))
+
def domain_maxmem_set(self, domid, mem):
"""Set the memory limit for a domain.
diff -r e74246451527 -r f54d38cea8ac tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py Tue May 30 12:52:02 2006 -0500
+++ b/tools/python/xen/xend/XendDomainInfo.py Tue May 30 14:30:34 2006 -0500
@@ -29,6 +29,7 @@ import string
import string
import time
import threading
+import os
import xen.lowlevel.xc
from xen.util import asserts
@@ -700,6 +701,16 @@ class XendDomainInfo:
log.debug("Storing VM details: %s", to_store)
self.writeVm(to_store)
+ self.setVmPermissions()
+
+
+ def setVmPermissions(self):
+ """Allow the guest domain to read its UUID. We don't allow it to
+ access any other entry, for security."""
+ xstransact.SetPermissions('%s/uuid' % self.vmpath,
+ { 'dom' : self.domid,
+ 'read' : True,
+ 'write' : False })
def storeDomDetails(self):
@@ -1264,7 +1275,14 @@ class XendDomainInfo:
m = self.image.getDomainMemory(self.info['memory'] * 1024)
balloon.free(m)
xc.domain_setmaxmem(self.domid, m)
- xc.domain_memory_increase_reservation(self.domid, m, 0, 0)
+
+ init_reservation = self.info['memory'] * 1024
+ if os.uname()[4] == 'ia64':
+ # Workaround until ia64 properly supports ballooning.
+ init_reservation = m
+
+ xc.domain_memory_increase_reservation(self.domid, init_reservation,
+ 0, 0)
self.createChannels()
@@ -1527,6 +1545,10 @@ class XendDomainInfo:
self.configure_bootloader()
config = self.sxpr()
+
+ if self.infoIsSet('cpus') and len(self.info['cpus']) != 0:
+ config.append(['cpus', reduce(lambda x, y: str(x) + "," + str(y),
+ self.info['cpus'])])
if self.readVm(RESTART_IN_PROGRESS):
log.error('Xend failed during restart of domain %d. '
diff -r e74246451527 -r f54d38cea8ac tools/python/xen/xend/balloon.py
--- a/tools/python/xen/xend/balloon.py Tue May 30 12:52:02 2006 -0500
+++ b/tools/python/xen/xend/balloon.py Tue May 30 14:30:34 2006 -0500
@@ -32,6 +32,7 @@ BALLOON_OUT_SLACK = 1 # MiB. We need th
BALLOON_OUT_SLACK = 1 # MiB. We need this because the physinfo details are
# rounded.
RETRY_LIMIT = 10
+RETRY_LIMIT_INCR = 5
##
# The time to sleep between retries grows linearly, using this value (in
# seconds). When the system is lightly loaded, memory should be scrubbed and
@@ -118,7 +119,8 @@ def free(required):
retries = 0
sleep_time = SLEEP_TIME_GROWTH
last_new_alloc = None
- while retries < RETRY_LIMIT:
+ rlimit = RETRY_LIMIT
+ while retries < rlimit:
free_mem = xc.physinfo()['free_memory']
if free_mem >= need_mem:
@@ -127,7 +129,9 @@ def free(required):
return
if retries == 0:
- log.debug("Balloon: free %d; need %d.", free_mem, need_mem)
+ rlimit += ((need_mem - free_mem)/1024) * RETRY_LIMIT_INCR
+ log.debug("Balloon: free %d; need %d; retries: %d.",
+ free_mem, need_mem, rlimit)
if dom0_min_mem > 0:
dom0_alloc = get_dom0_current_alloc()
@@ -143,8 +147,9 @@ def free(required):
# Continue to retry, waiting for ballooning.
time.sleep(sleep_time)
+ if retries < 2 * RETRY_LIMIT:
+ sleep_time += SLEEP_TIME_GROWTH
retries += 1
- sleep_time += SLEEP_TIME_GROWTH
# Not enough memory; diagnose the problem.
if dom0_min_mem == 0:
diff -r e74246451527 -r f54d38cea8ac tools/python/xen/xend/image.py
--- a/tools/python/xen/xend/image.py Tue May 30 12:52:02 2006 -0500
+++ b/tools/python/xen/xend/image.py Tue May 30 14:30:34 2006 -0500
@@ -19,6 +19,7 @@
import os, string
import re
+import math
import xen.lowlevel.xc
from xen.xend import sxp
@@ -141,11 +142,16 @@ class ImageHandler:
% (self.ostype, self.vm.getDomid(), str(result)))
- def getDomainMemory(self, mem):
+ def getDomainMemory(self, mem_kb):
"""@return The memory required, in KiB, by the domain to store the
- given amount, also in KiB. This is normally just mem, but HVM domains
- have overheads to account for."""
- return mem
+ given amount, also in KiB."""
+ if os.uname()[4] != 'ia64':
+ # A little extra because auto-ballooning is broken w.r.t. HVM
+ # guests. Also, slack is necessary for live migration since that
+ # uses shadow page tables.
+ if 'hvm' in xc.xeninfo()['xen_caps']:
+ mem_kb += 4*1024;
+ return mem_kb
def buildDomain(self):
"""Build the domain. Define in subclass."""
@@ -377,15 +383,20 @@ class HVMImageHandler(ImageHandler):
os.waitpid(self.pid, 0)
self.pid = 0
- def getDomainMemory(self, mem):
+ def getDomainMemory(self, mem_kb):
"""@see ImageHandler.getDomainMemory"""
- page_kb = 4
- extra_pages = 0
if os.uname()[4] == 'ia64':
page_kb = 16
# ROM size for guest firmware, ioreq page and xenstore page
extra_pages = 1024 + 2
- return mem + extra_pages * page_kb
+ else:
+ page_kb = 4
+ # This was derived emperically:
+ # 2.4 MB overhead per 1024 MB RAM + 8 MB constant
+ # + 4 to avoid low-memory condition
+ extra_mb = (2.4/1024) * (mem_kb/1024.0) + 12;
+ extra_pages = int( math.ceil( extra_mb*1024 / page_kb ))
+ return mem_kb + extra_pages * page_kb
def register_shutdown_watch(self):
""" add xen store watch on control/shutdown """
diff -r e74246451527 -r f54d38cea8ac tools/python/xen/xend/server/SrvDomain.py
--- a/tools/python/xen/xend/server/SrvDomain.py Tue May 30 12:52:02 2006 -0500
+++ b/tools/python/xen/xend/server/SrvDomain.py Tue May 30 14:30:34 2006 -0500
@@ -129,6 +129,20 @@ class SrvDomain(SrvDir):
['latency', 'int'],
['extratime', 'int'],
['weight', 'int']])
+ val = fn(req.args, {'dom': self.dom.domid})
+ return val
+
+ def op_domain_sched_credit_get(self, _, req):
+ fn = FormFn(self.xd.domain_sched_credit_get,
+ [['dom', 'int']])
+ val = fn(req.args, {'dom': self.dom.domid})
+ return val
+
+
+ def op_domain_sched_credit_set(self, _, req):
+ fn = FormFn(self.xd.domain_sched_credit_set,
+ [['dom', 'int'],
+ ['weight', 'int']])
val = fn(req.args, {'dom': self.dom.domid})
return val
diff -r e74246451527 -r f54d38cea8ac
tools/python/xen/xend/xenstore/xstransact.py
--- a/tools/python/xen/xend/xenstore/xstransact.py Tue May 30 12:52:02
2006 -0500
+++ b/tools/python/xen/xend/xenstore/xstransact.py Tue May 30 14:30:34
2006 -0500
@@ -221,6 +221,34 @@ class xstransact:
xshandle().mkdir(self.transaction, self.prependPath(key))
+ def get_permissions(self, *args):
+ """If no arguments are given, return the permissions at this
+ transaction's path. If one argument is given, treat that argument as
+ a subpath to this transaction's path, and return the permissions at
+ that path. Otherwise, treat each argument as a subpath to this
+ transaction's path, and return a list composed of the permissions at
+ each of those instead.
+ """
+ if len(args) == 0:
+ return xshandle().get_permissions(self.transaction, self.path)
+ if len(args) == 1:
+ return self._get_permissions(args[0])
+ ret = []
+ for key in args:
+ ret.append(self._get_permissions(key))
+ return ret
+
+
+ def _get_permissions(self, key):
+ path = self.prependPath(key)
+ try:
+ return xshandle().get_permissions(self.transaction, path)
+ except RuntimeError, ex:
+ raise RuntimeError(ex.args[0],
+ '%s, while getting permissions from %s' %
+ (ex.args[1], path))
+
+
def set_permissions(self, *args):
if len(args) == 0:
raise TypeError
diff -r e74246451527 -r f54d38cea8ac tools/python/xen/xm/main.py
--- a/tools/python/xen/xm/main.py Tue May 30 12:52:02 2006 -0500
+++ b/tools/python/xen/xm/main.py Tue May 30 14:30:34 2006 -0500
@@ -99,6 +99,7 @@ sched_sedf_help = "sched-sedf [DOM] [OPT
specifies another way of setting a
domain's\n\
cpu period/slice."
+sched_credit_help = "sched-credit Set or get credit
scheduler parameters"
block_attach_help = """block-attach <DomId> <BackDev> <FrontDev> <Mode>
[BackDomId] Create a new virtual block device"""
block_detach_help = """block-detach <DomId> <DevId> Destroy a domain's
virtual block device,
@@ -174,6 +175,7 @@ host_commands = [
]
scheduler_commands = [
+ "sched-credit",
"sched-bvt",
"sched-bvt-ctxallow",
"sched-sedf",
@@ -735,6 +737,48 @@ def xm_sched_sedf(args):
else:
print_sedf(sedf_info)
+def xm_sched_credit(args):
+ usage_msg = """sched-credit: Set or get credit scheduler parameters
+ Usage:
+
+ sched-credit -d domain [-w weight] [-c cap]
+ """
+ try:
+ opts, args = getopt.getopt(args[0:], "d:w:c:",
+ ["domain=", "weight=", "cap="])
+ except getopt.GetoptError:
+ # print help information and exit:
+ print usage_msg
+ sys.exit(1)
+
+ domain = None
+ weight = None
+ cap = None
+
+ for o, a in opts:
+ if o == "-d":
+ domain = a
+ elif o == "-w":
+ weight = int(a)
+ elif o == "-c":
+ cap = int(a);
+
+ if domain is None:
+ # place holder for system-wide scheduler parameters
+ print usage_msg
+ sys.exit(1)
+
+ if weight is None and cap is None:
+ print server.xend.domain.sched_credit_get(domain)
+ else:
+ if weight is None:
+ weight = int(0)
+ if cap is None:
+ cap = int(~0)
+
+ err = server.xend.domain.sched_credit_set(domain, weight, cap)
+ if err != 0:
+ print err
def xm_info(args):
arg_check(args, "info", 0)
@@ -1032,6 +1076,7 @@ commands = {
"sched-bvt": xm_sched_bvt,
"sched-bvt-ctxallow": xm_sched_bvt_ctxallow,
"sched-sedf": xm_sched_sedf,
+ "sched-credit": xm_sched_credit,
# block
"block-attach": xm_block_attach,
"block-detach": xm_block_detach,
diff -r e74246451527 -r f54d38cea8ac tools/security/secpol_tool.c
--- a/tools/security/secpol_tool.c Tue May 30 12:52:02 2006 -0500
+++ b/tools/security/secpol_tool.c Tue May 30 14:30:34 2006 -0500
@@ -231,14 +231,16 @@ uint8_t pull_buffer[PULL_CACHE_SIZE];
uint8_t pull_buffer[PULL_CACHE_SIZE];
int acm_domain_getpolicy(int xc_handle)
{
- struct acm_op op;
+ struct acm_getpolicy getpolicy;
int ret;
memset(pull_buffer, 0x00, sizeof(pull_buffer));
- op.cmd = ACM_GETPOLICY;
- op.u.getpolicy.pullcache = (void *) pull_buffer;
- op.u.getpolicy.pullcache_size = sizeof(pull_buffer);
- if ((ret = xc_acm_op(xc_handle, &op)) < 0) {
+ getpolicy.interface_version = ACM_INTERFACE_VERSION;
+ getpolicy.pullcache = (void *) pull_buffer;
+ getpolicy.pullcache_size = sizeof(pull_buffer);
+ ret = xc_acm_op(xc_handle, ACMOP_getpolicy, &getpolicy, sizeof(getpolicy));
+
+ if (ret < 0) {
printf("ACM operation failed: errno=%d\n", errno);
if (errno == EACCES)
fprintf(stderr, "ACM operation failed -- need to"
@@ -275,13 +277,13 @@ int acm_domain_loadpolicy(int xc_handle,
goto free_out;
}
if (len == read(fd, buffer, len)) {
- struct acm_op op;
+ struct acm_setpolicy setpolicy;
/* dump it and then push it down into xen/acm */
acm_dump_policy_buffer(buffer, len);
- op.cmd = ACM_SETPOLICY;
- op.u.setpolicy.pushcache = (void *) buffer;
- op.u.setpolicy.pushcache_size = len;
- ret = xc_acm_op(xc_handle, &op);
+ setpolicy.interface_version = ACM_INTERFACE_VERSION;
+ setpolicy.pushcache = (void *) buffer;
+ setpolicy.pushcache_size = len;
+ ret = xc_acm_op(xc_handle, ACMOP_setpolicy, &setpolicy,
sizeof(setpolicy));
if (ret)
printf
@@ -322,15 +324,15 @@ int acm_domain_dumpstats(int xc_handle)
int acm_domain_dumpstats(int xc_handle)
{
uint8_t stats_buffer[PULL_STATS_SIZE];
- struct acm_op op;
+ struct acm_dumpstats dumpstats;
int ret;
struct acm_stats_buffer *stats;
memset(stats_buffer, 0x00, sizeof(stats_buffer));
- op.cmd = ACM_DUMPSTATS;
- op.u.dumpstats.pullcache = (void *) stats_buffer;
- op.u.dumpstats.pullcache_size = sizeof(stats_buffer);
- ret = xc_acm_op(xc_handle, &op);
+ dumpstats.interface_version = ACM_INTERFACE_VERSION;
+ dumpstats.pullcache = (void *) stats_buffer;
+ dumpstats.pullcache_size = sizeof(stats_buffer);
+ ret = xc_acm_op(xc_handle, ACMOP_dumpstats, &dumpstats, sizeof(dumpstats));
if (ret < 0) {
printf
diff -r e74246451527 -r f54d38cea8ac tools/tests/test_x86_emulator.c
--- a/tools/tests/test_x86_emulator.c Tue May 30 12:52:02 2006 -0500
+++ b/tools/tests/test_x86_emulator.c Tue May 30 14:30:34 2006 -0500
@@ -17,7 +17,8 @@ static int read_any(
static int read_any(
unsigned long addr,
unsigned long *val,
- unsigned int bytes)
+ unsigned int bytes,
+ struct x86_emulate_ctxt *ctxt)
{
switch ( bytes )
{
@@ -32,7 +33,8 @@ static int write_any(
static int write_any(
unsigned long addr,
unsigned long val,
- unsigned int bytes)
+ unsigned int bytes,
+ struct x86_emulate_ctxt *ctxt)
{
switch ( bytes )
{
@@ -48,7 +50,8 @@ static int cmpxchg_any(
unsigned long addr,
unsigned long old,
unsigned long new,
- unsigned int bytes)
+ unsigned int bytes,
+ struct x86_emulate_ctxt *ctxt)
{
switch ( bytes )
{
@@ -65,34 +68,38 @@ static int cmpxchg8b_any(
unsigned long old_lo,
unsigned long old_hi,
unsigned long new_lo,
- unsigned long new_hi)
+ unsigned long new_hi,
+ struct x86_emulate_ctxt *ctxt)
{
((unsigned long *)addr)[0] = new_lo;
((unsigned long *)addr)[1] = new_hi;
return X86EMUL_CONTINUE;
}
-static struct x86_mem_emulator emulops = {
+static struct x86_emulate_ops emulops = {
read_any, write_any, read_any, write_any, cmpxchg_any, cmpxchg8b_any
};
int main(int argc, char **argv)
{
+ struct x86_emulate_ctxt ctxt;
struct cpu_user_regs regs;
char instr[20] = { 0x01, 0x08 }; /* add %ecx,(%eax) */
unsigned int res = 0x7FFFFFFF;
u32 cmpxchg8b_res[2] = { 0x12345678, 0x87654321 };
- unsigned long cr2;
int rc;
+
+ ctxt.regs = ®s;
+ ctxt.mode = X86EMUL_MODE_PROT32;
printf("%-40s", "Testing addl %%ecx,(%%eax)...");
instr[0] = 0x01; instr[1] = 0x08;
regs.eflags = 0x200;
regs.eip = (unsigned long)&instr[0];
regs.ecx = 0x12345678;
- cr2 = (unsigned long)&res;
+ ctxt.cr2 = (unsigned long)&res;
res = 0x7FFFFFFF;
- rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32);
+ rc = x86_emulate_memop(&ctxt, &emulops);
if ( (rc != 0) ||
(res != 0x92345677) ||
(regs.eflags != 0xa94) ||
@@ -109,8 +116,8 @@ int main(int argc, char **argv)
#else
regs.ecx = 0x12345678UL;
#endif
- cr2 = (unsigned long)&res;
- rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32);
+ ctxt.cr2 = (unsigned long)&res;
+ rc = x86_emulate_memop(&ctxt, &emulops);
if ( (rc != 0) ||
(res != 0x92345677) ||
(regs.ecx != 0x8000000FUL) ||
@@ -124,8 +131,8 @@ int main(int argc, char **argv)
regs.eip = (unsigned long)&instr[0];
regs.eax = 0x92345677UL;
regs.ecx = 0xAA;
- cr2 = (unsigned long)&res;
- rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32);
+ ctxt.cr2 = (unsigned long)&res;
+ rc = x86_emulate_memop(&ctxt, &emulops);
if ( (rc != 0) ||
(res != 0x923456AA) ||
(regs.eflags != 0x244) ||
@@ -140,8 +147,8 @@ int main(int argc, char **argv)
regs.eip = (unsigned long)&instr[0];
regs.eax = 0xAABBCC77UL;
regs.ecx = 0xFF;
- cr2 = (unsigned long)&res;
- rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32);
+ ctxt.cr2 = (unsigned long)&res;
+ rc = x86_emulate_memop(&ctxt, &emulops);
if ( (rc != 0) ||
(res != 0x923456AA) ||
((regs.eflags&0x240) != 0x200) ||
@@ -156,8 +163,8 @@ int main(int argc, char **argv)
regs.eflags = 0x200;
regs.eip = (unsigned long)&instr[0];
regs.ecx = 0x12345678;
- cr2 = (unsigned long)&res;
- rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32);
+ ctxt.cr2 = (unsigned long)&res;
+ rc = x86_emulate_memop(&ctxt, &emulops);
if ( (rc != 0) ||
(res != 0x12345678) ||
(regs.eflags != 0x200) ||
@@ -173,8 +180,8 @@ int main(int argc, char **argv)
regs.eip = (unsigned long)&instr[0];
regs.eax = 0x923456AAUL;
regs.ecx = 0xDDEEFF00L;
- cr2 = (unsigned long)&res;
- rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32);
+ ctxt.cr2 = (unsigned long)&res;
+ rc = x86_emulate_memop(&ctxt, &emulops);
if ( (rc != 0) ||
(res != 0xDDEEFF00) ||
(regs.eflags != 0x244) ||
@@ -192,8 +199,8 @@ int main(int argc, char **argv)
regs.esi = (unsigned long)&res + 0;
regs.edi = (unsigned long)&res + 2;
regs.error_code = 0; /* read fault */
- cr2 = regs.esi;
- rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32);
+ ctxt.cr2 = regs.esi;
+ rc = x86_emulate_memop(&ctxt, &emulops);
if ( (rc != 0) ||
(res != 0x44554455) ||
(regs.eflags != 0x200) ||
@@ -210,8 +217,8 @@ int main(int argc, char **argv)
regs.eflags = 0x200;
regs.eip = (unsigned long)&instr[0];
regs.edi = (unsigned long)&res;
- cr2 = regs.edi;
- rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32);
+ ctxt.cr2 = regs.edi;
+ rc = x86_emulate_memop(&ctxt, &emulops);
if ( (rc != 0) ||
(res != 0x2233445D) ||
((regs.eflags&0x201) != 0x201) ||
@@ -228,8 +235,8 @@ int main(int argc, char **argv)
regs.ecx = 0xCCCCFFFF;
regs.eip = (unsigned long)&instr[0];
regs.edi = (unsigned long)cmpxchg8b_res;
- cr2 = regs.edi;
- rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32);
+ ctxt.cr2 = regs.edi;
+ rc = x86_emulate_memop(&ctxt, &emulops);
if ( (rc != 0) ||
(cmpxchg8b_res[0] != 0x9999AAAA) ||
(cmpxchg8b_res[1] != 0xCCCCFFFF) ||
@@ -242,8 +249,8 @@ int main(int argc, char **argv)
instr[0] = 0x0f; instr[1] = 0xc7; instr[2] = 0x0f;
regs.eip = (unsigned long)&instr[0];
regs.edi = (unsigned long)cmpxchg8b_res;
- cr2 = regs.edi;
- rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32);
+ ctxt.cr2 = regs.edi;
+ rc = x86_emulate_memop(&ctxt, &emulops);
if ( (rc != 0) ||
(cmpxchg8b_res[0] != 0x9999AAAA) ||
(cmpxchg8b_res[1] != 0xCCCCFFFF) ||
@@ -258,9 +265,9 @@ int main(int argc, char **argv)
instr[0] = 0x0f; instr[1] = 0xbe; instr[2] = 0x08;
regs.eip = (unsigned long)&instr[0];
regs.ecx = 0x12345678;
- cr2 = (unsigned long)&res;
+ ctxt.cr2 = (unsigned long)&res;
res = 0x82;
- rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32);
+ rc = x86_emulate_memop(&ctxt, &emulops);
if ( (rc != 0) ||
(res != 0x82) ||
(regs.ecx != 0xFFFFFF82) ||
@@ -273,9 +280,9 @@ int main(int argc, char **argv)
instr[0] = 0x0f; instr[1] = 0xb7; instr[2] = 0x08;
regs.eip = (unsigned long)&instr[0];
regs.ecx = 0x12345678;
- cr2 = (unsigned long)&res;
+ ctxt.cr2 = (unsigned long)&res;
res = 0x1234aa82;
- rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32);
+ rc = x86_emulate_memop(&ctxt, &emulops);
if ( (rc != 0) ||
(res != 0x1234aa82) ||
(regs.ecx != 0xaa82) ||
diff -r e74246451527 -r f54d38cea8ac tools/xenstore/Makefile
--- a/tools/xenstore/Makefile Tue May 30 12:52:02 2006 -0500
+++ b/tools/xenstore/Makefile Tue May 30 14:30:34 2006 -0500
@@ -27,6 +27,12 @@ CLIENTS += xenstore-write
CLIENTS += xenstore-write
CLIENTS_OBJS := $(patsubst xenstore-%,xenstore_%.o,$(CLIENTS))
+XENSTORED_OBJS = xenstored_core.o xenstored_watch.o xenstored_domain.o
xenstored_transaction.o xs_lib.o talloc.o utils.o tdb.o hashtable.o
+
+XENSTORED_Linux = xenstored_linux.o
+
+XENSTORED_OBJS += $(XENSTORED_$(OS))
+
.PHONY: all
all: libxenstore.so xenstored $(CLIENTS) xs_tdb_dump xenstore-control
xenstore-ls
@@ -36,7 +42,7 @@ test_interleaved_transactions: test_inte
.PHONY: testcode
testcode: xs_test xenstored_test xs_random
-xenstored: xenstored_core.o xenstored_watch.o xenstored_domain.o
xenstored_transaction.o xs_lib.o talloc.o utils.o tdb.o hashtable.o
+xenstored: $(XENSTORED_OBJS)
$(LINK.o) $^ $(LOADLIBES) $(LDLIBS) -lxenctrl -o $@
$(CLIENTS): xenstore-%: xenstore_%.o libxenstore.so
diff -r e74246451527 -r f54d38cea8ac tools/xenstore/xenstored_core.c
--- a/tools/xenstore/xenstored_core.c Tue May 30 12:52:02 2006 -0500
+++ b/tools/xenstore/xenstored_core.c Tue May 30 14:30:34 2006 -0500
@@ -451,6 +451,11 @@ static struct node *read_node(struct con
static bool write_node(struct connection *conn, const struct node *node)
{
+ /*
+ * conn will be null when this is called from manual_node.
+ * tdb_context copes with this.
+ */
+
TDB_DATA key, data;
void *p;
@@ -478,7 +483,7 @@ static bool write_node(struct connection
/* TDB should set errno, but doesn't even set ecode AFAICT. */
if (tdb_store(tdb_context(conn), key, data, TDB_REPLACE) != 0) {
- corrupt(conn, "Write of %s = %s failed", key, data);
+ corrupt(conn, "Write of %s failed", key.dptr);
goto error;
}
return true;
diff -r e74246451527 -r f54d38cea8ac tools/xenstore/xenstored_core.h
--- a/tools/xenstore/xenstored_core.h Tue May 30 12:52:02 2006 -0500
+++ b/tools/xenstore/xenstored_core.h Tue May 30 14:30:34 2006 -0500
@@ -19,6 +19,8 @@
#ifndef _XENSTORED_CORE_H
#define _XENSTORED_CORE_H
+
+#include <xenctrl.h>
#include <sys/types.h>
#include <dirent.h>
@@ -163,6 +165,12 @@ void trace(const char *fmt, ...);
extern int event_fd;
+/* Map the kernel's xenstore page. */
+void *xenbus_map(void);
+
+/* Return the event channel used by xenbus. */
+evtchn_port_t xenbus_evtchn(void);
+
#endif /* _XENSTORED_CORE_H */
/*
diff -r e74246451527 -r f54d38cea8ac tools/xenstore/xenstored_domain.c
--- a/tools/xenstore/xenstored_domain.c Tue May 30 12:52:02 2006 -0500
+++ b/tools/xenstore/xenstored_domain.c Tue May 30 14:30:34 2006 -0500
@@ -33,12 +33,11 @@
#include "talloc.h"
#include "xenstored_core.h"
#include "xenstored_domain.h"
-#include "xenstored_proc.h"
#include "xenstored_watch.h"
#include "xenstored_test.h"
#include <xenctrl.h>
-#include <xen/linux/evtchn.h>
+#include <xen/sys/evtchn.h>
static int *xc_handle;
static evtchn_port_t virq_port;
@@ -476,44 +475,24 @@ void restore_existing_connections(void)
static int dom0_init(void)
{
- int rc, fd;
- evtchn_port_t port;
- char str[20];
- struct domain *dom0;
-
- fd = open(XENSTORED_PROC_PORT, O_RDONLY);
- if (fd == -1)
+ evtchn_port_t port;
+ struct domain *dom0;
+
+ port = xenbus_evtchn();
+ if (port == -1)
return -1;
- rc = read(fd, str, sizeof(str));
- if (rc == -1)
- goto outfd;
- str[rc] = '\0';
- port = strtoul(str, NULL, 0);
-
- close(fd);
-
dom0 = new_domain(NULL, 0, port);
- fd = open(XENSTORED_PROC_KVA, O_RDWR);
- if (fd == -1)
+ dom0->interface = xenbus_map();
+ if (dom0->interface == NULL)
return -1;
- dom0->interface = mmap(NULL, getpagesize(), PROT_READ|PROT_WRITE,
- MAP_SHARED, fd, 0);
- if (dom0->interface == MAP_FAILED)
- goto outfd;
-
- close(fd);
-
talloc_steal(dom0->conn, dom0);
evtchn_notify(dom0->port);
return 0;
-outfd:
- close(fd);
- return -1;
}
diff -r e74246451527 -r f54d38cea8ac tools/xentrace/xentrace_format
--- a/tools/xentrace/xentrace_format Tue May 30 12:52:02 2006 -0500
+++ b/tools/xentrace/xentrace_format Tue May 30 14:30:34 2006 -0500
@@ -89,7 +89,7 @@ CPUREC = "I"
CPUREC = "I"
TRCREC = "QLLLLLL"
-last_tsc = [0,0,0,0,0,0,0,0]
+last_tsc = [0]
i=0
@@ -111,7 +111,9 @@ while not interrupted:
#print i, tsc
- if tsc < last_tsc[cpu]:
+ if cpu >= len(last_tsc):
+ last_tsc += [0] * (cpu - len(last_tsc) + 1)
+ elif tsc < last_tsc[cpu]:
print "TSC stepped backward cpu %d ! %d %d" %
(cpu,tsc,last_tsc[cpu])
last_tsc[cpu] = tsc
diff -r e74246451527 -r f54d38cea8ac tools/xm-test/configure.ac
--- a/tools/xm-test/configure.ac Tue May 30 12:52:02 2006 -0500
+++ b/tools/xm-test/configure.ac Tue May 30 14:30:34 2006 -0500
@@ -99,6 +99,7 @@ AC_CONFIG_FILES([
tests/block-list/Makefile
tests/block-create/Makefile
tests/block-destroy/Makefile
+ tests/block-integrity/Makefile
tests/console/Makefile
tests/create/Makefile
tests/destroy/Makefile
diff -r e74246451527 -r f54d38cea8ac tools/xm-test/ramdisk/bin/create_disk_image
--- a/tools/xm-test/ramdisk/bin/create_disk_image Tue May 30 12:52:02
2006 -0500
+++ b/tools/xm-test/ramdisk/bin/create_disk_image Tue May 30 14:30:34
2006 -0500
@@ -207,6 +207,13 @@ function dd_rootfs_to_image()
dd if="$ROOTFS" of="$LOOPP" > /dev/null 2>&1
if [ $? -ne 0 ]; then
die "Failed to dd $ROOTFS to $LOOPP."
+ fi
+
+ # Resize fs to use full partition
+ e2fsck -f $LOOPP
+ resize2fs $LOOPP
+ if [ $? -ne 0 ]; then
+ die "Failed to resize rootfs on $LOOPP."
fi
}
diff -r e74246451527 -r f54d38cea8ac tools/xm-test/tests/Makefile.am
--- a/tools/xm-test/tests/Makefile.am Tue May 30 12:52:02 2006 -0500
+++ b/tools/xm-test/tests/Makefile.am Tue May 30 14:30:34 2006 -0500
@@ -1,14 +1,15 @@ SUBDIRS = \
SUBDIRS = \
block-create \
- block-list \
- block-destroy \
+ block-list \
+ block-destroy \
+ block-integrity \
console \
create \
destroy \
dmesg \
domid \
domname \
- help \
+ help \
info \
list \
memmax \
diff -r e74246451527 -r f54d38cea8ac
tools/xm-test/tests/enforce_dom0_cpus/01_enforce_dom0_cpus_basic_pos.py
--- a/tools/xm-test/tests/enforce_dom0_cpus/01_enforce_dom0_cpus_basic_pos.py
Tue May 30 12:52:02 2006 -0500
+++ b/tools/xm-test/tests/enforce_dom0_cpus/01_enforce_dom0_cpus_basic_pos.py
Tue May 30 14:30:34 2006 -0500
@@ -65,13 +65,24 @@ if check_status and status != 0:
FAIL("\"%s\" returned invalid %i != 0" %(cmd,status))
# 5) check /proc/cpuinfo for cpu count
-cmd = "grep \"^processor\" /proc/cpuinfo | wc -l"
-status, output = traceCommand(cmd)
-if check_status and status != 0:
- os.unsetenv("XEND_CONFIG")
- restartXend()
- FAIL("\"%s\" returned invalid %i != 0" %(cmd,status))
+# It takes some time for the CPU count to change, on multi-proc systems, so
check the number of procs in a loop for 20 seconds.
+#Sleep inside the loop for a second each time.
+timeout = 20
+starttime = time.time()
+while timeout + starttime > time.time():
+# Check /proc/cpuinfo
+ cmd = "grep \"^processor\" /proc/cpuinfo | wc -l"
+ status, output = traceCommand(cmd)
+ if check_status and status != 0:
+ os.unsetenv("XEND_CONFIG")
+ restartXend()
+ FAIL("\"%s\" returned invalid %i != 0" %(cmd,status))
+# Has it succeeded? If so, we can leave the loop
+ if output == str(enforce_dom0_cpus):
+ break
+# Sleep for 1 second before trying again
+ time.sleep(1)
if output != str(enforce_dom0_cpus):
os.unsetenv("XEND_CONFIG")
restartXend()
@@ -94,7 +105,14 @@ if check_status and status != 0:
FAIL("\"%s\" returned invalid %i != 0" %(cmd,status))
# check restore worked
-num_online = int(getDomInfo("Domain-0", "VCPUs"))
+# Since this also takes time, we will do it in a loop with a 20 second timeout.
+timeout=20
+starttime=time.time()
+while timeout + starttime > time.time():
+ num_online = int(getDomInfo("Domain-0", "VCPUs"))
+ if num_online == dom0_online_vcpus:
+ break
+ time.sleep(1)
if num_online != dom0_online_vcpus:
os.unsetenv("XEND_CONFIG")
restartXend()
diff -r e74246451527 -r f54d38cea8ac
tools/xm-test/tests/network/03_network_local_tcp_pos.py
--- a/tools/xm-test/tests/network/03_network_local_tcp_pos.py Tue May 30
12:52:02 2006 -0500
+++ b/tools/xm-test/tests/network/03_network_local_tcp_pos.py Tue May 30
14:30:34 2006 -0500
@@ -44,7 +44,7 @@ try:
lofails=""
for size in trysizes:
out = console.runCmd("hping2 127.0.0.1 -E /dev/urandom -q -c 20 "
- + "--fast -d " + str(size))
+ + "--fast -d " + str(size) + " -N " + str(size))
if out["return"]:
lofails += " " + str(size)
@@ -54,7 +54,7 @@ try:
ip = netdev.getNetDevIP()
for size in trysizes:
out = console.runCmd("hping2 " + ip + " -E /dev/urandom -q -c 20 "
- + "--fast -d "+ str(size))
+ + "--fast -d "+ str(size) + " -N " + str(size))
if out["return"]:
eth0fails += " " + str(size)
except ConsoleError, e:
diff -r e74246451527 -r f54d38cea8ac
tools/xm-test/tests/network/04_network_local_udp_pos.py
--- a/tools/xm-test/tests/network/04_network_local_udp_pos.py Tue May 30
12:52:02 2006 -0500
+++ b/tools/xm-test/tests/network/04_network_local_udp_pos.py Tue May 30
14:30:34 2006 -0500
@@ -43,7 +43,7 @@ try:
lofails=""
for size in trysizes:
out = console.runCmd("hping2 127.0.0.1 -E /dev/urandom -2 -q -c 20 "
- + "--fast -d " + str(size))
+ + "--fast -d " + str(size) + " -N " + str(size))
if out["return"]:
lofails += " " + str(size)
print out["output"]
@@ -54,7 +54,7 @@ try:
ip = netdev.getNetDevIP()
for size in trysizes:
out = console.runCmd("hping2 " + ip + " -E /dev/urandom -2 -q -c 20 "
- + "--fast -d " + str(size))
+ + "--fast -d " + str(size) + " -N " + str(size))
if out["return"]:
eth0fails += " " + str(size)
print out["output"]
diff -r e74246451527 -r f54d38cea8ac
tools/xm-test/tests/network/06_network_dom0_tcp_pos.py
--- a/tools/xm-test/tests/network/06_network_dom0_tcp_pos.py Tue May 30
12:52:02 2006 -0500
+++ b/tools/xm-test/tests/network/06_network_dom0_tcp_pos.py Tue May 30
14:30:34 2006 -0500
@@ -44,7 +44,7 @@ try:
dom0ip = netdev.getDom0AliasIP()
for size in trysizes:
out = console.runCmd("hping2 " + dom0ip + " -E /dev/urandom -q -c 20 "
- + "--fast -d " + str(size))
+ + "--fast -d " + str(size) + " -N " + str(size))
if out["return"]:
fails += " " + str(size)
print out["output"]
diff -r e74246451527 -r f54d38cea8ac
tools/xm-test/tests/network/07_network_dom0_udp_pos.py
--- a/tools/xm-test/tests/network/07_network_dom0_udp_pos.py Tue May 30
12:52:02 2006 -0500
+++ b/tools/xm-test/tests/network/07_network_dom0_udp_pos.py Tue May 30
14:30:34 2006 -0500
@@ -43,7 +43,7 @@ try:
dom0ip = netdev.getDom0AliasIP()
for size in trysizes:
out = console.runCmd("hping2 " + dom0ip + " -E /dev/urandom -2 -q -c
20"
- + " --fast -d " + str(size))
+ + " --fast -d " + str(size) + " -N " + str(size))
if out["return"]:
fails += " " + str(size)
print out["output"]
diff -r e74246451527 -r f54d38cea8ac
tools/xm-test/tests/network/12_network_domU_tcp_pos.py
--- a/tools/xm-test/tests/network/12_network_domU_tcp_pos.py Tue May 30
12:52:02 2006 -0500
+++ b/tools/xm-test/tests/network/12_network_domU_tcp_pos.py Tue May 30
14:30:34 2006 -0500
@@ -50,7 +50,7 @@ try:
ip2 = dst_netdev.getNetDevIP()
for size in pingsizes:
out = src_console.runCmd("hping2 " + ip2 + " -E /dev/urandom -q -c 20 "
- + "--fast -d " + str(size))
+ + "--fast -d " + str(size) + " -N " + str(size))
if out["return"]:
fails += " " + str(size)
print out["output"]
diff -r e74246451527 -r f54d38cea8ac
tools/xm-test/tests/network/13_network_domU_udp_pos.py
--- a/tools/xm-test/tests/network/13_network_domU_udp_pos.py Tue May 30
12:52:02 2006 -0500
+++ b/tools/xm-test/tests/network/13_network_domU_udp_pos.py Tue May 30
14:30:34 2006 -0500
@@ -50,7 +50,7 @@ try:
ip2 = dst_netdev.getNetDevIP()
for size in pingsizes:
out = src_console.runCmd("hping2 " + ip2 + " -E /dev/urandom -2 -q "
- + "-c 20 --fast -d " + str(size))
+ + "-c 20 --fast -d " + str(size) + " -N " + str(size))
if out["return"]:
fails += " " + str(size)
print out["output"]
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/Rules.mk
--- a/xen/arch/ia64/Rules.mk Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/Rules.mk Tue May 30 14:30:34 2006 -0500
@@ -3,31 +3,31 @@
HAS_ACPI := y
VALIDATE_VT ?= n
-xen_ia64_dom0_virtual_physical ?= n
+xen_ia64_dom0_virtual_physical ?= y
+no_warns ?= n
ifneq ($(COMPILE_ARCH),$(TARGET_ARCH))
CROSS_COMPILE ?= /usr/local/sp_env/v2.2.5/i686/bin/ia64-unknown-linux-
endif
-AFLAGS += -D__ASSEMBLY__ -nostdinc $(CPPFLAGS)
-AFLAGS += -mconstant-gp
-CPPFLAGS += -I$(BASEDIR)/include -I$(BASEDIR)/include/asm-ia64 \
- -I$(BASEDIR)/include/asm-ia64/linux \
- -I$(BASEDIR)/include/asm-ia64/linux-xen \
- -I$(BASEDIR)/include/asm-ia64/linux-null \
- -I$(BASEDIR)/arch/ia64/linux -I$(BASEDIR)/arch/ia64/linux-xen
+
+# Used only by linux/Makefile.
+AFLAGS_KERNEL += -mconstant-gp
+
+# Note: .S -> .o rule uses AFLAGS and CFLAGS.
CFLAGS += -nostdinc -fno-builtin -fno-common -fno-strict-aliasing
CFLAGS += -mconstant-gp
#CFLAGS += -O3 # -O3 over-inlines making debugging tough!
CFLAGS += -O2 # but no optimization causes compile errors!
-#CFLAGS += -iwithprefix include -Wall -DMONITOR_BASE=$(MONITOR_BASE)
-CFLAGS += -iwithprefix include -Wall
-CFLAGS += -fomit-frame-pointer -I$(BASEDIR)/include -D__KERNEL__
-CFLAGS += -I$(BASEDIR)/include/asm-ia64 -I$(BASEDIR)/include/asm-ia64/linux \
+CFLAGS += -fomit-frame-pointer -D__KERNEL__
+CFLAGS += -iwithprefix include
+CPPFLAGS+= -I$(BASEDIR)/include \
+ -I$(BASEDIR)/include/asm-ia64 \
-I$(BASEDIR)/include/asm-ia64/linux
\
-I$(BASEDIR)/include/asm-ia64/linux-xen \
-I$(BASEDIR)/include/asm-ia64/linux-null \
-I$(BASEDIR)/arch/ia64/linux -I$(BASEDIR)/arch/ia64/linux-xen
+CFLAGS += $(CPPFLAGS)
#CFLAGS += -Wno-pointer-arith -Wredundant-decls
CFLAGS += -DIA64 -DXEN -DLINUX_2_6 -DV_IOSAPIC_READY
CFLAGS += -ffixed-r13 -mfixed-range=f2-f5,f12-f127
@@ -39,4 +39,8 @@ ifeq ($(xen_ia64_dom0_virtual_physical),
ifeq ($(xen_ia64_dom0_virtual_physical),y)
CFLAGS += -DCONFIG_XEN_IA64_DOM0_VP
endif
+ifeq ($(no_warns),y)
+CFLAGS += -Wa,--fatal-warnings
+endif
+
LDFLAGS := -g
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/asm-offsets.c
--- a/xen/arch/ia64/asm-offsets.c Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/asm-offsets.c Tue May 30 14:30:34 2006 -0500
@@ -50,8 +50,6 @@ void foo(void)
DEFINE(IA64_VCPU_META_SAVED_RR0_OFFSET, offsetof (struct vcpu,
arch.metaphysical_saved_rr0));
DEFINE(IA64_VCPU_BREAKIMM_OFFSET, offsetof (struct vcpu,
arch.breakimm));
DEFINE(IA64_VCPU_IVA_OFFSET, offsetof (struct vcpu, arch.iva));
- DEFINE(IA64_VCPU_DTLB_PTE_OFFSET, offsetof (struct vcpu,
arch.dtlb_pte));
- DEFINE(IA64_VCPU_ITLB_PTE_OFFSET, offsetof (struct vcpu,
arch.itlb_pte));
DEFINE(IA64_VCPU_IRR0_OFFSET, offsetof (struct vcpu, arch.irr[0]));
DEFINE(IA64_VCPU_IRR3_OFFSET, offsetof (struct vcpu, arch.irr[3]));
DEFINE(IA64_VCPU_INSVC3_OFFSET, offsetof (struct vcpu, arch.insvc[3]));
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/linux-xen/setup.c
--- a/xen/arch/ia64/linux-xen/setup.c Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/linux-xen/setup.c Tue May 30 14:30:34 2006 -0500
@@ -800,8 +800,7 @@ cpu_init (void)
cpu_data = per_cpu_init();
#ifdef XEN
- printf ("cpu_init: current=%p, current->domain->arch.mm=%p\n",
- current, current->domain->arch.mm);
+ printf ("cpu_init: current=%p\n", current);
#endif
/*
@@ -872,12 +871,11 @@ cpu_init (void)
#ifndef XEN
current->active_mm = &init_mm;
#endif
-#ifdef XEN
- if (current->domain->arch.mm)
-#else
+#ifndef XEN
if (current->mm)
-#endif
BUG();
+#endif
+
#ifdef XEN
ia64_fph_enable();
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/linux-xen/smp.c
--- a/xen/arch/ia64/linux-xen/smp.c Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/linux-xen/smp.c Tue May 30 14:30:34 2006 -0500
@@ -53,28 +53,6 @@
#endif
#ifdef XEN
-// FIXME: MOVE ELSEWHERE
-//Huh? This seems to be used on ia64 even if !CONFIG_SMP
-void flush_tlb_mask(cpumask_t mask)
-{
- int cpu;
-
- cpu = smp_processor_id();
- if (cpu_isset (cpu, mask)) {
- cpu_clear(cpu, mask);
- local_flush_tlb_all ();
- }
-
-#ifdef CONFIG_SMP
- if (cpus_empty(mask))
- return;
-
- for (cpu = 0; cpu < NR_CPUS; ++cpu)
- if (cpu_isset(cpu, mask))
- smp_call_function_single
- (cpu, (void (*)(void *))local_flush_tlb_all, NULL, 1, 1);
-#endif
-}
//#if CONFIG_SMP || IA64
#if CONFIG_SMP
//Huh? This seems to be used on ia64 even if !CONFIG_SMP
@@ -276,7 +254,6 @@ smp_send_reschedule (int cpu)
{
platform_send_ipi(cpu, IA64_IPI_RESCHEDULE, IA64_IPI_DM_INT, 0);
}
-#endif
void
smp_flush_tlb_all (void)
@@ -284,15 +261,6 @@ smp_flush_tlb_all (void)
on_each_cpu((void (*)(void *))local_flush_tlb_all, NULL, 1, 1);
}
-#ifdef XEN
-void
-smp_vhpt_flush_all(void)
-{
- on_each_cpu((void (*)(void *))vhpt_flush, NULL, 1, 1);
-}
-#endif
-
-#ifndef XEN
void
smp_flush_tlb_mm (struct mm_struct *mm)
{
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/linux-xen/unaligned.c
--- a/xen/arch/ia64/linux-xen/unaligned.c Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/linux-xen/unaligned.c Tue May 30 14:30:34 2006 -0500
@@ -377,7 +377,7 @@ get_rse_reg (struct pt_regs *regs, unsig
if (ridx >= sof) {
/* read of out-of-frame register returns an undefined value; 0 in our
case. */
DPRINT("ignoring read from r%lu; only %lu registers are allocated!\n",
r1, sof);
- panic("wrong stack register number (iip=%p)\n", regs->cr_iip);
+ panic("wrong stack register number (iip=%lx)\n", regs->cr_iip);
}
if (ridx < sor)
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/vmx/pal_emul.c
--- a/xen/arch/ia64/vmx/pal_emul.c Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/vmx/pal_emul.c Tue May 30 14:30:34 2006 -0500
@@ -62,8 +62,8 @@ pal_cache_flush (VCPU *vcpu) {
// ia64_pal_call_static(gr28 ,gr29, gr30,
// result.v1,1LL);
// }
- while (result.status != 0) {
- panic("PAL_CACHE_FLUSH ERROR, status %ld", result.status);
+ if(result.status != 0) {
+ panic_domain(vcpu_regs(vcpu),"PAL_CACHE_FLUSH ERROR, status
%ld", result.status);
}
return result;
@@ -445,7 +445,7 @@ pal_emul( VCPU *vcpu) {
break;
default:
- panic("pal_emul(): guest call unsupported pal" );
+ panic_domain(vcpu_regs(vcpu),"pal_emul(): guest call
unsupported pal" );
}
set_pal_result (vcpu, result);
}
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/vmx/vlsapic.c
--- a/xen/arch/ia64/vmx/vlsapic.c Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/vmx/vlsapic.c Tue May 30 14:30:34 2006 -0500
@@ -568,7 +568,7 @@ int vmx_check_pending_irq(VCPU *vcpu)
if ( vpsr.i && IRQ_NO_MASKED == mask ) {
isr = vpsr.val & IA64_PSR_RI;
if ( !vpsr.ic )
- panic("Interrupt when IC=0\n");
+ panic_domain(regs,"Interrupt when IC=0\n");
vmx_reflect_interruption(0,isr,0, 12, regs ); // EXT IRQ
injected = 1;
}
@@ -595,7 +595,8 @@ void guest_write_eoi(VCPU *vcpu)
uint64_t spsr;
vec = highest_inservice_irq(vcpu);
- if ( vec == NULL_VECTOR ) panic("Wrong vector to EOI\n");
+ if ( vec == NULL_VECTOR )
+ panic_domain(vcpu_regs(vcpu),"Wrong vector to EOI\n");
local_irq_save(spsr);
VLSAPIC_INSVC(vcpu,vec>>6) &= ~(1UL <<(vec&63));
local_irq_restore(spsr);
@@ -634,7 +635,7 @@ static void generate_exirq(VCPU *vcpu)
update_vhpi(vcpu, NULL_VECTOR);
isr = vpsr.val & IA64_PSR_RI;
if ( !vpsr.ic )
- panic("Interrupt when IC=0\n");
+ panic_domain(regs,"Interrupt when IC=0\n");
vmx_reflect_interruption(0,isr,0, 12, regs); // EXT IRQ
}
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/vmx/vmmu.c
--- a/xen/arch/ia64/vmx/vmmu.c Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/vmx/vmmu.c Tue May 30 14:30:34 2006 -0500
@@ -134,11 +134,11 @@ static void init_domain_vhpt(struct vcpu
void * vbase;
page = alloc_domheap_pages (NULL, VCPU_VHPT_ORDER, 0);
if ( page == NULL ) {
- panic("No enough contiguous memory for init_domain_vhpt\n");
+ panic_domain(vcpu_regs(v),"No enough contiguous memory for
init_domain_vhpt\n");
}
vbase = page_to_virt(page);
memset(vbase, 0, VCPU_VHPT_SIZE);
- printk("Allocate domain tlb at 0x%p\n", vbase);
+ printk("Allocate domain vhpt at 0x%p\n", vbase);
VHPT(v,hash) = vbase;
VHPT(v,hash_sz) = VCPU_VHPT_SIZE/2;
@@ -157,11 +157,11 @@ void init_domain_tlb(struct vcpu *v)
init_domain_vhpt(v);
page = alloc_domheap_pages (NULL, VCPU_VTLB_ORDER, 0);
if ( page == NULL ) {
- panic("No enough contiguous memory for init_domain_tlb\n");
+ panic_domain(vcpu_regs(v),"No enough contiguous memory for
init_domain_tlb\n");
}
vbase = page_to_virt(page);
memset(vbase, 0, VCPU_VTLB_SIZE);
- printk("Allocate domain tlb at 0x%p\n", vbase);
+ printk("Allocate domain vtlb at 0x%p\n", vbase);
VTLB(v,hash) = vbase;
VTLB(v,hash_sz) = VCPU_VTLB_SIZE/2;
@@ -202,7 +202,7 @@ void machine_tlb_insert(struct vcpu *d,
mtlb.ppn = get_mfn(d->domain,tlb->ppn);
mtlb_ppn=mtlb.ppn;
if (mtlb_ppn == INVALID_MFN)
- panic("Machine tlb insert with invalid mfn number.\n");
+ panic_domain(vcpu_regs(d),"Machine tlb insert with invalid mfn
number.\n");
psr = ia64_clear_ic();
if ( cl == ISIDE_TLB ) {
@@ -325,12 +325,12 @@ fetch_code(VCPU *vcpu, u64 gip, u64 *cod
}
if( gpip){
mfn = gmfn_to_mfn(vcpu->domain, gpip >>PAGE_SHIFT);
- if( mfn == INVALID_MFN ) panic("fetch_code: invalid memory\n");
+ if( mfn == INVALID_MFN ) panic_domain(vcpu_regs(vcpu),"fetch_code:
invalid memory\n");
vpa =(u64 *)__va( (gip & (PAGE_SIZE-1)) | (mfn<<PAGE_SHIFT));
}else{
tlb = vhpt_lookup(gip);
if( tlb == NULL)
- panic("No entry found in ITLB and DTLB\n");
+ panic_domain(vcpu_regs(vcpu),"No entry found in ITLB and DTLB\n");
vpa =(u64
*)__va((tlb->ppn>>(PAGE_SHIFT-ARCH_PAGE_SHIFT)<<PAGE_SHIFT)|(gip&(PAGE_SIZE-1)));
}
*code1 = *vpa++;
@@ -347,7 +347,7 @@ IA64FAULT vmx_vcpu_itc_i(VCPU *vcpu, UIN
slot = vtr_find_overlap(vcpu, va, ps, ISIDE_TLB);
if (slot >=0) {
// generate MCA.
- panic("Tlb conflict!!");
+ panic_domain(vcpu_regs(vcpu),"Tlb conflict!!");
return IA64_FAULT;
}
thash_purge_and_insert(vcpu, pte, itir, ifa);
@@ -363,7 +363,7 @@ IA64FAULT vmx_vcpu_itc_d(VCPU *vcpu, UIN
slot = vtr_find_overlap(vcpu, va, ps, DSIDE_TLB);
if (slot >=0) {
// generate MCA.
- panic("Tlb conflict!!");
+ panic_domain(vcpu_regs(vcpu),"Tlb conflict!!");
return IA64_FAULT;
}
gpfn = (pte & _PAGE_PPN_MASK)>> PAGE_SHIFT;
@@ -385,7 +385,7 @@ IA64FAULT vmx_vcpu_itr_i(VCPU *vcpu, u64
index = vtr_find_overlap(vcpu, va, ps, ISIDE_TLB);
if (index >=0) {
// generate MCA.
- panic("Tlb conflict!!");
+ panic_domain(vcpu_regs(vcpu),"Tlb conflict!!");
return IA64_FAULT;
}
thash_purge_entries(vcpu, va, ps);
@@ -407,7 +407,7 @@ IA64FAULT vmx_vcpu_itr_d(VCPU *vcpu, u64
index = vtr_find_overlap(vcpu, va, ps, DSIDE_TLB);
if (index>=0) {
// generate MCA.
- panic("Tlb conflict!!");
+ panic_domain(vcpu_regs(vcpu),"Tlb conflict!!");
return IA64_FAULT;
}
thash_purge_entries(vcpu, va, ps);
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/vmx/vmx_entry.S
--- a/xen/arch/ia64/vmx/vmx_entry.S Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/vmx/vmx_entry.S Tue May 30 14:30:34 2006 -0500
@@ -290,10 +290,59 @@ GLOBAL_ENTRY(ia64_leave_hypervisor)
mov ar.ccv=r18
;;
//rbs_switch
- // loadrs has already been shifted
+
+ shr.u r18=r20,16
+ ;;
+ movl r19= THIS_CPU(ia64_phys_stacked_size_p8)
+ ;;
+ ld4 r19=[r19]
+
+vmx_dont_preserve_current_frame:
+/*
+ * To prevent leaking bits between the hypervisor and guest domain,
+ * we must clear the stacked registers in the "invalid" partition here.
+ * 5 registers/cycle on McKinley).
+ */
+# define pRecurse p6
+# define pReturn p7
+# define Nregs 14
+
+ alloc loc0=ar.pfs,2,Nregs-2,2,0
+ shr.u loc1=r18,9 // RNaTslots <= floor(dirtySize / (64*8))
+ sub r19=r19,r18 // r19 = (physStackedSize + 8) -
dirtySize
+ ;;
+ mov ar.rsc=r20 // load ar.rsc to be used for "loadrs"
+ shladd in0=loc1,3,r19
+ mov in1=0
+ ;;
+ TEXT_ALIGN(32)
+vmx_rse_clear_invalid:
+ alloc loc0=ar.pfs,2,Nregs-2,2,0
+ cmp.lt pRecurse,p0=Nregs*8,in0 // if more than Nregs regs left to
clear, (re)curse
+ add out0=-Nregs*8,in0
+ add out1=1,in1 // increment recursion count
+ mov loc1=0
+ mov loc2=0
+ ;;
+ mov loc3=0
+ mov loc4=0
+ mov loc5=0
+ mov loc6=0
+ mov loc7=0
+(pRecurse) br.call.dptk.few b0=vmx_rse_clear_invalid
+ ;;
+ mov loc8=0
+ mov loc9=0
+ cmp.ne pReturn,p0=r0,in1 // if recursion count != 0, we need to do a
br.ret
+ mov loc10=0
+ mov loc11=0
+(pReturn) br.ret.dptk.many b0
+
+# undef pRecurse
+# undef pReturn
+
+// loadrs has already been shifted
alloc r16=ar.pfs,0,0,0,0 // drop current register frame
- ;;
- mov ar.rsc=r20
;;
loadrs
;;
@@ -315,7 +364,9 @@ vmx_dorfirfi_back:
adds r18=IA64_VPD_BASE_OFFSET,r21
;;
ld8 r18=[r18] //vpd
- ;;
+ adds r17=IA64_VCPU_ISR_OFFSET,r21
+ ;;
+ ld8 r17=[r17]
adds r19=VPD(VPSR),r18
;;
ld8 r19=[r19] //vpsr
@@ -331,12 +382,14 @@ vmx_dorfirfi_back:
mov b0=r16
br.cond.sptk b0 // call the service
;;
+END(ia64_leave_hypervisor)
switch_rr7:
// fall through
GLOBAL_ENTRY(ia64_vmm_entry)
/*
* must be at bank 0
* parameter:
+ * r17:cr.isr
* r18:vpd
* r19:vpsr
* r20:__vsa_base
@@ -348,13 +401,19 @@ GLOBAL_ENTRY(ia64_vmm_entry)
tbit.nz p1,p2 = r19,IA64_PSR_IC_BIT // p1=vpsr.ic
;;
(p1) add r29=PAL_VPS_RESUME_NORMAL,r20
+ (p1) br.sptk.many ia64_vmm_entry_out
+ ;;
+ tbit.nz p1,p2 = r17,IA64_ISR_IR_BIT //p1=cr.isr.ir
+ ;;
+ (p1) add r29=PAL_VPS_RESUME_NORMAL,r20
(p2) add r29=PAL_VPS_RESUME_HANDLER,r20
;;
+ia64_vmm_entry_out:
mov pr=r23,-2
mov b0=r29
;;
br.cond.sptk b0 // call pal service
-END(ia64_leave_hypervisor)
+END(ia64_vmm_entry)
//r24 rfi_pfs
//r17 address of rfi_pfs
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/vmx/vmx_init.c
--- a/xen/arch/ia64/vmx/vmx_init.c Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/vmx/vmx_init.c Tue May 30 14:30:34 2006 -0500
@@ -208,8 +208,9 @@ vmx_create_vp(struct vcpu *v)
ivt_base = (u64) &vmx_ia64_ivt;
printk("ivt_base: 0x%lx\n", ivt_base);
ret = ia64_pal_vp_create((u64 *)vpd, (u64 *)ivt_base, 0);
- if (ret != PAL_STATUS_SUCCESS)
- panic("ia64_pal_vp_create failed. \n");
+ if (ret != PAL_STATUS_SUCCESS){
+ panic_domain(vcpu_regs(v),"ia64_pal_vp_create failed. \n");
+ }
}
/* Other non-context related tasks can be done in context switch */
@@ -220,8 +221,9 @@ vmx_save_state(struct vcpu *v)
/* FIXME: about setting of pal_proc_vector... time consuming */
status = ia64_pal_vp_save((u64 *)v->arch.privregs, 0);
- if (status != PAL_STATUS_SUCCESS)
- panic("Save vp status failed\n");
+ if (status != PAL_STATUS_SUCCESS){
+ panic_domain(vcpu_regs(v),"Save vp status failed\n");
+ }
/* Need to save KR when domain switch, though HV itself doesn;t
@@ -244,8 +246,9 @@ vmx_load_state(struct vcpu *v)
u64 status;
status = ia64_pal_vp_restore((u64 *)v->arch.privregs, 0);
- if (status != PAL_STATUS_SUCCESS)
- panic("Restore vp status failed\n");
+ if (status != PAL_STATUS_SUCCESS){
+ panic_domain(vcpu_regs(v),"Restore vp status failed\n");
+ }
ia64_set_kr(0, v->arch.arch_vmx.vkr[0]);
ia64_set_kr(1, v->arch.arch_vmx.vkr[1]);
@@ -343,17 +346,18 @@ int vmx_build_physmap_table(struct domai
for (j = io_ranges[i].start;
j < io_ranges[i].start + io_ranges[i].size;
j += PAGE_SIZE)
- assign_domain_page(d, j, io_ranges[i].type);
+ __assign_domain_page(d, j, io_ranges[i].type);
}
/* Map normal memory below 3G */
end = VMX_CONFIG_PAGES(d) << PAGE_SHIFT;
tmp = end < MMIO_START ? end : MMIO_START;
for (i = 0; (i < tmp) && (list_ent != &d->page_list); i += PAGE_SIZE) {
- mfn = page_to_mfn(list_entry(
- list_ent, struct page_info, list));
+ mfn = page_to_mfn(list_entry(list_ent, struct page_info, list));
+ list_ent = mfn_to_page(mfn)->list.next;
+ if (VGA_IO_START <= i && i < VGA_IO_START + VGA_IO_SIZE)
+ continue;
assign_domain_page(d, i, mfn << PAGE_SHIFT);
- list_ent = mfn_to_page(mfn)->list.next;
}
ASSERT(list_ent != &d->page_list);
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/vmx/vmx_interrupt.c
--- a/xen/arch/ia64/vmx/vmx_interrupt.c Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/vmx/vmx_interrupt.c Tue May 30 14:30:34 2006 -0500
@@ -91,8 +91,12 @@ inject_guest_interruption(VCPU *vcpu, u6
{
u64 viva;
REGS *regs;
+ ISR pt_isr;
regs=vcpu_regs(vcpu);
-
+ // clear cr.isr.ri
+ pt_isr.val = VMX(vcpu,cr_isr);
+ pt_isr.ir = 0;
+ VMX(vcpu,cr_isr) = pt_isr.val;
collect_interruption(vcpu);
vmx_vcpu_get_iva(vcpu,&viva);
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/vmx/vmx_ivt.S
--- a/xen/arch/ia64/vmx/vmx_ivt.S Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/vmx/vmx_ivt.S Tue May 30 14:30:34 2006 -0500
@@ -143,35 +143,62 @@ ENTRY(vmx_itlb_miss)
thash r17 = r16
;;
ttag r20 = r16
+ mov r18 = r17
;;
vmx_itlb_loop:
cmp.eq p6,p0 = r0, r17
-(p6) br vmx_itlb_out
- ;;
- adds r22 = VLE_TITAG_OFFSET, r17
- adds r23 = VLE_CCHAIN_OFFSET, r17
- ;;
- ld8 r24 = [r22]
- ld8 r25 = [r23]
- ;;
- lfetch [r25]
- cmp.eq p6,p7 = r20, r24
- ;;
-(p7) mov r17 = r25;
-(p7) br.sptk vmx_itlb_loop
+(p6)br vmx_itlb_out
+ ;;
+ adds r16 = VLE_TITAG_OFFSET, r17
+ adds r19 = VLE_CCHAIN_OFFSET, r17
+ ;;
+ ld8 r22 = [r16]
+ ld8 r23 = [r19]
+ ;;
+ lfetch [r23]
+ cmp.eq p6,p7 = r20, r22
+ ;;
+(p7)mov r17 = r23;
+(p7)br.sptk vmx_itlb_loop
;;
adds r23 = VLE_PGFLAGS_OFFSET, r17
adds r24 = VLE_ITIR_OFFSET, r17
;;
- ld8 r26 = [r23]
- ld8 r25 = [r24]
- ;;
- mov cr.itir = r25
- ;;
- itc.i r26
+ ld8 r25 = [r23]
+ ld8 r26 = [r24]
+ ;;
+ cmp.eq p6,p7=r18,r17
+(p6) br vmx_itlb_loop1
+ ;;
+ ld8 r27 = [r18]
+ ;;
+ extr.u r19 = r27, 56, 8
+ extr.u r20 = r25, 56, 8
+ ;;
+ dep r27 = r20, r27, 56, 8
+ dep r25 = r19, r25, 56, 8
+ ;;
+ st8 [r18] = r25,8
+ st8 [r23] = r27
+ ;;
+ ld8 r28 = [r18]
+ ;;
+ st8 [r18] = r26,8
+ st8 [r24] = r28
+ ;;
+ ld8 r30 = [r18]
+ ;;
+ st8 [r18] = r22
+ st8 [r16] = r30
+ ;;
+vmx_itlb_loop1:
+ mov cr.itir = r26
+ ;;
+ itc.i r25
;;
srlz.i
;;
+ mov r17=cr.isr
mov r23=r31
mov r22=b0
adds r16=IA64_VPD_BASE_OFFSET,r21
@@ -201,42 +228,68 @@ ENTRY(vmx_dtlb_miss)
mov r29=cr.ipsr;
;;
tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
- (p6)br.sptk vmx_alt_dtlb_miss_1
-//(p6)br.sptk vmx_fault_2
+(p6)br.sptk vmx_alt_dtlb_miss_1
mov r16 = cr.ifa
;;
thash r17 = r16
;;
ttag r20 = r16
+ mov r18 = r17
;;
vmx_dtlb_loop:
cmp.eq p6,p0 = r0, r17
(p6)br vmx_dtlb_out
;;
- adds r22 = VLE_TITAG_OFFSET, r17
- adds r23 = VLE_CCHAIN_OFFSET, r17
- ;;
- ld8 r24 = [r22]
- ld8 r25 = [r23]
- ;;
- lfetch [r25]
- cmp.eq p6,p7 = r20, r24
- ;;
-(p7)mov r17 = r25;
+ adds r16 = VLE_TITAG_OFFSET, r17
+ adds r19 = VLE_CCHAIN_OFFSET, r17
+ ;;
+ ld8 r22 = [r16]
+ ld8 r23 = [r19]
+ ;;
+ lfetch [r23]
+ cmp.eq p6,p7 = r20, r22
+ ;;
+(p7)mov r17 = r23;
(p7)br.sptk vmx_dtlb_loop
;;
adds r23 = VLE_PGFLAGS_OFFSET, r17
adds r24 = VLE_ITIR_OFFSET, r17
;;
- ld8 r26 = [r23]
- ld8 r25 = [r24]
- ;;
- mov cr.itir = r25
- ;;
- itc.d r26
+ ld8 r25 = [r23]
+ ld8 r26 = [r24]
+ ;;
+ cmp.eq p6,p7=r18,r17
+(p6) br vmx_dtlb_loop1
+ ;;
+ ld8 r27 = [r18]
+ ;;
+ extr.u r19 = r27, 56, 8
+ extr.u r20 = r25, 56, 8
+ ;;
+ dep r27 = r20, r27, 56, 8
+ dep r25 = r19, r25, 56, 8
+ ;;
+ st8 [r18] = r25,8
+ st8 [r23] = r27
+ ;;
+ ld8 r28 = [r18]
+ ;;
+ st8 [r18] = r26,8
+ st8 [r24] = r28
+ ;;
+ ld8 r30 = [r18]
+ ;;
+ st8 [r18] = r22
+ st8 [r16] = r30
+ ;;
+vmx_dtlb_loop1:
+ mov cr.itir = r26
+ ;;
+ itc.d r25
;;
srlz.d;
;;
+ mov r17=cr.isr
mov r23=r31
mov r22=b0
adds r16=IA64_VPD_BASE_OFFSET,r21
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/vmx/vmx_phy_mode.c
--- a/xen/arch/ia64/vmx/vmx_phy_mode.c Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/vmx/vmx_phy_mode.c Tue May 30 14:30:34 2006 -0500
@@ -186,8 +186,10 @@ vmx_load_all_rr(VCPU *vcpu)
* mode in same region
*/
if (is_physical_mode(vcpu)) {
- if (vcpu->arch.mode_flags & GUEST_PHY_EMUL)
- panic("Unexpected domain switch in phy emul\n");
+ if (vcpu->arch.mode_flags & GUEST_PHY_EMUL){
+ panic_domain(vcpu_regs(vcpu),
+ "Unexpected domain switch in phy emul\n");
+ }
phy_rr.rrval = vcpu->arch.metaphysical_rr0;
//phy_rr.ps = PAGE_SHIFT;
phy_rr.ve = 1;
@@ -322,8 +324,7 @@ switch_mm_mode(VCPU *vcpu, IA64_PSR old_
break;
default:
/* Sanity check */
- printf("old: %lx, new: %lx\n", old_psr.val, new_psr.val);
- panic("Unexpected virtual <--> physical mode transition");
+ panic_domain(vcpu_regs(vcpu),"Unexpected virtual <--> physical mode
transition,old:%lx,new:%lx\n",old_psr.val,new_psr.val);
break;
}
return;
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/vmx/vmx_process.c
--- a/xen/arch/ia64/vmx/vmx_process.c Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/vmx/vmx_process.c Tue May 30 14:30:34 2006 -0500
@@ -338,7 +338,7 @@ vmx_hpw_miss(u64 vadr , u64 vec, REGS* r
}
if(vec == 1) type = ISIDE_TLB;
else if(vec == 2) type = DSIDE_TLB;
- else panic("wrong vec\n");
+ else panic_domain(regs,"wrong vec:%0xlx\n",vec);
// prepare_if_physical_mode(v);
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/vmx/vmx_support.c
--- a/xen/arch/ia64/vmx/vmx_support.c Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/vmx/vmx_support.c Tue May 30 14:30:34 2006 -0500
@@ -92,12 +92,12 @@ void vmx_io_assist(struct vcpu *v)
*/
vio = get_vio(v->domain, v->vcpu_id);
if (!vio)
- panic("Corruption: bad shared page: %lx\n", (unsigned long)vio);
+ panic_domain(vcpu_regs(v),"Corruption: bad shared page: %lx\n",
(unsigned long)vio);
p = &vio->vp_ioreq;
if (p->state == STATE_IORESP_HOOK)
- panic("Not supported: No hook available for DM request\n");
+ panic_domain(vcpu_regs(v),"Not supported: No hook available for DM
request\n");
if (test_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags)) {
if (p->state != STATE_IORESP_READY) {
@@ -135,7 +135,7 @@ void vmx_intr_assist(struct vcpu *v)
* out of vmx_wait_io, when guest is still waiting for response.
*/
if (test_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags))
- panic("!!!Bad resume to guest before I/O emulation is done.\n");
+ panic_domain(vcpu_regs(v),"!!!Bad resume to guest before I/O emulation
is done.\n");
/* Clear indicator specific to interrupt delivered from DM */
if (test_and_clear_bit(port,
@@ -154,7 +154,7 @@ void vmx_intr_assist(struct vcpu *v)
*/
vio = get_vio(v->domain, v->vcpu_id);
if (!vio)
- panic("Corruption: bad shared page: %lx\n", (unsigned long)vio);
+ panic_domain(vcpu_regs(v),"Corruption: bad shared page: %lx\n",
(unsigned long)vio);
#ifdef V_IOSAPIC_READY
/* Confirm virtual interrupt line signals, and set pending bits in vpd */
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/vmx/vmx_vcpu.c
--- a/xen/arch/ia64/vmx/vmx_vcpu.c Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/vmx/vmx_vcpu.c Tue May 30 14:30:34 2006 -0500
@@ -91,7 +91,7 @@ vmx_vcpu_set_psr(VCPU *vcpu, unsigned lo
* Otherwise panic
*/
if ( value & (IA64_PSR_PK | IA64_PSR_IS | IA64_PSR_VM )) {
- panic ("Setting unsupport guest psr!");
+ panic_domain (regs,"Setting unsupport guest psr!");
}
/*
@@ -206,7 +206,7 @@ IA64FAULT vmx_vcpu_set_rr(VCPU *vcpu, UI
vcpu_get_rr(vcpu, reg, &oldrr.rrval);
newrr.rrval=val;
if (newrr.rid >= (1 << vcpu->domain->arch.rid_bits))
- panic_domain (NULL, "use of invalid rid %lx\n", newrr.rid);
+ panic_domain (NULL, "use of invalid rid %x\n", newrr.rid);
if(oldrr.ps!=newrr.ps){
thash_purge_all(vcpu);
}
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/vmx/vmx_virt.c
--- a/xen/arch/ia64/vmx/vmx_virt.c Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/vmx/vmx_virt.c Tue May 30 14:30:34 2006 -0500
@@ -182,8 +182,9 @@ IA64FAULT vmx_emul_mov_to_psr(VCPU *vcpu
IA64FAULT vmx_emul_mov_to_psr(VCPU *vcpu, INST64 inst)
{
UINT64 val;
+
if(vcpu_get_gr_nat(vcpu, inst.M35.r2, &val) != IA64_NO_FAULT)
- panic(" get_psr nat bit fault\n");
+ panic_domain(vcpu_regs(vcpu),"get_psr nat bit fault\n");
val = (val & MASK(0, 32)) | (VCPU(vcpu, vpsr) & MASK(32, 32));
#if 0
@@ -216,7 +217,7 @@ IA64FAULT vmx_emul_rfi(VCPU *vcpu, INST6
regs=vcpu_regs(vcpu);
vpsr.val=regs->cr_ipsr;
if ( vpsr.is == 1 ) {
- panic ("We do not support IA32 instruction yet");
+ panic_domain(regs,"We do not support IA32 instruction yet");
}
return vmx_vcpu_rfi(vcpu);
@@ -715,8 +716,9 @@ IA64FAULT vmx_emul_mov_to_ar_imm(VCPU *v
{
// I27 and M30 are identical for these fields
UINT64 imm;
+
if(inst.M30.ar3!=44){
- panic("Can't support ar register other than itc");
+ panic_domain(vcpu_regs(vcpu),"Can't support ar register other than
itc");
}
#ifdef CHECK_FAULT
IA64_PSR vpsr;
@@ -741,7 +743,7 @@ IA64FAULT vmx_emul_mov_to_ar_reg(VCPU *v
// I26 and M29 are identical for these fields
u64 r2;
if(inst.M29.ar3!=44){
- panic("Can't support ar register other than itc");
+ panic_domain(vcpu_regs(vcpu),"Can't support ar register other than
itc");
}
if(vcpu_get_gr_nat(vcpu,inst.M29.r2,&r2)){
#ifdef CHECK_FAULT
@@ -769,7 +771,7 @@ IA64FAULT vmx_emul_mov_from_ar_reg(VCPU
// I27 and M30 are identical for these fields
u64 r1;
if(inst.M31.ar3!=44){
- panic("Can't support ar register other than itc");
+ panic_domain(vcpu_regs(vcpu),"Can't support ar register other than
itc");
}
#ifdef CHECK_FAULT
if(check_target_register(vcpu,inst.M31.r1)){
@@ -1359,8 +1361,7 @@ if ( (cause == 0xff && opcode == 0x1e000
slot_type = slot_types[bundle.template][slot];
ia64_priv_decoder(slot_type, inst, &cause);
if(cause==0){
- printf("This instruction at 0x%lx slot %d can't be virtualized", iip,
slot);
- panic("123456\n");
+ panic_domain(regs,"This instruction at 0x%lx slot %d can't be
virtualized", iip, slot);
}
#else
inst.inst=opcode;
@@ -1494,12 +1495,8 @@ if ( (cause == 0xff && opcode == 0x1e000
status=IA64_FAULT;
break;
default:
- printf("unknown cause %ld, iip: %lx, ipsr: %lx\n",
cause,regs->cr_iip,regs->cr_ipsr);
- while(1);
- /* For unknown cause, let hardware to re-execute */
- status=IA64_RETRY;
- break;
-// panic("unknown cause in virtualization intercept");
+ panic_domain(regs,"unknown cause %ld, iip: %lx, ipsr: %lx\n",
cause,regs->cr_iip,regs->cr_ipsr);
+ break;
};
#if 0
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/vmx/vtlb.c
--- a/xen/arch/ia64/vmx/vtlb.c Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/vmx/vtlb.c Tue May 30 14:30:34 2006 -0500
@@ -274,36 +274,36 @@ static void vtlb_purge(thash_cb_t *hcb,
static void vtlb_purge(thash_cb_t *hcb, u64 va, u64 ps)
{
thash_data_t *hash_table, *prev, *next;
- u64 start, end, size, tag, rid;
+ u64 start, end, size, tag, rid, def_size;
ia64_rr vrr;
vcpu_get_rr(current, va, &vrr.rrval);
rid = vrr.rid;
size = PSIZE(ps);
start = va & (-size);
end = start + size;
+ def_size = PSIZE(vrr.ps);
while(start < end){
hash_table = vsa_thash(hcb->pta, start, vrr.rrval, &tag);
-// tag = ia64_ttag(start);
if(!INVALID_TLB(hash_table)){
- if(hash_table->etag == tag){
- __rem_hash_head(hcb, hash_table);
- }
- else{
- prev=hash_table;
- next=prev->next;
- while(next){
- if(next->etag == tag){
- prev->next=next->next;
- cch_free(hcb,next);
- hash_table->len--;
- break;
- }
- prev=next;
- next=next->next;
- }
- }
- }
- start += PAGE_SIZE;
+ if(hash_table->etag == tag){
+ __rem_hash_head(hcb, hash_table);
+ }
+ else{
+ prev=hash_table;
+ next=prev->next;
+ while(next){
+ if(next->etag == tag){
+ prev->next=next->next;
+ cch_free(hcb,next);
+ hash_table->len--;
+ break;
+ }
+ prev=next;
+ next=next->next;
+ }
+ }
+ }
+ start += def_size;
}
// machine_tlb_purge(va, ps);
}
@@ -319,26 +319,26 @@ static void vhpt_purge(thash_cb_t *hcb,
start = va & (-size);
end = start + size;
while(start < end){
- hash_table = (thash_data_t *)ia64_thash(start);
- tag = ia64_ttag(start);
- if(hash_table->etag == tag ){
+ hash_table = (thash_data_t *)ia64_thash(start);
+ tag = ia64_ttag(start);
+ if(hash_table->etag == tag ){
__rem_hash_head(hcb, hash_table);
- }
- else{
- prev=hash_table;
- next=prev->next;
- while(next){
- if(next->etag == tag){
- prev->next=next->next;
- cch_free(hcb,next);
- hash_table->len--;
- break;
- }
- prev=next;
- next=next->next;
- }
- }
- start += PAGE_SIZE;
+ }
+ else{
+ prev=hash_table;
+ next=prev->next;
+ while(next){
+ if(next->etag == tag){
+ prev->next=next->next;
+ cch_free(hcb,next);
+ hash_table->len--;
+ break;
+ }
+ prev=next;
+ next=next->next;
+ }
+ }
+ start += PAGE_SIZE;
}
machine_tlb_purge(va, ps);
}
@@ -390,9 +390,9 @@ void vtlb_insert(thash_cb_t *hcb, u64 pt
vcpu_get_rr(current, va, &vrr.rrval);
if (vrr.ps != ps) {
// machine_tlb_insert(hcb->vcpu, entry);
- panic_domain(NULL, "not preferred ps with va: 0x%lx vrr.ps=%d ps=%d\n",
- va, vrr.ps, ps);
- return;
+ panic_domain(NULL, "not preferred ps with va: 0x%lx vrr.ps=%d
ps=%ld\n",
+ va, vrr.ps, ps);
+ return;
}
hash_table = vsa_thash(hcb->pta, va, vrr.rrval, &tag);
if( INVALID_TLB(hash_table) ) {
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/xen/Makefile
--- a/xen/arch/ia64/xen/Makefile Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/xen/Makefile Tue May 30 14:30:34 2006 -0500
@@ -2,6 +2,7 @@ obj-y += dom0_ops.o
obj-y += dom0_ops.o
obj-y += domain.o
obj-y += dom_fw.o
+obj-y += efi_emul.o
obj-y += hpsimserial.o
obj-y += hypercall.o
obj-y += hyperprivop.o
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/xen/dom0_ops.c
--- a/xen/arch/ia64/xen/dom0_ops.c Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/xen/dom0_ops.c Tue May 30 14:30:34 2006 -0500
@@ -151,10 +151,7 @@ long arch_do_dom0_op(dom0_op_t *op, XEN_
put_domain(d);
}
break;
- /*
- * NOTE: DOM0_GETMEMLIST has somewhat different semantics on IA64 -
- * it actually allocates and maps pages.
- */
+
case DOM0_GETMEMLIST:
{
unsigned long i = 0;
@@ -198,7 +195,8 @@ long arch_do_dom0_op(dom0_op_t *op, XEN_
ret = -ENOMEM;
op->u.getmemlist.num_pfns = i - start_page;
- copy_to_guest(u_dom0_op, op, 1);
+ if (copy_to_guest(u_dom0_op, op, 1))
+ ret = -EFAULT;
put_domain(d);
}
@@ -264,10 +262,6 @@ do_dom0vp_op(unsigned long cmd,
}
ret = get_gpfn_from_mfn(arg0);
break;
- case IA64_DOM0VP_populate_physmap:
- ret = dom0vp_populate_physmap(d, arg0,
- (unsigned int)arg1, (unsigned int)arg2);
- break;
case IA64_DOM0VP_zap_physmap:
ret = dom0vp_zap_physmap(d, arg0, (unsigned int)arg1);
break;
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/xen/dom_fw.c
--- a/xen/arch/ia64/xen/dom_fw.c Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/xen/dom_fw.c Tue May 30 14:30:34 2006 -0500
@@ -462,7 +462,7 @@ static void print_md(efi_memory_desc_t *
static void print_md(efi_memory_desc_t *md)
{
#if 1
- printk("domain mem: type=%u, attr=0x%lx, range=[0x%016lx-0x%016lx)
(%luMB)\n",
+ printk("domain mem: type=%2u, attr=0x%016lx, range=[0x%016lx-0x%016lx)
(%luMB)\n",
md->type, md->attribute, md->phys_addr,
md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT),
md->num_pages >> (20 - EFI_PAGE_SHIFT));
@@ -541,7 +541,7 @@ struct fake_acpi_tables {
struct fadt_descriptor_rev2 fadt;
struct facs_descriptor_rev2 facs;
struct acpi_table_header dsdt;
- u8 aml[16];
+ u8 aml[8 + 11 * MAX_VIRT_CPUS];
struct acpi_table_madt madt;
struct acpi_table_lsapic lsapic[MAX_VIRT_CPUS];
u8 pm1a_evt_blk[4];
@@ -561,6 +561,7 @@ dom_fw_fake_acpi(struct domain *d, struc
struct acpi_table_madt *madt = &tables->madt;
struct acpi_table_lsapic *lsapic = tables->lsapic;
int i;
+ int aml_len;
memset(tables, 0, sizeof(struct fake_acpi_tables));
@@ -629,7 +630,6 @@ dom_fw_fake_acpi(struct domain *d, struc
/* setup DSDT with trivial namespace. */
strncpy(dsdt->signature, DSDT_SIG, 4);
dsdt->revision = 1;
- dsdt->length = sizeof(struct acpi_table_header) + sizeof(tables->aml);
strcpy(dsdt->oem_id, "XEN");
strcpy(dsdt->oem_table_id, "Xen/ia64");
strcpy(dsdt->asl_compiler_id, "XEN");
@@ -637,15 +637,33 @@ dom_fw_fake_acpi(struct domain *d, struc
/* Trivial namespace, avoids ACPI CA complaints */
tables->aml[0] = 0x10; /* Scope */
- tables->aml[1] = 0x12; /* length/offset to next object */
- strncpy((char *)&tables->aml[2], "_SB_", 4);
+ tables->aml[1] = 0x40; /* length/offset to next object (patched) */
+ tables->aml[2] = 0x00;
+ strncpy((char *)&tables->aml[3], "_SB_", 4);
/* The processor object isn't absolutely necessary, revist for SMP */
- tables->aml[6] = 0x5b; /* processor object */
- tables->aml[7] = 0x83;
- tables->aml[8] = 0x0b; /* next */
- strncpy((char *)&tables->aml[9], "CPU0", 4);
-
+ aml_len = 7;
+ for (i = 0; i < 3; i++) {
+ unsigned char *p = tables->aml + aml_len;
+ p[0] = 0x5b; /* processor object */
+ p[1] = 0x83;
+ p[2] = 0x0b; /* next */
+ p[3] = 'C';
+ p[4] = 'P';
+ snprintf ((char *)p + 5, 3, "%02x", i);
+ if (i < 16)
+ p[5] = 'U';
+ p[7] = i; /* acpi_id */
+ p[8] = 0; /* pblk_addr */
+ p[9] = 0;
+ p[10] = 0;
+ p[11] = 0;
+ p[12] = 0; /* pblk_len */
+ aml_len += 13;
+ }
+ tables->aml[1] = 0x40 + ((aml_len - 1) & 0x0f);
+ tables->aml[2] = (aml_len - 1) >> 4;
+ dsdt->length = sizeof(struct acpi_table_header) + aml_len;
dsdt->checksum = generate_acpi_checksum(dsdt, dsdt->length);
/* setup MADT */
@@ -662,6 +680,7 @@ dom_fw_fake_acpi(struct domain *d, struc
for (i = 0; i < MAX_VIRT_CPUS; i++) {
lsapic[i].header.type = ACPI_MADT_LSAPIC;
lsapic[i].header.length = sizeof(struct acpi_table_lsapic);
+ lsapic[i].acpi_id = i;
lsapic[i].id = i;
lsapic[i].eid = 0;
lsapic[i].flags.enabled = (d->vcpu[i] != NULL);
@@ -798,6 +817,9 @@ dom_fw_init (struct domain *d, const cha
pfn = (void *) cp; cp += NFUNCPTRS * 2 * sizeof(pfn);
cmd_line = (void *) cp;
+ /* Initialise for EFI_SET_VIRTUAL_ADDRESS_MAP emulation */
+ d->arch.efi_runtime = efi_runtime;
+
if (args) {
if (arglen >= 1024)
arglen = 1023;
@@ -959,7 +981,7 @@ dom_fw_init (struct domain *d, const cha
MAKE_MD(EFI_LOADER_DATA,EFI_MEMORY_WB,0*MB,1*MB, 0);//XXX
#endif
/* hypercall patches live here, masquerade as reserved PAL
memory */
-
MAKE_MD(EFI_PAL_CODE,EFI_MEMORY_WB,HYPERCALL_START,HYPERCALL_END, 0);
+
MAKE_MD(EFI_PAL_CODE,EFI_MEMORY_WB|EFI_MEMORY_RUNTIME,HYPERCALL_START,HYPERCALL_END,
0);
MAKE_MD(EFI_CONVENTIONAL_MEMORY,EFI_MEMORY_WB,HYPERCALL_END,maxmem-IA64_GRANULE_SIZE,
0);//XXX make sure this doesn't overlap on i/o, runtime area.
#ifndef CONFIG_XEN_IA64_DOM0_VP
/* hack */
MAKE_MD(EFI_CONVENTIONAL_MEMORY,EFI_MEMORY_WB,last_start,last_end,1);
@@ -993,7 +1015,7 @@ dom_fw_init (struct domain *d, const cha
MAKE_MD(EFI_LOADER_DATA,EFI_MEMORY_WB,0*MB,1*MB, 1);
#endif
/* hypercall patches live here, masquerade as reserved PAL
memory */
-
MAKE_MD(EFI_PAL_CODE,EFI_MEMORY_WB,HYPERCALL_START,HYPERCALL_END, 1);
+
MAKE_MD(EFI_PAL_CODE,EFI_MEMORY_WB|EFI_MEMORY_RUNTIME,HYPERCALL_START,HYPERCALL_END,
1);
MAKE_MD(EFI_CONVENTIONAL_MEMORY,EFI_MEMORY_WB,HYPERCALL_END,maxmem, 1);
/* Create a dummy entry for IO ports, so that IO accesses are
trapped by Xen. */
@@ -1009,7 +1031,7 @@ dom_fw_init (struct domain *d, const cha
BUG_ON(i > NUM_MEM_DESCS);
bp->efi_memmap_size = i * sizeof(efi_memory_desc_t);
bp->efi_memdesc_size = sizeof(efi_memory_desc_t);
- bp->efi_memdesc_version = 1;
+ bp->efi_memdesc_version = EFI_MEMDESC_VERSION;
bp->command_line = dom_pa((unsigned long) cmd_line);
bp->console_info.num_cols = 80;
bp->console_info.num_rows = 25;
@@ -1019,7 +1041,8 @@ dom_fw_init (struct domain *d, const cha
if (d == dom0) {
// XXX CONFIG_XEN_IA64_DOM0_VP
// initrd_start address is hard coded in start_kernel()
- bp->initrd_start = ia64_boot_param->initrd_start;
+ bp->initrd_start = (dom0_start+dom0_size) -
+ (PAGE_ALIGN(ia64_boot_param->initrd_size) + 4*1024*1024);
bp->initrd_size = ia64_boot_param->initrd_size;
}
else {
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/xen/domain.c
--- a/xen/arch/ia64/xen/domain.c Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/xen/domain.c Tue May 30 14:30:34 2006 -0500
@@ -77,36 +77,19 @@ static void init_switch_stack(struct vcp
static void init_switch_stack(struct vcpu *v);
void build_physmap_table(struct domain *d);
+static void try_to_clear_PGC_allocate(struct domain* d,
+ struct page_info* page);
+
/* this belongs in include/asm, but there doesn't seem to be a suitable place
*/
void arch_domain_destroy(struct domain *d)
{
- struct page_info *page;
- struct list_head *ent, *prev;
-
- if (d->arch.mm->pgd != NULL)
- {
- list_for_each ( ent, &d->arch.mm->pt_list )
- {
- page = list_entry(ent, struct page_info, list);
- prev = ent->prev;
- list_del(ent);
- free_xenheap_page(page_to_virt(page));
- ent = prev;
- }
- pgd_free(d->arch.mm->pgd);
- }
- if (d->arch.mm != NULL)
- xfree(d->arch.mm);
+ BUG_ON(d->arch.mm.pgd != NULL);
if (d->shared_info != NULL)
free_xenheap_page(d->shared_info);
+ domain_flush_destroy (d);
+
deallocate_rid_range(d);
-
- /* It is really good in this? */
- flush_tlb_all();
-
- /* It is really good in this? */
- vhpt_flush_all();
}
static void default_idle(void)
@@ -179,7 +162,6 @@ struct vcpu *alloc_vcpu_struct(struct do
memset(&d->shared_info->evtchn_mask[0], 0xff,
sizeof(d->shared_info->evtchn_mask));
- v->vcpu_info = &(d->shared_info->vcpu_info[0]);
v->arch.metaphysical_rr0 = d->arch.metaphysical_rr0;
v->arch.metaphysical_rr4 = d->arch.metaphysical_rr4;
v->arch.metaphysical_saved_rr0 = d->arch.metaphysical_rr0;
@@ -239,7 +221,8 @@ int arch_domain_create(struct domain *d)
// the following will eventually need to be negotiated dynamically
d->xen_vastart = XEN_START_ADDR;
d->xen_vaend = XEN_END_ADDR;
- d->shared_info_va = SHAREDINFO_ADDR;
+ d->arch.shared_info_va = SHAREDINFO_ADDR;
+ d->arch.breakimm = 0x1000;
if (is_idle_domain(d))
return 0;
@@ -255,26 +238,20 @@ int arch_domain_create(struct domain *d)
*/
if (!allocate_rid_range(d,0))
goto fail_nomem;
- d->arch.breakimm = 0x1000;
d->arch.sys_pgnr = 0;
- if ((d->arch.mm = xmalloc(struct mm_struct)) == NULL)
- goto fail_nomem;
- memset(d->arch.mm, 0, sizeof(*d->arch.mm));
- INIT_LIST_HEAD(&d->arch.mm->pt_list);
+ memset(&d->arch.mm, 0, sizeof(d->arch.mm));
d->arch.physmap_built = 0;
- if ((d->arch.mm->pgd = pgd_alloc(d->arch.mm)) == NULL)
+ if ((d->arch.mm.pgd = pgd_alloc(&d->arch.mm)) == NULL)
goto fail_nomem;
printf ("arch_domain_create: domain=%p\n", d);
return 0;
fail_nomem:
- if (d->arch.mm->pgd != NULL)
- pgd_free(d->arch.mm->pgd);
- if (d->arch.mm != NULL)
- xfree(d->arch.mm);
+ if (d->arch.mm.pgd != NULL)
+ pgd_free(d->arch.mm.pgd);
if (d->shared_info != NULL)
free_xenheap_page(d->shared_info);
return -ENOMEM;
@@ -282,11 +259,7 @@ fail_nomem:
void arch_getdomaininfo_ctxt(struct vcpu *v, struct vcpu_guest_context *c)
{
- struct pt_regs *regs = vcpu_regs (v);
-
- c->regs = *regs;
- c->vcpu.evtchn_vector = v->vcpu_info->arch.evtchn_vector;
-
+ c->regs = *vcpu_regs (v);
c->shared = v->domain->shared_info->arch;
}
@@ -325,11 +298,10 @@ int arch_set_info_guest(struct vcpu *v,
}
new_thread(v, regs->cr_iip, 0, 0);
- v->vcpu_info->arch.evtchn_vector = c->vcpu.evtchn_vector;
- if ( c->vcpu.privregs && copy_from_user(v->arch.privregs,
- c->vcpu.privregs, sizeof(mapped_regs_t))) {
+ if ( c->privregs && copy_from_user(v->arch.privregs,
+ c->privregs, sizeof(mapped_regs_t))) {
printk("Bad ctxt address in arch_set_info_guest: %p\n",
- c->vcpu.privregs);
+ c->privregs);
return -EFAULT;
}
@@ -394,19 +366,129 @@ static void relinquish_memory(struct dom
/* Follow the list chain and /then/ potentially free the page. */
ent = ent->next;
+#ifdef CONFIG_XEN_IA64_DOM0_VP
+#if 1
+ BUG_ON(get_gpfn_from_mfn(page_to_mfn(page)) != INVALID_M2P_ENTRY);
+#else
+ //XXX this should be done at traversing the P2M table.
+ if (page_get_owner(page) == d)
+ set_gpfn_from_mfn(page_to_mfn(page), INVALID_M2P_ENTRY);
+#endif
+#endif
put_page(page);
}
spin_unlock_recursive(&d->page_alloc_lock);
}
+static void
+relinquish_pte(struct domain* d, pte_t* pte)
+{
+ unsigned long mfn = pte_pfn(*pte);
+ struct page_info* page;
+
+ // vmx domain use bit[58:56] to distinguish io region from memory.
+ // see vmx_build_physmap_table() in vmx_init.c
+ if (((mfn << PAGE_SHIFT) & GPFN_IO_MASK) != GPFN_MEM)
+ return;
+
+ // domain might map IO space or acpi table pages. check it.
+ if (!mfn_valid(mfn))
+ return;
+ page = mfn_to_page(mfn);
+ // struct page_info corresponding to mfn may exist or not depending
+ // on CONFIG_VIRTUAL_FRAME_TABLE.
+ // This check is too easy.
+ // The right way is to check whether this page is of io area or acpi pages
+ if (page_get_owner(page) == NULL) {
+ BUG_ON(page->count_info != 0);
+ return;
+ }
+
+#ifdef CONFIG_XEN_IA64_DOM0_VP
+ if (page_get_owner(page) == d) {
+ BUG_ON(get_gpfn_from_mfn(mfn) == INVALID_M2P_ENTRY);
+ set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY);
+ }
+#endif
+ try_to_clear_PGC_allocate(d, page);
+ put_page(page);
+}
+
+static void
+relinquish_pmd(struct domain* d, pmd_t* pmd, unsigned long offset)
+{
+ unsigned long i;
+ pte_t* pte = pte_offset_map(pmd, offset);
+
+ for (i = 0; i < PTRS_PER_PTE; i++, pte++) {
+ if (!pte_present(*pte))
+ continue;
+
+ relinquish_pte(d, pte);
+ }
+ pte_free_kernel(pte_offset_map(pmd, offset));
+}
+
+static void
+relinquish_pud(struct domain* d, pud_t *pud, unsigned long offset)
+{
+ unsigned long i;
+ pmd_t *pmd = pmd_offset(pud, offset);
+
+ for (i = 0; i < PTRS_PER_PMD; i++, pmd++) {
+ if (!pmd_present(*pmd))
+ continue;
+
+ relinquish_pmd(d, pmd, offset + (i << PMD_SHIFT));
+ }
+ pmd_free(pmd_offset(pud, offset));
+}
+
+static void
+relinquish_pgd(struct domain* d, pgd_t *pgd, unsigned long offset)
+{
+ unsigned long i;
+ pud_t *pud = pud_offset(pgd, offset);
+
+ for (i = 0; i < PTRS_PER_PUD; i++, pud++) {
+ if (!pud_present(*pud))
+ continue;
+
+ relinquish_pud(d, pud, offset + (i << PUD_SHIFT));
+ }
+ pud_free(pud_offset(pgd, offset));
+}
+
+static void
+relinquish_mm(struct domain* d)
+{
+ struct mm_struct* mm = &d->arch.mm;
+ unsigned long i;
+ pgd_t* pgd;
+
+ if (mm->pgd == NULL)
+ return;
+
+ pgd = pgd_offset(mm, 0);
+ for (i = 0; i < PTRS_PER_PGD; i++, pgd++) {
+ if (!pgd_present(*pgd))
+ continue;
+
+ relinquish_pgd(d, pgd, i << PGDIR_SHIFT);
+ }
+ pgd_free(mm->pgd);
+ mm->pgd = NULL;
+}
+
void domain_relinquish_resources(struct domain *d)
{
/* Relinquish every page of memory. */
- /* xenheap_list is not used in ia64. */
- BUG_ON(!list_empty(&d->xenpage_list));
-
+ // relase page traversing d->arch.mm.
+ relinquish_mm(d);
+
+ relinquish_memory(d, &d->xenpage_list);
relinquish_memory(d, &d->page_list);
}
@@ -483,11 +565,58 @@ void new_thread(struct vcpu *v,
}
}
+// stolen from share_xen_page_with_guest() in xen/arch/x86/mm.c
+void
+share_xen_page_with_guest(struct page_info *page,
+ struct domain *d, int readonly)
+{
+ if ( page_get_owner(page) == d )
+ return;
+
+#if 1
+ if (readonly) {
+ printk("%s:%d readonly is not supported yet\n", __func__, __LINE__);
+ }
+#endif
+
+ // alloc_xenheap_pages() doesn't initialize page owner.
+ //BUG_ON(page_get_owner(page) != NULL);
+#if 0
+ if (get_gpfn_from_mfn(page_to_mfn(page)) != INVALID_M2P_ENTRY) {
+ printk("%s:%d page 0x%p mfn 0x%lx gpfn 0x%lx\n", __func__, __LINE__,
+ page, page_to_mfn(page), get_gpfn_from_mfn(page_to_mfn(page)));
+ }
+#endif
+ // grant_table_destroy() release these pages.
+ // but it doesn't clear m2p entry. So there might remain stale entry.
+ // We clear such a stale entry here.
+ set_gpfn_from_mfn(page_to_mfn(page), INVALID_M2P_ENTRY);
+
+ spin_lock(&d->page_alloc_lock);
+
+#ifndef __ia64__
+ /* The incremented type count pins as writable or read-only. */
+ page->u.inuse.type_info = (readonly ? PGT_none : PGT_writable_page);
+ page->u.inuse.type_info |= PGT_validated | 1;
+#endif
+
+ page_set_owner(page, d);
+ wmb(); /* install valid domain ptr before updating refcnt. */
+ ASSERT(page->count_info == 0);
+ page->count_info |= PGC_allocated | 1;
+
+ if ( unlikely(d->xenheap_pages++ == 0) )
+ get_knownalive_domain(d);
+ list_add_tail(&page->list, &d->xenpage_list);
+
+ spin_unlock(&d->page_alloc_lock);
+}
+
+//XXX !xxx_present() should be used instread of !xxx_none()?
static pte_t*
lookup_alloc_domain_pte(struct domain* d, unsigned long mpaddr)
{
- struct page_info *pt;
- struct mm_struct *mm = d->arch.mm;
+ struct mm_struct *mm = &d->arch.mm;
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
@@ -496,22 +625,16 @@ lookup_alloc_domain_pte(struct domain* d
pgd = pgd_offset(mm, mpaddr);
if (pgd_none(*pgd)) {
pgd_populate(mm, pgd, pud_alloc_one(mm,mpaddr));
- pt = maddr_to_page(pgd_val(*pgd));
- list_add_tail(&pt->list, &d->arch.mm->pt_list);
}
pud = pud_offset(pgd, mpaddr);
if (pud_none(*pud)) {
pud_populate(mm, pud, pmd_alloc_one(mm,mpaddr));
- pt = maddr_to_page(pud_val(*pud));
- list_add_tail(&pt->list, &d->arch.mm->pt_list);
}
pmd = pmd_offset(pud, mpaddr);
if (pmd_none(*pmd)) {
pmd_populate_kernel(mm, pmd, pte_alloc_one_kernel(mm, mpaddr));
- pt = maddr_to_page(pmd_val(*pmd));
- list_add_tail(&pt->list, &d->arch.mm->pt_list);
}
return pte_offset_map(pmd, mpaddr);
@@ -521,7 +644,7 @@ static pte_t*
static pte_t*
lookup_noalloc_domain_pte(struct domain* d, unsigned long mpaddr)
{
- struct mm_struct *mm = d->arch.mm;
+ struct mm_struct *mm = &d->arch.mm;
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
@@ -549,7 +672,7 @@ static pte_t*
static pte_t*
lookup_noalloc_domain_pte_none(struct domain* d, unsigned long mpaddr)
{
- struct mm_struct *mm = d->arch.mm;
+ struct mm_struct *mm = &d->arch.mm;
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
@@ -581,6 +704,7 @@ __assign_new_domain_page(struct domain *
{
struct page_info *p = NULL;
unsigned long maddr;
+ int ret;
BUG_ON(!pte_none(*pte));
@@ -601,14 +725,13 @@ __assign_new_domain_page(struct domain *
#endif
p = alloc_domheap_page(d);
- // zero out pages for security reasons
- if (p)
- clear_page(page_to_virt(p));
-
if (unlikely(!p)) {
printf("assign_new_domain_page: Can't alloc!!!! Aaaargh!\n");
return(p);
}
+
+ // zero out pages for security reasons
+ clear_page(page_to_virt(p));
maddr = page_to_maddr (p);
if (unlikely(maddr > __get_cpu_var(vhpt_paddr)
&& maddr < __get_cpu_var(vhpt_pend))) {
@@ -618,13 +741,15 @@ __assign_new_domain_page(struct domain *
maddr);
}
+ ret = get_page(p, d);
+ BUG_ON(ret == 0);
set_pte(pte, pfn_pte(maddr >> PAGE_SHIFT,
__pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RWX)));
+ mb ();
//XXX CONFIG_XEN_IA64_DOM0_VP
// TODO racy
- if ((mpaddr & GPFN_IO_MASK) == GPFN_MEM)
- set_gpfn_from_mfn(page_to_mfn(p), mpaddr >> PAGE_SHIFT);
+ set_gpfn_from_mfn(page_to_mfn(p), mpaddr >> PAGE_SHIFT);
return p;
}
@@ -668,21 +793,38 @@ assign_new_domain0_page(struct domain *d
}
/* map a physical address to the specified metaphysical addr */
-void assign_domain_page(struct domain *d, unsigned long mpaddr, unsigned long
physaddr)
-{
- pte_t *pte;
-
- pte = lookup_alloc_domain_pte(d, mpaddr);
- if (pte_none(*pte)) {
- set_pte(pte, pfn_pte(physaddr >> PAGE_SHIFT,
- __pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RWX)));
-
- //XXX CONFIG_XEN_IA64_DOM0_VP
- // TODO racy
- if ((physaddr & GPFN_IO_MASK) == GPFN_MEM)
- set_gpfn_from_mfn(physaddr >> PAGE_SHIFT, mpaddr >> PAGE_SHIFT);
- }
- else printk("assign_domain_page: mpaddr %lx already mapped!\n",mpaddr);
+void
+__assign_domain_page(struct domain *d,
+ unsigned long mpaddr, unsigned long physaddr)
+{
+ pte_t *pte;
+
+ pte = lookup_alloc_domain_pte(d, mpaddr);
+ if (pte_none(*pte)) {
+ set_pte(pte,
+ pfn_pte(physaddr >> PAGE_SHIFT,
+ __pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RWX)));
+ mb ();
+ } else
+ printk("%s: mpaddr %lx already mapped!\n", __func__, mpaddr);
+}
+
+/* get_page() and map a physical address to the specified metaphysical addr */
+void
+assign_domain_page(struct domain *d,
+ unsigned long mpaddr, unsigned long physaddr)
+{
+ struct page_info* page = mfn_to_page(physaddr >> PAGE_SHIFT);
+ int ret;
+
+ BUG_ON((physaddr & GPFN_IO_MASK) != GPFN_MEM);
+ ret = get_page(page, d);
+ BUG_ON(ret == 0);
+ __assign_domain_page(d, mpaddr, physaddr);
+
+ //XXX CONFIG_XEN_IA64_DOM0_VP
+ // TODO racy
+ set_gpfn_from_mfn(physaddr >> PAGE_SHIFT, mpaddr >> PAGE_SHIFT);
}
#ifdef CONFIG_XEN_IA64_DOM0_VP
@@ -693,8 +835,58 @@ assign_domain_same_page(struct domain *d
//XXX optimization
unsigned long end = mpaddr + size;
for (; mpaddr < end; mpaddr += PAGE_SIZE) {
- assign_domain_page(d, mpaddr, mpaddr);
- }
+ __assign_domain_page(d, mpaddr, mpaddr);
+ }
+}
+
+static int
+efi_mmio(unsigned long physaddr, unsigned long size)
+{
+ void *efi_map_start, *efi_map_end;
+ u64 efi_desc_size;
+ void* p;
+
+ efi_map_start = __va(ia64_boot_param->efi_memmap);
+ efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
+ efi_desc_size = ia64_boot_param->efi_memdesc_size;
+
+ for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
+ efi_memory_desc_t* md = (efi_memory_desc_t *)p;
+ unsigned long start = md->phys_addr;
+ unsigned long end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
+
+ if (start <= physaddr && physaddr < end) {
+ if ((physaddr + size) > end) {
+ DPRINTK("%s:%d physaddr 0x%lx size = 0x%lx\n",
+ __func__, __LINE__, physaddr, size);
+ return 0;
+ }
+
+ // for io space
+ if (md->type == EFI_MEMORY_MAPPED_IO ||
+ md->type == EFI_MEMORY_MAPPED_IO_PORT_SPACE) {
+ return 1;
+ }
+
+ // for runtime
+ // see efi_enter_virtual_mode(void)
+ // in linux/arch/ia64/kernel/efi.c
+ if ((md->attribute & EFI_MEMORY_RUNTIME) &&
+ !(md->attribute & EFI_MEMORY_WB)) {
+ return 1;
+ }
+
+ DPRINTK("%s:%d physaddr 0x%lx size = 0x%lx\n",
+ __func__, __LINE__, physaddr, size);
+ return 0;
+ }
+
+ if (physaddr < start) {
+ break;
+ }
+ }
+
+ return 1;
}
unsigned long
@@ -704,6 +896,11 @@ assign_domain_mmio_page(struct domain *d
if (size == 0) {
DPRINTK("%s: domain %p mpaddr 0x%lx size = 0x%lx\n",
__func__, d, mpaddr, size);
+ }
+ if (!efi_mmio(mpaddr, size)) {
+ DPRINTK("%s:%d domain %p mpaddr 0x%lx size = 0x%lx\n",
+ __func__, __LINE__, d, mpaddr, size);
+ return -EINVAL;
}
assign_domain_same_page(d, mpaddr, size);
return mpaddr;
@@ -723,23 +920,55 @@ domain_page_flush(struct domain* d, unsi
domain_page_flush(struct domain* d, unsigned long mpaddr,
unsigned long old_mfn, unsigned long new_mfn)
{
- struct vcpu* v;
- //XXX SMP
- for_each_vcpu(d, v) {
- vcpu_purge_tr_entry(&v->arch.dtlb);
- vcpu_purge_tr_entry(&v->arch.itlb);
- }
-
- // flush vhpt
- vhpt_flush();
- // flush tlb
- flush_tlb_all();
-}
-
+ domain_flush_vtlb_all();
+}
+#endif
+
+//XXX heavily depends on the struct page_info layout.
+//
+// if (page_get_owner(page) == d &&
+// test_and_clear_bit(_PGC_allocated, &page->count_info)) {
+// put_page(page);
+// }
static void
-zap_domain_page_one(struct domain *d, unsigned long mpaddr)
-{
- struct mm_struct *mm = d->arch.mm;
+try_to_clear_PGC_allocate(struct domain* d, struct page_info* page)
+{
+ u32 _d, _nd;
+ u64 x, nx, y;
+
+ _d = pickle_domptr(d);
+ y = *((u64*)&page->count_info);
+ do {
+ x = y;
+ _nd = x >> 32;
+ nx = x - 1;
+ __clear_bit(_PGC_allocated, &nx);
+
+ if (unlikely(!(x & PGC_allocated)) || unlikely(_nd != _d)) {
+ struct domain* nd = unpickle_domptr(_nd);
+ if (nd == NULL) {
+ DPRINTK("gnttab_transfer: Bad page %p: ed=%p(%u) 0x%x, "
+ "sd=%p 0x%x,"
+ " caf=%016lx, taf=%" PRtype_info "\n",
+ (void *) page_to_mfn(page),
+ d, d->domain_id, _d,
+ nd, _nd,
+ x,
+ page->u.inuse.type_info);
+ }
+ break;
+ }
+
+ BUG_ON((nx & PGC_count_mask) < 1);
+ y = cmpxchg((u64*)&page->count_info, x, nx);
+ } while (unlikely(y != x));
+}
+
+#ifdef CONFIG_XEN_IA64_DOM0_VP
+static void
+zap_domain_page_one(struct domain *d, unsigned long mpaddr, int do_put_page)
+{
+ struct mm_struct *mm = &d->arch.mm;
pte_t *pte;
pte_t old_pte;
unsigned long mfn;
@@ -755,6 +984,7 @@ zap_domain_page_one(struct domain *d, un
old_pte = ptep_get_and_clear(mm, mpaddr, pte);
mfn = pte_pfn(old_pte);
page = mfn_to_page(mfn);
+ BUG_ON((page->count_info & PGC_count_mask) == 0);
if (page_get_owner(page) == d) {
BUG_ON(get_gpfn_from_mfn(mfn) != (mpaddr >> PAGE_SHIFT));
@@ -763,7 +993,10 @@ zap_domain_page_one(struct domain *d, un
domain_page_flush(d, mpaddr, mfn, INVALID_MFN);
- put_page(page);
+ if (do_put_page) {
+ try_to_clear_PGC_allocate(d, page);
+ put_page(page);
+ }
}
#endif
@@ -867,66 +1100,6 @@ unsigned long lookup_domain_mpa(struct d
#ifdef CONFIG_XEN_IA64_DOM0_VP
//XXX SMP
unsigned long
-dom0vp_populate_physmap(struct domain *d, unsigned long gpfn,
- unsigned int extent_order, unsigned int address_bits)
-{
- unsigned long ret = 0;
- int flags = 0;
- unsigned long mpaddr = gpfn << PAGE_SHIFT;
- unsigned long extent_size = 1UL << extent_order;
- unsigned long offset;
- struct page_info* page;
- unsigned long physaddr;
-
- if (extent_order > 0 && !multipage_allocation_permitted(d)) {
- ret = -EINVAL;
- goto out;
- }
-
- if (gpfn + (1 << extent_order) < gpfn) {
- ret = -EINVAL;
- goto out;
- }
- if (gpfn > d->max_pages || gpfn + (1 << extent_order) > d->max_pages) {
- ret = -EINVAL;
- goto out;
- }
- if ((extent_size << PAGE_SHIFT) < extent_size) {
- ret = -EINVAL;
- goto out;
- }
-
- //XXX check address_bits and set flags = ALLOC_DOM_DMA if needed
-
- // check the rage is not populated yet.
- //XXX loop optimization
- for (offset = 0; offset < extent_size << PAGE_SHIFT; offset += PAGE_SIZE) {
- if (____lookup_domain_mpa(d, mpaddr + offset) != INVALID_MFN) {
- ret = -EBUSY;
- goto out;
- }
- }
-
- page = alloc_domheap_pages(d, extent_order, flags);
- if (page == NULL) {
- ret = -ENOMEM;
- DPRINTK("Could not allocate order=%d extent: id=%d flags=%x\n",
- extent_order, d->domain_id, flags);
- goto out;
- }
-
- //XXX loop optimization
- physaddr = page_to_maddr(page);
- for (offset = 0; offset < extent_size << PAGE_SHIFT; offset += PAGE_SIZE) {
- assign_domain_page(d, mpaddr + offset, physaddr + offset);
- }
-
-out:
- return ret;
-}
-
-//XXX SMP
-unsigned long
dom0vp_zap_physmap(struct domain *d, unsigned long gpfn,
unsigned int extent_order)
{
@@ -937,26 +1110,28 @@ dom0vp_zap_physmap(struct domain *d, uns
goto out;
}
- zap_domain_page_one(d, gpfn << PAGE_SHIFT);
+ zap_domain_page_one(d, gpfn << PAGE_SHIFT, 1);
out:
return ret;
}
+// caller must get_page(mfn_to_page(mfn)) before
+// caller must call set_gpfn_from_mfn().
static void
assign_domain_page_replace(struct domain *d, unsigned long mpaddr,
unsigned long mfn, unsigned int flags)
{
- struct mm_struct *mm = d->arch.mm;
+ struct mm_struct *mm = &d->arch.mm;
pte_t* pte;
pte_t old_pte;
+ pte_t npte;
pte = lookup_alloc_domain_pte(d, mpaddr);
// update pte
- old_pte = ptep_get_and_clear(mm, mpaddr, pte);
- set_pte(pte, pfn_pte(mfn,
- __pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RWX)));
+ npte = pfn_pte(mfn, __pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RWX));
+ old_pte = ptep_xchg(mm, mpaddr, pte, npte);
if (!pte_none(old_pte)) {
unsigned long old_mfn;
struct page_info* old_page;
@@ -973,8 +1148,10 @@ assign_domain_page_replace(struct domain
domain_page_flush(d, mpaddr, old_mfn, mfn);
+ try_to_clear_PGC_allocate(d, old_page);
put_page(old_page);
} else {
+ BUG_ON(!mfn_valid(mfn));
BUG_ON(page_get_owner(mfn_to_page(mfn)) == d &&
get_gpfn_from_mfn(mfn) != INVALID_M2P_ENTRY);
}
@@ -1002,17 +1179,195 @@ dom0vp_add_physmap(struct domain* d, uns
}
assign_domain_page_replace(d, gpfn << PAGE_SHIFT, mfn, 0/* flags:XXX */);
+ //don't update p2m table because this page belongs to rd, not d.
out1:
put_domain(rd);
out0:
return error;
}
+
+// grant table host mapping
+// mpaddr: host_addr: pseudo physical address
+// mfn: frame: machine page frame
+// flags: GNTMAP_readonly | GNTMAP_application_map | GNTMAP_contains_pte
+int
+create_grant_host_mapping(unsigned long gpaddr,
+ unsigned long mfn, unsigned int flags)
+{
+ struct domain* d = current->domain;
+ struct page_info* page;
+ int ret;
+
+ if (flags & (GNTMAP_application_map | GNTMAP_contains_pte)) {
+ DPRINTK("%s: flags 0x%x\n", __func__, flags);
+ return GNTST_general_error;
+ }
+ if (flags & GNTMAP_readonly) {
+#if 0
+ DPRINTK("%s: GNTMAP_readonly is not implemented yet. flags %x\n",
+ __func__, flags);
+#endif
+ flags &= ~GNTMAP_readonly;
+ }
+
+ page = mfn_to_page(mfn);
+ ret = get_page(page, page_get_owner(page));
+ BUG_ON(ret == 0);
+ assign_domain_page_replace(d, gpaddr, mfn, flags);
+
+ return GNTST_okay;
+}
+
+// grant table host unmapping
+int
+destroy_grant_host_mapping(unsigned long gpaddr,
+ unsigned long mfn, unsigned int flags)
+{
+ struct domain* d = current->domain;
+ pte_t* pte;
+ pte_t old_pte;
+ unsigned long old_mfn = INVALID_MFN;
+ struct page_info* old_page;
+
+ if (flags & (GNTMAP_application_map | GNTMAP_contains_pte)) {
+ DPRINTK("%s: flags 0x%x\n", __func__, flags);
+ return GNTST_general_error;
+ }
+ if (flags & GNTMAP_readonly) {
+#if 0
+ DPRINTK("%s: GNTMAP_readonly is not implemented yet. flags %x\n",
+ __func__, flags);
+#endif
+ flags &= ~GNTMAP_readonly;
+ }
+
+ pte = lookup_noalloc_domain_pte(d, gpaddr);
+ if (pte == NULL || !pte_present(*pte) || pte_pfn(*pte) != mfn)
+ return GNTST_general_error;//XXX GNTST_bad_pseudo_phys_addr
+
+ // update pte
+ old_pte = ptep_get_and_clear(&d->arch.mm, gpaddr, pte);
+ if (pte_present(old_pte)) {
+ old_mfn = pte_pfn(old_pte);//XXX
+ }
+ domain_page_flush(d, gpaddr, old_mfn, INVALID_MFN);
+
+ old_page = mfn_to_page(old_mfn);
+ BUG_ON(page_get_owner(old_page) == d);//try_to_clear_PGC_allocate(d, page)
is not needed.
+ put_page(old_page);
+
+ return GNTST_okay;
+}
+
+//XXX needs refcount patch
+//XXX heavily depends on the struct page layout.
+//XXX SMP
+int
+steal_page_for_grant_transfer(struct domain *d, struct page_info *page)
+{
+#if 0 /* if big endian */
+# error "implement big endian version of steal_page_for_grant_transfer()"
+#endif
+ u32 _d, _nd;
+ u64 x, nx, y;
+ unsigned long mpaddr = get_gpfn_from_mfn(page_to_mfn(page)) << PAGE_SHIFT;
+ struct page_info *new;
+
+ zap_domain_page_one(d, mpaddr, 0);
+ put_page(page);
+
+ spin_lock(&d->page_alloc_lock);
+
+ /*
+ * The tricky bit: atomically release ownership while there is just one
+ * benign reference to the page (PGC_allocated). If that reference
+ * disappears then the deallocation routine will safely spin.
+ */
+ _d = pickle_domptr(d);
+ y = *((u64*)&page->count_info);
+ do {
+ x = y;
+ nx = x & 0xffffffff;
+ // page->count_info: untouched
+ // page->u.inused._domain = 0;
+ _nd = x >> 32;
+
+ if (unlikely((x & (PGC_count_mask | PGC_allocated)) !=
+ (1 | PGC_allocated)) ||
+ unlikely(_nd != _d)) {
+ struct domain* nd = unpickle_domptr(_nd);
+ if (nd == NULL) {
+ DPRINTK("gnttab_transfer: Bad page %p: ed=%p(%u) 0x%x, "
+ "sd=%p 0x%x,"
+ " caf=%016lx, taf=%" PRtype_info "\n",
+ (void *) page_to_mfn(page),
+ d, d->domain_id, _d,
+ nd, _nd,
+ x,
+ page->u.inuse.type_info);
+ } else {
+ DPRINTK("gnttab_transfer: Bad page %p: ed=%p(%u) 0x%x, "
+ "sd=%p(%u) 0x%x,"
+ " caf=%016lx, taf=%" PRtype_info "\n",
+ (void *) page_to_mfn(page),
+ d, d->domain_id, _d,
+ nd, nd->domain_id, _nd,
+ x,
+ page->u.inuse.type_info);
+ }
+ spin_unlock(&d->page_alloc_lock);
+ return -1;
+ }
+
+ y = cmpxchg((u64*)&page->count_info, x, nx);
+ } while (unlikely(y != x));
+
+ /*
+ * Unlink from 'd'. At least one reference remains (now anonymous), so
+ * noone else is spinning to try to delete this page from 'd'.
+ */
+ d->tot_pages--;
+ list_del(&page->list);
+
+ spin_unlock(&d->page_alloc_lock);
+
+#if 1
+ //XXX Until net_rx_action() fix
+ // assign new page for this mpaddr
+ new = assign_new_domain_page(d, mpaddr);
+ BUG_ON(new == NULL);//XXX
+#endif
+
+ return 0;
+}
+
+void
+guest_physmap_add_page(struct domain *d, unsigned long gpfn,
+ unsigned long mfn)
+{
+ int ret;
+
+ ret = get_page(mfn_to_page(mfn), d);
+ BUG_ON(ret == 0);
+ assign_domain_page_replace(d, gpfn << PAGE_SHIFT, mfn, 0/* XXX */);
+ set_gpfn_from_mfn(mfn, gpfn);//XXX SMP
+
+ //BUG_ON(mfn != ((lookup_domain_mpa(d, gpfn << PAGE_SHIFT) & _PFN_MASK) >>
PAGE_SHIFT));
+}
+
+void
+guest_physmap_remove_page(struct domain *d, unsigned long gpfn,
+ unsigned long mfn)
+{
+ BUG_ON(mfn == 0);//XXX
+ zap_domain_page_one(d, gpfn << PAGE_SHIFT, 1);
+}
#endif
/* Flush cache of domain d. */
void domain_cache_flush (struct domain *d, int sync_only)
{
- struct mm_struct *mm = d->arch.mm;
+ struct mm_struct *mm = &d->arch.mm;
pgd_t *pgd = mm->pgd;
unsigned long maddr;
int i,j,k, l;
@@ -1478,9 +1833,9 @@ void domain_pend_keyboard_interrupt(int
void sync_vcpu_execstate(struct vcpu *v)
{
- __ia64_save_fpu(v->arch._thread.fph);
- if (VMX_DOMAIN(v))
- vmx_save_state(v);
+// __ia64_save_fpu(v->arch._thread.fph);
+// if (VMX_DOMAIN(v))
+// vmx_save_state(v);
// FIXME SMP: Anything else needed here for SMP?
}
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/xen/hypercall.c
--- a/xen/arch/ia64/xen/hypercall.c Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/xen/hypercall.c Tue May 30 14:30:34 2006 -0500
@@ -26,7 +26,6 @@
#include <public/physdev.h>
#include <xen/domain.h>
-extern unsigned long translate_domain_mpaddr(unsigned long);
static long do_physdev_op_compat(XEN_GUEST_HANDLE(physdev_op_t) uop);
static long do_physdev_op(int cmd, XEN_GUEST_HANDLE(void) arg);
/* FIXME: where these declarations should be there ? */
@@ -71,13 +70,39 @@ hypercall_t ia64_hypercall_table[] =
(hypercall_t)do_ni_hypercall, /* */
/* 30 */
(hypercall_t)do_ni_hypercall, /* */
(hypercall_t)do_event_channel_op,
- (hypercall_t)do_physdev_op
+ (hypercall_t)do_physdev_op,
+ (hypercall_t)do_ni_hypercall, /* */
+ (hypercall_t)do_ni_hypercall, /* */ /* 35 */
+ (hypercall_t)do_ni_hypercall, /* */
+ (hypercall_t)do_ni_hypercall, /* */
+ (hypercall_t)do_ni_hypercall, /* */
+ (hypercall_t)do_ni_hypercall, /* */
+ (hypercall_t)do_ni_hypercall, /* */ /* 40 */
+ (hypercall_t)do_ni_hypercall, /* */
+ (hypercall_t)do_ni_hypercall, /* */
+ (hypercall_t)do_ni_hypercall, /* */
+ (hypercall_t)do_ni_hypercall, /* */
+ (hypercall_t)do_ni_hypercall, /* */ /* 45 */
+ (hypercall_t)do_ni_hypercall, /* */
+ (hypercall_t)do_ni_hypercall, /* */
+#ifdef CONFIG_XEN_IA64_DOM0_VP
+ (hypercall_t)do_dom0vp_op, /* dom0vp_op */
+#else
+ (hypercall_t)do_ni_hypercall, /* arch_0 */
+#endif
+ (hypercall_t)do_ni_hypercall, /* arch_1 */
+ (hypercall_t)do_ni_hypercall, /* arch_2 */ /* 50 */
+ (hypercall_t)do_ni_hypercall, /* arch_3 */
+ (hypercall_t)do_ni_hypercall, /* arch_4 */
+ (hypercall_t)do_ni_hypercall, /* arch_5 */
+ (hypercall_t)do_ni_hypercall, /* arch_6 */
+ (hypercall_t)do_ni_hypercall /* arch_7 */ /* 55 */
};
uint32_t nr_hypercalls =
sizeof(ia64_hypercall_table) / sizeof(hypercall_t);
-static int
+static IA64FAULT
xen_hypercall (struct pt_regs *regs)
{
uint32_t cmd = (uint32_t)regs->r2;
@@ -91,15 +116,9 @@ xen_hypercall (struct pt_regs *regs)
regs->r18,
regs->r19);
else
-#ifdef CONFIG_XEN_IA64_DOM0_VP
- if (cmd == __HYPERVISOR_ia64_dom0vp_op)
- regs->r8 = do_dom0vp_op(regs->r14, regs->r15, regs->r16,
- regs->r17, regs->r18);
- else
-#endif
regs->r8 = -ENOSYS;
- return 1;
+ return IA64_NO_FAULT;
}
@@ -134,9 +153,6 @@ fw_hypercall_ipi (struct pt_regs *regs)
c.regs.cr_iip = targ_regs->cr_iip;
c.regs.r1 = targ_regs->r1;
- /* Copy from vcpu 0. */
- c.vcpu.evtchn_vector =
- current->domain->vcpu[0]->vcpu_info->arch.evtchn_vector;
if (arch_set_info_guest (targ, &c) != 0) {
printf ("arch_boot_vcpu: failure\n");
return;
@@ -162,14 +178,16 @@ fw_hypercall_ipi (struct pt_regs *regs)
return;
}
-static int
+static IA64FAULT
fw_hypercall (struct pt_regs *regs)
{
struct vcpu *v = current;
struct sal_ret_values x;
- unsigned long *tv, *tc;
-
- switch (regs->r2) {
+ efi_status_t efi_ret_value;
+ IA64FAULT fault;
+ unsigned long index = regs->r2 & FW_HYPERCALL_NUM_MASK_HIGH;
+
+ switch (index) {
case FW_HYPERCALL_PAL_CALL:
//printf("*** PAL hypercall: index=%d\n",regs->r28);
//FIXME: This should call a C routine
@@ -227,40 +245,10 @@ fw_hypercall (struct pt_regs *regs)
regs->r8 = x.r8; regs->r9 = x.r9;
regs->r10 = x.r10; regs->r11 = x.r11;
break;
- case FW_HYPERCALL_EFI_RESET_SYSTEM:
- printf("efi.reset_system called ");
- if (current->domain == dom0) {
- printf("(by dom0)\n ");
- (*efi.reset_system)(EFI_RESET_WARM,0,0,NULL);
- }
- else
- domain_shutdown (current->domain, SHUTDOWN_reboot);
- regs->r8 = EFI_UNSUPPORTED;
- break;
- case FW_HYPERCALL_EFI_GET_TIME:
- tv = (unsigned long *) vcpu_get_gr(v,32);
- tc = (unsigned long *) vcpu_get_gr(v,33);
- //printf("efi_get_time(%p,%p) called...",tv,tc);
- tv = (unsigned long *) __va(translate_domain_mpaddr((unsigned
long) tv));
- if (tc) tc = (unsigned long *)
__va(translate_domain_mpaddr((unsigned long) tc));
- regs->r8 = (*efi.get_time)((efi_time_t *) tv, (efi_time_cap_t
*) tc);
- //printf("and returns %lx\n",regs->r8);
- break;
- case FW_HYPERCALL_EFI_SET_TIME:
- case FW_HYPERCALL_EFI_GET_WAKEUP_TIME:
- case FW_HYPERCALL_EFI_SET_WAKEUP_TIME:
- // FIXME: need fixes in efi.h from 2.6.9
- case FW_HYPERCALL_EFI_SET_VIRTUAL_ADDRESS_MAP:
- // FIXME: WARNING!! IF THIS EVER GETS IMPLEMENTED
- // SOME OF THE OTHER EFI EMULATIONS WILL CHANGE AS
- // POINTER ARGUMENTS WILL BE VIRTUAL!!
- case FW_HYPERCALL_EFI_GET_VARIABLE:
- // FIXME: need fixes in efi.h from 2.6.9
- case FW_HYPERCALL_EFI_GET_NEXT_VARIABLE:
- case FW_HYPERCALL_EFI_SET_VARIABLE:
- case FW_HYPERCALL_EFI_GET_NEXT_HIGH_MONO_COUNT:
- // FIXME: need fixes in efi.h from 2.6.9
- regs->r8 = EFI_UNSUPPORTED;
+ case FW_HYPERCALL_EFI_CALL:
+ efi_ret_value = efi_emulator (regs, &fault);
+ if (fault != IA64_NO_FAULT) return fault;
+ regs->r8 = efi_ret_value;
break;
case FW_HYPERCALL_IPI:
fw_hypercall_ipi (regs);
@@ -269,7 +257,7 @@ fw_hypercall (struct pt_regs *regs)
printf("unknown ia64 fw hypercall %lx\n", regs->r2);
regs->r8 = do_ni_hypercall();
}
- return 1;
+ return IA64_NO_FAULT;
}
/* opt_unsafe_hypercall: If true, unsafe debugging hypercalls are allowed.
@@ -277,7 +265,7 @@ static int opt_unsafe_hypercall = 0;
static int opt_unsafe_hypercall = 0;
boolean_param("unsafe_hypercall", opt_unsafe_hypercall);
-int
+IA64FAULT
ia64_hypercall (struct pt_regs *regs)
{
struct vcpu *v = current;
@@ -307,7 +295,7 @@ ia64_hypercall (struct pt_regs *regs)
printf("unknown user xen/ia64 hypercall %lx\n", index);
regs->r8 = do_ni_hypercall();
}
- return 1;
+ return IA64_NO_FAULT;
}
/* Hypercalls are only allowed by kernel.
@@ -316,7 +304,7 @@ ia64_hypercall (struct pt_regs *regs)
/* FIXME: Return a better error value ?
Reflection ? Illegal operation ? */
regs->r8 = -1;
- return 1;
+ return IA64_NO_FAULT;
}
if (index >= FW_HYPERCALL_FIRST_ARCH)
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/xen/hyperprivop.S
--- a/xen/arch/ia64/xen/hyperprivop.S Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/xen/hyperprivop.S Tue May 30 14:30:34 2006 -0500
@@ -30,7 +30,7 @@
#undef FAST_ITC //XXX CONFIG_XEN_IA64_DOM0_VP
// TODO fast_itc doesn't suport dom0 vp yet.
#else
-//#define FAST_ITC // working but default off for now
+//#define FAST_ITC // to be reviewed
#endif
#define FAST_BREAK
#ifndef CONFIG_XEN_IA64_DOM0_VP
@@ -46,27 +46,8 @@
#undef RFI_TO_INTERRUPT // not working yet
#endif
-#define XEN_HYPER_RFI 0x1
-#define XEN_HYPER_RSM_DT 0x2
-#define XEN_HYPER_SSM_DT 0x3
-#define XEN_HYPER_COVER 0x4
-#define XEN_HYPER_ITC_D 0x5
-#define XEN_HYPER_ITC_I 0x6
-#define XEN_HYPER_SSM_I 0x7
-#define XEN_HYPER_GET_IVR 0x8
-#define XEN_HYPER_GET_TPR 0x9
-#define XEN_HYPER_SET_TPR 0xa
-#define XEN_HYPER_EOI 0xb
-#define XEN_HYPER_SET_ITM 0xc
-#define XEN_HYPER_THASH 0xd
-#define XEN_HYPER_PTC_GA 0xe
-#define XEN_HYPER_ITR_D 0xf
-#define XEN_HYPER_GET_RR 0x10
-#define XEN_HYPER_SET_RR 0x11
-#define XEN_HYPER_SET_KR 0x12
-
#ifdef CONFIG_SMP
-#warning "FIXME: ptc.ga instruction requires spinlock for SMP"
+//#warning "FIXME: ptc.ga instruction requires spinlock for SMP"
#undef FAST_PTC_GA
#endif
@@ -106,7 +87,7 @@ GLOBAL_ENTRY(fast_hyperprivop)
#endif
// HYPERPRIVOP_SSM_I?
// assumes domain interrupts pending, so just do it
- cmp.eq p7,p6=XEN_HYPER_SSM_I,r17
+ cmp.eq p7,p6=HYPERPRIVOP_SSM_I,r17
(p7) br.sptk.many hyper_ssm_i;;
// FIXME. This algorithm gives up (goes to the slow path) if there
@@ -127,75 +108,75 @@ 1: // when we get to here r20=~=interrup
1: // when we get to here r20=~=interrupts pending
// HYPERPRIVOP_RFI?
- cmp.eq p7,p6=XEN_HYPER_RFI,r17
+ cmp.eq p7,p6=HYPERPRIVOP_RFI,r17
(p7) br.sptk.many hyper_rfi;;
// HYPERPRIVOP_GET_IVR?
- cmp.eq p7,p6=XEN_HYPER_GET_IVR,r17
+ cmp.eq p7,p6=HYPERPRIVOP_GET_IVR,r17
(p7) br.sptk.many hyper_get_ivr;;
cmp.ne p7,p0=r20,r0
(p7) br.spnt.many dispatch_break_fault ;;
// HYPERPRIVOP_COVER?
- cmp.eq p7,p6=XEN_HYPER_COVER,r17
+ cmp.eq p7,p6=HYPERPRIVOP_COVER,r17
(p7) br.sptk.many hyper_cover;;
// HYPERPRIVOP_SSM_DT?
- cmp.eq p7,p6=XEN_HYPER_SSM_DT,r17
+ cmp.eq p7,p6=HYPERPRIVOP_SSM_DT,r17
(p7) br.sptk.many hyper_ssm_dt;;
// HYPERPRIVOP_RSM_DT?
- cmp.eq p7,p6=XEN_HYPER_RSM_DT,r17
+ cmp.eq p7,p6=HYPERPRIVOP_RSM_DT,r17
(p7) br.sptk.many hyper_rsm_dt;;
// HYPERPRIVOP_GET_TPR?
- cmp.eq p7,p6=XEN_HYPER_GET_TPR,r17
+ cmp.eq p7,p6=HYPERPRIVOP_GET_TPR,r17
(p7) br.sptk.many hyper_get_tpr;;
// HYPERPRIVOP_SET_TPR?
- cmp.eq p7,p6=XEN_HYPER_SET_TPR,r17
+ cmp.eq p7,p6=HYPERPRIVOP_SET_TPR,r17
(p7) br.sptk.many hyper_set_tpr;;
// HYPERPRIVOP_EOI?
- cmp.eq p7,p6=XEN_HYPER_EOI,r17
+ cmp.eq p7,p6=HYPERPRIVOP_EOI,r17
(p7) br.sptk.many hyper_eoi;;
// HYPERPRIVOP_SET_ITM?
- cmp.eq p7,p6=XEN_HYPER_SET_ITM,r17
+ cmp.eq p7,p6=HYPERPRIVOP_SET_ITM,r17
(p7) br.sptk.many hyper_set_itm;;
// HYPERPRIVOP_SET_RR?
- cmp.eq p7,p6=XEN_HYPER_SET_RR,r17
+ cmp.eq p7,p6=HYPERPRIVOP_SET_RR,r17
(p7) br.sptk.many hyper_set_rr;;
// HYPERPRIVOP_GET_RR?
- cmp.eq p7,p6=XEN_HYPER_GET_RR,r17
+ cmp.eq p7,p6=HYPERPRIVOP_GET_RR,r17
(p7) br.sptk.many hyper_get_rr;;
// HYPERPRIVOP_PTC_GA?
- cmp.eq p7,p6=XEN_HYPER_PTC_GA,r17
+ cmp.eq p7,p6=HYPERPRIVOP_PTC_GA,r17
(p7) br.sptk.many hyper_ptc_ga;;
// HYPERPRIVOP_ITC_D?
- cmp.eq p7,p6=XEN_HYPER_ITC_D,r17
+ cmp.eq p7,p6=HYPERPRIVOP_ITC_D,r17
(p7) br.sptk.many hyper_itc_d;;
// HYPERPRIVOP_ITC_I?
- cmp.eq p7,p6=XEN_HYPER_ITC_I,r17
+ cmp.eq p7,p6=HYPERPRIVOP_ITC_I,r17
(p7) br.sptk.many hyper_itc_i;;
// HYPERPRIVOP_THASH?
- cmp.eq p7,p6=XEN_HYPER_THASH,r17
+ cmp.eq p7,p6=HYPERPRIVOP_THASH,r17
(p7) br.sptk.many hyper_thash;;
// HYPERPRIVOP_SET_KR?
- cmp.eq p7,p6=XEN_HYPER_SET_KR,r17
+ cmp.eq p7,p6=HYPERPRIVOP_SET_KR,r17
(p7) br.sptk.many hyper_set_kr;;
// if not one of the above, give up for now and do it the slow way
br.sptk.many dispatch_break_fault ;;
-
+END(fast_hyperprivop)
// give up for now if: ipsr.be==1, ipsr.pp==1
// from reflect_interruption, don't need to:
@@ -250,7 +231,7 @@ ENTRY(hyper_ssm_i)
cmp.ne p7,p0=r21,r0
(p7) br.sptk.many dispatch_break_fault ;;
#ifdef FAST_HYPERPRIVOP_CNT
- movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_SSM_I);;
+ movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_SSM_I);;
ld8 r21=[r20];;
adds r21=1,r21;;
st8 [r20]=r21;;
@@ -348,6 +329,7 @@ ENTRY(hyper_ssm_i)
mov pr=r31,-1 ;;
rfi
;;
+END(hyper_ssm_i)
// reflect domain clock interrupt
// r31 == pr
@@ -594,7 +576,7 @@ 1:
adds r21=XSI_IIM_OFS-XSI_PSR_IC_OFS,r18 ;;
st8 [r21]=r17;;
// fall through
-
+END(fast_break_reflect)
// reflect to domain ivt+r20
// sets up isr,iip,ipsr,ifs (FIXME: do iipa too)
@@ -723,6 +705,7 @@ ENTRY(fast_reflect)
mov pr=r31,-1 ;;
rfi
;;
+END(fast_reflect)
// reflect access faults (0x2400,0x2800,0x5300) directly to domain
// r16 == isr
@@ -762,6 +745,7 @@ GLOBAL_ENTRY(fast_access_reflect)
and r22=~3,r22;;
st8 [r23]=r22;;
br.cond.sptk.many fast_reflect;;
+END(fast_access_reflect)
// when we get to here, VHPT_CCHAIN_LOOKUP has failed and everything
// is as it was at the time of original miss. We want to preserve that
@@ -769,7 +753,7 @@ GLOBAL_ENTRY(fast_tlb_miss_reflect)
GLOBAL_ENTRY(fast_tlb_miss_reflect)
#ifndef FAST_TLB_MISS_REFLECT // see beginning of file
br.spnt.few page_fault ;;
-#endif
+#else
mov r31=pr
mov r30=cr.ipsr
mov r29=cr.iip
@@ -957,6 +941,7 @@ 1: // check the guest VHPT
extr.u r24=r24,2,6;;
// IFA already in PSCB
br.cond.sptk.many fast_insert;;
+END(fast_tlb_miss_reflect)
// we get here if fast_insert fails (e.g. due to metaphysical lookup)
ENTRY(recover_and_page_fault)
@@ -1007,6 +992,7 @@ 1: extr.u r25=r17,61,3;;
mov r29=cr.iip
mov r30=cr.ipsr
br.sptk.many fast_reflect;;
+#endif
END(fast_tlb_miss_reflect)
// ensure that, if giving up, registers at entry to fast_hyperprivop unchanged
@@ -1065,7 +1051,7 @@ 1:
1: // OK now, let's do an rfi.
#ifdef FAST_HYPERPRIVOP_CNT
- movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_RFI);;
+ movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_RFI);;
ld8 r23=[r20];;
adds r23=1,r23;;
st8 [r20]=r23;;
@@ -1145,9 +1131,10 @@ 1: mov pr=r31,-1
;;
rfi
;;
-
+END(hyper_rfi)
+
#ifdef RFI_TO_INTERRUPT
-GLOBAL_ENTRY(rfi_check_extint)
+ENTRY(rfi_check_extint)
//br.sptk.many dispatch_break_fault ;;
// r18=&vpsr.i|vpsr.ic, r21==vpsr, r22=vcr.iip
@@ -1214,11 +1201,12 @@ GLOBAL_ENTRY(rfi_check_extint)
adds r29=15,r29;;
cmp.ge p6,p0=r29,r26 // if tpr masks interrupt, just rfi
(p6) br.cond.spnt.few just_do_rfi;;
+END(rfi_check_extint)
// this doesn't work yet (dies early after getting to user mode)
// but happens relatively infrequently, so fix it later.
// NOTE that these will be counted incorrectly for now (for privcnt output)
-GLOBAL_ENTRY(rfi_with_interrupt)
+ENTRY(rfi_with_interrupt)
#if 1
br.sptk.many dispatch_break_fault ;;
#endif
@@ -1313,11 +1301,12 @@ GLOBAL_ENTRY(rfi_with_interrupt)
st4 [r20]=r0 ;;
mov pr=r31,-1 ;;
rfi
+END(rfi_with_interrupt)
#endif // RFI_TO_INTERRUPT
ENTRY(hyper_cover)
#ifdef FAST_HYPERPRIVOP_CNT
- movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_COVER);;
+ movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_COVER);;
ld8 r21=[r20];;
adds r21=1,r21;;
st8 [r20]=r21;;
@@ -1348,11 +1337,12 @@ ENTRY(hyper_cover)
mov pr=r31,-1 ;;
rfi
;;
+END(hyper_cover)
// return from metaphysical mode (meta=1) to virtual mode (meta=0)
ENTRY(hyper_ssm_dt)
#ifdef FAST_HYPERPRIVOP_CNT
- movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_SSM_DT);;
+ movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_SSM_DT);;
ld8 r21=[r20];;
adds r21=1,r21;;
st8 [r20]=r21;;
@@ -1384,11 +1374,12 @@ 1: extr.u r26=r24,41,2 ;;
mov pr=r31,-1 ;;
rfi
;;
+END(hyper_ssm_dt)
// go to metaphysical mode (meta=1) from virtual mode (meta=0)
ENTRY(hyper_rsm_dt)
#ifdef FAST_HYPERPRIVOP_CNT
- movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_RSM_DT);;
+ movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_RSM_DT);;
ld8 r21=[r20];;
adds r21=1,r21;;
st8 [r20]=r21;;
@@ -1421,10 +1412,11 @@ 1: extr.u r26=r24,41,2 ;;
mov pr=r31,-1 ;;
rfi
;;
+END(hyper_rsm_dt)
ENTRY(hyper_get_tpr)
#ifdef FAST_HYPERPRIVOP_CNT
- movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_GET_TPR);;
+ movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_GET_TPR);;
ld8 r21=[r20];;
adds r21=1,r21;;
st8 [r20]=r21;;
@@ -1453,7 +1445,7 @@ END(hyper_get_tpr)
// (or accidentally missing) delivering an interrupt
ENTRY(hyper_set_tpr)
#ifdef FAST_HYPERPRIVOP_CNT
- movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_SET_TPR);;
+ movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_SET_TPR);;
ld8 r21=[r20];;
adds r21=1,r21;;
st8 [r20]=r21;;
@@ -1481,7 +1473,7 @@ END(hyper_set_tpr)
ENTRY(hyper_get_ivr)
#ifdef FAST_HYPERPRIVOP_CNT
- movl r22=fast_hyperpriv_cnt+(8*XEN_HYPER_GET_IVR);;
+ movl r22=fast_hyperpriv_cnt+(8*HYPERPRIVOP_GET_IVR);;
ld8 r21=[r22];;
adds r21=1,r21;;
st8 [r22]=r21;;
@@ -1593,7 +1585,7 @@ ENTRY(hyper_eoi)
cmp.ne p7,p0=r20,r0
(p7) br.spnt.many dispatch_break_fault ;;
#ifdef FAST_HYPERPRIVOP_CNT
- movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_EOI);;
+ movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_EOI);;
ld8 r21=[r20];;
adds r21=1,r21;;
st8 [r20]=r21;;
@@ -1657,7 +1649,7 @@ ENTRY(hyper_set_itm)
cmp.ne p7,p0=r20,r0
(p7) br.spnt.many dispatch_break_fault ;;
#ifdef FAST_HYPERPRIVOP_CNT
- movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_SET_ITM);;
+ movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_SET_ITM);;
ld8 r21=[r20];;
adds r21=1,r21;;
st8 [r20]=r21;;
@@ -1698,7 +1690,7 @@ END(hyper_set_itm)
ENTRY(hyper_get_rr)
#ifdef FAST_HYPERPRIVOP_CNT
- movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_GET_RR);;
+ movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_GET_RR);;
ld8 r21=[r20];;
adds r21=1,r21;;
st8 [r20]=r21;;
@@ -1730,7 +1722,7 @@ ENTRY(hyper_set_rr)
cmp.leu p7,p0=7,r25 // punt on setting rr7
(p7) br.spnt.many dispatch_break_fault ;;
#ifdef FAST_HYPERPRIVOP_CNT
- movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_SET_RR);;
+ movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_SET_RR);;
ld8 r21=[r20];;
adds r21=1,r21;;
st8 [r20]=r21;;
@@ -1788,7 +1780,7 @@ ENTRY(hyper_set_kr)
cmp.ne p7,p0=r0,r25 // if kr# > 7, go slow way
(p7) br.spnt.many dispatch_break_fault ;;
#ifdef FAST_HYPERPRIVOP_CNT
- movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_SET_KR);;
+ movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_SET_KR);;
ld8 r21=[r20];;
adds r21=1,r21;;
st8 [r20]=r21;;
@@ -1844,9 +1836,9 @@ END(hyper_set_kr)
// On entry:
// r18 == XSI_PSR_IC
// r31 == pr
-GLOBAL_ENTRY(hyper_thash)
+ENTRY(hyper_thash)
#ifdef FAST_HYPERPRIVOP_CNT
- movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_THASH);;
+ movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_THASH);;
ld8 r21=[r20];;
adds r21=1,r21;;
st8 [r20]=r21;;
@@ -1915,7 +1907,7 @@ ENTRY(hyper_ptc_ga)
#endif
// FIXME: validate not flushing Xen addresses
#ifdef FAST_HYPERPRIVOP_CNT
- movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_PTC_GA);;
+ movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_PTC_GA);;
ld8 r21=[r20];;
adds r21=1,r21;;
st8 [r20]=r21;;
@@ -1992,18 +1984,19 @@ ENTRY(recover_and_dispatch_break_fault)
#endif
mov b0=r29 ;;
br.sptk.many dispatch_break_fault;;
+END(recover_and_dispatch_break_fault)
// Registers at entry
-// r17 = break immediate (XEN_HYPER_ITC_D or I)
+// r17 = break immediate (HYPERPRIVOP_ITC_D or I)
// r18 == XSI_PSR_IC_OFS
// r31 == pr
-GLOBAL_ENTRY(hyper_itc)
-ENTRY(hyper_itc_i)
+ENTRY(hyper_itc)
+hyper_itc_i:
// fall through, hyper_itc_d handles both i and d
-ENTRY(hyper_itc_d)
+hyper_itc_d:
#ifndef FAST_ITC
br.sptk.many dispatch_break_fault ;;
-#endif
+#else
// ensure itir.ps >= xen's pagesize
adds r23=XSI_ITIR_OFS-XSI_PSR_IC_OFS,r18 ;;
ld8 r23=[r23];;
@@ -2027,9 +2020,9 @@ ENTRY(hyper_itc_d)
cmp.ne p7,p0=r27,r28
(p7) br.spnt.many dispatch_break_fault ;;
#ifdef FAST_HYPERPRIVOP_CNT
- cmp.eq p6,p7=XEN_HYPER_ITC_D,r17;;
-(p6) movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_ITC_D);;
-(p7) movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_ITC_I);;
+ cmp.eq p6,p7=HYPERPRIVOP_ITC_D,r17;;
+(p6) movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_ITC_D);;
+(p7) movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_ITC_I);;
ld8 r21=[r20];;
adds r21=1,r21;;
st8 [r20]=r21;;
@@ -2040,7 +2033,10 @@ ENTRY(hyper_itc_d)
movl r30=recover_and_dispatch_break_fault ;;
mov r16=r8;;
// fall through
-
+#endif
+END(hyper_itc)
+
+#if defined(FAST_ITC) || defined (FAST_TLB_MISS_REFLECT)
// fast_insert(PSCB(ifa),r24=ps,r16=pte)
// r16 == pte
@@ -2050,7 +2046,7 @@ ENTRY(hyper_itc_d)
// r29 == saved value of b0 in case of recovery
// r30 == recovery ip if failure occurs
// r31 == pr
-GLOBAL_ENTRY(fast_insert)
+ENTRY(fast_insert)
// translate_domain_pte(r16=pteval,PSCB(ifa)=address,r24=itir)
mov r19=1;;
shl r20=r19,r24;;
@@ -2175,4 +2171,4 @@ no_inc_iip:
rfi
;;
END(fast_insert)
-
+#endif
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/xen/ivt.S
--- a/xen/arch/ia64/xen/ivt.S Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/xen/ivt.S Tue May 30 14:30:34 2006 -0500
@@ -100,6 +100,15 @@
mov r19=n;; /* prepare to save predicates */
\
br.sptk.many dispatch_to_fault_handler
+#define FAULT_OR_REFLECT(n)
\
+ mov r31=pr;
\
+ mov r20=cr.ipsr;;
\
+ mov r19=n; /* prepare to save predicates */
\
+ extr.u r20=r20,IA64_PSR_CPL0_BIT,2;;
\
+ cmp.ne p6,p0=r0,r20; /* cpl != 0?*/
\
+(p6) br.dptk.many dispatch_reflection;
\
+ br.sptk.few dispatch_to_fault_handler
+
#ifdef XEN
#define REFLECT(n)
\
mov r31=pr;
\
@@ -697,7 +706,7 @@ ENTRY(ikey_miss)
ENTRY(ikey_miss)
DBG_FAULT(6)
#ifdef XEN
- REFLECT(6)
+ FAULT_OR_REFLECT(6)
#endif
FAULT(6)
END(ikey_miss)
@@ -746,7 +755,7 @@ ENTRY(dkey_miss)
ENTRY(dkey_miss)
DBG_FAULT(7)
#ifdef XEN
- REFLECT(7)
+ FAULT_OR_REFLECT(7)
#endif
FAULT(7)
END(dkey_miss)
@@ -757,7 +766,7 @@ ENTRY(dirty_bit)
ENTRY(dirty_bit)
DBG_FAULT(8)
#ifdef XEN
- REFLECT(8)
+ FAULT_OR_REFLECT(8)
#endif
/*
* What we do here is to simply turn on the dirty bit in the PTE. We
need to
@@ -1523,7 +1532,7 @@ ENTRY(page_not_present)
ENTRY(page_not_present)
DBG_FAULT(20)
#ifdef XEN
- REFLECT(20)
+ FAULT_OR_REFLECT(20)
#endif
mov r16=cr.ifa
rsm psr.dt
@@ -1546,7 +1555,7 @@ ENTRY(key_permission)
ENTRY(key_permission)
DBG_FAULT(21)
#ifdef XEN
- REFLECT(21)
+ FAULT_OR_REFLECT(21)
#endif
mov r16=cr.ifa
rsm psr.dt
@@ -1562,7 +1571,7 @@ ENTRY(iaccess_rights)
ENTRY(iaccess_rights)
DBG_FAULT(22)
#ifdef XEN
- REFLECT(22)
+ FAULT_OR_REFLECT(22)
#endif
mov r16=cr.ifa
rsm psr.dt
@@ -1637,7 +1646,7 @@ ENTRY(disabled_fp_reg)
mov pr=r20,-1
;;
#endif
- REFLECT(25)
+ FAULT_OR_REFLECT(25)
//floating_panic:
// br.sptk.many floating_panic
;;
@@ -1656,7 +1665,7 @@ ENTRY(nat_consumption)
ENTRY(nat_consumption)
DBG_FAULT(26)
#ifdef XEN
- REFLECT(26)
+ FAULT_OR_REFLECT(26)
#endif
FAULT(26)
END(nat_consumption)
@@ -1668,7 +1677,7 @@ ENTRY(speculation_vector)
DBG_FAULT(27)
#ifdef XEN
// this probably need not reflect...
- REFLECT(27)
+ FAULT_OR_REFLECT(27)
#endif
/*
* A [f]chk.[as] instruction needs to take the branch to the recovery
code but
@@ -1714,7 +1723,7 @@ ENTRY(debug_vector)
ENTRY(debug_vector)
DBG_FAULT(29)
#ifdef XEN
- REFLECT(29)
+ FAULT_OR_REFLECT(29)
#endif
FAULT(29)
END(debug_vector)
@@ -1725,7 +1734,7 @@ ENTRY(unaligned_access)
ENTRY(unaligned_access)
DBG_FAULT(30)
#ifdef XEN
- REFLECT(30)
+ FAULT_OR_REFLECT(30)
#endif
mov r16=cr.ipsr
mov r31=pr // prepare to save predicates
@@ -1739,7 +1748,7 @@ ENTRY(unsupported_data_reference)
ENTRY(unsupported_data_reference)
DBG_FAULT(31)
#ifdef XEN
- REFLECT(31)
+ FAULT_OR_REFLECT(31)
#endif
FAULT(31)
END(unsupported_data_reference)
@@ -1750,7 +1759,7 @@ ENTRY(floating_point_fault)
ENTRY(floating_point_fault)
DBG_FAULT(32)
#ifdef XEN
- REFLECT(32)
+ FAULT_OR_REFLECT(32)
#endif
FAULT(32)
END(floating_point_fault)
@@ -1761,7 +1770,7 @@ ENTRY(floating_point_trap)
ENTRY(floating_point_trap)
DBG_FAULT(33)
#ifdef XEN
- REFLECT(33)
+ FAULT_OR_REFLECT(33)
#endif
FAULT(33)
END(floating_point_trap)
@@ -1772,7 +1781,7 @@ ENTRY(lower_privilege_trap)
ENTRY(lower_privilege_trap)
DBG_FAULT(34)
#ifdef XEN
- REFLECT(34)
+ FAULT_OR_REFLECT(34)
#endif
FAULT(34)
END(lower_privilege_trap)
@@ -1783,7 +1792,7 @@ ENTRY(taken_branch_trap)
ENTRY(taken_branch_trap)
DBG_FAULT(35)
#ifdef XEN
- REFLECT(35)
+ FAULT_OR_REFLECT(35)
#endif
FAULT(35)
END(taken_branch_trap)
@@ -1794,7 +1803,7 @@ ENTRY(single_step_trap)
ENTRY(single_step_trap)
DBG_FAULT(36)
#ifdef XEN
- REFLECT(36)
+ FAULT_OR_REFLECT(36)
#endif
FAULT(36)
END(single_step_trap)
@@ -1853,7 +1862,7 @@ ENTRY(ia32_exception)
ENTRY(ia32_exception)
DBG_FAULT(45)
#ifdef XEN
- REFLECT(45)
+ FAULT_OR_REFLECT(45)
#endif
FAULT(45)
END(ia32_exception)
@@ -1864,7 +1873,7 @@ ENTRY(ia32_intercept)
ENTRY(ia32_intercept)
DBG_FAULT(46)
#ifdef XEN
- REFLECT(46)
+ FAULT_OR_REFLECT(46)
#endif
#ifdef CONFIG_IA32_SUPPORT
mov r31=pr
@@ -1897,7 +1906,7 @@ ENTRY(ia32_interrupt)
ENTRY(ia32_interrupt)
DBG_FAULT(47)
#ifdef XEN
- REFLECT(47)
+ FAULT_OR_REFLECT(47)
#endif
#ifdef CONFIG_IA32_SUPPORT
mov r31=pr
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/xen/privop.c
--- a/xen/arch/ia64/xen/privop.c Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/xen/privop.c Tue May 30 14:30:34 2006 -0500
@@ -793,33 +793,6 @@ priv_emulate(VCPU *vcpu, REGS *regs, UIN
printf("priv_emulate: priv_handle_op fails, isr=0x%lx\n",isr);
return fault;
}
-
-
-// FIXME: Move these to include/public/arch-ia64?
-#define HYPERPRIVOP_RFI 0x1
-#define HYPERPRIVOP_RSM_DT 0x2
-#define HYPERPRIVOP_SSM_DT 0x3
-#define HYPERPRIVOP_COVER 0x4
-#define HYPERPRIVOP_ITC_D 0x5
-#define HYPERPRIVOP_ITC_I 0x6
-#define HYPERPRIVOP_SSM_I 0x7
-#define HYPERPRIVOP_GET_IVR 0x8
-#define HYPERPRIVOP_GET_TPR 0x9
-#define HYPERPRIVOP_SET_TPR 0xa
-#define HYPERPRIVOP_EOI 0xb
-#define HYPERPRIVOP_SET_ITM 0xc
-#define HYPERPRIVOP_THASH 0xd
-#define HYPERPRIVOP_PTC_GA 0xe
-#define HYPERPRIVOP_ITR_D 0xf
-#define HYPERPRIVOP_GET_RR 0x10
-#define HYPERPRIVOP_SET_RR 0x11
-#define HYPERPRIVOP_SET_KR 0x12
-#define HYPERPRIVOP_FC 0x13
-#define HYPERPRIVOP_GET_CPUID 0x14
-#define HYPERPRIVOP_GET_PMD 0x15
-#define HYPERPRIVOP_GET_EFLAG 0x16
-#define HYPERPRIVOP_SET_EFLAG 0x17
-#define HYPERPRIVOP_MAX 0x17
static const char * const hyperpriv_str[HYPERPRIVOP_MAX+1] = {
0, "rfi", "rsm.dt", "ssm.dt", "cover", "itc.d", "itc.i", "ssm.i",
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/xen/process.c
--- a/xen/arch/ia64/xen/process.c Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/xen/process.c Tue May 30 14:30:34 2006 -0500
@@ -15,7 +15,6 @@
#include <asm/ptrace.h>
#include <xen/delay.h>
-#include <linux/efi.h> /* FOR EFI_UNIMPLEMENTED */
#include <asm/sal.h> /* FOR struct ia64_sal_retval */
#include <asm/system.h>
@@ -40,7 +39,7 @@ extern void panic_domain(struct pt_regs
extern void panic_domain(struct pt_regs *, const char *, ...);
extern long platform_is_hp_ski(void);
extern int ia64_hyperprivop(unsigned long, REGS *);
-extern int ia64_hypercall(struct pt_regs *regs);
+extern IA64FAULT ia64_hypercall(struct pt_regs *regs);
extern void vmx_do_launch(struct vcpu *);
extern unsigned long lookup_domain_mpa(struct domain *,unsigned long);
@@ -195,10 +194,10 @@ void check_bad_nested_interruption(unsig
}
vector &= ~0xf;
if (vector != IA64_DATA_TLB_VECTOR &&
- vector != IA64_ALT_DATA_TLB_VECTOR &&
- vector != IA64_VHPT_TRANS_VECTOR) {
-panic_domain(regs,"psr.ic off, delivering
fault=%lx,ipsr=%p,iip=%p,ifa=%p,isr=%p,PSCB.iip=%p\n",
- vector,regs->cr_ipsr,regs->cr_iip,PSCB(v,ifa),isr,PSCB(v,iip));
+ vector != IA64_ALT_DATA_TLB_VECTOR &&
+ vector != IA64_VHPT_TRANS_VECTOR) {
+ panic_domain(regs,"psr.ic off, delivering
fault=%lx,ipsr=%lx,iip=%lx,ifa=%lx,isr=%lx,PSCB.iip=%lx\n",
+
vector,regs->cr_ipsr,regs->cr_iip,PSCB(v,ifa),isr,PSCB(v,iip));
}
}
@@ -265,7 +264,8 @@ void deliver_pending_interrupt(struct pt
}
unsigned long lazy_cover_count = 0;
-int handle_lazy_cover(struct vcpu *v, unsigned long isr, struct pt_regs *regs)
+static int
+handle_lazy_cover(struct vcpu *v, struct pt_regs *regs)
{
if (!PSCB(v,interrupt_collection_enabled)) {
PSCB(v,ifs) = regs->cr_ifs;
@@ -285,7 +285,7 @@ void ia64_do_page_fault (unsigned long a
unsigned long is_data = !((isr >> IA64_ISR_X_BIT) & 1UL);
IA64FAULT fault;
- if ((isr & IA64_ISR_IR) && handle_lazy_cover(current, isr, regs))
return;
+ if ((isr & IA64_ISR_IR) && handle_lazy_cover(current, regs)) return;
if ((isr & IA64_ISR_SP)
|| ((isr & IA64_ISR_NA) && (isr & IA64_ISR_CODE_MASK) ==
IA64_ISR_CODE_LFETCH))
{
@@ -299,7 +299,7 @@ void ia64_do_page_fault (unsigned long a
}
again:
- fault = vcpu_translate(current,address,is_data,0,&pteval,&itir,&iha);
+ fault = vcpu_translate(current,address,is_data,&pteval,&itir,&iha);
if (fault == IA64_NO_FAULT || fault == IA64_USE_TLB) {
u64 logps;
pteval = translate_domain_pte(pteval, address, itir, &logps);
@@ -307,11 +307,7 @@ void ia64_do_page_fault (unsigned long a
if (fault == IA64_USE_TLB && !current->arch.dtlb.pte.p) {
/* dtlb has been purged in-between. This dtlb was
matching. Undo the work. */
-#ifdef VHPT_GLOBAL
- vhpt_flush_address (address, 1);
-#endif
- ia64_ptcl(address, 1<<2);
- ia64_srlz_i();
+ vcpu_flush_tlb_vhpt_range (address, 1);
goto again;
}
return;
@@ -357,7 +353,7 @@ ia64_fault (unsigned long vector, unsign
struct pt_regs *regs = (struct pt_regs *) &stack;
unsigned long code;
char buf[128];
- static const char * const reason[] = {
+ static const char *reason[] = {
"IA-64 Illegal Operation fault",
"IA-64 Privileged Operation fault",
"IA-64 Privileged Register fault",
@@ -367,10 +363,10 @@ ia64_fault (unsigned long vector, unsign
"Unknown fault 9", "Unknown fault 10", "Unknown fault 11",
"Unknown fault 12",
"Unknown fault 13", "Unknown fault 14", "Unknown fault 15"
};
-#if 0
-printf("ia64_fault, vector=0x%p, ifa=%p, iip=%p, ipsr=%p, isr=%p\n",
- vector, ifa, regs->cr_iip, regs->cr_ipsr, isr);
-#endif
+
+ printf("ia64_fault, vector=0x%lx, ifa=0x%016lx, iip=0x%016lx,
ipsr=0x%016lx, isr=0x%016lx\n",
+ vector, ifa, regs->cr_iip, regs->cr_ipsr, isr);
+
if ((isr & IA64_ISR_NA) && ((isr & IA64_ISR_CODE_MASK) ==
IA64_ISR_CODE_LFETCH)) {
/*
@@ -383,15 +379,48 @@ printf("ia64_fault, vector=0x%p, ifa=%p,
}
switch (vector) {
- case 24: /* General Exception */
+ case 0:
+ printk("VHPT Translation.\n");
+ break;
+
+ case 4:
+ printk("Alt DTLB.\n");
+ break;
+
+ case 6:
+ printk("Instruction Key Miss.\n");
+ break;
+
+ case 7:
+ printk("Data Key Miss.\n");
+ break;
+
+ case 8:
+ printk("Dirty-bit.\n");
+ break;
+
+ case 20:
+ printk("Page Not Found.\n");
+ break;
+
+ case 21:
+ printk("Key Permission.\n");
+ break;
+
+ case 22:
+ printk("Instruction Access Rights.\n");
+ break;
+
+ case 24: /* General Exception */
code = (isr >> 4) & 0xf;
sprintf(buf, "General Exception: %s%s", reason[code],
- (code == 3) ? ((isr & (1UL << 37))
- ? " (RSE access)" : " (data access)") :
"");
+ (code == 3) ? ((isr & (1UL << 37)) ? " (RSE access)" :
+ " (data access)") : "");
if (code == 8) {
# ifdef CONFIG_IA64_PRINT_HAZARDS
printk("%s[%d]: possible hazard @ ip=%016lx (pr =
%016lx)\n",
- current->comm, current->pid, regs->cr_iip +
ia64_psr(regs)->ri,
+ current->comm, current->pid,
+ regs->cr_iip + ia64_psr(regs)->ri,
regs->pr);
# endif
printf("ia64_fault: returning on hazard\n");
@@ -399,162 +428,65 @@ printf("ia64_fault, vector=0x%p, ifa=%p,
}
break;
- case 25: /* Disabled FP-Register */
- if (isr & 2) {
- //disabled_fph_fault(regs);
- //return;
- }
- sprintf(buf, "Disabled FPL fault---not supposed to happen!");
- break;
-
- case 26: /* NaT Consumption */
- if (user_mode(regs)) {
- void *addr;
-
- if (((isr >> 4) & 0xf) == 2) {
- /* NaT page consumption */
- //sig = SIGSEGV;
- //code = SEGV_ACCERR;
- addr = (void *) ifa;
- } else {
- /* register NaT consumption */
- //sig = SIGILL;
- //code = ILL_ILLOPN;
- addr = (void *) (regs->cr_iip +
ia64_psr(regs)->ri);
- }
- //siginfo.si_signo = sig;
- //siginfo.si_code = code;
- //siginfo.si_errno = 0;
- //siginfo.si_addr = addr;
- //siginfo.si_imm = vector;
- //siginfo.si_flags = __ISR_VALID;
- //siginfo.si_isr = isr;
- //force_sig_info(sig, &siginfo, current);
- //return;
- } //else if (ia64_done_with_exception(regs))
- //return;
- sprintf(buf, "NaT consumption");
- break;
-
- case 31: /* Unsupported Data Reference */
- if (user_mode(regs)) {
- //siginfo.si_signo = SIGILL;
- //siginfo.si_code = ILL_ILLOPN;
- //siginfo.si_errno = 0;
- //siginfo.si_addr = (void *) (regs->cr_iip +
ia64_psr(regs)->ri);
- //siginfo.si_imm = vector;
- //siginfo.si_flags = __ISR_VALID;
- //siginfo.si_isr = isr;
- //force_sig_info(SIGILL, &siginfo, current);
- //return;
- }
- sprintf(buf, "Unsupported data reference");
- break;
-
- case 29: /* Debug */
- case 35: /* Taken Branch Trap */
- case 36: /* Single Step Trap */
- //if (fsys_mode(current, regs)) {}
- switch (vector) {
- case 29:
- //siginfo.si_code = TRAP_HWBKPT;
-#ifdef CONFIG_ITANIUM
- /*
- * Erratum 10 (IFA may contain incorrect address) now
has
- * "NoFix" status. There are no plans for fixing this.
- */
- if (ia64_psr(regs)->is == 0)
- ifa = regs->cr_iip;
-#endif
- break;
- case 35: ifa = 0; break;
- case 36: ifa = 0; break;
- //case 35: siginfo.si_code = TRAP_BRANCH; ifa = 0; break;
- //case 36: siginfo.si_code = TRAP_TRACE; ifa = 0; break;
- }
- //siginfo.si_signo = SIGTRAP;
- //siginfo.si_errno = 0;
- //siginfo.si_addr = (void *) ifa;
- //siginfo.si_imm = 0;
- //siginfo.si_flags = __ISR_VALID;
- //siginfo.si_isr = isr;
- //force_sig_info(SIGTRAP, &siginfo, current);
- //return;
-
- case 32: /* fp fault */
- case 33: /* fp trap */
- //result = handle_fpu_swa((vector == 32) ? 1 : 0, regs, isr);
- //if ((result < 0) || (current->thread.flags &
IA64_THREAD_FPEMU_SIGFPE)) {
- //siginfo.si_signo = SIGFPE;
- //siginfo.si_errno = 0;
- //siginfo.si_code = FPE_FLTINV;
- //siginfo.si_addr = (void *) (regs->cr_iip +
ia64_psr(regs)->ri);
- //siginfo.si_flags = __ISR_VALID;
- //siginfo.si_isr = isr;
- //siginfo.si_imm = 0;
- //force_sig_info(SIGFPE, &siginfo, current);
- //}
- //return;
- sprintf(buf, "FP fault/trap");
- break;
-
- case 34:
- if (isr & 0x2) {
- /* Lower-Privilege Transfer Trap */
- /*
- * Just clear PSR.lp and then return immediately: all
the
- * interesting work (e.g., signal delivery is done in
the kernel
- * exit path).
- */
- //ia64_psr(regs)->lp = 0;
- //return;
- sprintf(buf, "Lower-Privilege Transfer trap");
- } else {
- /* Unimplemented Instr. Address Trap */
- if (user_mode(regs)) {
- //siginfo.si_signo = SIGILL;
- //siginfo.si_code = ILL_BADIADDR;
- //siginfo.si_errno = 0;
- //siginfo.si_flags = 0;
- //siginfo.si_isr = 0;
- //siginfo.si_imm = 0;
- //siginfo.si_addr = (void *) (regs->cr_iip +
ia64_psr(regs)->ri);
- //force_sig_info(SIGILL, &siginfo, current);
- //return;
- }
- sprintf(buf, "Unimplemented Instruction Address fault");
- }
- break;
-
- case 45:
- printk(KERN_ERR "Unexpected IA-32 exception (Trap 45)\n");
- printk(KERN_ERR " iip - 0x%lx, ifa - 0x%lx, isr - 0x%lx\n",
- regs->cr_iip, ifa, isr);
- //force_sig(SIGSEGV, current);
- break;
-
- case 46:
- printk(KERN_ERR "Unexpected IA-32 intercept trap (Trap 46)\n");
- printk(KERN_ERR " iip - 0x%lx, ifa - 0x%lx, isr - 0x%lx, iim -
0x%lx\n",
- regs->cr_iip, ifa, isr, iim);
- //force_sig(SIGSEGV, current);
- return;
-
- case 47:
- sprintf(buf, "IA-32 Interruption Fault (int 0x%lx)", isr >> 16);
- break;
-
- default:
- sprintf(buf, "Fault %lu", vector);
- break;
- }
- //die_if_kernel(buf, regs, error);
-printk("ia64_fault: %s: reflecting\n",buf);
-PSCB(current,itir) = vcpu_get_itir_on_fault(current,ifa);
-PSCB(current,ifa) = ifa;
-reflect_interruption(isr,regs,IA64_GENEX_VECTOR);
-//while(1);
- //force_sig(SIGILL, current);
+ case 25:
+ printk("Disabled FP-Register.\n");
+ break;
+
+ case 26:
+ printk("NaT consumption.\n");
+ break;
+
+ case 29:
+ printk("Debug.\n");
+ break;
+
+ case 30:
+ printk("Unaligned Reference.\n");
+ break;
+
+ case 31:
+ printk("Unsupported data reference.\n");
+ break;
+
+ case 32:
+ printk("Floating-Point Fault.\n");
+ break;
+
+ case 33:
+ printk("Floating-Point Trap.\n");
+ break;
+
+ case 34:
+ printk("Lower Privilege Transfer Trap.\n");
+ break;
+
+ case 35:
+ printk("Taken Branch Trap.\n");
+ break;
+
+ case 36:
+ printk("Single Step Trap.\n");
+ break;
+
+ case 45:
+ printk("IA-32 Exception.\n");
+ break;
+
+ case 46:
+ printk("IA-32 Intercept.\n");
+ break;
+
+ case 47:
+ printk("IA-32 Interrupt.\n");
+ break;
+
+ default:
+ printk("Fault %lu\n", vector);
+ break;
+ }
+
+ show_registers(regs);
+ panic("Fault in Xen.\n");
}
unsigned long running_on_sim = 0;
@@ -679,6 +611,7 @@ ia64_handle_break (unsigned long ifa, st
{
struct domain *d = current->domain;
struct vcpu *v = current;
+ IA64FAULT vector;
if (first_break) {
if (platform_is_hp_ski()) running_on_sim = 1;
@@ -699,9 +632,11 @@ ia64_handle_break (unsigned long ifa, st
/* by default, do not continue */
v->arch.hypercall_continuation = 0;
- if (ia64_hypercall(regs) &&
- !PSCBX(v, hypercall_continuation))
- vcpu_increment_iip(current);
+ if ((vector = ia64_hypercall(regs)) == IA64_NO_FAULT) {
+ if (!PSCBX(v, hypercall_continuation))
+ vcpu_increment_iip(current);
+ }
+ else reflect_interruption(isr, regs, vector);
}
else if (!PSCB(v,interrupt_collection_enabled)) {
if (ia64_hyperprivop(iim,regs))
@@ -813,7 +748,7 @@ printf("*** Handled privop masquerading
while(vector);
return;
}
- if (check_lazy_cover && (isr & IA64_ISR_IR) && handle_lazy_cover(v,
isr, regs)) return;
+ if (check_lazy_cover && (isr & IA64_ISR_IR) && handle_lazy_cover(v,
regs)) return;
PSCB(current,ifa) = ifa;
PSCB(current,itir) = vcpu_get_itir_on_fault(v,ifa);
reflect_interruption(isr,regs,vector);
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/xen/regionreg.c
--- a/xen/arch/ia64/xen/regionreg.c Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/xen/regionreg.c Tue May 30 14:30:34 2006 -0500
@@ -17,9 +17,7 @@
#include <asm/vcpu.h>
/* Defined in xemasm.S */
-extern void ia64_new_rr7(unsigned long rid,void *shared_info, void
*shared_arch_info, unsigned long p_vhpt, unsigned long v_pal);
-
-extern void *pal_vaddr;
+extern void ia64_new_rr7(unsigned long rid, void *shared_info, void
*shared_arch_info, unsigned long shared_info_va, unsigned long p_vhpt);
/* RID virtualization mechanism is really simple: domains have less rid bits
than the host and the host rid space is shared among the domains. (Values
@@ -260,9 +258,9 @@ int set_one_rr(unsigned long rr, unsigne
if (!PSCB(v,metaphysical_mode))
set_rr(rr,newrrv.rrval);
} else if (rreg == 7) {
- ia64_new_rr7(vmMangleRID(newrrv.rrval),v->vcpu_info,
- v->arch.privregs, __get_cpu_var(vhpt_paddr),
- (unsigned long) pal_vaddr);
+ ia64_new_rr7(vmMangleRID(newrrv.rrval),v->domain->shared_info,
+ v->arch.privregs, v->domain->arch.shared_info_va,
+ __get_cpu_var(vhpt_paddr));
} else {
set_rr(rr,newrrv.rrval);
}
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/xen/vcpu.c
--- a/xen/arch/ia64/xen/vcpu.c Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/xen/vcpu.c Tue May 30 14:30:34 2006 -0500
@@ -28,8 +28,6 @@ extern void setfpreg (unsigned long regn
extern void panic_domain(struct pt_regs *, const char *, ...);
extern unsigned long translate_domain_mpaddr(unsigned long);
-extern void ia64_global_tlb_purge(UINT64 start, UINT64 end, UINT64 nbits);
-
typedef union {
struct ia64_psr ia64_psr;
@@ -682,9 +680,9 @@ UINT64 vcpu_check_pending_interrupts(VCP
*/
check_start:
if (event_pending(vcpu) &&
- !test_bit(vcpu->vcpu_info->arch.evtchn_vector,
+ !test_bit(vcpu->domain->shared_info->arch.evtchn_vector,
&PSCBX(vcpu, insvc[0])))
- vcpu_pend_interrupt(vcpu, vcpu->vcpu_info->arch.evtchn_vector);
+ vcpu_pend_interrupt(vcpu,
vcpu->domain->shared_info->arch.evtchn_vector);
p = &PSCBX(vcpu,irr[3]);
r = &PSCBX(vcpu,insvc[3]);
@@ -1290,8 +1288,7 @@ static inline int vcpu_match_tr_entry(TR
return trp->pte.p && vcpu_match_tr_entry_no_p(trp, ifa, rid);
}
-// in_tpa is not used when CONFIG_XEN_IA64_DOM0_VP
-IA64FAULT vcpu_translate(VCPU *vcpu, UINT64 address, BOOLEAN is_data, BOOLEAN
in_tpa, UINT64 *pteval, UINT64 *itir, UINT64 *iha)
+IA64FAULT vcpu_translate(VCPU *vcpu, UINT64 address, BOOLEAN is_data, UINT64
*pteval, UINT64 *itir, UINT64 *iha)
{
unsigned long region = address >> 61;
unsigned long pta, rid, rr;
@@ -1368,12 +1365,7 @@ IA64FAULT vcpu_translate(VCPU *vcpu, UIN
pte = trp->pte;
if (/* is_data && */ pte.p
&& vcpu_match_tr_entry_no_p(trp,address,rid)) {
-#ifndef CONFIG_XEN_IA64_DOM0_VP
- if (vcpu->domain==dom0 && !in_tpa)
- *pteval = pte.val;
- else
-#endif
- *pteval = vcpu->arch.dtlb_pte;
+ *pteval = pte.val;
*itir = trp->itir;
dtlb_translate_count++;
return IA64_USE_TLB;
@@ -1422,7 +1414,7 @@ IA64FAULT vcpu_tpa(VCPU *vcpu, UINT64 va
UINT64 pteval, itir, mask, iha;
IA64FAULT fault;
- fault = vcpu_translate(vcpu, vadr, TRUE, TRUE, &pteval, &itir, &iha);
+ fault = vcpu_translate(vcpu, vadr, TRUE, &pteval, &itir, &iha);
if (fault == IA64_NO_FAULT || fault == IA64_USE_TLB)
{
mask = itir_mask(itir);
@@ -1708,11 +1700,6 @@ IA64FAULT vcpu_set_pkr(VCPU *vcpu, UINT6
VCPU translation register access routines
**************************************************************************/
-void vcpu_purge_tr_entry(TR_ENTRY *trp)
-{
- trp->pte.val = 0;
-}
-
static void vcpu_set_tr_entry(TR_ENTRY *trp, UINT64 pte, UINT64 itir, UINT64
ifa)
{
UINT64 ps;
@@ -1800,12 +1787,10 @@ void vcpu_itc_no_srlz(VCPU *vcpu, UINT64
if ((mp_pte == -1UL) || (IorD & 0x4)) // don't place in 1-entry TLB
return;
if (IorD & 0x1) {
- vcpu_set_tr_entry(&PSCBX(vcpu,itlb),pte,ps<<2,vaddr);
- PSCBX(vcpu,itlb_pte) = mp_pte;
+ vcpu_set_tr_entry(&PSCBX(vcpu,itlb),mp_pte,ps<<2,vaddr);
}
if (IorD & 0x2) {
- vcpu_set_tr_entry(&PSCBX(vcpu,dtlb),pte,ps<<2,vaddr);
- PSCBX(vcpu,dtlb_pte) = mp_pte;
+ vcpu_set_tr_entry(&PSCBX(vcpu,dtlb),mp_pte,ps<<2,vaddr);
}
}
@@ -1875,20 +1860,14 @@ IA64FAULT vcpu_fc(VCPU *vcpu, UINT64 vad
return fault;
}
-int ptce_count = 0;
IA64FAULT vcpu_ptc_e(VCPU *vcpu, UINT64 vadr)
{
// Note that this only needs to be called once, i.e. the
// architected loop to purge the entire TLB, should use
// base = stride1 = stride2 = 0, count0 = count 1 = 1
-#ifdef VHPT_GLOBAL
- vhpt_flush(); // FIXME: This is overdoing it
-#endif
- local_flush_tlb_all();
- // just invalidate the "whole" tlb
- vcpu_purge_tr_entry(&PSCBX(vcpu,dtlb));
- vcpu_purge_tr_entry(&PSCBX(vcpu,itlb));
+ vcpu_flush_vtlb_all ();
+
return IA64_NO_FAULT;
}
@@ -1905,33 +1884,8 @@ IA64FAULT vcpu_ptc_ga(VCPU *vcpu,UINT64
// FIXME: ??breaks if domain PAGE_SIZE < Xen PAGE_SIZE
//printf("######## vcpu_ptc_ga(%p,%p) ##############\n",vadr,addr_range);
-#ifdef CONFIG_XEN_SMP
- struct domain *d = vcpu->domain;
- struct vcpu *v;
-
- for_each_vcpu (d, v) {
- if (v == vcpu)
- continue;
-
- /* Purge TC entries.
- FIXME: clear only if match. */
- vcpu_purge_tr_entry(&PSCBX(vcpu,dtlb));
- vcpu_purge_tr_entry(&PSCBX(vcpu,itlb));
-
-#ifdef VHPT_GLOBAL
- /* Invalidate VHPT entries. */
- vhpt_flush_address_remote (v->processor, vadr, addr_range);
-#endif
- }
-#endif
-
-#ifdef VHPT_GLOBAL
- vhpt_flush_address(vadr,addr_range);
-#endif
- ia64_global_tlb_purge(vadr,vadr+addr_range,PAGE_SHIFT);
- /* Purge tc. */
- vcpu_purge_tr_entry(&PSCBX(vcpu,dtlb));
- vcpu_purge_tr_entry(&PSCBX(vcpu,itlb));
+ domain_flush_vtlb_range (vcpu->domain, vadr, addr_range);
+
return IA64_NO_FAULT;
}
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/xen/vhpt.c
--- a/xen/arch/ia64/xen/vhpt.c Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/xen/vhpt.c Tue May 30 14:30:34 2006 -0500
@@ -12,32 +12,31 @@
#include <asm/system.h>
#include <asm/pgalloc.h>
#include <asm/page.h>
-#include <asm/dma.h>
#include <asm/vhpt.h>
+#include <asm/vcpu.h>
+
+/* Defined in tlb.c */
+extern void ia64_global_tlb_purge(UINT64 start, UINT64 end, UINT64 nbits);
extern long running_on_sim;
DEFINE_PER_CPU (unsigned long, vhpt_paddr);
DEFINE_PER_CPU (unsigned long, vhpt_pend);
-void vhpt_flush(void)
-{
- struct vhpt_lf_entry *v =__va(__ia64_per_cpu_var(vhpt_paddr));
- int i;
-#if 0
-static int firsttime = 2;
-
-if (firsttime) firsttime--;
-else {
-printf("vhpt_flush: *********************************************\n");
-printf("vhpt_flush: *********************************************\n");
-printf("vhpt_flush: *********************************************\n");
-printf("vhpt_flush: flushing vhpt (seems to crash at rid wrap?)...\n");
-printf("vhpt_flush: *********************************************\n");
-printf("vhpt_flush: *********************************************\n");
-printf("vhpt_flush: *********************************************\n");
-}
-#endif
+static void vhpt_flush(void)
+{
+ struct vhpt_lf_entry *v = __va(__ia64_per_cpu_var(vhpt_paddr));
+ int i;
+
+ for (i = 0; i < VHPT_NUM_ENTRIES; i++, v++)
+ v->ti_tag = INVALID_TI_TAG;
+}
+
+static void vhpt_erase(void)
+{
+ struct vhpt_lf_entry *v = (struct vhpt_lf_entry *)VHPT_ADDR;
+ int i;
+
for (i = 0; i < VHPT_NUM_ENTRIES; i++, v++) {
v->itir = 0;
v->CChain = 0;
@@ -47,51 +46,6 @@ printf("vhpt_flush: ********************
// initialize cache too???
}
-#ifdef VHPT_GLOBAL
-void vhpt_flush_address(unsigned long vadr, unsigned long addr_range)
-{
- struct vhpt_lf_entry *vlfe;
-
- if ((vadr >> 61) == 7) {
- // no vhpt for region 7 yet, see vcpu_itc_no_srlz
- printf("vhpt_flush_address: region 7, spinning...\n");
- while(1);
- }
-#if 0
- // this only seems to occur at shutdown, but it does occur
- if ((!addr_range) || addr_range & (addr_range - 1)) {
- printf("vhpt_flush_address: weird range, spinning...\n");
- while(1);
- }
-//printf("************** vhpt_flush_address(%p,%p)\n",vadr,addr_range);
-#endif
- while ((long)addr_range > 0) {
- vlfe = (struct vhpt_lf_entry *)ia64_thash(vadr);
- // FIXME: for now, just blow it away even if it belongs to
- // another domain. Later, use ttag to check for match
-//if (!(vlfe->ti_tag & INVALID_TI_TAG)) {
-//printf("vhpt_flush_address: blowing away valid tag for vadr=%p\n",vadr);
-//}
- vlfe->ti_tag |= INVALID_TI_TAG;
- addr_range -= PAGE_SIZE;
- vadr += PAGE_SIZE;
- }
-}
-
-void vhpt_flush_address_remote(int cpu,
- unsigned long vadr, unsigned long addr_range)
-{
- while ((long)addr_range > 0) {
- /* Get the VHPT entry. */
- unsigned int off = ia64_thash(vadr) - VHPT_ADDR;
- volatile struct vhpt_lf_entry *v;
- v =__va(per_cpu(vhpt_paddr, cpu) + off);
- v->ti_tag = INVALID_TI_TAG;
- addr_range -= PAGE_SIZE;
- vadr += PAGE_SIZE;
- }
-}
-#endif
static void vhpt_map(unsigned long pte)
{
@@ -147,17 +101,11 @@ void vhpt_multiple_insert(unsigned long
void vhpt_init(void)
{
- unsigned long vhpt_total_size, vhpt_alignment;
unsigned long paddr, pte;
struct page_info *page;
#if !VHPT_ENABLED
return;
#endif
- // allocate a huge chunk of physical memory.... how???
- vhpt_total_size = 1 << VHPT_SIZE_LOG2; // 4MB, 16MB, 64MB, or 256MB
- vhpt_alignment = 1 << VHPT_SIZE_LOG2; // 4MB, 16MB, 64MB, or 256MB
- printf("vhpt_init: vhpt size=0x%lx, align=0x%lx\n",
- vhpt_total_size, vhpt_alignment);
/* This allocation only holds true if vhpt table is unique for
* all domains. Or else later new vhpt table should be allocated
* from domain heap when each domain is created. Assume xen buddy
@@ -167,17 +115,135 @@ void vhpt_init(void)
if (!page)
panic("vhpt_init: can't allocate VHPT!\n");
paddr = page_to_maddr(page);
+ if (paddr & ((1 << VHPT_SIZE_LOG2) - 1))
+ panic("vhpt_init: bad VHPT alignment!\n");
__get_cpu_var(vhpt_paddr) = paddr;
- __get_cpu_var(vhpt_pend) = paddr + vhpt_total_size - 1;
+ __get_cpu_var(vhpt_pend) = paddr + (1 << VHPT_SIZE_LOG2) - 1;
printf("vhpt_init: vhpt paddr=0x%lx, end=0x%lx\n",
paddr, __get_cpu_var(vhpt_pend));
pte = pte_val(pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL));
vhpt_map(pte);
ia64_set_pta(VHPT_ADDR | (1 << 8) | (VHPT_SIZE_LOG2 << 2) |
VHPT_ENABLED);
- vhpt_flush();
-}
-
+ vhpt_erase();
+}
+
+
+void vcpu_flush_vtlb_all (void)
+{
+ struct vcpu *v = current;
+
+ /* First VCPU tlb. */
+ vcpu_purge_tr_entry(&PSCBX(v,dtlb));
+ vcpu_purge_tr_entry(&PSCBX(v,itlb));
+
+ /* Then VHPT. */
+ vhpt_flush ();
+
+ /* Then mTLB. */
+ local_flush_tlb_all ();
+
+ /* We could clear bit in d->domain_dirty_cpumask only if domain d in
+ not running on this processor. There is currently no easy way to
+ check this. */
+}
+
+void domain_flush_vtlb_all (void)
+{
+ int cpu = smp_processor_id ();
+ struct vcpu *v;
+
+ for_each_vcpu (current->domain, v)
+ if (v->processor == cpu)
+ vcpu_flush_vtlb_all ();
+ else
+ smp_call_function_single
+ (v->processor,
+ (void(*)(void *))vcpu_flush_vtlb_all,
+ NULL,1,1);
+}
+
+static void cpu_flush_vhpt_range (int cpu, u64 vadr, u64 addr_range)
+{
+ void *vhpt_base = __va(per_cpu(vhpt_paddr, cpu));
+
+ while ((long)addr_range > 0) {
+ /* Get the VHPT entry. */
+ unsigned int off = ia64_thash(vadr) - VHPT_ADDR;
+ volatile struct vhpt_lf_entry *v;
+ v = vhpt_base + off;
+ v->ti_tag = INVALID_TI_TAG;
+ addr_range -= PAGE_SIZE;
+ vadr += PAGE_SIZE;
+ }
+}
+
+void vcpu_flush_tlb_vhpt_range (u64 vadr, u64 log_range)
+{
+ cpu_flush_vhpt_range (current->processor, vadr, 1UL << log_range);
+ ia64_ptcl(vadr, log_range << 2);
+ ia64_srlz_i();
+}
+
+void domain_flush_vtlb_range (struct domain *d, u64 vadr, u64 addr_range)
+{
+ struct vcpu *v;
+
+#if 0
+ // this only seems to occur at shutdown, but it does occur
+ if ((!addr_range) || addr_range & (addr_range - 1)) {
+ printf("vhpt_flush_address: weird range, spinning...\n");
+ while(1);
+ }
+#endif
+
+ for_each_vcpu (d, v) {
+ /* Purge TC entries.
+ FIXME: clear only if match. */
+ vcpu_purge_tr_entry(&PSCBX(v,dtlb));
+ vcpu_purge_tr_entry(&PSCBX(v,itlb));
+
+ /* Invalidate VHPT entries. */
+ cpu_flush_vhpt_range (v->processor, vadr, addr_range);
+ }
+
+ /* ptc.ga */
+ ia64_global_tlb_purge(vadr,vadr+addr_range,PAGE_SHIFT);
+}
+
+static void flush_tlb_vhpt_all (struct domain *d)
+{
+ /* First VHPT. */
+ vhpt_flush ();
+
+ /* Then mTLB. */
+ local_flush_tlb_all ();
+}
+
+void domain_flush_destroy (struct domain *d)
+{
+ /* Very heavy... */
+ on_each_cpu ((void (*)(void *))flush_tlb_vhpt_all, d, 1, 1);
+ cpus_clear (d->domain_dirty_cpumask);
+}
+
+void flush_tlb_mask(cpumask_t mask)
+{
+ int cpu;
+
+ cpu = smp_processor_id();
+ if (cpu_isset (cpu, mask)) {
+ cpu_clear(cpu, mask);
+ flush_tlb_vhpt_all (NULL);
+ }
+
+ if (cpus_empty(mask))
+ return;
+
+ for_each_cpu_mask (cpu, mask)
+ smp_call_function_single
+ (cpu, (void (*)(void *))flush_tlb_vhpt_all, NULL, 1, 1);
+}
void zero_vhpt_stats(void)
{
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/xen/xenasm.S
--- a/xen/arch/ia64/xen/xenasm.S Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/xen/xenasm.S Tue May 30 14:30:34 2006 -0500
@@ -11,242 +11,160 @@
#include <asm/pgtable.h>
#include <asm/vhpt.h>
-#if 0
-// FIXME: there's gotta be a better way...
-// ski and spaski are different... moved to xenmisc.c
-#define RunningOnHpSki(rx,ry,pn) \
- addl rx = 2, r0; \
- addl ry = 3, r0; \
- ;; \
- mov rx = cpuid[rx]; \
- mov ry = cpuid[ry]; \
- ;; \
- cmp.eq pn,p0 = 0, rx; \
- ;; \
- (pn) movl rx = 0x7000004 ; \
- ;; \
- (pn) cmp.ge pn,p0 = ry, rx; \
- ;;
-
-//int platform_is_hp_ski(void)
-GLOBAL_ENTRY(platform_is_hp_ski)
- mov r8 = 0
- RunningOnHpSki(r3,r9,p8)
-(p8) mov r8 = 1
- br.ret.sptk.many b0
-END(platform_is_hp_ski)
-#endif
-
// Change rr7 to the passed value while ensuring
// Xen is mapped into the new region.
-// in0: new rr7 value
-// in1: Xen virtual address of shared info (to be pinned)
#define PSR_BITS_TO_CLEAR \
(IA64_PSR_I | IA64_PSR_IT | IA64_PSR_DT | IA64_PSR_RT | \
IA64_PSR_DD | IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED | \
- IA64_PSR_DFL | IA64_PSR_DFH)
+ IA64_PSR_DFL | IA64_PSR_DFH | IA64_PSR_IC)
// FIXME? Note that this turns off the DB bit (debug)
#define PSR_BITS_TO_SET IA64_PSR_BN
-//extern void ia64_new_rr7(unsigned long rid,void *shared_info, void
*shared_arch_info, unsigned long p_vhpt, unsigned long v_pal);
+//extern void ia64_new_rr7(unsigned long rid, /* in0 */
+// void *shared_info, /* in1 */
+// void *shared_arch_info, /* in2 */
+// unsigned long shared_info_va, /* in3 */
+// unsigned long p_vhpt) /* in4 */
+//Local usage:
+// loc0=rp, loc1=ar.pfs, loc2=percpu_paddr, loc3=psr, loc4=ar.rse
+// loc5=pal_vaddr, loc6=xen_paddr, loc7=shared_archinfo_paddr,
GLOBAL_ENTRY(ia64_new_rr7)
// not sure this unwind statement is correct...
.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(1)
- alloc loc1 = ar.pfs, 5, 9, 0, 0
+ alloc loc1 = ar.pfs, 5, 8, 0, 0
+ movl loc2=PERCPU_ADDR
1: {
- mov r28 = in0 // copy procedure index
+ mov loc3 = psr // save psr
+ mov loc0 = rp // save rp
mov r8 = ip // save ip to compute branch
- mov loc0 = rp // save rp
};;
.body
- movl loc2=PERCPU_ADDR
- ;;
tpa loc2=loc2 // grab this BEFORE changing rr7
- ;;
- dep loc8=0,in4,60,4
- ;;
-#if VHPT_ENABLED
- mov loc6=in3
- ;;
- //tpa loc6=loc6 // grab this BEFORE changing rr7
- ;;
-#endif
- mov loc5=in1
- ;;
- tpa loc5=loc5 // grab this BEFORE changing rr7
- ;;
- mov loc7=in2 // arch_vcpu_info_t
- ;;
- tpa loc7=loc7 // grab this BEFORE changing rr7
- ;;
- mov loc3 = psr // save psr
- adds r8 = 1f-1b,r8 // calculate return address for call
- ;;
+ tpa in1=in1 // grab shared_info BEFORE changing rr7
+ adds r8 = 1f-1b,r8 // calculate return address for call
+ ;;
+ tpa loc7=in2 // grab arch_vcpu_info BEFORE chg rr7
+ movl r17=PSR_BITS_TO_SET
+ mov loc4=ar.rsc // save RSE configuration
+ movl r16=PSR_BITS_TO_CLEAR
+ ;;
tpa r8=r8 // convert rp to physical
- ;;
- mov loc4=ar.rsc // save RSE configuration
- ;;
mov ar.rsc=0 // put RSE in enforced lazy, LE mode
- movl r16=PSR_BITS_TO_CLEAR
- movl r17=PSR_BITS_TO_SET
- ;;
or loc3=loc3,r17 // add in psr the bits to set
;;
andcm r16=loc3,r16 // removes bits to clear from psr
+ dep loc6=0,r8,0,KERNEL_TR_PAGE_SHIFT // Xen code paddr
br.call.sptk.many rp=ia64_switch_mode_phys
1:
// now in physical mode with psr.i/ic off so do rr7 switch
- dep r16=-1,r0,61,3
- ;;
+ movl r16=pal_vaddr // Note: belong to region 7!
+ ;;
mov rr[r16]=in0
+ ;;
srlz.d
- ;;
+ dep r16=0,r16,60,4 // Get physical address.
+ ;;
+ ld8 loc5=[r16] // read pal_vaddr
+ movl r26=PAGE_KERNEL
+ ;;
// re-pin mappings for kernel text and data
- mov r18=KERNEL_TR_PAGE_SHIFT<<2
+ mov r24=KERNEL_TR_PAGE_SHIFT<<2
movl r17=KERNEL_START
;;
- rsm psr.i | psr.ic
- ;;
- srlz.i
- ;;
- ptr.i r17,r18
- ptr.d r17,r18
- ;;
- mov cr.itir=r18
+ ptr.i r17,r24
+ ptr.d r17,r24
+ mov r16=IA64_TR_KERNEL
+ mov cr.itir=r24
mov cr.ifa=r17
- mov r16=IA64_TR_KERNEL
- //mov r3=ip
- movl r18=PAGE_KERNEL
- ;;
- dep r2=0,r3,0,KERNEL_TR_PAGE_SHIFT
- ;;
- or r18=r2,r18
- ;;
- srlz.i
+ or r18=loc6,r26
;;
itr.i itr[r16]=r18
- ;;
+ ;;
itr.d dtr[r16]=r18
- ;;
-
- // re-pin mappings for stack (current), per-cpu, vhpt, and shared info
+
+ // re-pin mappings for stack (current)
// unless overlaps with KERNEL_TR
dep r18=0,r13,0,KERNEL_TR_PAGE_SHIFT
;;
cmp.eq p7,p0=r17,r18
(p7) br.cond.sptk .stack_overlaps
- ;;
- movl r25=PAGE_KERNEL
+ mov r25=IA64_GRANULE_SHIFT<<2
dep r21=0,r13,60,4 // physical address of "current"
;;
- or r23=r25,r21 // construct PA | page properties
- mov r25=IA64_GRANULE_SHIFT<<2
- ;;
ptr.d r13,r25
- ;;
+ or r23=r21,r26 // construct PA | page properties
mov cr.itir=r25
mov cr.ifa=r13 // VA of next task...
- ;;
- mov r25=IA64_TR_CURRENT_STACK
+ mov r21=IA64_TR_CURRENT_STACK
+ ;;
+ itr.d dtr[r21]=r23 // wire in new mapping...
+
+ // Per-cpu
+.stack_overlaps:
+ mov r24=PERCPU_PAGE_SHIFT<<2
+ movl r22=PERCPU_ADDR
+ ;;
+ ptr.d r22,r24
+ or r23=loc2,r26 // construct PA | page properties
+ mov cr.itir=r24
+ mov cr.ifa=r22
+ mov r25=IA64_TR_PERCPU_DATA
;;
itr.d dtr[r25]=r23 // wire in new mapping...
- ;;
-.stack_overlaps:
-
- movl r22=PERCPU_ADDR
- ;;
- movl r25=PAGE_KERNEL
- ;;
- mov r21=loc2 // saved percpu physical address
- ;;
- or r23=r25,r21 // construct PA | page properties
- mov r24=PERCPU_PAGE_SHIFT<<2
+
+ // VHPT
+#if VHPT_ENABLED
+ mov r24=VHPT_SIZE_LOG2<<2
+ movl r22=VHPT_ADDR
+ mov r21=IA64_TR_VHPT
;;
ptr.d r22,r24
- ;;
+ or r23=in4,r26 // construct PA | page properties
mov cr.itir=r24
mov cr.ifa=r22
;;
- mov r25=IA64_TR_PERCPU_DATA
- ;;
- itr.d dtr[r25]=r23 // wire in new mapping...
- ;;
-
-#if VHPT_ENABLED
- movl r22=VHPT_ADDR
- ;;
- movl r25=PAGE_KERNEL
- ;;
- mov r21=loc6 // saved vhpt physical address
- ;;
- or r23=r25,r21 // construct PA | page properties
- mov r24=VHPT_SIZE_LOG2<<2
+ itr.d dtr[r21]=r23 // wire in new mapping...
+#endif
+
+ // Shared info
+ mov r24=PAGE_SHIFT<<2
+ movl r25=__pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RW)
+ ;;
+ ptr.d in3,r24
+ or r23=in1,r25 // construct PA | page properties
+ mov cr.itir=r24
+ mov cr.ifa=in3
+ mov r21=IA64_TR_SHARED_INFO
+ ;;
+ itr.d dtr[r21]=r23 // wire in new mapping...
+
+ // Map for arch_vcpu_info_t
+ movl r22=XSI_OFS
+ mov r24=PAGE_SHIFT<<2
+ ;;
+ add r22=r22,in3
;;
ptr.d r22,r24
- ;;
+ or r23=loc7,r25 // construct PA | page properties
mov cr.itir=r24
mov cr.ifa=r22
- ;;
- mov r25=IA64_TR_VHPT
- ;;
- itr.d dtr[r25]=r23 // wire in new mapping...
- ;;
-#endif
-
- movl r22=SHAREDINFO_ADDR
- ;;
- movl r25=__pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RW)
- ;;
- mov r21=loc5 // saved sharedinfo physical address
- ;;
- or r23=r25,r21 // construct PA | page properties
- mov r24=PAGE_SHIFT<<2
- ;;
- ptr.d r22,r24
- ;;
- mov cr.itir=r24
- mov cr.ifa=r22
- ;;
- mov r25=IA64_TR_SHARED_INFO
- ;;
- itr.d dtr[r25]=r23 // wire in new mapping...
- ;;
- // Map for arch_vcpu_info_t
- movl r22=SHARED_ARCHINFO_ADDR
- ;;
- movl r25=__pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RW)
- ;;
- mov r21=loc7 // saved sharedinfo physical address
- ;;
- or r23=r25,r21 // construct PA | page properties
- mov r24=PAGE_SHIFT<<2
- ;;
- ptr.d r22,r24
- ;;
- mov cr.itir=r24
- mov cr.ifa=r22
- ;;
- mov r25=IA64_TR_ARCH_INFO
- ;;
- itr.d dtr[r25]=r23 // wire in new mapping...
- ;;
-
- //Purge/insert PAL TR
+ mov r21=IA64_TR_ARCH_INFO
+ ;;
+ itr.d dtr[r21]=r23 // wire in new mapping...
+
+ // Purge/insert PAL TR
mov r24=IA64_TR_PALCODE
- movl r25=PAGE_KERNEL
- ;;
- or loc8=r25,loc8
mov r23=IA64_GRANULE_SHIFT<<2
- ;;
- ptr.i in4,r23
- ;;
+ dep r25=0,loc5,60,4 // convert pal vaddr to paddr
+ ;;
+ ptr.i loc5,r23
+ or r25=r25,r26 // construct PA | page properties
mov cr.itir=r23
- mov cr.ifa=in4
- ;;
- itr.i itr[r24]=loc8
- ;;
+ mov cr.ifa=loc5
+ ;;
+ itr.i itr[r24]=r25
// done, switch back to virtual and return
mov r16=loc3 // r16= original psr
@@ -261,6 +179,7 @@ 1:
br.ret.sptk.many rp
END(ia64_new_rr7)
+#if 0 /* Not used */
#include "minstate.h"
GLOBAL_ENTRY(ia64_prepare_handle_privop)
@@ -301,6 +220,7 @@ GLOBAL_ENTRY(ia64_prepare_handle_reflect
DO_LOAD_SWITCH_STACK
br.cond.sptk.many rp // goes to ia64_leave_kernel
END(ia64_prepare_handle_reflection)
+#endif
GLOBAL_ENTRY(__get_domain_bundle)
EX(.failure_in_get_bundle,ld8 r8=[r32],8)
@@ -331,80 +251,9 @@ GLOBAL_ENTRY(dorfirfi)
mov cr.ipsr=r17
mov cr.ifs=r18
;;
- // fall through
+ rfi
+ ;;
END(dorfirfi)
-
-GLOBAL_ENTRY(dorfi)
- rfi
- ;;
-END(dorfirfi)
-
-//
-// Long's Peak UART Offsets
-//
-#define COM_TOP 0xff5e0000
-#define COM_BOT 0xff5e2000
-
-// UART offsets
-#define UART_TX 0 /* Out: Transmit buffer (DLAB=0) */
-#define UART_INT_ENB 1 /* interrupt enable (DLAB=0) */
-#define UART_INT_ID 2 /* Interrupt ID register */
-#define UART_LINE_CTL 3 /* Line control register */
-#define UART_MODEM_CTL 4 /* Modem Control Register */
-#define UART_LSR 5 /* In: Line Status Register */
-#define UART_MSR 6 /* Modem status register */
-#define UART_DLATCH_LOW UART_TX
-#define UART_DLATCH_HIGH UART_INT_ENB
-#define COM1 0x3f8
-#define COM2 0x2F8
-#define COM3 0x3E8
-
-/* interrupt enable bits (offset 1) */
-#define DATA_AVAIL_INT 1
-#define XMIT_HOLD_EMPTY_INT 2
-#define LINE_STAT_INT 4
-#define MODEM_STAT_INT 8
-
-/* line status bits (offset 5) */
-#define REC_DATA_READY 1
-#define OVERRUN 2
-#define PARITY_ERROR 4
-#define FRAMING_ERROR 8
-#define BREAK_INTERRUPT 0x10
-#define XMIT_HOLD_EMPTY 0x20
-#define XMIT_SHIFT_EMPTY 0x40
-
-// Write a single character
-// input: r32 = character to be written
-// output: none
-GLOBAL_ENTRY(longs_peak_putc)
- rsm psr.dt
- movl r16 = 0x8000000000000000 + COM_TOP + UART_LSR
- ;;
- srlz.i
- ;;
-
-.Chk_THRE_p:
- ld1.acq r18=[r16]
- ;;
-
- and r18 = XMIT_HOLD_EMPTY, r18
- ;;
- cmp4.eq p6,p0=0,r18
- ;;
-
-(p6) br .Chk_THRE_p
- ;;
- movl r16 = 0x8000000000000000 + COM_TOP + UART_TX
- ;;
- st1.rel [r16]=r32
- ;;
- ssm psr.dt
- ;;
- srlz.i
- ;;
- br.ret.sptk.many b0
-END(longs_peak_putc)
/* derived from linux/arch/ia64/hp/sim/boot/boot_head.S */
GLOBAL_ENTRY(pal_emulator_static)
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/xen/xenmisc.c
--- a/xen/arch/ia64/xen/xenmisc.c Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/xen/xenmisc.c Tue May 30 14:30:34 2006 -0500
@@ -267,6 +267,9 @@ void context_switch(struct vcpu *prev, s
vmx_load_state(next);
/*ia64_psr(ia64_task_regs(next))->dfh = !ia64_is_local_fpu_owner(next);*/
prev = ia64_switch_to(next);
+
+ //cpu_set(smp_processor_id(), current->domain->domain_dirty_cpumask);
+
if (!VMX_DOMAIN(current)){
vcpu_set_next_timer(current);
}
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/xen/xensetup.c
--- a/xen/arch/ia64/xen/xensetup.c Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/ia64/xen/xensetup.c Tue May 30 14:30:34 2006 -0500
@@ -415,8 +415,7 @@ printk("About to call domain_create()\n"
printk("About to call construct_dom0()\n");
dom0_memory_start = (unsigned long) __va(initial_images_start);
dom0_memory_size = ia64_boot_param->domain_size;
- dom0_initrd_start = (unsigned long) __va(initial_images_start +
- PAGE_ALIGN(ia64_boot_param->domain_size));
+ dom0_initrd_start = (unsigned long) __va(ia64_boot_param->initrd_start);
dom0_initrd_size = ia64_boot_param->initrd_size;
if ( construct_dom0(dom0, dom0_memory_start, dom0_memory_size,
diff -r e74246451527 -r f54d38cea8ac xen/arch/x86/dom0_ops.c
--- a/xen/arch/x86/dom0_ops.c Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/x86/dom0_ops.c Tue May 30 14:30:34 2006 -0500
@@ -404,27 +404,6 @@ long arch_do_dom0_op(struct dom0_op *op,
}
break;
- case DOM0_PHYSICAL_MEMORY_MAP:
- {
- struct dom0_memory_map_entry entry;
- int i;
-
- for ( i = 0; i < e820.nr_map; i++ )
- {
- if ( i >= op->u.physical_memory_map.max_map_entries )
- break;
- entry.start = e820.map[i].addr;
- entry.end = e820.map[i].addr + e820.map[i].size;
- entry.is_ram = (e820.map[i].type == E820_RAM);
- (void)copy_to_guest_offset(
- op->u.physical_memory_map.memory_map, i, &entry, 1);
- }
-
- op->u.physical_memory_map.nr_map_entries = i;
- (void)copy_to_guest(u_dom0_op, op, 1);
- }
- break;
-
case DOM0_HYPERCALL_INIT:
{
struct domain *d;
diff -r e74246451527 -r f54d38cea8ac xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/x86/domain.c Tue May 30 14:30:34 2006 -0500
@@ -146,6 +146,8 @@ struct vcpu *alloc_vcpu_struct(struct do
v->arch.guest_vl4table = __linear_l4_table;
#endif
+ pae_l3_cache_init(&v->arch.pae_l3_cache);
+
return v;
}
diff -r e74246451527 -r f54d38cea8ac xen/arch/x86/domain_build.c
--- a/xen/arch/x86/domain_build.c Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/x86/domain_build.c Tue May 30 14:30:34 2006 -0500
@@ -367,7 +367,10 @@ int construct_dom0(struct domain *d,
if ( (1UL << order) > nr_pages )
panic("Domain 0 allocation is too small for kernel image.\n");
- /* Allocate from DMA pool: PAE L3 table must be below 4GB boundary. */
+ /*
+ * Allocate from DMA pool: on i386 this ensures that our low-memory 1:1
+ * mapping covers the allocation.
+ */
if ( (page = alloc_domheap_pages(d, order, ALLOC_DOM_DMA)) == NULL )
panic("Not enough RAM for domain 0 allocation.\n");
alloc_spfn = page_to_mfn(page);
diff -r e74246451527 -r f54d38cea8ac xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/x86/hvm/hvm.c Tue May 30 14:30:34 2006 -0500
@@ -185,11 +185,16 @@ void hvm_setup_platform(struct domain* d
void hvm_setup_platform(struct domain* d)
{
struct hvm_domain *platform;
-
- if ( !hvm_guest(current) || (current->vcpu_id != 0) )
+ struct vcpu *v=current;
+
+ if ( !hvm_guest(v) || (v->vcpu_id != 0) )
return;
- shadow_direct_map_init(d);
+ if ( shadow_direct_map_init(d) == 0 )
+ {
+ printk("Can not allocate shadow direct map for HVM domain.\n");
+ domain_crash_synchronous();
+ }
hvm_map_io_shared_page(d);
hvm_get_info(d);
@@ -204,7 +209,8 @@ void hvm_setup_platform(struct domain* d
hvm_vioapic_init(d);
}
- pit_init(&platform->vpit, current);
+ init_timer(&platform->pl_time.periodic_tm.timer, pt_timer_fn, v,
v->processor);
+ pit_init(v, cpu_khz);
}
void pic_irq_request(void *data, int level)
@@ -234,6 +240,14 @@ void hvm_pic_assist(struct vcpu *v)
} while ( (u16)cmpxchg(virq_line,irqs, 0) != irqs );
do_pic_irqs(pic, irqs);
}
+}
+
+u64 hvm_get_guest_time(struct vcpu *v)
+{
+ u64 host_tsc;
+
+ rdtscll(host_tsc);
+ return host_tsc + v->arch.hvm_vcpu.cache_tsc_offset;
}
int cpu_get_interrupt(struct vcpu *v, int *type)
diff -r e74246451527 -r f54d38cea8ac xen/arch/x86/hvm/i8254.c
--- a/xen/arch/x86/hvm/i8254.c Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/x86/hvm/i8254.c Tue May 30 14:30:34 2006 -0500
@@ -22,11 +22,10 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
-/* Edwin Zhai <edwin.zhai@xxxxxxxxx>
+/* Edwin Zhai <edwin.zhai@xxxxxxxxx>, Eddie Dong <eddie.dong@xxxxxxxxx>
* Ported to xen:
- * use actimer for intr generation;
+ * Add a new layer of periodic time on top of PIT;
* move speaker io access to hypervisor;
- * use new method for counter/intrs calculation
*/
#include <xen/config.h>
@@ -42,184 +41,117 @@
#include <asm/hvm/vpit.h>
#include <asm/current.h>
-/*#define DEBUG_PIT*/
+/* Enable DEBUG_PIT may cause guest calibration inaccuracy */
+/* #define DEBUG_PIT */
#define RW_STATE_LSB 1
#define RW_STATE_MSB 2
#define RW_STATE_WORD0 3
#define RW_STATE_WORD1 4
-#ifndef NSEC_PER_SEC
-#define NSEC_PER_SEC (1000000000ULL)
-#endif
-
-#ifndef TIMER_SLOP
-#define TIMER_SLOP (50*1000) /* ns */
-#endif
-
-static void pit_irq_timer_update(PITChannelState *s, s64 current_time);
-
-s_time_t hvm_get_clock(void)
-{
- /* TODO: add pause/unpause support */
- return NOW();
+#define ticks_per_sec(v) (v->domain->arch.hvm_domain.tsc_frequency)
+static int handle_pit_io(ioreq_t *p);
+static int handle_speaker_io(ioreq_t *p);
+
+/* compute with 96 bit intermediate result: (a*b)/c */
+uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
+{
+ union {
+ uint64_t ll;
+ struct {
+#ifdef WORDS_BIGENDIAN
+ uint32_t high, low;
+#else
+ uint32_t low, high;
+#endif
+ } l;
+ } u, res;
+ uint64_t rl, rh;
+
+ u.ll = a;
+ rl = (uint64_t)u.l.low * (uint64_t)b;
+ rh = (uint64_t)u.l.high * (uint64_t)b;
+ rh += (rl >> 32);
+ res.l.high = rh / c;
+ res.l.low = (((rh % c) << 32) + (rl & 0xffffffff)) / c;
+ return res.ll;
+}
+
+/*
+ * get processor time.
+ * unit: TSC
+ */
+int64_t hvm_get_clock(struct vcpu *v)
+{
+ uint64_t gtsc;
+ gtsc = hvm_get_guest_time(v);
+ return gtsc;
}
static int pit_get_count(PITChannelState *s)
{
- u64 d;
- u64 counter;
-
- d = hvm_get_clock() - s->count_load_time;
+ uint64_t d;
+ int counter;
+
+ d = muldiv64(hvm_get_clock(s->vcpu) - s->count_load_time, PIT_FREQ,
ticks_per_sec(s->vcpu));
switch(s->mode) {
case 0:
case 1:
case 4:
case 5:
- counter = (s->period - d) & 0xffff;
+ counter = (s->count - d) & 0xffff;
break;
case 3:
/* XXX: may be incorrect for odd counts */
- counter = s->period - ((2 * d) % s->period);
+ counter = s->count - ((2 * d) % s->count);
break;
default:
- /* mod 2 counter handle */
- d = hvm_get_clock() - s->hvm_time->count_point;
- d += s->hvm_time->count_advance;
- counter = s->period - (d % s->period);
- break;
- }
- /* change from ns to pit counter */
- counter = DIV_ROUND( (counter * PIT_FREQ), NSEC_PER_SEC);
+ counter = s->count - (d % s->count);
+ break;
+ }
return counter;
}
/* get pit output bit */
-static int pit_get_out1(PITChannelState *s, s64 current_time)
-{
- u64 d;
+static int pit_get_out1(PITChannelState *s, int64_t current_time)
+{
+ uint64_t d;
int out;
- d = current_time - s->count_load_time;
+ d = muldiv64(current_time - s->count_load_time, PIT_FREQ,
ticks_per_sec(s->vcpu));
switch(s->mode) {
default:
case 0:
- out = (d >= s->period);
+ out = (d >= s->count);
break;
case 1:
- out = (d < s->period);
+ out = (d < s->count);
break;
case 2:
- /* mod2 out is no meaning, since intr are generated in background */
- if ((d % s->period) == 0 && d != 0)
+ if ((d % s->count) == 0 && d != 0)
out = 1;
else
out = 0;
break;
case 3:
- out = (d % s->period) < ((s->period + 1) >> 1);
+ out = (d % s->count) < ((s->count + 1) >> 1);
break;
case 4:
case 5:
- out = (d == s->period);
+ out = (d == s->count);
break;
}
return out;
}
-int pit_get_out(hvm_virpit *pit, int channel, s64 current_time)
+int pit_get_out(PITState *pit, int channel, int64_t current_time)
{
PITChannelState *s = &pit->channels[channel];
return pit_get_out1(s, current_time);
}
-static __inline__ s64 missed_ticks(PITChannelState *s, s64 current_time)
-{
- struct hvm_time_info *hvm_time = s->hvm_time;
- struct domain *d = (void *) s -
- offsetof(struct domain, arch.hvm_domain.vpit.channels[0]);
-
- /* ticks from current time(expected time) to NOW */
- int missed_ticks;
- /* current_time is expected time for next intr, check if it's true
- * (actimer has a TIMER_SLOP in advance)
- */
- s64 missed_time = hvm_get_clock() + TIMER_SLOP - current_time;
-
- if (missed_time >= 0) {
- missed_ticks = missed_time/(s_time_t)s->period + 1;
- if (test_bit(_DOMF_debugging, &d->domain_flags)) {
- hvm_time->pending_intr_nr++;
- } else {
- hvm_time->pending_intr_nr += missed_ticks;
- }
- s->next_transition_time = current_time + (missed_ticks ) * s->period;
- }
-
- return s->next_transition_time;
-}
-
-/* only rearm the actimer when return value > 0
- * -2: init state
- * -1: the mode has expired
- * 0: current VCPU is not running
- * >0: the next fired time
- */
-s64 pit_get_next_transition_time(PITChannelState *s,
- s64 current_time)
-{
- s64 d, next_time, base;
- int period2;
- struct hvm_time_info *hvm_time = s->hvm_time;
-
- d = current_time - s->count_load_time;
- switch(s->mode) {
- default:
- case 0:
- case 1:
- if (d < s->period)
- next_time = s->period;
- else
- return -1;
- break;
- case 2:
- next_time = missed_ticks(s, current_time);
- if ( !test_bit(_VCPUF_running, &(hvm_time->vcpu->vcpu_flags)) )
- return 0;
- break;
- case 3:
- base = (d / s->period) * s->period;
- period2 = ((s->period + 1) >> 1);
- if ((d - base) < period2)
- next_time = base + period2;
- else
- next_time = base + s->period;
- break;
- case 4:
- case 5:
- if (d < s->period)
- next_time = s->period;
- else if (d == s->period)
- next_time = s->period + 1;
- else
- return -1;
- break;
- case 0xff:
- return -2; /* for init state */
- break;
- }
- /* XXX: better solution: use a clock at PIT_FREQ Hz */
- if (next_time <= current_time){
-#ifdef DEBUG_PIT
- printk("HVM_PIT:next_time <= current_time. next=0x%llx,
current=0x%llx!\n",next_time, current_time);
-#endif
- next_time = current_time + 1;
- }
- return next_time;
-}
-
/* val must be 0 or 1 */
-void pit_set_gate(hvm_virpit *pit, int channel, int val)
+void pit_set_gate(PITState *pit, int channel, int val)
{
PITChannelState *s = &pit->channels[channel];
@@ -233,16 +165,16 @@ void pit_set_gate(hvm_virpit *pit, int c
case 5:
if (s->gate < val) {
/* restart counting on rising edge */
- s->count_load_time = hvm_get_clock();
- pit_irq_timer_update(s, s->count_load_time);
+ s->count_load_time = hvm_get_clock(s->vcpu);
+// pit_irq_timer_update(s, s->count_load_time);
}
break;
case 2:
case 3:
if (s->gate < val) {
/* restart counting on rising edge */
- s->count_load_time = hvm_get_clock();
- pit_irq_timer_update(s, s->count_load_time);
+ s->count_load_time = hvm_get_clock(s->vcpu);
+// pit_irq_timer_update(s, s->count_load_time);
}
/* XXX: disable/enable counting */
break;
@@ -250,7 +182,7 @@ void pit_set_gate(hvm_virpit *pit, int c
s->gate = val;
}
-int pit_get_gate(hvm_virpit *pit, int channel)
+int pit_get_gate(PITState *pit, int channel)
{
PITChannelState *s = &pit->channels[channel];
return s->gate;
@@ -258,37 +190,37 @@ int pit_get_gate(hvm_virpit *pit, int ch
static inline void pit_load_count(PITChannelState *s, int val)
{
+ u32 period;
if (val == 0)
val = 0x10000;
-
- s->count_load_time = hvm_get_clock();
+ s->count_load_time = hvm_get_clock(s->vcpu);
s->count = val;
- s->period = DIV_ROUND(((s->count) * NSEC_PER_SEC), PIT_FREQ);
+ period = DIV_ROUND((val * 1000000000ULL), PIT_FREQ);
#ifdef DEBUG_PIT
- printk("HVM_PIT: pit-load-counter, count=0x%x,period=0x%u us,mode=%d,
load_time=%lld\n",
+ printk("HVM_PIT: pit-load-counter(%p), count=0x%x, period=%uns mode=%d,
load_time=%lld\n",
+ s,
val,
- s->period / 1000,
+ period,
s->mode,
- s->count_load_time);
+ (long long)s->count_load_time);
#endif
- if (s->mode == HVM_PIT_ACCEL_MODE) {
- if (!s->hvm_time) {
- printk("HVM_PIT:guest should only set mod 2 on channel 0!\n");
- return;
- }
- s->hvm_time->period_cycles = (u64)s->period * cpu_khz / 1000000L;
- s->hvm_time->first_injected = 0;
-
- if (s->period < 900000) { /* < 0.9 ms */
- printk("HVM_PIT: guest programmed too small an count: %x\n",
- s->count);
- s->period = 1000000;
- }
- }
-
- pit_irq_timer_update(s, s->count_load_time);
+ switch (s->mode) {
+ case 2:
+ /* create periodic time */
+ s->pt = create_periodic_time (s->vcpu, period, 0, 0);
+ break;
+ case 1:
+ /* create one shot time */
+ s->pt = create_periodic_time (s->vcpu, period, 0, 1);
+#ifdef DEBUG_PIT
+ printk("HVM_PIT: create one shot time.\n");
+#endif
+ break;
+ default:
+ break;
+ }
}
/* if already latched, do not latch again */
@@ -300,9 +232,9 @@ static void pit_latch_count(PITChannelSt
}
}
-static void pit_ioport_write(void *opaque, u32 addr, u32 val)
-{
- hvm_virpit *pit = opaque;
+static void pit_ioport_write(void *opaque, uint32_t addr, uint32_t val)
+{
+ PITState *pit = opaque;
int channel, access;
PITChannelState *s;
val &= 0xff;
@@ -321,7 +253,7 @@ static void pit_ioport_write(void *opaqu
if (!(val & 0x10) && !s->status_latched) {
/* status latch */
/* XXX: add BCD and null count */
- s->status = (pit_get_out1(s, hvm_get_clock()) << 7) |
+ s->status = (pit_get_out1(s, hvm_get_clock(s->vcpu))
<< 7) |
(s->rw_mode << 4) |
(s->mode << 1) |
s->bcd;
@@ -366,9 +298,9 @@ static void pit_ioport_write(void *opaqu
}
}
-static u32 pit_ioport_read(void *opaque, u32 addr)
-{
- hvm_virpit *pit = opaque;
+static uint32_t pit_ioport_read(void *opaque, uint32_t addr)
+{
+ PITState *pit = opaque;
int ret, count;
PITChannelState *s;
@@ -419,84 +351,51 @@ static u32 pit_ioport_read(void *opaque,
return ret;
}
-static void pit_irq_timer_update(PITChannelState *s, s64 current_time)
-{
- s64 expire_time;
- int irq_level;
- struct vcpu *v = current;
- struct hvm_virpic *pic= &v->domain->arch.hvm_domain.vpic;
-
- if (!s->hvm_time || s->mode == 0xff)
- return;
-
- expire_time = pit_get_next_transition_time(s, current_time);
- /* not generate intr by direct pic_set_irq in mod 2
- * XXX:mod 3 should be same as mod 2
- */
- if (s->mode != HVM_PIT_ACCEL_MODE) {
- irq_level = pit_get_out1(s, current_time);
- pic_set_irq(pic, s->irq, irq_level);
- s->next_transition_time = expire_time;
-#ifdef DEBUG_PIT
- printk("HVM_PIT:irq_level=%d next_delay=%l ns\n",
- irq_level,
- (expire_time - current_time));
-#endif
- }
-
- if (expire_time > 0)
- set_timer(&(s->hvm_time->pit_timer), s->next_transition_time);
-
-}
-
-static void pit_irq_timer(void *data)
-{
- PITChannelState *s = data;
-
- pit_irq_timer_update(s, s->next_transition_time);
-}
-
static void pit_reset(void *opaque)
{
- hvm_virpit *pit = opaque;
+ PITState *pit = opaque;
PITChannelState *s;
int i;
for(i = 0;i < 3; i++) {
s = &pit->channels[i];
+ if ( s -> pt ) {
+ destroy_periodic_time (s->pt);
+ s->pt = NULL;
+ }
s->mode = 0xff; /* the init mode */
s->gate = (i != 2);
pit_load_count(s, 0);
}
}
-/* hvm_io_assist light-weight version, specific to PIT DM */
-static void resume_pit_io(ioreq_t *p)
-{
- struct cpu_user_regs *regs = guest_cpu_user_regs();
- unsigned long old_eax = regs->eax;
- p->state = STATE_INVALID;
-
- switch(p->size) {
- case 1:
- regs->eax = (old_eax & 0xffffff00) | (p->u.data & 0xff);
- break;
- case 2:
- regs->eax = (old_eax & 0xffff0000) | (p->u.data & 0xffff);
- break;
- case 4:
- regs->eax = (p->u.data & 0xffffffff);
- break;
- default:
- BUG();
- }
+void pit_init(struct vcpu *v, unsigned long cpu_khz)
+{
+ PITState *pit = &v->domain->arch.hvm_domain.pl_time.vpit;
+ PITChannelState *s;
+
+ s = &pit->channels[0];
+ /* the timer 0 is connected to an IRQ */
+ s->vcpu = v;
+ s++; s->vcpu = v;
+ s++; s->vcpu = v;
+
+ register_portio_handler(PIT_BASE, 4, handle_pit_io);
+ /* register the speaker port */
+ register_portio_handler(0x61, 1, handle_speaker_io);
+ ticks_per_sec(v) = cpu_khz * (int64_t)1000;
+#ifdef DEBUG_PIT
+ printk("HVM_PIT: guest frequency =%lld\n", (long long)ticks_per_sec(v));
+#endif
+ pit_reset(pit);
+ return;
}
/* the intercept action for PIT DM retval:0--not handled; 1--handled */
-int handle_pit_io(ioreq_t *p)
+static int handle_pit_io(ioreq_t *p)
{
struct vcpu *v = current;
- struct hvm_virpit *vpit = &(v->domain->arch.hvm_domain.vpit);
+ struct PITState *vpit = &(v->domain->arch.hvm_domain.pl_time.vpit);
if (p->size != 1 ||
p->pdata_valid ||
@@ -508,18 +407,18 @@ int handle_pit_io(ioreq_t *p)
if (p->dir == 0) {/* write */
pit_ioport_write(vpit, p->addr, p->u.data);
} else if (p->dir == 1) { /* read */
- p->u.data = pit_ioport_read(vpit, p->addr);
- resume_pit_io(p);
- }
-
- /* always return 1, since PIT sit in HV now */
+ if ( (p->addr & 3) != 3 ) {
+ p->u.data = pit_ioport_read(vpit, p->addr);
+ } else {
+ printk("HVM_PIT: read A1:A0=3!\n");
+ }
+ }
return 1;
}
static void speaker_ioport_write(void *opaque, uint32_t addr, uint32_t val)
{
- hvm_virpit *pit = opaque;
- val &= 0xff;
+ PITState *pit = opaque;
pit->speaker_data_on = (val >> 1) & 1;
pit_set_gate(pit, 2, val & 1);
}
@@ -527,18 +426,18 @@ static uint32_t speaker_ioport_read(void
static uint32_t speaker_ioport_read(void *opaque, uint32_t addr)
{
int out;
- hvm_virpit *pit = opaque;
- out = pit_get_out(pit, 2, hvm_get_clock());
+ PITState *pit = opaque;
+ out = pit_get_out(pit, 2, hvm_get_clock(pit->channels[2].vcpu));
pit->dummy_refresh_clock ^= 1;
return (pit->speaker_data_on << 1) | pit_get_gate(pit, 2) | (out << 5) |
(pit->dummy_refresh_clock << 4);
}
-int handle_speaker_io(ioreq_t *p)
+static int handle_speaker_io(ioreq_t *p)
{
struct vcpu *v = current;
- struct hvm_virpit *vpit = &(v->domain->arch.hvm_domain.vpit);
+ struct PITState *vpit = &(v->domain->arch.hvm_domain.pl_time.vpit);
if (p->size != 1 ||
p->pdata_valid ||
@@ -551,45 +450,7 @@ int handle_speaker_io(ioreq_t *p)
speaker_ioport_write(vpit, p->addr, p->u.data);
} else if (p->dir == 1) {/* read */
p->u.data = speaker_ioport_read(vpit, p->addr);
- resume_pit_io(p);
}
return 1;
}
-
-/* pick up missed timer ticks at deactive time */
-void pickup_deactive_ticks(struct hvm_virpit *vpit)
-{
- s64 next_time;
- PITChannelState *s = &(vpit->channels[0]);
- if ( !active_timer(&(vpit->time_info.pit_timer)) ) {
- next_time = pit_get_next_transition_time(s, s->next_transition_time);
- if (next_time >= 0)
- set_timer(&(s->hvm_time->pit_timer), s->next_transition_time);
- }
-}
-
-void pit_init(struct hvm_virpit *pit, struct vcpu *v)
-{
- PITChannelState *s;
- struct hvm_time_info *hvm_time;
-
- s = &pit->channels[0];
- /* the timer 0 is connected to an IRQ */
- s->irq = 0;
- /* channel 0 need access the related time info for intr injection */
- hvm_time = s->hvm_time = &pit->time_info;
- hvm_time->vcpu = v;
-
- init_timer(&(hvm_time->pit_timer), pit_irq_timer, s, v->processor);
-
- register_portio_handler(PIT_BASE, 4, handle_pit_io);
-
- /* register the speaker port */
- register_portio_handler(0x61, 1, handle_speaker_io);
-
- pit_reset(pit);
-
- return;
-
-}
diff -r e74246451527 -r f54d38cea8ac xen/arch/x86/hvm/intercept.c
--- a/xen/arch/x86/hvm/intercept.c Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/x86/hvm/intercept.c Tue May 30 14:30:34 2006 -0500
@@ -214,6 +214,88 @@ void hlt_timer_fn(void *data)
evtchn_set_pending(v, iopacket_port(v));
}
+static __inline__ void missed_ticks(struct periodic_time *pt)
+{
+ int missed_ticks;
+
+ missed_ticks = (NOW() - pt->scheduled)/(s_time_t) pt->period;
+ if ( missed_ticks++ >= 0 ) {
+ if ( missed_ticks > 1000 ) {
+ /* TODO: Adjust guest time togther */
+ pt->pending_intr_nr ++;
+ }
+ else {
+ pt->pending_intr_nr += missed_ticks;
+ }
+ pt->scheduled += missed_ticks * pt->period;
+ }
+}
+
+/* hook function for the platform periodic time */
+void pt_timer_fn(void *data)
+{
+ struct vcpu *v = data;
+ struct periodic_time *pt =
&(v->domain->arch.hvm_domain.pl_time.periodic_tm);
+
+ /* pick up missed timer tick */
+ missed_ticks(pt);
+ if ( test_bit(_VCPUF_running, &v->vcpu_flags) ) {
+ set_timer(&pt->timer, pt->scheduled);
+ }
+}
+
+/* pick up missed timer ticks at deactive time */
+void pickup_deactive_ticks(struct periodic_time *pt)
+{
+ if ( !active_timer(&(pt->timer)) ) {
+ missed_ticks(pt);
+ set_timer(&pt->timer, pt->scheduled);
+ }
+}
+
+/*
+ * period: fire frequency in ns.
+ */
+struct periodic_time * create_periodic_time(
+ struct vcpu *v,
+ u32 period,
+ char irq,
+ char one_shot)
+{
+ struct periodic_time *pt =
&(v->domain->arch.hvm_domain.pl_time.periodic_tm);
+ if ( pt->enabled ) {
+ if ( v->vcpu_id != 0 ) {
+ printk("HVM_PIT: start 2nd periodic time on non BSP!\n");
+ }
+ stop_timer (&pt->timer);
+ pt->enabled = 0;
+ }
+ pt->pending_intr_nr = 0;
+ pt->first_injected = 0;
+ if (period < 900000) { /* < 0.9 ms */
+ printk("HVM_PlatformTime: program too small period %u\n",period);
+ period = 900000; /* force to 0.9ms */
+ }
+ pt->period = period;
+ pt->irq = irq;
+ pt->period_cycles = (u64)period * cpu_khz / 1000000L;
+ pt->one_shot = one_shot;
+ if ( one_shot ) {
+ printk("HVM_PL: No support for one shot platform time yet\n");
+ }
+ pt->scheduled = NOW() + period;
+ set_timer (&pt->timer,pt->scheduled);
+ pt->enabled = 1;
+ return pt;
+}
+
+void destroy_periodic_time(struct periodic_time *pt)
+{
+ if ( pt->enabled ) {
+ stop_timer(&pt->timer);
+ pt->enabled = 0;
+ }
+}
/*
* Local variables:
diff -r e74246451527 -r f54d38cea8ac xen/arch/x86/hvm/svm/intr.c
--- a/xen/arch/x86/hvm/svm/intr.c Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/x86/hvm/svm/intr.c Tue May 30 14:30:34 2006 -0500
@@ -44,45 +44,33 @@
*/
#define BSP_CPU(v) (!(v->vcpu_id))
-u64 svm_get_guest_time(struct vcpu *v)
-{
- struct hvm_time_info *time_info =
&(v->domain->arch.hvm_domain.vpit.time_info);
- u64 host_tsc;
-
- rdtscll(host_tsc);
- return host_tsc + time_info->cache_tsc_offset;
-}
-
void svm_set_guest_time(struct vcpu *v, u64 gtime)
{
- struct hvm_time_info *time_info =
&(v->domain->arch.hvm_domain.vpit.time_info);
u64 host_tsc;
rdtscll(host_tsc);
- time_info->cache_tsc_offset = gtime - host_tsc;
- v->arch.hvm_svm.vmcb->tsc_offset = time_info->cache_tsc_offset;
+ v->arch.hvm_vcpu.cache_tsc_offset = gtime - host_tsc;
+ v->arch.hvm_svm.vmcb->tsc_offset = v->arch.hvm_vcpu.cache_tsc_offset;
}
static inline void
interrupt_post_injection(struct vcpu * v, int vector, int type)
{
- struct hvm_virpit *vpit = &(v->domain->arch.hvm_domain.vpit);
- struct hvm_time_info *time_info = &vpit->time_info;
+ struct periodic_time *pt =
&(v->domain->arch.hvm_domain.pl_time.periodic_tm);
if ( is_pit_irq(v, vector, type) ) {
- if ( !time_info->first_injected ) {
- time_info->pending_intr_nr = 0;
- time_info->last_pit_gtime = svm_get_guest_time(v);
- time_info->first_injected = 1;
+ if ( !pt->first_injected ) {
+ pt->pending_intr_nr = 0;
+ pt->last_plt_gtime = hvm_get_guest_time(v);
+ pt->scheduled = NOW() + pt->period;
+ set_timer(&pt->timer, pt->scheduled);
+ pt->first_injected = 1;
} else {
- time_info->pending_intr_nr--;
+ pt->pending_intr_nr--;
+ pt->last_plt_gtime += pt->period_cycles;
+ svm_set_guest_time(v, pt->last_plt_gtime);
}
- time_info->count_advance = 0;
- time_info->count_point = NOW();
-
- time_info->last_pit_gtime += time_info->period_cycles;
- svm_set_guest_time(v, time_info->last_pit_gtime);
}
switch(type)
@@ -121,8 +109,7 @@ asmlinkage void svm_intr_assist(void)
struct vcpu *v = current;
struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
struct hvm_domain *plat=&v->domain->arch.hvm_domain;
- struct hvm_virpit *vpit = &plat->vpit;
- struct hvm_time_info *time_info = &vpit->time_info;
+ struct periodic_time *pt = &plat->pl_time.periodic_tm;
struct hvm_virpic *pic= &plat->vpic;
int intr_type = VLAPIC_DELIV_MODE_EXT;
int intr_vector = -1;
@@ -174,9 +161,9 @@ asmlinkage void svm_intr_assist(void)
if ( cpu_has_pending_irq(v) ) {
intr_vector = cpu_get_interrupt(v, &intr_type);
}
- else if ( (v->vcpu_id == 0) && time_info->pending_intr_nr ) {
- pic_set_irq(pic, 0, 0);
- pic_set_irq(pic, 0, 1);
+ else if ( (v->vcpu_id == 0) && pt->enabled && pt->pending_intr_nr ) {
+ pic_set_irq(pic, pt->irq, 0);
+ pic_set_irq(pic, pt->irq, 1);
intr_vector = cpu_get_interrupt(v, &intr_type);
}
}
@@ -190,7 +177,7 @@ asmlinkage void svm_intr_assist(void)
/* Re-injecting a PIT interruptt? */
if (re_injecting &&
is_pit_irq(v, intr_vector, intr_type)) {
- ++time_info->pending_intr_nr;
+ ++pt->pending_intr_nr;
}
/* let's inject this interrupt */
TRACE_3D(TRC_VMX_INT, v->domain->domain_id, intr_vector, 0);
diff -r e74246451527 -r f54d38cea8ac xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/x86/hvm/svm/svm.c Tue May 30 14:30:34 2006 -0500
@@ -51,13 +51,6 @@
#define SVM_EXTRA_DEBUG
-#ifdef TRACE_BUFFER
-static unsigned long trace_values[NR_CPUS][4];
-#define TRACE_VMEXIT(index,value) trace_values[current->processor][index]=value
-#else
-#define TRACE_VMEXIT(index,value) ((void)0)
-#endif
-
/* Useful define */
#define MAX_INST_SIZE 15
@@ -458,6 +451,9 @@ int start_svm(void)
if (!(test_bit(X86_FEATURE_SVME, &boot_cpu_data.x86_capability)))
return 0;
+ svm_globals[cpu].hsa = alloc_host_save_area();
+ if (! svm_globals[cpu].hsa)
+ return 0;
rdmsr(MSR_EFER, eax, edx);
eax |= EFER_SVME;
@@ -466,7 +462,6 @@ int start_svm(void)
printk("AMD SVM Extension is enabled for cpu %d.\n", cpu );
/* Initialize the HSA for this core */
- svm_globals[cpu].hsa = alloc_host_save_area();
phys_hsa = (u64) virt_to_maddr( svm_globals[cpu].hsa );
phys_hsa_lo = (u32) phys_hsa;
phys_hsa_hi = (u32) (phys_hsa >> 32);
@@ -670,12 +665,11 @@ static void arch_svm_do_launch(struct vc
static void svm_freeze_time(struct vcpu *v)
{
- struct hvm_time_info *time_info =
&v->domain->arch.hvm_domain.vpit.time_info;
+ struct periodic_time *pt=&v->domain->arch.hvm_domain.pl_time.periodic_tm;
- if ( time_info->first_injected && !v->domain->arch.hvm_domain.guest_time )
{
- v->domain->arch.hvm_domain.guest_time = svm_get_guest_time(v);
- time_info->count_advance += (NOW() - time_info->count_point);
- stop_timer(&(time_info->pit_timer));
+ if ( pt->enabled && pt->first_injected && !v->arch.hvm_vcpu.guest_time ) {
+ v->arch.hvm_vcpu.guest_time = hvm_get_guest_time(v);
+ stop_timer(&(pt->timer));
}
}
@@ -752,7 +746,7 @@ static void svm_relinquish_guest_resourc
}
}
- kill_timer(&d->arch.hvm_domain.vpit.time_info.pit_timer);
+ kill_timer(&d->arch.hvm_domain.pl_time.periodic_tm.timer);
if ( d->arch.hvm_domain.shared_page_va )
unmap_domain_page_global(
@@ -782,10 +776,12 @@ void arch_svm_do_resume(struct vcpu *v)
void svm_migrate_timers(struct vcpu *v)
{
- struct hvm_time_info *time_info =
&v->domain->arch.hvm_domain.vpit.time_info;
-
- migrate_timer(&time_info->pit_timer, v->processor);
- migrate_timer(&v->arch.hvm_svm.hlt_timer, v->processor);
+ struct periodic_time *pt =
&(v->domain->arch.hvm_domain.pl_time.periodic_tm);
+
+ if ( pt->enabled ) {
+ migrate_timer( &pt->timer, v->processor );
+ migrate_timer( &v->arch.hvm_svm.hlt_timer, v->processor );
+ }
if ( hvm_apic_support(v->domain) && VLAPIC( v ))
migrate_timer( &(VLAPIC(v)->vlapic_timer ), v->processor );
}
@@ -814,7 +810,6 @@ static int svm_do_page_fault(unsigned lo
return 1;
handle_mmio(va, va);
- TRACE_VMEXIT(2,2);
return 1;
}
@@ -840,7 +835,6 @@ static int svm_do_page_fault(unsigned lo
return 1;
}
- TRACE_VMEXIT (2,2);
handle_mmio(va, gpa);
return 1;
@@ -852,8 +846,6 @@ static int svm_do_page_fault(unsigned lo
/* Let's make sure that the Guest TLB is flushed */
set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
}
-
- TRACE_VMEXIT (2,result);
return result;
}
@@ -1899,14 +1891,8 @@ static inline void svm_do_msr_access(str
regs->edx = 0;
switch (regs->ecx) {
case MSR_IA32_TIME_STAMP_COUNTER:
- {
- struct hvm_time_info *time_info;
-
- rdtscll(msr_content);
- time_info = &v->domain->arch.hvm_domain.vpit.time_info;
- msr_content += time_info->cache_tsc_offset;
+ msr_content = hvm_get_guest_time(v);
break;
- }
case MSR_IA32_SYSENTER_CS:
msr_content = vmcb->sysenter_cs;
break;
@@ -1973,7 +1959,7 @@ static inline void svm_vmexit_do_hlt(str
static inline void svm_vmexit_do_hlt(struct vmcb_struct *vmcb)
{
struct vcpu *v = current;
- struct hvm_virpit *vpit = &v->domain->arch.hvm_domain.vpit;
+ struct periodic_time *pt=&v->domain->arch.hvm_domain.pl_time.periodic_tm;
s_time_t next_pit = -1, next_wakeup;
__update_guest_eip(vmcb, 1);
@@ -1983,7 +1969,7 @@ static inline void svm_vmexit_do_hlt(str
return;
if ( !v->vcpu_id )
- next_pit = get_pit_scheduled(v, vpit);
+ next_pit = get_scheduled(v, pt->irq, pt);
next_wakeup = get_apictime_scheduled(v);
if ( (next_pit != -1 && next_pit < next_wakeup) || next_wakeup == -1 )
next_wakeup = next_pit;
diff -r e74246451527 -r f54d38cea8ac xen/arch/x86/hvm/svm/vmcb.c
--- a/xen/arch/x86/hvm/svm/vmcb.c Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/x86/hvm/svm/vmcb.c Tue May 30 14:30:34 2006 -0500
@@ -139,17 +139,20 @@ static int construct_vmcb_controls(struc
/* The following is for I/O and MSR permision map */
iopm = alloc_xenheap_pages(get_order_from_bytes(IOPM_SIZE));
-
- ASSERT(iopm);
- memset(iopm, 0xff, IOPM_SIZE);
- clear_bit(PC_DEBUG_PORT, iopm);
+ if (iopm)
+ {
+ memset(iopm, 0xff, IOPM_SIZE);
+ clear_bit(PC_DEBUG_PORT, iopm);
+ }
msrpm = alloc_xenheap_pages(get_order_from_bytes(MSRPM_SIZE));
-
- ASSERT(msrpm);
- memset(msrpm, 0xff, MSRPM_SIZE);
+ if (msrpm)
+ memset(msrpm, 0xff, MSRPM_SIZE);
arch_svm->iopm = iopm;
arch_svm->msrpm = msrpm;
+
+ if (! iopm || ! msrpm)
+ return 1;
vmcb->iopm_base_pa = (u64) virt_to_maddr(iopm);
vmcb->msrpm_base_pa = (u64) virt_to_maddr(msrpm);
@@ -439,19 +442,17 @@ void svm_do_resume(struct vcpu *v)
void svm_do_resume(struct vcpu *v)
{
struct domain *d = v->domain;
- struct hvm_virpit *vpit = &d->arch.hvm_domain.vpit;
- struct hvm_time_info *time_info = &vpit->time_info;
+ struct periodic_time *pt = &d->arch.hvm_domain.pl_time.periodic_tm;
svm_stts(v);
/* pick up the elapsed PIT ticks and re-enable pit_timer */
- if ( time_info->first_injected ) {
- if ( v->domain->arch.hvm_domain.guest_time ) {
- svm_set_guest_time(v, v->domain->arch.hvm_domain.guest_time);
- time_info->count_point = NOW();
- v->domain->arch.hvm_domain.guest_time = 0;
+ if ( pt->enabled && pt->first_injected ) {
+ if ( v->arch.hvm_vcpu.guest_time ) {
+ svm_set_guest_time(v, v->arch.hvm_vcpu.guest_time);
+ v->arch.hvm_vcpu.guest_time = 0;
}
- pickup_deactive_ticks(vpit);
+ pickup_deactive_ticks(pt);
}
if ( test_bit(iopacket_port(v), &d->shared_info->evtchn_pending[0]) ||
diff -r e74246451527 -r f54d38cea8ac xen/arch/x86/hvm/vmx/io.c
--- a/xen/arch/x86/hvm/vmx/io.c Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/x86/hvm/vmx/io.c Tue May 30 14:30:34 2006 -0500
@@ -49,45 +49,33 @@ void __set_tsc_offset(u64 offset)
#endif
}
-u64 get_guest_time(struct vcpu *v)
-{
- struct hvm_time_info *time_info =
&(v->domain->arch.hvm_domain.vpit.time_info);
- u64 host_tsc;
-
- rdtscll(host_tsc);
- return host_tsc + time_info->cache_tsc_offset;
-}
-
void set_guest_time(struct vcpu *v, u64 gtime)
{
- struct hvm_time_info *time_info =
&(v->domain->arch.hvm_domain.vpit.time_info);
u64 host_tsc;
rdtscll(host_tsc);
- time_info->cache_tsc_offset = gtime - host_tsc;
- __set_tsc_offset(time_info->cache_tsc_offset);
+ v->arch.hvm_vcpu.cache_tsc_offset = gtime - host_tsc;
+ __set_tsc_offset(v->arch.hvm_vcpu.cache_tsc_offset);
}
static inline void
interrupt_post_injection(struct vcpu * v, int vector, int type)
{
- struct hvm_virpit *vpit = &(v->domain->arch.hvm_domain.vpit);
- struct hvm_time_info *time_info = &vpit->time_info;
+ struct periodic_time *pt =
&(v->domain->arch.hvm_domain.pl_time.periodic_tm);
if ( is_pit_irq(v, vector, type) ) {
- if ( !time_info->first_injected ) {
- time_info->pending_intr_nr = 0;
- time_info->last_pit_gtime = get_guest_time(v);
- time_info->first_injected = 1;
+ if ( !pt->first_injected ) {
+ pt->pending_intr_nr = 0;
+ pt->last_plt_gtime = hvm_get_guest_time(v);
+ pt->scheduled = NOW() + pt->period;
+ set_timer(&pt->timer, pt->scheduled);
+ pt->first_injected = 1;
} else {
- time_info->pending_intr_nr--;
- }
- time_info->count_advance = 0;
- time_info->count_point = NOW();
-
- time_info->last_pit_gtime += time_info->period_cycles;
- set_guest_time(v, time_info->last_pit_gtime);
+ pt->pending_intr_nr--;
+ pt->last_plt_gtime += pt->period_cycles;
+ set_guest_time(v, pt->last_plt_gtime);
+ }
}
switch(type)
@@ -151,7 +139,7 @@ asmlinkage void vmx_intr_assist(void)
unsigned long eflags;
struct vcpu *v = current;
struct hvm_domain *plat=&v->domain->arch.hvm_domain;
- struct hvm_time_info *time_info = &plat->vpit.time_info;
+ struct periodic_time *pt = &plat->pl_time.periodic_tm;
struct hvm_virpic *pic= &plat->vpic;
unsigned int idtv_info_field;
unsigned long inst_len;
@@ -160,9 +148,9 @@ asmlinkage void vmx_intr_assist(void)
if ( v->vcpu_id == 0 )
hvm_pic_assist(v);
- if ( (v->vcpu_id == 0) && time_info->pending_intr_nr ) {
- pic_set_irq(pic, 0, 0);
- pic_set_irq(pic, 0, 1);
+ if ( (v->vcpu_id == 0) && pt->enabled && pt->pending_intr_nr ) {
+ pic_set_irq(pic, pt->irq, 0);
+ pic_set_irq(pic, pt->irq, 1);
}
has_ext_irq = cpu_has_pending_irq(v);
@@ -232,19 +220,17 @@ void vmx_do_resume(struct vcpu *v)
void vmx_do_resume(struct vcpu *v)
{
struct domain *d = v->domain;
- struct hvm_virpit *vpit = &v->domain->arch.hvm_domain.vpit;
- struct hvm_time_info *time_info = &vpit->time_info;
+ struct periodic_time *pt = &v->domain->arch.hvm_domain.pl_time.periodic_tm;
vmx_stts();
/* pick up the elapsed PIT ticks and re-enable pit_timer */
- if ( time_info->first_injected ) {
- if ( v->domain->arch.hvm_domain.guest_time ) {
- time_info->count_point = NOW();
- set_guest_time(v, v->domain->arch.hvm_domain.guest_time);
- v->domain->arch.hvm_domain.guest_time = 0;
- }
- pickup_deactive_ticks(vpit);
+ if ( pt->enabled && pt->first_injected ) {
+ if ( v->arch.hvm_vcpu.guest_time ) {
+ set_guest_time(v, v->arch.hvm_vcpu.guest_time);
+ v->arch.hvm_vcpu.guest_time = 0;
+ }
+ pickup_deactive_ticks(pt);
}
if ( test_bit(iopacket_port(v), &d->shared_info->evtchn_pending[0]) ||
diff -r e74246451527 -r f54d38cea8ac xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/x86/hvm/vmx/vmx.c Tue May 30 14:30:34 2006 -0500
@@ -47,7 +47,7 @@
#include <asm/hvm/vpic.h>
#include <asm/hvm/vlapic.h>
-static unsigned long trace_values[NR_CPUS][4];
+static unsigned long trace_values[NR_CPUS][5];
#define TRACE_VMEXIT(index,value) trace_values[smp_processor_id()][index]=value
static void vmx_ctxt_switch_from(struct vcpu *v);
@@ -102,7 +102,7 @@ static void vmx_relinquish_guest_resourc
}
}
- kill_timer(&d->arch.hvm_domain.vpit.time_info.pit_timer);
+ kill_timer(&d->arch.hvm_domain.pl_time.periodic_tm.timer);
if ( d->arch.hvm_domain.shared_page_va )
unmap_domain_page_global(
@@ -358,12 +358,11 @@ static inline int long_mode_do_msr_write
static void vmx_freeze_time(struct vcpu *v)
{
- struct hvm_time_info *time_info =
&(v->domain->arch.hvm_domain.vpit.time_info);
+ struct periodic_time *pt=&v->domain->arch.hvm_domain.pl_time.periodic_tm;
- if ( time_info->first_injected && !v->domain->arch.hvm_domain.guest_time )
{
- v->domain->arch.hvm_domain.guest_time = get_guest_time(v);
- time_info->count_advance += (NOW() - time_info->count_point);
- stop_timer(&(time_info->pit_timer));
+ if ( pt->enabled && pt->first_injected && !v->arch.hvm_vcpu.guest_time ) {
+ v->arch.hvm_vcpu.guest_time = hvm_get_guest_time(v);
+ stop_timer(&(pt->timer));
}
}
@@ -393,10 +392,12 @@ int vmx_initialize_guest_resources(struc
void vmx_migrate_timers(struct vcpu *v)
{
- struct hvm_time_info *time_info =
&v->domain->arch.hvm_domain.vpit.time_info;
-
- migrate_timer(&time_info->pit_timer, v->processor);
- migrate_timer(&v->arch.hvm_vmx.hlt_timer, v->processor);
+ struct periodic_time *pt =
&(v->domain->arch.hvm_domain.pl_time.periodic_tm);
+
+ if ( pt->enabled ) {
+ migrate_timer(&pt->timer, v->processor);
+ migrate_timer(&v->arch.hvm_vmx.hlt_timer, v->processor);
+ }
if ( hvm_apic_support(v->domain) && VLAPIC(v))
migrate_timer(&(VLAPIC(v)->vlapic_timer), v->processor);
}
@@ -1861,14 +1862,8 @@ static inline void vmx_do_msr_read(struc
(unsigned long)regs->edx);
switch (regs->ecx) {
case MSR_IA32_TIME_STAMP_COUNTER:
- {
- struct hvm_time_info *time_info;
-
- rdtscll(msr_content);
- time_info = &(v->domain->arch.hvm_domain.vpit.time_info);
- msr_content += time_info->cache_tsc_offset;
- break;
- }
+ msr_content = hvm_get_guest_time(v);
+ break;
case MSR_IA32_SYSENTER_CS:
__vmread(GUEST_SYSENTER_CS, (u32 *)&msr_content);
break;
@@ -1941,11 +1936,11 @@ void vmx_vmexit_do_hlt(void)
void vmx_vmexit_do_hlt(void)
{
struct vcpu *v=current;
- struct hvm_virpit *vpit = &(v->domain->arch.hvm_domain.vpit);
+ struct periodic_time *pt =
&(v->domain->arch.hvm_domain.pl_time.periodic_tm);
s_time_t next_pit=-1,next_wakeup;
if ( !v->vcpu_id )
- next_pit = get_pit_scheduled(v,vpit);
+ next_pit = get_scheduled(v, pt->irq, pt);
next_wakeup = get_apictime_scheduled(v);
if ( (next_pit != -1 && next_pit < next_wakeup) || next_wakeup == -1 )
next_wakeup = next_pit;
diff -r e74246451527 -r f54d38cea8ac xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/x86/mm.c Tue May 30 14:30:34 2006 -0500
@@ -260,9 +260,82 @@ void share_xen_page_with_privileged_gues
share_xen_page_with_guest(page, dom_xen, readonly);
}
+#if defined(CONFIG_X86_PAE)
+
+#ifdef NDEBUG
+/* Only PDPTs above 4GB boundary need to be shadowed in low memory. */
+#define l3tab_needs_shadow(mfn) (mfn >= 0x100000)
+#else
+/* In debug builds we aggressively shadow PDPTs to exercise code paths. */
+#define l3tab_needs_shadow(mfn) ((mfn << PAGE_SHIFT) != __pa(idle_pg_table))
+#endif
+
+static l1_pgentry_t *fix_pae_highmem_pl1e;
+
+/* Cache the address of PAE high-memory fixmap page tables. */
+static int __init cache_pae_fixmap_address(void)
+{
+ unsigned long fixmap_base = fix_to_virt(FIX_PAE_HIGHMEM_0);
+ l2_pgentry_t *pl2e = virt_to_xen_l2e(fixmap_base);
+ fix_pae_highmem_pl1e = l2e_to_l1e(*pl2e) + l1_table_offset(fixmap_base);
+ return 0;
+}
+__initcall(cache_pae_fixmap_address);
+
+static void __write_ptbase(unsigned long mfn)
+{
+ l3_pgentry_t *highmem_l3tab, *lowmem_l3tab;
+ struct pae_l3_cache *cache = ¤t->arch.pae_l3_cache;
+ unsigned int cpu = smp_processor_id();
+
+ /* Fast path 1: does this mfn need a shadow at all? */
+ if ( !l3tab_needs_shadow(mfn) )
+ {
+ write_cr3(mfn << PAGE_SHIFT);
+ return;
+ }
+
+ /* Caching logic is not interrupt safe. */
+ ASSERT(!in_irq());
+
+ /* Fast path 2: is this mfn already cached? */
+ if ( cache->high_mfn == mfn )
+ {
+ write_cr3(__pa(cache->table[cache->inuse_idx]));
+ return;
+ }
+
+ /* Protects against pae_flush_pgd(). */
+ spin_lock(&cache->lock);
+
+ cache->inuse_idx ^= 1;
+ cache->high_mfn = mfn;
+
+ /* Map the guest L3 table and copy to the chosen low-memory cache. */
+ *(fix_pae_highmem_pl1e - cpu) = l1e_from_pfn(mfn, __PAGE_HYPERVISOR);
+ highmem_l3tab = (l3_pgentry_t *)fix_to_virt(FIX_PAE_HIGHMEM_0 + cpu);
+ lowmem_l3tab = cache->table[cache->inuse_idx];
+ memcpy(lowmem_l3tab, highmem_l3tab, sizeof(cache->table[0]));
+ *(fix_pae_highmem_pl1e - cpu) = l1e_empty();
+
+ /* Install the low-memory L3 table in CR3. */
+ write_cr3(__pa(lowmem_l3tab));
+
+ spin_unlock(&cache->lock);
+}
+
+#else /* !CONFIG_X86_PAE */
+
+static void __write_ptbase(unsigned long mfn)
+{
+ write_cr3(mfn << PAGE_SHIFT);
+}
+
+#endif /* !CONFIG_X86_PAE */
+
void write_ptbase(struct vcpu *v)
{
- write_cr3(pagetable_get_paddr(v->arch.monitor_table));
+ __write_ptbase(pagetable_get_pfn(v->arch.monitor_table));
}
void invalidate_shadow_ldt(struct vcpu *v)
@@ -401,6 +474,7 @@ static int get_page_and_type_from_pagenr
return 1;
}
+#ifndef CONFIG_X86_PAE /* We do not support guest linear mappings on PAE. */
/*
* We allow root tables to map each other (a.k.a. linear page tables). It
* needs some special care with reference counts and access permissions:
@@ -456,6 +530,7 @@ get_linear_pagetable(
return 1;
}
+#endif /* !CONFIG_X86_PAE */
int
get_page_from_l1e(
@@ -564,10 +639,6 @@ get_page_from_l3e(
rc = get_page_and_type_from_pagenr(
l3e_get_pfn(l3e),
PGT_l2_page_table | vaddr, d);
-#if CONFIG_PAGING_LEVELS == 3
- if ( unlikely(!rc) )
- rc = get_linear_pagetable(l3e, pfn, d);
-#endif
return rc;
}
#endif /* 3 level */
@@ -773,6 +844,41 @@ static int create_pae_xen_mappings(l3_pg
return 1;
}
+/* Flush a pgdir update into low-memory caches. */
+static void pae_flush_pgd(
+ unsigned long mfn, unsigned int idx, l3_pgentry_t nl3e)
+{
+ struct domain *d = page_get_owner(mfn_to_page(mfn));
+ struct vcpu *v;
+ intpte_t _ol3e, _nl3e, _pl3e;
+ l3_pgentry_t *l3tab_ptr;
+ struct pae_l3_cache *cache;
+
+ /* If below 4GB then the pgdir is not shadowed in low memory. */
+ if ( !l3tab_needs_shadow(mfn) )
+ return;
+
+ for_each_vcpu ( d, v )
+ {
+ cache = &v->arch.pae_l3_cache;
+
+ spin_lock(&cache->lock);
+
+ if ( cache->high_mfn == mfn )
+ {
+ l3tab_ptr = &cache->table[cache->inuse_idx][idx];
+ _ol3e = l3e_get_intpte(*l3tab_ptr);
+ _nl3e = l3e_get_intpte(nl3e);
+ _pl3e = cmpxchg((intpte_t *)l3tab_ptr, _ol3e, _nl3e);
+ BUG_ON(_pl3e != _ol3e);
+ }
+
+ spin_unlock(&cache->lock);
+ }
+
+ flush_tlb_mask(d->domain_dirty_cpumask);
+}
+
static inline int l1_backptr(
unsigned long *backptr, unsigned long offset_in_l2, unsigned long l2_type)
{
@@ -787,6 +893,7 @@ static inline int l1_backptr(
#elif CONFIG_X86_64
# define create_pae_xen_mappings(pl3e) (1)
+# define pae_flush_pgd(mfn, idx, nl3e) ((void)0)
static inline int l1_backptr(
unsigned long *backptr, unsigned long offset_in_l2, unsigned long l2_type)
@@ -886,14 +993,6 @@ static int alloc_l3_table(struct page_in
ASSERT(!shadow_mode_refcounts(d));
-#ifdef CONFIG_X86_PAE
- if ( pfn >= 0x100000 )
- {
- MEM_LOG("PAE pgd must be below 4GB (0x%lx >= 0x100000)", pfn);
- return 0;
- }
-#endif
-
pl3e = map_domain_page(pfn);
for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
{
@@ -1240,6 +1339,8 @@ static int mod_l3_entry(l3_pgentry_t *pl
okay = create_pae_xen_mappings(pl3e);
BUG_ON(!okay);
+
+ pae_flush_pgd(pfn, pgentry_ptr_to_slot(pl3e), nl3e);
put_page_from_l3e(ol3e, pfn);
return 1;
@@ -2811,6 +2912,8 @@ long do_update_descriptor(u64 pa, u64 de
return ret;
}
+typedef struct e820entry e820entry_t;
+DEFINE_XEN_GUEST_HANDLE(e820entry_t);
long arch_memory_op(int op, XEN_GUEST_HANDLE(void) arg)
{
@@ -2869,6 +2972,39 @@ long arch_memory_op(int op, XEN_GUEST_HA
break;
}
+ case XENMEM_memory_map:
+ {
+ return -ENOSYS;
+ }
+
+ case XENMEM_machine_memory_map:
+ {
+ struct xen_memory_map memmap;
+ XEN_GUEST_HANDLE(e820entry_t) buffer;
+ int count;
+
+ if ( !IS_PRIV(current->domain) )
+ return -EINVAL;
+
+ if ( copy_from_guest(&memmap, arg, 1) )
+ return -EFAULT;
+ if ( memmap.nr_entries < e820.nr_map + 1 )
+ return -EINVAL;
+
+ buffer = guest_handle_cast(memmap.buffer, e820entry_t);
+
+ count = min((unsigned int)e820.nr_map, memmap.nr_entries);
+ if ( copy_to_guest(buffer, &e820.map[0], count) < 0 )
+ return -EFAULT;
+
+ memmap.nr_entries = count;
+
+ if ( copy_to_guest(arg, &memmap, 1) )
+ return -EFAULT;
+
+ return 0;
+ }
+
default:
return subarch_memory_op(op, arg);
}
@@ -3074,7 +3210,7 @@ void ptwr_flush(struct domain *d, const
if ( unlikely(d->arch.ptwr[which].vcpu != current) )
/* Don't use write_ptbase: it may switch to guest_user on x86/64! */
- write_cr3(pagetable_get_paddr(
+ __write_ptbase(pagetable_get_pfn(
d->arch.ptwr[which].vcpu->arch.guest_table));
else
TOGGLE_MODE();
@@ -3185,15 +3321,16 @@ static int ptwr_emulated_update(
/* Turn a sub-word access into a full-word access. */
if ( bytes != sizeof(paddr_t) )
{
- int rc;
- paddr_t full;
- unsigned int offset = addr & (sizeof(paddr_t)-1);
+ paddr_t full;
+ unsigned int offset = addr & (sizeof(paddr_t)-1);
/* Align address; read full word. */
addr &= ~(sizeof(paddr_t)-1);
- if ( (rc = x86_emulate_read_std(addr, (unsigned long *)&full,
- sizeof(paddr_t))) )
- return rc;
+ if ( copy_from_user(&full, (void *)addr, sizeof(paddr_t)) )
+ {
+ propagate_page_fault(addr, 4); /* user mode, read fault */
+ return X86EMUL_PROPAGATE_FAULT;
+ }
/* Mask out bits provided by caller. */
full &= ~((((paddr_t)1 << (bytes*8)) - 1) << (offset*8));
/* Shift the caller value and OR in the missing bits. */
@@ -3271,7 +3408,8 @@ static int ptwr_emulated_write(
static int ptwr_emulated_write(
unsigned long addr,
unsigned long val,
- unsigned int bytes)
+ unsigned int bytes,
+ struct x86_emulate_ctxt *ctxt)
{
return ptwr_emulated_update(addr, 0, val, bytes, 0);
}
@@ -3280,7 +3418,8 @@ static int ptwr_emulated_cmpxchg(
unsigned long addr,
unsigned long old,
unsigned long new,
- unsigned int bytes)
+ unsigned int bytes,
+ struct x86_emulate_ctxt *ctxt)
{
return ptwr_emulated_update(addr, old, new, bytes, 1);
}
@@ -3290,7 +3429,8 @@ static int ptwr_emulated_cmpxchg8b(
unsigned long old,
unsigned long old_hi,
unsigned long new,
- unsigned long new_hi)
+ unsigned long new_hi,
+ struct x86_emulate_ctxt *ctxt)
{
if ( CONFIG_PAGING_LEVELS == 2 )
return X86EMUL_UNHANDLEABLE;
@@ -3299,7 +3439,7 @@ static int ptwr_emulated_cmpxchg8b(
addr, ((u64)old_hi << 32) | old, ((u64)new_hi << 32) | new, 8, 1);
}
-static struct x86_mem_emulator ptwr_mem_emulator = {
+static struct x86_emulate_ops ptwr_emulate_ops = {
.read_std = x86_emulate_read_std,
.write_std = x86_emulate_write_std,
.read_emulated = x86_emulate_read_std,
@@ -3318,6 +3458,7 @@ int ptwr_do_page_fault(struct domain *d,
l2_pgentry_t *pl2e, l2e;
int which, flags;
unsigned long l2_idx;
+ struct x86_emulate_ctxt emul_ctxt;
if ( unlikely(shadow_mode_enabled(d)) )
return 0;
@@ -3472,8 +3613,10 @@ int ptwr_do_page_fault(struct domain *d,
return EXCRET_fault_fixed;
emulate:
- if ( x86_emulate_memop(guest_cpu_user_regs(), addr,
- &ptwr_mem_emulator, X86EMUL_MODE_HOST) )
+ emul_ctxt.regs = guest_cpu_user_regs();
+ emul_ctxt.cr2 = addr;
+ emul_ctxt.mode = X86EMUL_MODE_HOST;
+ if ( x86_emulate_memop(&emul_ctxt, &ptwr_emulate_ops) )
return 0;
perfc_incrc(ptwr_emulations);
return EXCRET_fault_fixed;
@@ -3596,11 +3739,10 @@ int map_pages_to_xen(
}
void __set_fixmap(
- enum fixed_addresses idx, unsigned long p, unsigned long flags)
-{
- if ( unlikely(idx >= __end_of_fixed_addresses) )
- BUG();
- map_pages_to_xen(fix_to_virt(idx), p >> PAGE_SHIFT, 1, flags);
+ enum fixed_addresses idx, unsigned long mfn, unsigned long flags)
+{
+ BUG_ON(idx >= __end_of_fixed_addresses);
+ map_pages_to_xen(fix_to_virt(idx), mfn, 1, flags);
}
#ifdef MEMORY_GUARD
diff -r e74246451527 -r f54d38cea8ac xen/arch/x86/shadow.c
--- a/xen/arch/x86/shadow.c Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/x86/shadow.c Tue May 30 14:30:34 2006 -0500
@@ -430,7 +430,8 @@ no_shadow_page:
perfc_value(shadow_l2_pages),
perfc_value(hl2_table_pages),
perfc_value(snapshot_pages));
- BUG(); /* XXX FIXME: try a shadow flush to free up some memory. */
+ /* XXX FIXME: try a shadow flush to free up some memory. */
+ domain_crash_synchronous();
return 0;
}
@@ -3064,7 +3065,8 @@ static inline unsigned long init_bl2(
if ( unlikely(!(smfn = alloc_shadow_page(d, gpfn, gmfn, PGT_l4_shadow))) )
{
printk("Couldn't alloc an L4 shadow for pfn=%lx mfn=%lx\n", gpfn,
gmfn);
- BUG(); /* XXX Deal gracefully with failure. */
+ /* XXX Deal gracefully with failure. */
+ domain_crash_synchronous();
}
spl4e = (l4_pgentry_t *)map_domain_page(smfn);
diff -r e74246451527 -r f54d38cea8ac xen/arch/x86/shadow32.c
--- a/xen/arch/x86/shadow32.c Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/x86/shadow32.c Tue May 30 14:30:34 2006 -0500
@@ -246,7 +246,8 @@ alloc_shadow_page(struct domain *d,
perfc_value(shadow_l2_pages),
perfc_value(hl2_table_pages),
perfc_value(snapshot_pages));
- BUG(); /* XXX FIXME: try a shadow flush to free up some memory. */
+ /* XXX FIXME: try a shadow flush to free up some memory. */
+ domain_crash_synchronous();
}
smfn = page_to_mfn(page);
@@ -983,6 +984,11 @@ alloc_p2m_table(struct domain *d)
else
{
page = alloc_domheap_page(NULL);
+ if (!page)
+ {
+ printk("Alloc p2m table fail\n");
+ domain_crash(d);
+ }
l1tab = map_domain_page(page_to_mfn(page));
memset(l1tab, 0, PAGE_SIZE);
diff -r e74246451527 -r f54d38cea8ac xen/arch/x86/shadow_public.c
--- a/xen/arch/x86/shadow_public.c Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/x86/shadow_public.c Tue May 30 14:30:34 2006 -0500
@@ -324,6 +324,11 @@ static void alloc_monitor_pagetable(stru
mmfn_info = alloc_domheap_page(NULL);
ASSERT( mmfn_info );
+ if (!mmfn_info)
+ {
+ printk("Fail to allocate monitor pagetable\n");
+ domain_crash(v->domain);
+ }
mmfn = page_to_mfn(mmfn_info);
mpl4e = (l4_pgentry_t *) map_domain_page_global(mmfn);
diff -r e74246451527 -r f54d38cea8ac xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/x86/traps.c Tue May 30 14:30:34 2006 -0500
@@ -876,7 +876,7 @@ static int emulate_privileged_op(struct
PAGE_FAULT(regs->edi, USER_WRITE_FAULT);
break;
}
- regs->edi += (regs->eflags & EF_DF) ? -op_bytes : op_bytes;
+ regs->edi += (regs->eflags & EF_DF) ? -(int)op_bytes : op_bytes;
break;
case 0x6e: /* OUTSB */
@@ -902,7 +902,7 @@ static int emulate_privileged_op(struct
outl_user((u32)data, (u16)regs->edx, v, regs);
break;
}
- regs->esi += (regs->eflags & EF_DF) ? -op_bytes : op_bytes;
+ regs->esi += (regs->eflags & EF_DF) ? -(int)op_bytes : op_bytes;
break;
}
diff -r e74246451527 -r f54d38cea8ac xen/arch/x86/x86_emulate.c
--- a/xen/arch/x86/x86_emulate.c Tue May 30 12:52:02 2006 -0500
+++ b/xen/arch/x86/x86_emulate.c Tue May 30 14:30:34 2006 -0500
@@ -363,12 +363,13 @@ do{ __asm__ __volatile__ (
#endif /* __i386__ */
/* Fetch next part of the instruction being emulated. */
-#define insn_fetch(_type, _size, _eip) \
-({ unsigned long _x; \
- if ( (rc = ops->read_std((unsigned long)(_eip), &_x, (_size))) != 0 ) \
- goto done; \
- (_eip) += (_size); \
- (_type)_x; \
+#define insn_fetch(_type, _size, _eip) \
+({ unsigned long _x; \
+ rc = ops->read_std((unsigned long)(_eip), &_x, (_size), ctxt); \
+ if ( rc != 0 ) \
+ goto done; \
+ (_eip) += (_size); \
+ (_type)_x; \
})
/* Access/update address held in a register, based on addressing mode. */
@@ -426,12 +427,10 @@ decode_register(
return p;
}
-int
+int
x86_emulate_memop(
- struct cpu_user_regs *regs,
- unsigned long cr2,
- struct x86_mem_emulator *ops,
- int mode)
+ struct x86_emulate_ctxt *ctxt,
+ struct x86_emulate_ops *ops)
{
uint8_t b, d, sib, twobyte = 0, rex_prefix = 0;
uint8_t modrm, modrm_mod = 0, modrm_reg = 0, modrm_rm = 0;
@@ -439,9 +438,11 @@ x86_emulate_memop(
unsigned int op_bytes, ad_bytes, lock_prefix = 0, rep_prefix = 0, i;
int rc = 0;
struct operand src, dst;
+ unsigned long cr2 = ctxt->cr2;
+ int mode = ctxt->mode;
/* Shadow copy of register state. Committed on successful emulation. */
- struct cpu_user_regs _regs = *regs;
+ struct cpu_user_regs _regs = *ctxt->regs;
switch ( mode )
{
@@ -628,7 +629,7 @@ x86_emulate_memop(
dst.bytes = (d & ByteOp) ? 1 : op_bytes;
if ( !(d & Mov) && /* optimisation - avoid slow emulated read */
((rc = ops->read_emulated((unsigned long)dst.ptr,
- &dst.val, dst.bytes)) != 0) )
+ &dst.val, dst.bytes, ctxt)) != 0) )
goto done;
break;
}
@@ -670,7 +671,7 @@ x86_emulate_memop(
src.type = OP_MEM;
src.ptr = (unsigned long *)cr2;
if ( (rc = ops->read_emulated((unsigned long)src.ptr,
- &src.val, src.bytes)) != 0 )
+ &src.val, src.bytes, ctxt)) != 0 )
goto done;
src.orig_val = src.val;
break;
@@ -776,7 +777,7 @@ x86_emulate_memop(
if ( mode == X86EMUL_MODE_PROT64 )
dst.bytes = 8;
if ( (rc = ops->read_std(register_address(_regs.ss, _regs.esp),
- &dst.val, dst.bytes)) != 0 )
+ &dst.val, dst.bytes, ctxt)) != 0 )
goto done;
register_address_increment(_regs.esp, dst.bytes);
break;
@@ -854,12 +855,12 @@ x86_emulate_memop(
{
dst.bytes = 8;
if ( (rc = ops->read_std((unsigned long)dst.ptr,
- &dst.val, 8)) != 0 )
+ &dst.val, 8, ctxt)) != 0 )
goto done;
}
- register_address_increment(_regs.esp, -dst.bytes);
+ register_address_increment(_regs.esp, -(int)dst.bytes);
if ( (rc = ops->write_std(register_address(_regs.ss, _regs.esp),
- dst.val, dst.bytes)) != 0 )
+ dst.val, dst.bytes, ctxt)) != 0 )
goto done;
dst.val = dst.orig_val; /* skanky: disable writeback */
break;
@@ -887,10 +888,11 @@ x86_emulate_memop(
case OP_MEM:
if ( lock_prefix )
rc = ops->cmpxchg_emulated(
- (unsigned long)dst.ptr, dst.orig_val, dst.val, dst.bytes);
+ (unsigned long)dst.ptr, dst.orig_val,
+ dst.val, dst.bytes, ctxt);
else
rc = ops->write_emulated(
- (unsigned long)dst.ptr, dst.val, dst.bytes);
+ (unsigned long)dst.ptr, dst.val, dst.bytes, ctxt);
if ( rc != 0 )
goto done;
default:
@@ -899,7 +901,7 @@ x86_emulate_memop(
}
/* Commit shadow register state. */
- *regs = _regs;
+ *ctxt->regs = _regs;
done:
return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0;
@@ -911,11 +913,11 @@ x86_emulate_memop(
{
if ( _regs.ecx == 0 )
{
- regs->eip = _regs.eip;
+ ctxt->regs->eip = _regs.eip;
goto done;
}
_regs.ecx--;
- _regs.eip = regs->eip;
+ _regs.eip = ctxt->regs->eip;
}
switch ( b )
{
@@ -928,20 +930,21 @@ x86_emulate_memop(
dst.ptr = (unsigned long *)cr2;
if ( (rc = ops->read_std(register_address(seg ? *seg : _regs.ds,
_regs.esi),
- &dst.val, dst.bytes)) != 0 )
+ &dst.val, dst.bytes, ctxt)) != 0 )
goto done;
}
else
{
/* Read fault: source is special memory. */
dst.ptr = (unsigned long *)register_address(_regs.es, _regs.edi);
- if ( (rc = ops->read_emulated(cr2, &dst.val, dst.bytes)) != 0 )
+ if ( (rc = ops->read_emulated(cr2, &dst.val,
+ dst.bytes, ctxt)) != 0 )
goto done;
}
register_address_increment(
- _regs.esi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
+ _regs.esi, (_regs.eflags & EFLG_DF) ? -(int)dst.bytes : dst.bytes);
register_address_increment(
- _regs.edi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
+ _regs.edi, (_regs.eflags & EFLG_DF) ? -(int)dst.bytes : dst.bytes);
break;
case 0xa6 ... 0xa7: /* cmps */
DPRINTF("Urk! I don't handle CMPS.\n");
@@ -952,16 +955,16 @@ x86_emulate_memop(
dst.ptr = (unsigned long *)cr2;
dst.val = _regs.eax;
register_address_increment(
- _regs.edi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
+ _regs.edi, (_regs.eflags & EFLG_DF) ? -(int)dst.bytes : dst.bytes);
break;
case 0xac ... 0xad: /* lods */
dst.type = OP_REG;
dst.bytes = (d & ByteOp) ? 1 : op_bytes;
dst.ptr = (unsigned long *)&_regs.eax;
- if ( (rc = ops->read_emulated(cr2, &dst.val, dst.bytes)) != 0 )
+ if ( (rc = ops->read_emulated(cr2, &dst.val, dst.bytes, ctxt)) != 0 )
goto done;
register_address_increment(
- _regs.esi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
+ _regs.esi, (_regs.eflags & EFLG_DF) ? -(int)dst.bytes : dst.bytes);
break;
case 0xae ... 0xaf: /* scas */
DPRINTF("Urk! I don't handle SCAS.\n");
@@ -1074,8 +1077,8 @@ x86_emulate_memop(
#if defined(__i386__)
{
unsigned long old_lo, old_hi;
- if ( ((rc = ops->read_emulated(cr2+0, &old_lo, 4)) != 0) ||
- ((rc = ops->read_emulated(cr2+4, &old_hi, 4)) != 0) )
+ if ( ((rc = ops->read_emulated(cr2+0, &old_lo, 4, ctxt)) != 0) ||
+ ((rc = ops->read_emulated(cr2+4, &old_hi, 4, ctxt)) != 0) )
goto done;
if ( (old_lo != _regs.eax) || (old_hi != _regs.edx) )
{
@@ -1090,8 +1093,8 @@ x86_emulate_memop(
}
else
{
- if ( (rc = ops->cmpxchg8b_emulated(cr2, old_lo, old_hi,
- _regs.ebx, _regs.ecx)) != 0 )
+ if ( (rc = ops->cmpxchg8b_emulated(cr2, old_lo, old_hi, _regs.ebx,
+ _regs.ecx, ctxt)) != 0 )
goto done;
_regs.eflags |= EFLG_ZF;
}
@@ -1100,7 +1103,7 @@ x86_emulate_memop(
#elif defined(__x86_64__)
{
unsigned long old, new;
- if ( (rc = ops->read_emulated(cr2, &old, 8)) != 0 )
+ if ( (rc = ops->read_emulated(cr2, &old, 8, ctxt)) != 0 )
goto done;
if ( ((uint32_t)(old>>0) != (uint32_t)_regs.eax) ||
((uint32_t)(old>>32) != (uint32_t)_regs.edx) )
@@ -1112,7 +1115,7 @@ x86_emulate_memop(
else
{
new = (_regs.ecx<<32)|(uint32_t)_regs.ebx;
- if ( (rc = ops->cmpxchg_emulated(cr2, old, new, 8)) != 0 )
+ if ( (rc = ops->cmpxchg_emulated(cr2, old, new, 8, ctxt)) != 0 )
goto done;
_regs.eflags |= EFLG_ZF;
}
@@ -1136,7 +1139,8 @@ x86_emulate_read_std(
x86_emulate_read_std(
unsigned long addr,
unsigned long *val,
- unsigned int bytes)
+ unsigned int bytes,
+ struct x86_emulate_ctxt *ctxt)
{
*val = 0;
if ( copy_from_user((void *)val, (void *)addr, bytes) )
@@ -1151,7 +1155,8 @@ x86_emulate_write_std(
x86_emulate_write_std(
unsigned long addr,
unsigned long val,
- unsigned int bytes)
+ unsigned int bytes,
+ struct x86_emulate_ctxt *ctxt)
{
if ( copy_to_user((void *)addr, (void *)&val, bytes) )
{
diff -r e74246451527 -r f54d38cea8ac xen/common/Makefile
--- a/xen/common/Makefile Tue May 30 12:52:02 2006 -0500
+++ b/xen/common/Makefile Tue May 30 14:30:34 2006 -0500
@@ -13,6 +13,7 @@ obj-y += page_alloc.o
obj-y += page_alloc.o
obj-y += rangeset.o
obj-y += sched_bvt.o
+obj-y += sched_credit.o
obj-y += sched_sedf.o
obj-y += schedule.o
obj-y += softirq.o
diff -r e74246451527 -r f54d38cea8ac xen/common/acm_ops.c
--- a/xen/common/acm_ops.c Tue May 30 12:52:02 2006 -0500
+++ b/xen/common/acm_ops.c Tue May 30 14:30:34 2006 -0500
@@ -32,100 +32,94 @@
#ifndef ACM_SECURITY
-long do_acm_op(XEN_GUEST_HANDLE(acm_op_t) u_acm_op)
+
+long do_acm_op(int cmd, XEN_GUEST_HANDLE(void) arg)
{
return -ENOSYS;
}
+
#else
-enum acm_operation {
- POLICY, /* access to policy interface (early drop) */
- GETPOLICY, /* dump policy cache */
- SETPOLICY, /* set policy cache (controls security) */
- DUMPSTATS, /* dump policy statistics */
- GETSSID, /* retrieve ssidref for domain id (decide
inside authorized domains) */
- GETDECISION /* retrieve ACM decision from authorized
domains */
-};
-
-int acm_authorize_acm_ops(struct domain *d, enum acm_operation pops)
+
+int acm_authorize_acm_ops(struct domain *d)
{
/* currently, policy management functions are restricted to privileged
domains */
if (!IS_PRIV(d))
return -EPERM;
-
return 0;
}
-long do_acm_op(XEN_GUEST_HANDLE(acm_op_t) u_acm_op)
-{
- long ret = 0;
- struct acm_op curop, *op = &curop;
-
- if (acm_authorize_acm_ops(current->domain, POLICY))
+
+long do_acm_op(int cmd, XEN_GUEST_HANDLE(void) arg)
+{
+ long rc = -EFAULT;
+
+ if (acm_authorize_acm_ops(current->domain))
return -EPERM;
- if (copy_from_guest(op, u_acm_op, 1))
- return -EFAULT;
-
- if (op->interface_version != ACM_INTERFACE_VERSION)
- return -EACCES;
-
- switch (op->cmd)
+ switch ( cmd )
{
- case ACM_SETPOLICY:
- {
- ret = acm_authorize_acm_ops(current->domain, SETPOLICY);
- if (!ret)
- ret = acm_set_policy(op->u.setpolicy.pushcache,
- op->u.setpolicy.pushcache_size, 1);
- }
- break;
-
- case ACM_GETPOLICY:
- {
- ret = acm_authorize_acm_ops(current->domain, GETPOLICY);
- if (!ret)
- ret = acm_get_policy(op->u.getpolicy.pullcache,
- op->u.getpolicy.pullcache_size);
- if (!ret)
- copy_to_guest(u_acm_op, op, 1);
- }
- break;
-
- case ACM_DUMPSTATS:
- {
- ret = acm_authorize_acm_ops(current->domain, DUMPSTATS);
- if (!ret)
- ret = acm_dump_statistics(op->u.dumpstats.pullcache,
- op->u.dumpstats.pullcache_size);
- if (!ret)
- copy_to_guest(u_acm_op, op, 1);
- }
- break;
-
- case ACM_GETSSID:
- {
+
+ case ACMOP_setpolicy: {
+ struct acm_setpolicy setpolicy;
+ if (copy_from_guest(&setpolicy, arg, 1) != 0)
+ return -EFAULT;
+ if (setpolicy.interface_version != ACM_INTERFACE_VERSION)
+ return -EACCES;
+
+ rc = acm_set_policy(setpolicy.pushcache,
+ setpolicy.pushcache_size, 1);
+ break;
+ }
+
+ case ACMOP_getpolicy: {
+ struct acm_getpolicy getpolicy;
+ if (copy_from_guest(&getpolicy, arg, 1) != 0)
+ return -EFAULT;
+ if (getpolicy.interface_version != ACM_INTERFACE_VERSION)
+ return -EACCES;
+
+ rc = acm_get_policy(getpolicy.pullcache,
+ getpolicy.pullcache_size);
+ break;
+ }
+
+ case ACMOP_dumpstats: {
+ struct acm_dumpstats dumpstats;
+ if (copy_from_guest(&dumpstats, arg, 1) != 0)
+ return -EFAULT;
+ if (dumpstats.interface_version != ACM_INTERFACE_VERSION)
+ return -EACCES;
+
+ rc = acm_dump_statistics(dumpstats.pullcache,
+ dumpstats.pullcache_size);
+ break;
+ }
+
+ case ACMOP_getssid: {
+ struct acm_getssid getssid;
ssidref_t ssidref;
- ret = acm_authorize_acm_ops(current->domain, GETSSID);
- if (ret)
- break;
-
- if (op->u.getssid.get_ssid_by == SSIDREF)
- ssidref = op->u.getssid.id.ssidref;
- else if (op->u.getssid.get_ssid_by == DOMAINID)
- {
- struct domain *subj = find_domain_by_id(op->u.getssid.id.domainid);
- if (!subj)
- {
- ret = -ESRCH; /* domain not found */
- break;
- }
- if (subj->ssid == NULL)
- {
- put_domain(subj);
- ret = -ESRCH;
+ if (copy_from_guest(&getssid, arg, 1) != 0)
+ return -EFAULT;
+ if (getssid.interface_version != ACM_INTERFACE_VERSION)
+ return -EACCES;
+
+ if (getssid.get_ssid_by == SSIDREF)
+ ssidref = getssid.id.ssidref;
+ else if (getssid.get_ssid_by == DOMAINID)
+ {
+ struct domain *subj = find_domain_by_id(getssid.id.domainid);
+ if (!subj)
+ {
+ rc = -ESRCH; /* domain not found */
+ break;
+ }
+ if (subj->ssid == NULL)
+ {
+ put_domain(subj);
+ rc = -ESRCH;
break;
}
ssidref = ((struct acm_ssid_domain *)(subj->ssid))->ssidref;
@@ -133,39 +127,36 @@ long do_acm_op(XEN_GUEST_HANDLE(acm_op_t
}
else
{
- ret = -ESRCH;
- break;
- }
- ret = acm_get_ssid(ssidref,
- op->u.getssid.ssidbuf,
- op->u.getssid.ssidbuf_size);
- if (!ret)
- copy_to_guest(u_acm_op, op, 1);
- }
- break;
-
- case ACM_GETDECISION:
- {
+ rc = -ESRCH;
+ break;
+ }
+ rc = acm_get_ssid(ssidref, getssid.ssidbuf, getssid.ssidbuf_size);
+ break;
+ }
+
+ case ACMOP_getdecision: {
+ struct acm_getdecision getdecision;
ssidref_t ssidref1, ssidref2;
- ret = acm_authorize_acm_ops(current->domain, GETDECISION);
- if (ret)
- break;
-
- if (op->u.getdecision.get_decision_by1 == SSIDREF)
- ssidref1 = op->u.getdecision.id1.ssidref;
- else if (op->u.getdecision.get_decision_by1 == DOMAINID)
- {
- struct domain *subj =
find_domain_by_id(op->u.getdecision.id1.domainid);
- if (!subj)
- {
- ret = -ESRCH; /* domain not found */
- break;
- }
- if (subj->ssid == NULL)
- {
- put_domain(subj);
- ret = -ESRCH;
+ if (copy_from_guest(&getdecision, arg, 1) != 0)
+ return -EFAULT;
+ if (getdecision.interface_version != ACM_INTERFACE_VERSION)
+ return -EACCES;
+
+ if (getdecision.get_decision_by1 == SSIDREF)
+ ssidref1 = getdecision.id1.ssidref;
+ else if (getdecision.get_decision_by1 == DOMAINID)
+ {
+ struct domain *subj = find_domain_by_id(getdecision.id1.domainid);
+ if (!subj)
+ {
+ rc = -ESRCH; /* domain not found */
+ break;
+ }
+ if (subj->ssid == NULL)
+ {
+ put_domain(subj);
+ rc = -ESRCH;
break;
}
ssidref1 = ((struct acm_ssid_domain *)(subj->ssid))->ssidref;
@@ -173,23 +164,23 @@ long do_acm_op(XEN_GUEST_HANDLE(acm_op_t
}
else
{
- ret = -ESRCH;
- break;
- }
- if (op->u.getdecision.get_decision_by2 == SSIDREF)
- ssidref2 = op->u.getdecision.id2.ssidref;
- else if (op->u.getdecision.get_decision_by2 == DOMAINID)
- {
- struct domain *subj =
find_domain_by_id(op->u.getdecision.id2.domainid);
- if (!subj)
- {
- ret = -ESRCH; /* domain not found */
+ rc = -ESRCH;
+ break;
+ }
+ if (getdecision.get_decision_by2 == SSIDREF)
+ ssidref2 = getdecision.id2.ssidref;
+ else if (getdecision.get_decision_by2 == DOMAINID)
+ {
+ struct domain *subj = find_domain_by_id(getdecision.id2.domainid);
+ if (!subj)
+ {
+ rc = -ESRCH; /* domain not found */
break;;
}
if (subj->ssid == NULL)
{
put_domain(subj);
- ret = -ESRCH;
+ rc = -ESRCH;
break;
}
ssidref2 = ((struct acm_ssid_domain *)(subj->ssid))->ssidref;
@@ -197,34 +188,35 @@ long do_acm_op(XEN_GUEST_HANDLE(acm_op_t
}
else
{
- ret = -ESRCH;
- break;
- }
- ret = acm_get_decision(ssidref1, ssidref2, op->u.getdecision.hook);
-
- if (ret == ACM_ACCESS_PERMITTED)
- {
- op->u.getdecision.acm_decision = ACM_ACCESS_PERMITTED;
- ret = 0;
- }
- else if (ret == ACM_ACCESS_DENIED)
- {
- op->u.getdecision.acm_decision = ACM_ACCESS_DENIED;
- ret = 0;
- }
- else
- ret = -ESRCH;
-
- if (!ret)
- copy_to_guest(u_acm_op, op, 1);
- }
- break;
+ rc = -ESRCH;
+ break;
+ }
+ rc = acm_get_decision(ssidref1, ssidref2, getdecision.hook);
+
+ if (rc == ACM_ACCESS_PERMITTED)
+ {
+ getdecision.acm_decision = ACM_ACCESS_PERMITTED;
+ rc = 0;
+ }
+ else if (rc == ACM_ACCESS_DENIED)
+ {
+ getdecision.acm_decision = ACM_ACCESS_DENIED;
+ rc = 0;
+ }
+ else
+ rc = -ESRCH;
+
+ if ( (rc == 0) && (copy_to_guest(arg, &getdecision, 1) != 0) )
+ rc = -EFAULT;
+ break;
+ }
default:
- ret = -ESRCH;
- }
-
- return ret;
+ rc = -ENOSYS;
+ break;
+ }
+
+ return rc;
}
#endif
diff -r e74246451527 -r f54d38cea8ac xen/common/elf.c
--- a/xen/common/elf.c Tue May 30 12:52:02 2006 -0500
+++ b/xen/common/elf.c Tue May 30 14:30:34 2006 -0500
@@ -23,10 +23,10 @@ int parseelfimage(struct domain_setup_in
Elf_Ehdr *ehdr = (Elf_Ehdr *)dsi->image_addr;
Elf_Phdr *phdr;
Elf_Shdr *shdr;
- unsigned long kernstart = ~0UL, kernend=0UL, vaddr, virt_base;
+ unsigned long kernstart = ~0UL, kernend=0UL, vaddr, virt_base, elf_pa_off;
char *shstrtab, *guestinfo=NULL, *p;
char *elfbase = (char *)dsi->image_addr;
- int h;
+ int h, virt_base_defined, elf_pa_off_defined;
if ( !elf_sanity_check(ehdr) )
return -EINVAL;
@@ -84,29 +84,40 @@ int parseelfimage(struct domain_setup_in
if ( guestinfo == NULL )
guestinfo = "";
- virt_base = 0;
- if ( (p = strstr(guestinfo, "VIRT_BASE=")) != NULL )
- virt_base = simple_strtoul(p+10, &p, 0);
- dsi->elf_paddr_offset = virt_base;
- if ( (p = strstr(guestinfo, "ELF_PADDR_OFFSET=")) != NULL )
- dsi->elf_paddr_offset = simple_strtoul(p+17, &p, 0);
+ /* Initial guess for virt_base is 0 if it is not explicitly defined. */
+ p = strstr(guestinfo, "VIRT_BASE=");
+ virt_base_defined = (p != NULL);
+ virt_base = virt_base_defined ? simple_strtoul(p+10, &p, 0) : 0;
+
+ /* Initial guess for elf_pa_off is virt_base if not explicitly defined. */
+ p = strstr(guestinfo, "ELF_PADDR_OFFSET=");
+ elf_pa_off_defined = (p != NULL);
+ elf_pa_off = elf_pa_off_defined ? simple_strtoul(p+17, &p, 0) : virt_base;
+
+ if ( elf_pa_off_defined && !virt_base_defined )
+ goto bad_image;
for ( h = 0; h < ehdr->e_phnum; h++ )
{
phdr = (Elf_Phdr *)(elfbase + ehdr->e_phoff + (h*ehdr->e_phentsize));
if ( !is_loadable_phdr(phdr) )
continue;
- vaddr = phdr->p_paddr - dsi->elf_paddr_offset + virt_base;
+ vaddr = phdr->p_paddr - elf_pa_off + virt_base;
+ if ( (vaddr + phdr->p_memsz) < vaddr )
+ goto bad_image;
if ( vaddr < kernstart )
kernstart = vaddr;
if ( (vaddr + phdr->p_memsz) > kernend )
kernend = vaddr + phdr->p_memsz;
}
- if ( virt_base )
- dsi->v_start = virt_base;
- else
- dsi->v_start = kernstart;
+ /*
+ * Legacy compatibility and images with no __xen_guest section: assume
+ * header addresses are virtual addresses, and that guest memory should be
+ * mapped starting at kernel load address.
+ */
+ dsi->v_start = virt_base_defined ? virt_base : kernstart;
+ dsi->elf_paddr_offset = elf_pa_off_defined ? elf_pa_off : dsi->v_start;
dsi->v_kernentry = ehdr->e_entry;
if ( (p = strstr(guestinfo, "VIRT_ENTRY=")) != NULL )
@@ -114,11 +125,9 @@ int parseelfimage(struct domain_setup_in
if ( (kernstart > kernend) ||
(dsi->v_kernentry < kernstart) ||
- (dsi->v_kernentry > kernend) )
- {
- printk("Malformed ELF image.\n");
- return -EINVAL;
- }
+ (dsi->v_kernentry > kernend) ||
+ (dsi->v_start > kernstart) )
+ goto bad_image;
if ( (p = strstr(guestinfo, "BSD_SYMTAB")) != NULL )
dsi->load_symtab = 1;
@@ -130,6 +139,10 @@ int parseelfimage(struct domain_setup_in
loadelfsymtab(dsi, 0);
return 0;
+
+ bad_image:
+ printk("Malformed ELF image.\n");
+ return -EINVAL;
}
int loadelfimage(struct domain_setup_info *dsi)
diff -r e74246451527 -r f54d38cea8ac xen/common/grant_table.c
--- a/xen/common/grant_table.c Tue May 30 12:52:02 2006 -0500
+++ b/xen/common/grant_table.c Tue May 30 14:30:34 2006 -0500
@@ -505,15 +505,12 @@ gnttab_setup_table(
goto out;
}
- if ( op.nr_frames <= NR_GRANT_FRAMES )
- {
- ASSERT(d->grant_table != NULL);
- op.status = GNTST_okay;
- for ( i = 0; i < op.nr_frames; i++ )
- {
- gmfn = gnttab_shared_gmfn(d, d->grant_table, i);
- (void)copy_to_guest_offset(op.frame_list, i, &gmfn, 1);
- }
+ ASSERT(d->grant_table != NULL);
+ op.status = GNTST_okay;
+ for ( i = 0; i < op.nr_frames; i++ )
+ {
+ gmfn = gnttab_shared_gmfn(d, d->grant_table, i);
+ (void)copy_to_guest_offset(op.frame_list, i, &gmfn, 1);
}
put_domain(d);
diff -r e74246451527 -r f54d38cea8ac xen/common/kernel.c
--- a/xen/common/kernel.c Tue May 30 12:52:02 2006 -0500
+++ b/xen/common/kernel.c Tue May 30 14:30:34 2006 -0500
@@ -191,12 +191,11 @@ long do_xen_version(int cmd, XEN_GUEST_H
switch ( fi.submap_idx )
{
case 0:
- fi.submap = 0;
+ fi.submap = (1U << XENFEAT_pae_pgdir_above_4gb);
if ( shadow_mode_translate(current->domain) )
fi.submap |=
(1U << XENFEAT_writable_page_tables) |
- (1U << XENFEAT_auto_translated_physmap) |
- (1U << XENFEAT_pae_pgdir_above_4gb);
+ (1U << XENFEAT_auto_translated_physmap);
if ( supervisor_mode_kernel )
fi.submap |= 1U << XENFEAT_supervisor_mode_kernel;
break;
diff -r e74246451527 -r f54d38cea8ac xen/common/schedule.c
--- a/xen/common/schedule.c Tue May 30 12:52:02 2006 -0500
+++ b/xen/common/schedule.c Tue May 30 14:30:34 2006 -0500
@@ -50,9 +50,11 @@ struct schedule_data schedule_data[NR_CP
extern struct scheduler sched_bvt_def;
extern struct scheduler sched_sedf_def;
+extern struct scheduler sched_credit_def;
static struct scheduler *schedulers[] = {
&sched_bvt_def,
&sched_sedf_def,
+ &sched_credit_def,
NULL
};
@@ -639,6 +641,8 @@ static void t_timer_fn(void *unused)
page_scrub_schedule_work();
+ SCHED_OP(tick, cpu);
+
set_timer(&t_timer[cpu], NOW() + MILLISECS(10));
}
@@ -681,6 +685,7 @@ void __init scheduler_init(void)
printk("Could not find scheduler: %s\n", opt_sched);
printk("Using scheduler: %s (%s)\n", ops.name, ops.opt_name);
+ SCHED_OP(init);
if ( idle_vcpu[0] != NULL )
{
diff -r e74246451527 -r f54d38cea8ac xen/common/trace.c
--- a/xen/common/trace.c Tue May 30 12:52:02 2006 -0500
+++ b/xen/common/trace.c Tue May 30 14:30:34 2006 -0500
@@ -91,6 +91,7 @@ static int alloc_trace_bufs(void)
if ( (rawbuf = alloc_xenheap_pages(order)) == NULL )
{
printk("Xen trace buffers: memory allocation failed\n");
+ opt_tbuf_size = 0;
return -EINVAL;
}
@@ -135,10 +136,7 @@ static int tb_set_size(int size)
opt_tbuf_size = size;
if ( alloc_trace_bufs() != 0 )
- {
- opt_tbuf_size = 0;
- return -EINVAL;
- }
+ return -EINVAL;
printk("Xen trace buffers: initialized\n");
return 0;
diff -r e74246451527 -r f54d38cea8ac xen/include/asm-ia64/config.h
--- a/xen/include/asm-ia64/config.h Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/asm-ia64/config.h Tue May 30 14:30:34 2006 -0500
@@ -97,6 +97,13 @@ extern char _end[]; /* standard ELF symb
//#define HZ 1000
// FIXME SMP: leave SMP for a later time
+/* A power-of-two value greater than or equal to number of hypercalls. */
+#define NR_hypercalls 64
+
+#if NR_hypercalls & (NR_hypercalls - 1)
+#error "NR_hypercalls must be a power-of-two value"
+#endif
+
///////////////////////////////////////////////////////////////
// xen/include/asm/config.h
// Natural boundary upon TR size to define xenheap space
@@ -239,6 +246,10 @@ void dummy_called(char *function);
// these declarations got moved at some point, find a better place for them
extern int ht_per_core;
+#ifdef CONFIG_XEN_IA64_DOM0_VP
+#define CONFIG_SHADOW 1
+#endif
+
// xen/include/asm/config.h
/******************************************************************************
* config.h
diff -r e74246451527 -r f54d38cea8ac xen/include/asm-ia64/dom_fw.h
--- a/xen/include/asm-ia64/dom_fw.h Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/asm-ia64/dom_fw.h Tue May 30 14:30:34 2006 -0500
@@ -5,7 +5,7 @@
* Dan Magenheimer (dan.magenheimer@xxxxxx)
*/
-extern unsigned long dom_fw_setup(struct domain *, const char *, int);
+#include <linux/efi.h>
#ifndef MB
#define MB (1024*1024)
@@ -55,7 +55,7 @@ extern unsigned long dom_fw_setup(struct
#define FW_HYPERCALL_SAL_CALL_INDEX 0x82UL
#define FW_HYPERCALL_SAL_CALL_PADDR
FW_HYPERCALL_PADDR(FW_HYPERCALL_SAL_CALL_INDEX)
-#define FW_HYPERCALL_SAL_CALL 0x1001UL
+#define FW_HYPERCALL_SAL_CALL 0x1100UL
/*
* EFI is accessed via the EFI system table, which contains:
@@ -94,6 +94,7 @@ extern unsigned long dom_fw_setup(struct
#define FW_HYPERCALL_EFI_RESET_SYSTEM_INDEX 9UL
/* these are hypercall numbers */
+#define FW_HYPERCALL_EFI_CALL 0x300UL
#define FW_HYPERCALL_EFI_GET_TIME 0x300UL
#define FW_HYPERCALL_EFI_SET_TIME 0x301UL
#define FW_HYPERCALL_EFI_GET_WAKEUP_TIME 0x302UL
@@ -125,7 +126,7 @@ extern unsigned long dom_fw_setup(struct
*/
#define FW_HYPERCALL_FIRST_ARCH 0x300UL
-#define FW_HYPERCALL_IPI 0x380UL
+#define FW_HYPERCALL_IPI 0x400UL
/* Xen/ia64 user hypercalls. Only used for debugging. */
#define FW_HYPERCALL_FIRST_USER 0xff00UL
@@ -133,9 +134,16 @@ extern unsigned long dom_fw_setup(struct
/* Interrupt vector used for os boot rendez vous. */
#define XEN_SAL_BOOT_RENDEZ_VEC 0xF3
+#define FW_HYPERCALL_NUM_MASK_HIGH ~0xffUL
+#define FW_HYPERCALL_NUM_MASK_LOW 0xffUL
+
+#define EFI_MEMDESC_VERSION 1
+
extern struct ia64_pal_retval xen_pal_emulator(UINT64, u64, u64, u64);
extern struct sal_ret_values sal_emulator (long index, unsigned long in1,
unsigned long in2, unsigned long in3, unsigned long in4, unsigned long in5,
unsigned long in6, unsigned long in7);
extern struct ia64_pal_retval pal_emulator_static (unsigned long);
+extern unsigned long dom_fw_setup (struct domain *, const char *, int);
+extern efi_status_t efi_emulator (struct pt_regs *regs, unsigned long *fault);
extern void build_pal_hypercall_bundles(unsigned long *imva, unsigned long
brkimm, unsigned long hypnum);
extern void build_hypercall_bundle(UINT64 *imva, UINT64 brkimm, UINT64 hypnum,
UINT64 ret);
diff -r e74246451527 -r f54d38cea8ac xen/include/asm-ia64/domain.h
--- a/xen/include/asm-ia64/domain.h Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/asm-ia64/domain.h Tue May 30 14:30:34 2006 -0500
@@ -22,8 +22,13 @@ extern void panic_domain(struct pt_regs
extern void panic_domain(struct pt_regs *, const char *, ...)
__attribute__ ((noreturn, format (printf, 2, 3)));
+struct mm_struct {
+ pgd_t * pgd;
+ // atomic_t mm_users; /* How many users with user
space? */
+};
+
struct arch_domain {
- struct mm_struct *mm;
+ struct mm_struct mm;
unsigned long metaphysical_rr0;
unsigned long metaphysical_rr4;
@@ -54,10 +59,11 @@ struct arch_domain {
unsigned long initrd_start;
unsigned long initrd_len;
char *cmdline;
+ int efi_virt_mode; /* phys : 0 , virt : 1 */
+ void *efi_runtime;
};
#define xen_vastart arch.xen_vastart
#define xen_vaend arch.xen_vaend
-#define shared_info_va arch.shared_info_va
#define INT_ENABLE_OFFSET(v) \
(sizeof(vcpu_info_t) * (v)->vcpu_id + \
offsetof(vcpu_info_t, evtchn_upcall_mask))
@@ -69,8 +75,6 @@ struct arch_vcpu {
TR_ENTRY dtlb;
unsigned int itr_regions;
unsigned int dtr_regions;
- unsigned long itlb_pte;
- unsigned long dtlb_pte;
unsigned long irr[4];
unsigned long insvc[4];
unsigned long tc_regions;
@@ -106,27 +110,15 @@ struct arch_vcpu {
struct arch_vmx_struct arch_vmx; /* Virtual Machine Extensions */
};
-//#define thread arch._thread
-
-// FOLLOWING FROM linux-2.6.7/include/sched.h
-
-struct mm_struct {
- pgd_t * pgd;
- // atomic_t mm_users; /* How many users with user
space? */
- struct list_head pt_list; /* List of pagetable */
-};
-
-extern struct mm_struct init_mm;
-
struct page_info * assign_new_domain_page(struct domain *d, unsigned long
mpaddr);
void assign_new_domain0_page(struct domain *d, unsigned long mpaddr);
+void __assign_domain_page(struct domain *d, unsigned long mpaddr, unsigned
long physaddr);
void assign_domain_page(struct domain *d, unsigned long mpaddr, unsigned long
physaddr);
void assign_domain_io_page(struct domain *d, unsigned long mpaddr, unsigned
long flags);
#ifdef CONFIG_XEN_IA64_DOM0_VP
unsigned long assign_domain_mmio_page(struct domain *d, unsigned long mpaddr,
unsigned long size);
unsigned long assign_domain_mach_page(struct domain *d, unsigned long mpaddr,
unsigned long size);
unsigned long do_dom0vp_op(unsigned long cmd, unsigned long arg0, unsigned
long arg1, unsigned long arg2, unsigned long arg3);
-unsigned long dom0vp_populate_physmap(struct domain *d, unsigned long gpfn,
unsigned int extent_order, unsigned int address_bits);
unsigned long dom0vp_zap_physmap(struct domain *d, unsigned long gpfn,
unsigned int extent_order);
unsigned long dom0vp_add_physmap(struct domain* d, unsigned long gpfn,
unsigned long mfn, unsigned int flags, domid_t domid);
#endif
diff -r e74246451527 -r f54d38cea8ac xen/include/asm-ia64/event.h
--- a/xen/include/asm-ia64/event.h Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/asm-ia64/event.h Tue May 30 14:30:34 2006 -0500
@@ -29,7 +29,7 @@ static inline void evtchn_notify(struct
smp_send_event_check_cpu(v->processor);
if(!VMX_DOMAIN(v))
- vcpu_pend_interrupt(v, v->vcpu_info->arch.evtchn_vector);
+ vcpu_pend_interrupt(v, v->domain->shared_info->arch.evtchn_vector);
}
/* Note: Bitwise operations result in fast code with no branches. */
diff -r e74246451527 -r f54d38cea8ac xen/include/asm-ia64/grant_table.h
--- a/xen/include/asm-ia64/grant_table.h Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/asm-ia64/grant_table.h Tue May 30 14:30:34 2006 -0500
@@ -7,12 +7,33 @@
#define ORDER_GRANT_FRAMES 0
+#ifndef CONFIG_XEN_IA64_DOM0_VP
+// for grant map/unmap
#define create_grant_host_mapping(a, f, fl) 0
#define destroy_grant_host_mapping(a, f, fl) 0
+// for grant transfer
#define steal_page_for_grant_transfer(d, p) 0
-#define gnttab_create_shared_page(d, t, i) ((void)0)
+#else
+// for grant map/unmap
+int create_grant_host_mapping(unsigned long gpaddr, unsigned long mfn,
unsigned int flags);
+int destroy_grant_host_mapping(unsigned long gpaddr, unsigned long mfn,
unsigned int flags);
+
+// for grant transfer
+int steal_page_for_grant_transfer(struct domain *d, struct page_info *page);
+void guest_physmap_add_page(struct domain *d, unsigned long gpfn, unsigned
long mfn);
+
+#endif
+
+// for grant table shared page
+#define gnttab_create_shared_page(d, t, i) \
+ do { \
+ share_xen_page_with_guest( \
+ virt_to_page((char *)(t)->shared + ((i) << PAGE_SHIFT)), \
+ (d), XENSHARE_writable); \
+ } while (0)
+
/* Guest physical address of the grant table. */
#define IA64_GRANT_TABLE_PADDR (1UL << 40)
@@ -20,13 +41,21 @@
#define gnttab_shared_maddr(d, t, i) \
virt_to_maddr((char*)(t)->shared + ((i) << PAGE_SHIFT))
-#define gnttab_shared_gmfn(d, t, i) \
+#ifndef CONFIG_XEN_IA64_DOM0_VP
+# define gnttab_shared_gmfn(d, t, i) \
({ ((d) == dom0) ? \
(virt_to_maddr((t)->shared) >> PAGE_SHIFT) + (i): \
assign_domain_page((d), \
IA64_GRANT_TABLE_PADDR + ((i) << PAGE_SHIFT), \
gnttab_shared_maddr(d, t, i)), \
(IA64_GRANT_TABLE_PADDR >> PAGE_SHIFT) + (i);})
+#else
+# define gnttab_shared_gmfn(d, t, i) \
+ ({ assign_domain_page((d), \
+ IA64_GRANT_TABLE_PADDR + ((i) << PAGE_SHIFT), \
+ gnttab_shared_maddr((d), (t), (i))); \
+ (IA64_GRANT_TABLE_PADDR >> PAGE_SHIFT) + (i);})
+#endif
#define gnttab_log_dirty(d, f) ((void)0)
diff -r e74246451527 -r f54d38cea8ac
xen/include/asm-ia64/linux-xen/asm/pgalloc.h
--- a/xen/include/asm-ia64/linux-xen/asm/pgalloc.h Tue May 30 12:52:02
2006 -0500
+++ b/xen/include/asm-ia64/linux-xen/asm/pgalloc.h Tue May 30 14:30:34
2006 -0500
@@ -139,12 +139,14 @@ static inline void pte_free(struct page
{
pgtable_quicklist_free(page_address(pte));
}
+#endif
static inline void pte_free_kernel(pte_t * pte)
{
pgtable_quicklist_free(pte);
}
+#ifndef XEN
#define __pte_free_tlb(tlb, pte) pte_free(pte)
#endif
diff -r e74246451527 -r f54d38cea8ac
xen/include/asm-ia64/linux-xen/asm/pgtable.h
--- a/xen/include/asm-ia64/linux-xen/asm/pgtable.h Tue May 30 12:52:02
2006 -0500
+++ b/xen/include/asm-ia64/linux-xen/asm/pgtable.h Tue May 30 14:30:34
2006 -0500
@@ -383,6 +383,7 @@ ptep_test_and_clear_dirty (struct vm_are
return 1;
#endif
}
+#endif
static inline pte_t
ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
@@ -396,6 +397,19 @@ ptep_get_and_clear(struct mm_struct *mm,
#endif
}
+static inline pte_t
+ptep_xchg(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t npte)
+{
+#ifdef CONFIG_SMP
+ return __pte(xchg((long *) ptep, pte_val(npte)));
+#else
+ pte_t pte = *ptep;
+ set_pte (ptep, npte);
+ return pte;
+#endif
+}
+
+#ifndef XEN
static inline void
ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
{
diff -r e74246451527 -r f54d38cea8ac xen/include/asm-ia64/mm.h
--- a/xen/include/asm-ia64/mm.h Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/asm-ia64/mm.h Tue May 30 14:30:34 2006 -0500
@@ -12,7 +12,7 @@
#include <asm/processor.h>
#include <asm/atomic.h>
-#include <asm/flushtlb.h>
+#include <asm/tlbflush.h>
#include <asm/io.h>
#include <public/xen.h>
@@ -128,8 +128,10 @@ static inline u32 pickle_domptr(struct d
#define page_get_owner(_p) (unpickle_domptr((_p)->u.inuse._domain))
#define page_set_owner(_p, _d) ((_p)->u.inuse._domain = pickle_domptr(_d))
-/* Dummy now */
-#define share_xen_page_with_guest(p, d, r) do { } while (0)
+#define XENSHARE_writable 0
+#define XENSHARE_readonly 1
+void share_xen_page_with_guest(struct page_info *page,
+ struct domain *d, int readonly);
#define share_xen_page_with_privileged_guests(p, r) do { } while (0)
extern struct page_info *frame_table;
@@ -471,6 +473,4 @@ extern unsigned long ____lookup_domain_m
/* Arch-specific portion of memory_op hypercall. */
#define arch_memory_op(op, arg) (-ENOSYS)
-extern void assign_domain_page(struct domain *d, unsigned long mpaddr,
- unsigned long physaddr);
#endif /* __ASM_IA64_MM_H__ */
diff -r e74246451527 -r f54d38cea8ac xen/include/asm-ia64/shadow.h
--- a/xen/include/asm-ia64/shadow.h Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/asm-ia64/shadow.h Tue May 30 14:30:34 2006 -0500
@@ -1,2 +1,57 @@
-/* empty */
+/******************************************************************************
+ * include/asm-ia64/shadow.h
+ *
+ * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp>
+ * VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+#ifndef _XEN_SHADOW_H
+#define _XEN_SHADOW_H
+
+#include <xen/config.h>
+
+#ifdef CONFIG_XEN_IA64_DOM0_VP
+#ifndef CONFIG_SHADOW
+# error "CONFIG_SHADOW must be defined"
+#endif
+
+#define shadow_drop_references(d, p) ((void)0)
+
+// this is used only x86-specific code
+//#define shadow_sync_and_drop_references(d, p) ((void)0)
+
+#define shadow_mode_translate(d) (1)
+
+// for granttab transfer. XENMEM_populate_physmap
+void guest_physmap_add_page(struct domain *d, unsigned long gpfn, unsigned
long mfn);
+// for balloon driver. XENMEM_decrease_reservation
+void guest_physmap_remove_page(struct domain *d, unsigned long gpfn, unsigned
long mfn);
+#endif
+
+#endif // _XEN_SHADOW_H
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
+
diff -r e74246451527 -r f54d38cea8ac xen/include/asm-ia64/vcpu.h
--- a/xen/include/asm-ia64/vcpu.h Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/asm-ia64/vcpu.h Tue May 30 14:30:34 2006 -0500
@@ -135,7 +135,10 @@ extern IA64FAULT vcpu_set_pkr(VCPU *vcpu
extern IA64FAULT vcpu_set_pkr(VCPU *vcpu, UINT64 reg, UINT64 val);
extern IA64FAULT vcpu_tak(VCPU *vcpu, UINT64 vadr, UINT64 *key);
/* TLB */
-extern void vcpu_purge_tr_entry(TR_ENTRY *trp);
+static inline void vcpu_purge_tr_entry(TR_ENTRY *trp)
+{
+ trp->pte.val = 0;
+}
extern IA64FAULT vcpu_itr_d(VCPU *vcpu, UINT64 slot, UINT64 padr,
UINT64 itir, UINT64 ifa);
extern IA64FAULT vcpu_itr_i(VCPU *vcpu, UINT64 slot, UINT64 padr,
@@ -148,8 +151,7 @@ extern IA64FAULT vcpu_ptc_ga(VCPU *vcpu,
extern IA64FAULT vcpu_ptc_ga(VCPU *vcpu, UINT64 vadr, UINT64 addr_range);
extern IA64FAULT vcpu_ptr_d(VCPU *vcpu,UINT64 vadr, UINT64 addr_range);
extern IA64FAULT vcpu_ptr_i(VCPU *vcpu,UINT64 vadr, UINT64 addr_range);
-extern IA64FAULT vcpu_translate(VCPU *vcpu, UINT64 address,
- BOOLEAN is_data, BOOLEAN in_tpa,
+extern IA64FAULT vcpu_translate(VCPU *vcpu, UINT64 address, BOOLEAN is_data,
UINT64 *pteval, UINT64 *itir, UINT64 *iha);
extern IA64FAULT vcpu_tpa(VCPU *vcpu, UINT64 vadr, UINT64 *padr);
extern IA64FAULT vcpu_force_data_miss(VCPU *vcpu, UINT64 ifa);
diff -r e74246451527 -r f54d38cea8ac xen/include/asm-ia64/vhpt.h
--- a/xen/include/asm-ia64/vhpt.h Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/asm-ia64/vhpt.h Tue May 30 14:30:34 2006 -0500
@@ -4,18 +4,17 @@
#define VHPT_ENABLED 1
/* Size of the VHPT. */
-#define VHPT_SIZE_LOG2 24
+#ifdef CONFIG_XEN_IA64_DOM0_VP
+// XXX work around to avoid trigerring xenLinux software lock up detection.
+# define VHPT_SIZE_LOG2 16 // 64KB
+#else
+# define VHPT_SIZE_LOG2 24 // 16MB default
+#endif
/* Number of entries in the VHPT. The size of an entry is 4*8B == 32B */
#define VHPT_NUM_ENTRIES (1 << (VHPT_SIZE_LOG2 - 5))
-#ifdef CONFIG_SMP
-# define vhpt_flush_all() smp_vhpt_flush_all()
-#else
-# define vhpt_flush_all() vhpt_flush()
-#endif
// FIXME: These should be automatically generated
-
#define VLE_PGFLAGS_OFFSET 0
#define VLE_ITIR_OFFSET 8
#define VLE_TITAG_OFFSET 16
@@ -37,15 +36,10 @@ extern void vhpt_init (void);
extern void vhpt_init (void);
extern void zero_vhpt_stats(void);
extern int dump_vhpt_stats(char *buf);
-extern void vhpt_flush_address(unsigned long vadr, unsigned long addr_range);
-extern void vhpt_flush_address_remote(int cpu, unsigned long vadr,
- unsigned long addr_range);
extern void vhpt_multiple_insert(unsigned long vaddr, unsigned long pte,
unsigned long logps);
extern void vhpt_insert (unsigned long vadr, unsigned long pte,
unsigned long logps);
-extern void vhpt_flush(void);
-extern void smp_vhpt_flush_all(void);
/* Currently the VHPT is allocated per CPU. */
DECLARE_PER_CPU (unsigned long, vhpt_paddr);
diff -r e74246451527 -r f54d38cea8ac xen/include/asm-ia64/vmx_vcpu.h
--- a/xen/include/asm-ia64/vmx_vcpu.h Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/asm-ia64/vmx_vcpu.h Tue May 30 14:30:34 2006 -0500
@@ -359,7 +359,7 @@ IA64FAULT vmx_vcpu_get_cpuid(VCPU *vcpu,
// TODO: unimplemented DBRs return a reserved register fault
// TODO: Should set Logical CPU state, not just physical
if(reg > 4){
- panic("there are only five cpuid registers");
+ panic_domain(vcpu_regs(vcpu),"there are only five cpuid registers");
}
*pval=VCPU(vcpu,vcpuid[reg]);
return (IA64_NO_FAULT);
diff -r e74246451527 -r f54d38cea8ac xen/include/asm-x86/domain.h
--- a/xen/include/asm-x86/domain.h Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/asm-x86/domain.h Tue May 30 14:30:34 2006 -0500
@@ -114,11 +114,32 @@ struct arch_domain
unsigned long first_reserved_pfn;
} __cacheline_aligned;
+#ifdef CONFIG_X86_PAE
+struct pae_l3_cache {
+ /*
+ * Two low-memory (<4GB) PAE L3 tables, used as fallback when the guest
+ * supplies a >=4GB PAE L3 table. We need two because we cannot set up
+ * an L3 table while we are currently running on it (without using
+ * expensive atomic 64-bit operations).
+ */
+ l3_pgentry_t table[2][4] __attribute__((__aligned__(32)));
+ unsigned long high_mfn; /* The >=4GB MFN being shadowed. */
+ unsigned int inuse_idx; /* Which of the two cache slots is in use? */
+ spinlock_t lock;
+};
+#define pae_l3_cache_init(c) spin_lock_init(&(c)->lock)
+#else /* !CONFIG_X86_PAE */
+struct pae_l3_cache { };
+#define pae_l3_cache_init(c) ((void)0)
+#endif
+
struct arch_vcpu
{
/* Needs 16-byte aligment for FXSAVE/FXRSTOR. */
struct vcpu_guest_context guest_context
__attribute__((__aligned__(16)));
+
+ struct pae_l3_cache pae_l3_cache;
unsigned long flags; /* TF_ */
diff -r e74246451527 -r f54d38cea8ac xen/include/asm-x86/fixmap.h
--- a/xen/include/asm-x86/fixmap.h Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/asm-x86/fixmap.h Tue May 30 14:30:34 2006 -0500
@@ -25,6 +25,10 @@
* from the end of virtual memory backwards.
*/
enum fixed_addresses {
+#ifdef CONFIG_X86_PAE
+ FIX_PAE_HIGHMEM_0,
+ FIX_PAE_HIGHMEM_END = FIX_PAE_HIGHMEM_0 + NR_CPUS-1,
+#endif
FIX_APIC_BASE,
FIX_IO_APIC_BASE_0,
FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1,
@@ -40,13 +44,13 @@ enum fixed_addresses {
#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
extern void __set_fixmap(
- enum fixed_addresses idx, unsigned long p, unsigned long flags);
+ enum fixed_addresses idx, unsigned long mfn, unsigned long flags);
#define set_fixmap(idx, phys) \
- __set_fixmap(idx, phys, PAGE_HYPERVISOR)
+ __set_fixmap(idx, (phys)>>PAGE_SHIFT, PAGE_HYPERVISOR)
#define set_fixmap_nocache(idx, phys) \
- __set_fixmap(idx, phys, PAGE_HYPERVISOR_NOCACHE)
+ __set_fixmap(idx, (phys)>>PAGE_SHIFT, PAGE_HYPERVISOR_NOCACHE)
#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT))
#define __virt_to_fix(x) ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT)
diff -r e74246451527 -r f54d38cea8ac xen/include/asm-x86/hvm/domain.h
--- a/xen/include/asm-x86/hvm/domain.h Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/asm-x86/hvm/domain.h Tue May 30 14:30:34 2006 -0500
@@ -35,9 +35,9 @@ struct hvm_domain {
unsigned int nr_vcpus;
unsigned int apic_enabled;
unsigned int pae_enabled;
-
- struct hvm_virpit vpit;
- u64 guest_time;
+ s64 tsc_frequency;
+ struct pl_time pl_time;
+
struct hvm_virpic vpic;
struct hvm_vioapic vioapic;
struct hvm_io_handler io_handler;
diff -r e74246451527 -r f54d38cea8ac xen/include/asm-x86/hvm/svm/intr.h
--- a/xen/include/asm-x86/hvm/svm/intr.h Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/asm-x86/hvm/svm/intr.h Tue May 30 14:30:34 2006 -0500
@@ -21,7 +21,6 @@
#ifndef __ASM_X86_HVM_SVM_INTR_H__
#define __ASM_X86_HVM_SVM_INTR_H__
-extern void svm_set_tsc_shift(struct vcpu *v, struct hvm_virpit *vpit);
extern void svm_intr_assist(void);
extern void svm_intr_assist_update(struct vcpu *v, int highest_vector);
extern void svm_intr_assist_test_valid(struct vcpu *v,
diff -r e74246451527 -r f54d38cea8ac xen/include/asm-x86/hvm/svm/svm.h
--- a/xen/include/asm-x86/hvm/svm/svm.h Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/asm-x86/hvm/svm/svm.h Tue May 30 14:30:34 2006 -0500
@@ -48,7 +48,6 @@ extern void svm_do_launch(struct vcpu *v
extern void svm_do_launch(struct vcpu *v);
extern void svm_do_resume(struct vcpu *v);
extern void svm_set_guest_time(struct vcpu *v, u64 gtime);
-extern u64 svm_get_guest_time(struct vcpu *v);
extern void arch_svm_do_resume(struct vcpu *v);
extern int load_vmcb(struct arch_svm_struct *arch_svm, u64 phys_hsa);
/* For debugging. Remove when no longer needed. */
diff -r e74246451527 -r f54d38cea8ac xen/include/asm-x86/hvm/vcpu.h
--- a/xen/include/asm-x86/hvm/vcpu.h Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/asm-x86/hvm/vcpu.h Tue May 30 14:30:34 2006 -0500
@@ -32,6 +32,9 @@ struct hvm_vcpu {
unsigned long ioflags;
struct mmio_op mmio_op;
struct vlapic *vlapic;
+ s64 cache_tsc_offset;
+ u64 guest_time;
+
/* For AP startup */
unsigned long init_sipi_sipi_state;
diff -r e74246451527 -r f54d38cea8ac xen/include/asm-x86/hvm/vmx/vmx.h
--- a/xen/include/asm-x86/hvm/vmx/vmx.h Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h Tue May 30 14:30:34 2006 -0500
@@ -34,7 +34,6 @@ extern void arch_vmx_do_launch(struct vc
extern void arch_vmx_do_launch(struct vcpu *);
extern void arch_vmx_do_resume(struct vcpu *);
extern void set_guest_time(struct vcpu *v, u64 gtime);
-extern u64 get_guest_time(struct vcpu *v);
extern unsigned int cpu_rev;
diff -r e74246451527 -r f54d38cea8ac xen/include/asm-x86/hvm/vpit.h
--- a/xen/include/asm-x86/hvm/vpit.h Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/asm-x86/hvm/vpit.h Tue May 30 14:30:34 2006 -0500
@@ -29,9 +29,7 @@
#include <asm/hvm/vpic.h>
#define PIT_FREQ 1193181
-
-#define PIT_BASE 0x40
-#define HVM_PIT_ACCEL_MODE 2
+#define PIT_BASE 0x40
typedef struct PITChannelState {
int count; /* can be 65536 */
@@ -48,47 +46,56 @@ typedef struct PITChannelState {
u8 gate; /* timer start */
s64 count_load_time;
/* irq handling */
- s64 next_transition_time;
- int irq;
- struct hvm_time_info *hvm_time;
- u32 period; /* period(ns) based on count */
+ struct vcpu *vcpu;
+ struct periodic_time *pt;
} PITChannelState;
-
-struct hvm_time_info {
- /* extra info for the mode 2 channel */
- struct timer pit_timer;
- struct vcpu *vcpu; /* which vcpu the ac_timer bound to */
- u64 period_cycles; /* pit frequency in cpu cycles */
- s_time_t count_advance; /* accumulated count advance since last fire */
- s_time_t count_point; /* last point accumulating count advance */
- unsigned int pending_intr_nr; /* the couner for pending timer interrupts */
- int first_injected; /* flag to prevent shadow window */
- s64 cache_tsc_offset; /* cache of VMCS TSC_OFFSET offset */
- u64 last_pit_gtime; /* guest time when last pit is injected */
+
+/*
+ * Abstract layer of periodic time, one short time.
+ */
+struct periodic_time {
+ char enabled; /* enabled */
+ char one_shot; /* one shot time */
+ char irq;
+ char first_injected; /* flag to prevent shadow window */
+ u32 pending_intr_nr; /* the couner for pending timer interrupts */
+ u32 period; /* frequency in ns */
+ u64 period_cycles; /* frequency in cpu cycles */
+ s_time_t scheduled; /* scheduled timer interrupt */
+ u64 last_plt_gtime; /* platform time when last IRQ is injected */
+ struct timer timer; /* ac_timer */
};
-typedef struct hvm_virpit {
+typedef struct PITState {
PITChannelState channels[3];
- struct hvm_time_info time_info;
int speaker_data_on;
int dummy_refresh_clock;
-}hvm_virpit;
+} PITState;
+struct pl_time { /* platform time */
+ struct periodic_time periodic_tm;
+ struct PITState vpit;
+ /* TODO: RTC/ACPI time */
+};
-static __inline__ s_time_t get_pit_scheduled(
- struct vcpu *v,
- struct hvm_virpit *vpit)
+static __inline__ s_time_t get_scheduled(
+ struct vcpu *v, int irq,
+ struct periodic_time *pt)
{
- struct PITChannelState *s = &(vpit->channels[0]);
- if ( is_irq_enabled(v, 0) ) {
- return s->next_transition_time;
+ if ( is_irq_enabled(v, irq) ) {
+ return pt->scheduled;
}
else
return -1;
}
/* to hook the ioreq packet to get the PIT initialization info */
-extern void pit_init(struct hvm_virpit *pit, struct vcpu *v);
-extern void pickup_deactive_ticks(struct hvm_virpit *vpit);
+extern void hvm_hooks_assist(struct vcpu *v);
+extern void pickup_deactive_ticks(struct periodic_time *vpit);
+extern u64 hvm_get_guest_time(struct vcpu *v);
+extern struct periodic_time *create_periodic_time(struct vcpu *v, u32 period,
char irq, char one_shot);
+extern void destroy_periodic_time(struct periodic_time *pt);
+void pit_init(struct vcpu *v, unsigned long cpu_khz);
+void pt_timer_fn(void *data);
#endif /* __ASM_X86_HVM_VPIT_H__ */
diff -r e74246451527 -r f54d38cea8ac xen/include/asm-x86/string.h
--- a/xen/include/asm-x86/string.h Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/asm-x86/string.h Tue May 30 14:30:34 2006 -0500
@@ -2,152 +2,6 @@
#define __X86_STRING_H__
#include <xen/config.h>
-
-#define __HAVE_ARCH_STRCPY
-static inline char *strcpy(char *dest, const char *src)
-{
- long d0, d1, d2;
- __asm__ __volatile__ (
- "1: lodsb \n"
- " stosb \n"
- " test %%al,%%al \n"
- " jne 1b \n"
- : "=&S" (d0), "=&D" (d1), "=&a" (d2)
- : "0" (src), "1" (dest) : "memory" );
- return dest;
-}
-
-#define __HAVE_ARCH_STRNCPY
-static inline char *strncpy(char *dest, const char *src, size_t count)
-{
- long d0, d1, d2, d3;
- __asm__ __volatile__ (
- "1: dec %2 \n"
- " js 2f \n"
- " lodsb \n"
- " stosb \n"
- " test %%al,%%al \n"
- " jne 1b \n"
- " rep ; stosb \n"
- "2: \n"
- : "=&S" (d0), "=&D" (d1), "=&c" (d2), "=&a" (d3)
- : "0" (src), "1" (dest), "2" (count) : "memory" );
- return dest;
-}
-
-#define __HAVE_ARCH_STRCAT
-static inline char *strcat(char *dest, const char *src)
-{
- long d0, d1, d2, d3;
- __asm__ __volatile__ (
- " repne ; scasb \n"
- " dec %1 \n"
- "1: lodsb \n"
- " stosb \n"
- " test %%al,%%al \n"
- " jne 1b \n"
- : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3)
- : "0" (src), "1" (dest), "2" (0UL), "3" (0xffffffffUL) : "memory" );
- return dest;
-}
-
-#define __HAVE_ARCH_STRNCAT
-static inline char *strncat(char *dest, const char *src, size_t count)
-{
- long d0, d1, d2, d3;
- __asm__ __volatile__ (
- " repne ; scasb \n"
- " dec %1 \n"
- " mov %8,%3 \n"
- "1: dec %3 \n"
- " js 2f \n"
- " lodsb \n"
- " stosb \n"
- " test %%al,%%al \n"
- " jne 1b \n"
- "2: xor %%eax,%%eax\n"
- " stosb"
- : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3)
- : "0" (src), "1" (dest), "2" (0UL), "3" (0xffffffffUL), "g" (count)
- : "memory" );
- return dest;
-}
-
-#define __HAVE_ARCH_STRCMP
-static inline int strcmp(const char *cs, const char *ct)
-{
- long d0, d1;
- register int __res;
- __asm__ __volatile__ (
- "1: lodsb \n"
- " scasb \n"
- " jne 2f \n"
- " test %%al,%%al \n"
- " jne 1b \n"
- " xor %%eax,%%eax\n"
- " jmp 3f \n"
- "2: sbb %%eax,%%eax\n"
- " or $1,%%al \n"
- "3: \n"
- : "=a" (__res), "=&S" (d0), "=&D" (d1)
- : "1" (cs), "2" (ct) );
- return __res;
-}
-
-#define __HAVE_ARCH_STRNCMP
-static inline int strncmp(const char *cs, const char *ct, size_t count)
-{
- long d0, d1, d2;
- register int __res;
- __asm__ __volatile__ (
- "1: dec %3 \n"
- " js 2f \n"
- " lodsb \n"
- " scasb \n"
- " jne 3f \n"
- " test %%al,%%al \n"
- " jne 1b \n"
- "2: xor %%eax,%%eax\n"
- " jmp 4f \n"
- "3: sbb %%eax,%%eax\n"
- " or $1,%%al \n"
- "4: \n"
- : "=a" (__res), "=&S" (d0), "=&D" (d1), "=&c" (d2)
- : "1" (cs), "2" (ct), "3" (count) );
- return __res;
-}
-
-#define __HAVE_ARCH_STRCHR
-static inline char *strchr(const char *s, int c)
-{
- long d0;
- register char *__res;
- __asm__ __volatile__ (
- " mov %%al,%%ah \n"
- "1: lodsb \n"
- " cmp %%ah,%%al \n"
- " je 2f \n"
- " test %%al,%%al \n"
- " jne 1b \n"
- " mov $1,%1 \n"
- "2: mov %1,%0 \n"
- " dec %0 \n"
- : "=a" (__res), "=&S" (d0) : "1" (s), "0" (c) );
- return __res;
-}
-
-#define __HAVE_ARCH_STRLEN
-static inline size_t strlen(const char *s)
-{
- long d0;
- register int __res;
- __asm__ __volatile__ (
- " repne ; scasb \n"
- " notl %0 \n"
- " decl %0 \n"
- : "=c" (__res), "=&D" (d0) : "1" (s), "a" (0), "0" (0xffffffffUL) );
- return __res;
-}
static inline void *__variable_memcpy(void *to, const void *from, size_t n)
{
@@ -258,22 +112,6 @@ extern void *memmove(void *dest, const v
#define __HAVE_ARCH_MEMCMP
#define memcmp __builtin_memcmp
-#define __HAVE_ARCH_MEMCHR
-static inline void *memchr(const void *cs, int c, size_t count)
-{
- long d0;
- register void *__res;
- if ( count == 0 )
- return NULL;
- __asm__ __volatile__ (
- " repne ; scasb\n"
- " je 1f \n"
- " mov $1,%0 \n"
- "1: dec %0 \n"
- : "=D" (__res), "=&c" (d0) : "a" (c), "0" (cs), "1" (count) );
- return __res;
-}
-
static inline void *__memset_generic(void *s, char c, size_t count)
{
long d0, d1;
diff -r e74246451527 -r f54d38cea8ac xen/include/asm-x86/x86_emulate.h
--- a/xen/include/asm-x86/x86_emulate.h Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/asm-x86/x86_emulate.h Tue May 30 14:30:34 2006 -0500
@@ -9,8 +9,10 @@
#ifndef __X86_EMULATE_H__
#define __X86_EMULATE_H__
-/*
- * x86_mem_emulator:
+struct x86_emulate_ctxt;
+
+/*
+ * x86_emulate_ops:
*
* These operations represent the instruction emulator's interface to memory.
* There are two categories of operation: those that act on ordinary memory
@@ -47,7 +49,7 @@
#define X86EMUL_PROPAGATE_FAULT 2 /* propagate a generated fault to guest */
#define X86EMUL_RETRY_INSTR 2 /* retry the instruction for some reason */
#define X86EMUL_CMPXCHG_FAILED 2 /* cmpxchg did not see expected value */
-struct x86_mem_emulator
+struct x86_emulate_ops
{
/*
* read_std: Read bytes of standard (non-emulated/special) memory.
@@ -59,7 +61,8 @@ struct x86_mem_emulator
int (*read_std)(
unsigned long addr,
unsigned long *val,
- unsigned int bytes);
+ unsigned int bytes,
+ struct x86_emulate_ctxt *ctxt);
/*
* write_std: Write bytes of standard (non-emulated/special) memory.
@@ -71,7 +74,8 @@ struct x86_mem_emulator
int (*write_std)(
unsigned long addr,
unsigned long val,
- unsigned int bytes);
+ unsigned int bytes,
+ struct x86_emulate_ctxt *ctxt);
/*
* read_emulated: Read bytes from emulated/special memory area.
@@ -82,7 +86,8 @@ struct x86_mem_emulator
int (*read_emulated)(
unsigned long addr,
unsigned long *val,
- unsigned int bytes);
+ unsigned int bytes,
+ struct x86_emulate_ctxt *ctxt);
/*
* write_emulated: Read bytes from emulated/special memory area.
@@ -93,7 +98,8 @@ struct x86_mem_emulator
int (*write_emulated)(
unsigned long addr,
unsigned long val,
- unsigned int bytes);
+ unsigned int bytes,
+ struct x86_emulate_ctxt *ctxt);
/*
* cmpxchg_emulated: Emulate an atomic (LOCKed) CMPXCHG operation on an
@@ -107,11 +113,12 @@ struct x86_mem_emulator
unsigned long addr,
unsigned long old,
unsigned long new,
- unsigned int bytes);
-
- /*
- * cmpxchg_emulated: Emulate an atomic (LOCKed) CMPXCHG8B operation on an
- * emulated/special memory area.
+ unsigned int bytes,
+ struct x86_emulate_ctxt *ctxt);
+
+ /*
+ * cmpxchg8b_emulated: Emulate an atomic (LOCKed) CMPXCHG8B operation on an
+ * emulated/special memory area.
* @addr: [IN ] Linear address to access.
* @old: [IN ] Value expected to be current at @addr.
* @new: [IN ] Value to write to @addr.
@@ -126,7 +133,8 @@ struct x86_mem_emulator
unsigned long old_lo,
unsigned long old_hi,
unsigned long new_lo,
- unsigned long new_hi);
+ unsigned long new_hi,
+ struct x86_emulate_ctxt *ctxt);
};
/* Standard reader/writer functions that callers may wish to use. */
@@ -134,14 +142,28 @@ x86_emulate_read_std(
x86_emulate_read_std(
unsigned long addr,
unsigned long *val,
- unsigned int bytes);
+ unsigned int bytes,
+ struct x86_emulate_ctxt *ctxt);
extern int
x86_emulate_write_std(
unsigned long addr,
unsigned long val,
- unsigned int bytes);
+ unsigned int bytes,
+ struct x86_emulate_ctxt *ctxt);
struct cpu_user_regs;
+
+struct x86_emulate_ctxt
+{
+ /* Register state before/after emulation. */
+ struct cpu_user_regs *regs;
+
+ /* Linear faulting address (if emulating a page-faulting instruction). */
+ unsigned long cr2;
+
+ /* Emulated execution mode, represented by an X86EMUL_MODE value. */
+ int mode;
+};
/* Execution mode, passed to the emulator. */
#define X86EMUL_MODE_REAL 0 /* Real mode. */
@@ -159,25 +181,19 @@ struct cpu_user_regs;
/*
* x86_emulate_memop: Emulate an instruction that faulted attempting to
* read/write a 'special' memory area.
- * @regs: Register state at time of fault.
- * @cr2: Linear faulting address within an emulated/special memory area.
- * @ops: Interface to access special memory.
- * @mode: Emulated execution mode, represented by an X86EMUL_MODE value.
* Returns -1 on failure, 0 on success.
*/
-extern int
+int
x86_emulate_memop(
- struct cpu_user_regs *regs,
- unsigned long cr2,
- struct x86_mem_emulator *ops,
- int mode);
+ struct x86_emulate_ctxt *ctxt,
+ struct x86_emulate_ops *ops);
/*
* Given the 'reg' portion of a ModRM byte, and a register block, return a
* pointer into the block that addresses the relevant register.
* @highbyte_regs specifies whether to decode AH,CH,DH,BH.
*/
-extern void *
+void *
decode_register(
uint8_t modrm_reg, struct cpu_user_regs *regs, int highbyte_regs);
diff -r e74246451527 -r f54d38cea8ac xen/include/public/acm_ops.h
--- a/xen/include/public/acm_ops.h Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/public/acm_ops.h Tue May 30 14:30:34 2006 -0500
@@ -2,7 +2,7 @@
* acm_ops.h: Xen access control module hypervisor commands
*
* Reiner Sailer <sailer@xxxxxxxxxxxxxx>
- * Copyright (c) 2005, International Business Machines Corporation.
+ * Copyright (c) 2005,2006 International Business Machines Corporation.
*/
#ifndef __XEN_PUBLIC_ACM_OPS_H__
@@ -17,36 +17,50 @@
* This makes sure that old versions of acm tools will stop working in a
* well-defined way (rather than crashing the machine, for instance).
*/
-#define ACM_INTERFACE_VERSION 0xAAAA0006
+#define ACM_INTERFACE_VERSION 0xAAAA0007
/************************************************************************/
-#define ACM_SETPOLICY 4
+/*
+ * Prototype for this hypercall is:
+ * int acm_op(int cmd, void *args)
+ * @cmd == ACMOP_??? (access control module operation).
+ * @args == Operation-specific extra arguments (NULL if none).
+ */
+
+
+#define ACMOP_setpolicy 1
struct acm_setpolicy {
- /* OUT variables */
+ /* IN */
+ uint32_t interface_version;
void *pushcache;
uint32_t pushcache_size;
};
-#define ACM_GETPOLICY 5
+#define ACMOP_getpolicy 2
struct acm_getpolicy {
- /* OUT variables */
+ /* IN */
+ uint32_t interface_version;
void *pullcache;
uint32_t pullcache_size;
};
-#define ACM_DUMPSTATS 6
+#define ACMOP_dumpstats 3
struct acm_dumpstats {
+ /* IN */
+ uint32_t interface_version;
void *pullcache;
uint32_t pullcache_size;
};
-#define ACM_GETSSID 7
+#define ACMOP_getssid 4
enum get_type {UNSET=0, SSIDREF, DOMAINID};
struct acm_getssid {
+ /* IN */
+ uint32_t interface_version;
enum get_type get_ssid_by;
union {
domaintype_t domainid;
@@ -56,9 +70,11 @@ struct acm_getssid {
uint32_t ssidbuf_size;
};
-#define ACM_GETDECISION 8
+#define ACMOP_getdecision 5
struct acm_getdecision {
- enum get_type get_decision_by1; /* in */
+ /* IN */
+ uint32_t interface_version;
+ enum get_type get_decision_by1;
enum get_type get_decision_by2;
union {
domaintype_t domainid;
@@ -69,23 +85,11 @@ struct acm_getdecision {
ssidref_t ssidref;
} id2;
enum acm_hook_type hook;
- int acm_decision; /* out */
+ /* OUT */
+ int acm_decision;
};
-typedef struct acm_op {
- uint32_t cmd;
- uint32_t interface_version; /* ACM_INTERFACE_VERSION */
- union {
- struct acm_setpolicy setpolicy;
- struct acm_getpolicy getpolicy;
- struct acm_dumpstats dumpstats;
- struct acm_getssid getssid;
- struct acm_getdecision getdecision;
- } u;
-} acm_op_t;
-DEFINE_XEN_GUEST_HANDLE(acm_op_t);
-
-#endif /* __XEN_PUBLIC_ACM_OPS_H__ */
+#endif /* __XEN_PUBLIC_ACM_OPS_H__ */
/*
* Local variables:
diff -r e74246451527 -r f54d38cea8ac xen/include/public/arch-ia64.h
--- a/xen/include/public/arch-ia64.h Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/public/arch-ia64.h Tue May 30 14:30:34 2006 -0500
@@ -38,15 +38,17 @@ DEFINE_XEN_GUEST_HANDLE(void);
#ifndef __ASSEMBLY__
#define MAX_NR_SECTION 32 /* at most 32 memory holes */
-typedef struct {
+struct mm_section {
unsigned long start; /* start of memory hole */
unsigned long end; /* end of memory hole */
-} mm_section_t;
-
-typedef struct {
+};
+typedef struct mm_section mm_section_t;
+
+struct pmt_entry {
unsigned long mfn : 56;
unsigned long type: 8;
-} pmt_entry_t;
+};
+typedef struct pmt_entry pmt_entry_t;
#define GPFN_MEM (0UL << 56) /* Guest pfn is normal mem */
#define GPFN_FRAME_BUFFER (1UL << 56) /* VGA framebuffer */
@@ -93,10 +95,11 @@ typedef struct {
* NB. This may become a 64-bit count with no shift. If this happens then the
* structure size will still be 8 bytes, so no other alignments will change.
*/
-typedef struct {
+struct tsc_timestamp {
unsigned int tsc_bits; /* 0: 32 bits read from the CPU's TSC. */
unsigned int tsc_bitshift; /* 4: 'tsc_bits' uses N:N+31 of TSC. */
-} tsc_timestamp_t; /* 8 bytes */
+}; /* 8 bytes */
+typedef struct tsc_timestamp tsc_timestamp_t;
struct pt_fpreg {
union {
@@ -105,7 +108,7 @@ struct pt_fpreg {
} u;
};
-typedef struct cpu_user_regs{
+struct cpu_user_regs {
/* The following registers are saved by SAVE_MIN: */
unsigned long b6; /* scratch */
unsigned long b7; /* scratch */
@@ -179,9 +182,10 @@ typedef struct cpu_user_regs{
unsigned long eml_unat; /* used for emulating instruction */
unsigned long rfi_pfs; /* used for elulating rfi */
-}cpu_user_regs_t;
-
-typedef union {
+};
+typedef struct cpu_user_regs cpu_user_regs_t;
+
+union vac {
unsigned long value;
struct {
int a_int:1;
@@ -193,9 +197,10 @@ typedef union {
int a_bsw:1;
long reserved:57;
};
-} vac_t;
-
-typedef union {
+};
+typedef union vac vac_t;
+
+union vdc {
unsigned long value;
struct {
int d_vmsw:1;
@@ -206,11 +211,12 @@ typedef union {
int d_itm:1;
long reserved:58;
};
-} vdc_t;
-
-typedef struct {
- vac_t vac;
- vdc_t vdc;
+};
+typedef union vdc vdc_t;
+
+struct mapped_regs {
+ union vac vac;
+ union vdc vdc;
unsigned long virt_env_vaddr;
unsigned long reserved1[29];
unsigned long vhpi;
@@ -290,27 +296,32 @@ typedef struct {
unsigned long reserved6[3456];
unsigned long vmm_avail[128];
unsigned long reserved7[4096];
-} mapped_regs_t;
-
-typedef struct {
- mapped_regs_t *privregs;
- int evtchn_vector;
-} arch_vcpu_info_t;
+};
+typedef struct mapped_regs mapped_regs_t;
+
+struct arch_vcpu_info {
+};
+typedef struct arch_vcpu_info arch_vcpu_info_t;
typedef mapped_regs_t vpd_t;
-typedef struct {
+struct arch_shared_info {
unsigned int flags;
unsigned long start_info_pfn;
-} arch_shared_info_t;
-
-typedef struct {
+
+ /* Interrupt vector for event channel. */
+ int evtchn_vector;
+};
+typedef struct arch_shared_info arch_shared_info_t;
+
+struct arch_initrd_info {
unsigned long start;
unsigned long size;
-} arch_initrd_info_t;
+};
+typedef struct arch_initrd_info arch_initrd_info_t;
#define IA64_COMMAND_LINE_SIZE 512
-typedef struct vcpu_guest_context {
+struct vcpu_guest_context {
#define VGCF_FPU_VALID (1<<0)
#define VGCF_VMX_GUEST (1<<1)
#define VGCF_IN_KERNEL (1<<2)
@@ -320,19 +331,17 @@ typedef struct vcpu_guest_context {
unsigned long sys_pgnr; /* System pages out of domain memory */
unsigned long vm_assist; /* VMASST_TYPE_* bitmap, now none on IPF */
- cpu_user_regs_t regs;
- arch_vcpu_info_t vcpu;
- arch_shared_info_t shared;
- arch_initrd_info_t initrd;
+ struct cpu_user_regs regs;
+ struct mapped_regs *privregs;
+ struct arch_shared_info shared;
+ struct arch_initrd_info initrd;
char cmdline[IA64_COMMAND_LINE_SIZE];
-} vcpu_guest_context_t;
+};
+typedef struct vcpu_guest_context vcpu_guest_context_t;
DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t);
// dom0 vp op
-#define __HYPERVISOR_ia64_dom0vp_op 256 // XXX sufficient large
- // TODO
- // arch specific hypercall
- // number conversion
+#define __HYPERVISOR_ia64_dom0vp_op __HYPERVISOR_arch_0
#define IA64_DOM0VP_ioremap 0 // map io space in machine
// address to dom0 physical
// address space.
@@ -352,10 +361,6 @@ DEFINE_XEN_GUEST_HANDLE(vcpu_guest_conte
// to the corresponding
// pseudo physical page frame
// number of the caller domain
-#define IA64_DOM0VP_populate_physmap 16 // allocate machine-contigusous
- // memory region and
- // map it to pseudo physical
- // address
#define IA64_DOM0VP_zap_physmap 17 // unmap and free pages
// contained in the specified
// pseudo physical region
@@ -364,6 +369,32 @@ DEFINE_XEN_GUEST_HANDLE(vcpu_guest_conte
// address space.
#endif /* !__ASSEMBLY__ */
+
+/* Hyperprivops. */
+#define HYPERPRIVOP_RFI 0x1
+#define HYPERPRIVOP_RSM_DT 0x2
+#define HYPERPRIVOP_SSM_DT 0x3
+#define HYPERPRIVOP_COVER 0x4
+#define HYPERPRIVOP_ITC_D 0x5
+#define HYPERPRIVOP_ITC_I 0x6
+#define HYPERPRIVOP_SSM_I 0x7
+#define HYPERPRIVOP_GET_IVR 0x8
+#define HYPERPRIVOP_GET_TPR 0x9
+#define HYPERPRIVOP_SET_TPR 0xa
+#define HYPERPRIVOP_EOI 0xb
+#define HYPERPRIVOP_SET_ITM 0xc
+#define HYPERPRIVOP_THASH 0xd
+#define HYPERPRIVOP_PTC_GA 0xe
+#define HYPERPRIVOP_ITR_D 0xf
+#define HYPERPRIVOP_GET_RR 0x10
+#define HYPERPRIVOP_SET_RR 0x11
+#define HYPERPRIVOP_SET_KR 0x12
+#define HYPERPRIVOP_FC 0x13
+#define HYPERPRIVOP_GET_CPUID 0x14
+#define HYPERPRIVOP_GET_PMD 0x15
+#define HYPERPRIVOP_GET_EFLAG 0x16
+#define HYPERPRIVOP_SET_EFLAG 0x17
+#define HYPERPRIVOP_MAX 0x17
#endif /* __HYPERVISOR_IF_IA64_H__ */
diff -r e74246451527 -r f54d38cea8ac xen/include/public/arch-x86_32.h
--- a/xen/include/public/arch-x86_32.h Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/public/arch-x86_32.h Tue May 30 14:30:34 2006 -0500
@@ -95,15 +95,16 @@ DEFINE_XEN_GUEST_HANDLE(void);
#define TI_GET_IF(_ti) ((_ti)->flags & 4)
#define TI_SET_DPL(_ti,_dpl) ((_ti)->flags |= (_dpl))
#define TI_SET_IF(_ti,_if) ((_ti)->flags |= ((!!(_if))<<2))
-typedef struct trap_info {
+struct trap_info {
uint8_t vector; /* exception vector */
uint8_t flags; /* 0-3: privilege level; 4: clear event enable? */
uint16_t cs; /* code selector */
unsigned long address; /* code offset */
-} trap_info_t;
+};
+typedef struct trap_info trap_info_t;
DEFINE_XEN_GUEST_HANDLE(trap_info_t);
-typedef struct cpu_user_regs {
+struct cpu_user_regs {
uint32_t ebx;
uint32_t ecx;
uint32_t edx;
@@ -124,7 +125,8 @@ typedef struct cpu_user_regs {
uint16_t ds, _pad3;
uint16_t fs, _pad4;
uint16_t gs, _pad5;
-} cpu_user_regs_t;
+};
+typedef struct cpu_user_regs cpu_user_regs_t;
DEFINE_XEN_GUEST_HANDLE(cpu_user_regs_t);
typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */
@@ -133,14 +135,14 @@ typedef uint64_t tsc_timestamp_t; /* RDT
* The following is all CPU context. Note that the fpu_ctxt block is filled
* in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used.
*/
-typedef struct vcpu_guest_context {
+struct vcpu_guest_context {
/* FPU registers come first so they can be aligned for FXSAVE/FXRSTOR. */
struct { char x[512]; } fpu_ctxt; /* User-level FPU registers */
#define VGCF_I387_VALID (1<<0)
#define VGCF_HVM_GUEST (1<<1)
#define VGCF_IN_KERNEL (1<<2)
unsigned long flags; /* VGCF_* flags */
- cpu_user_regs_t user_regs; /* User-level CPU registers */
+ struct cpu_user_regs user_regs; /* User-level CPU registers */
struct trap_info trap_ctxt[256]; /* Virtual IDT */
unsigned long ldt_base, ldt_ents; /* LDT (linear address, # ents) */
unsigned long gdt_frames[16], gdt_ents; /* GDT (machine frames, # ents) */
@@ -152,25 +154,29 @@ typedef struct vcpu_guest_context {
unsigned long failsafe_callback_cs; /* CS:EIP of failsafe callback */
unsigned long failsafe_callback_eip;
unsigned long vm_assist; /* VMASST_TYPE_* bitmap */
-} vcpu_guest_context_t;
+};
+typedef struct vcpu_guest_context vcpu_guest_context_t;
DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t);
-typedef struct arch_shared_info {
+struct arch_shared_info {
unsigned long max_pfn; /* max pfn that appears in table */
/* Frame containing list of mfns containing list of mfns containing p2m. */
unsigned long pfn_to_mfn_frame_list_list;
unsigned long nmi_reason;
-} arch_shared_info_t;
-
-typedef struct {
+};
+typedef struct arch_shared_info arch_shared_info_t;
+
+struct arch_vcpu_info {
unsigned long cr2;
unsigned long pad[5]; /* sizeof(vcpu_info_t) == 64 */
-} arch_vcpu_info_t;
-
-typedef struct {
+};
+typedef struct arch_vcpu_info arch_vcpu_info_t;
+
+struct xen_callback {
unsigned long cs;
unsigned long eip;
-} xen_callback_t;
+};
+typedef struct xen_callback xen_callback_t;
#endif /* !__ASSEMBLY__ */
diff -r e74246451527 -r f54d38cea8ac xen/include/public/arch-x86_64.h
--- a/xen/include/public/arch-x86_64.h Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/public/arch-x86_64.h Tue May 30 14:30:34 2006 -0500
@@ -150,12 +150,13 @@ struct iret_context {
#define TI_GET_IF(_ti) ((_ti)->flags & 4)
#define TI_SET_DPL(_ti,_dpl) ((_ti)->flags |= (_dpl))
#define TI_SET_IF(_ti,_if) ((_ti)->flags |= ((!!(_if))<<2))
-typedef struct trap_info {
+struct trap_info {
uint8_t vector; /* exception vector */
uint8_t flags; /* 0-3: privilege level; 4: clear event enable? */
uint16_t cs; /* code selector */
unsigned long address; /* code offset */
-} trap_info_t;
+};
+typedef struct trap_info trap_info_t;
DEFINE_XEN_GUEST_HANDLE(trap_info_t);
#ifdef __GNUC__
@@ -166,7 +167,7 @@ DEFINE_XEN_GUEST_HANDLE(trap_info_t);
#define __DECL_REG(name) uint64_t r ## name
#endif
-typedef struct cpu_user_regs {
+struct cpu_user_regs {
uint64_t r15;
uint64_t r14;
uint64_t r13;
@@ -195,7 +196,8 @@ typedef struct cpu_user_regs {
uint16_t ds, _pad4[3];
uint16_t fs, _pad5[3]; /* Non-zero => takes precedence over fs_base. */
uint16_t gs, _pad6[3]; /* Non-zero => takes precedence over gs_base_usr. */
-} cpu_user_regs_t;
+};
+typedef struct cpu_user_regs cpu_user_regs_t;
DEFINE_XEN_GUEST_HANDLE(cpu_user_regs_t);
#undef __DECL_REG
@@ -206,14 +208,14 @@ typedef uint64_t tsc_timestamp_t; /* RDT
* The following is all CPU context. Note that the fpu_ctxt block is filled
* in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used.
*/
-typedef struct vcpu_guest_context {
+struct vcpu_guest_context {
/* FPU registers come first so they can be aligned for FXSAVE/FXRSTOR. */
struct { char x[512]; } fpu_ctxt; /* User-level FPU registers */
#define VGCF_I387_VALID (1<<0)
#define VGCF_HVM_GUEST (1<<1)
#define VGCF_IN_KERNEL (1<<2)
unsigned long flags; /* VGCF_* flags */
- cpu_user_regs_t user_regs; /* User-level CPU registers */
+ struct cpu_user_regs user_regs; /* User-level CPU registers */
struct trap_info trap_ctxt[256]; /* Virtual IDT */
unsigned long ldt_base, ldt_ents; /* LDT (linear address, # ents) */
unsigned long gdt_frames[16], gdt_ents; /* GDT (machine frames, # ents) */
@@ -228,20 +230,23 @@ typedef struct vcpu_guest_context {
uint64_t fs_base;
uint64_t gs_base_kernel;
uint64_t gs_base_user;
-} vcpu_guest_context_t;
+};
+typedef struct vcpu_guest_context vcpu_guest_context_t;
DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t);
-typedef struct arch_shared_info {
+struct arch_shared_info {
unsigned long max_pfn; /* max pfn that appears in table */
/* Frame containing list of mfns containing list of mfns containing p2m. */
unsigned long pfn_to_mfn_frame_list_list;
unsigned long nmi_reason;
-} arch_shared_info_t;
-
-typedef struct {
+};
+typedef struct arch_shared_info arch_shared_info_t;
+
+struct arch_vcpu_info {
unsigned long cr2;
unsigned long pad; /* sizeof(vcpu_info_t) == 64 */
-} arch_vcpu_info_t;
+};
+typedef struct arch_vcpu_info arch_vcpu_info_t;
typedef unsigned long xen_callback_t;
diff -r e74246451527 -r f54d38cea8ac xen/include/public/callback.h
--- a/xen/include/public/callback.h Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/public/callback.h Tue May 30 14:30:34 2006 -0500
@@ -32,10 +32,11 @@
* Register a callback.
*/
#define CALLBACKOP_register 0
-typedef struct callback_register {
+struct callback_register {
int type;
xen_callback_t address;
-} callback_register_t;
+};
+typedef struct callback_register callback_register_t;
DEFINE_XEN_GUEST_HANDLE(callback_register_t);
/*
@@ -45,9 +46,10 @@ DEFINE_XEN_GUEST_HANDLE(callback_registe
* you attempt to unregister such a callback.
*/
#define CALLBACKOP_unregister 1
-typedef struct callback_unregister {
+struct callback_unregister {
int type;
-} callback_unregister_t;
+};
+typedef struct callback_unregister callback_unregister_t;
DEFINE_XEN_GUEST_HANDLE(callback_unregister_t);
#endif /* __XEN_PUBLIC_CALLBACK_H__ */
diff -r e74246451527 -r f54d38cea8ac xen/include/public/dom0_ops.h
--- a/xen/include/public/dom0_ops.h Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/public/dom0_ops.h Tue May 30 14:30:34 2006 -0500
@@ -24,14 +24,15 @@
/************************************************************************/
#define DOM0_GETMEMLIST 2
-typedef struct dom0_getmemlist {
+struct dom0_getmemlist {
/* IN variables. */
domid_t domain;
unsigned long max_pfns;
XEN_GUEST_HANDLE(ulong) buffer;
/* OUT variables. */
unsigned long num_pfns;
-} dom0_getmemlist_t;
+};
+typedef struct dom0_getmemlist dom0_getmemlist_t;
DEFINE_XEN_GUEST_HANDLE(dom0_getmemlist_t);
#define DOM0_SCHEDCTL 6
@@ -45,39 +46,43 @@ DEFINE_XEN_GUEST_HANDLE(dom0_adjustdom_t
DEFINE_XEN_GUEST_HANDLE(dom0_adjustdom_t);
#define DOM0_CREATEDOMAIN 8
-typedef struct dom0_createdomain {
+struct dom0_createdomain {
/* IN parameters */
uint32_t ssidref;
xen_domain_handle_t handle;
/* IN/OUT parameters. */
/* Identifier for new domain (auto-allocate if zero is specified). */
domid_t domain;
-} dom0_createdomain_t;
+};
+typedef struct dom0_createdomain dom0_createdomain_t;
DEFINE_XEN_GUEST_HANDLE(dom0_createdomain_t);
#define DOM0_DESTROYDOMAIN 9
-typedef struct dom0_destroydomain {
- /* IN variables. */
- domid_t domain;
-} dom0_destroydomain_t;
+struct dom0_destroydomain {
+ /* IN variables. */
+ domid_t domain;
+};
+typedef struct dom0_destroydomain dom0_destroydomain_t;
DEFINE_XEN_GUEST_HANDLE(dom0_destroydomain_t);
#define DOM0_PAUSEDOMAIN 10
-typedef struct dom0_pausedomain {
+struct dom0_pausedomain {
/* IN parameters. */
domid_t domain;
-} dom0_pausedomain_t;
+};
+typedef struct dom0_pausedomain dom0_pausedomain_t;
DEFINE_XEN_GUEST_HANDLE(dom0_pausedomain_t);
#define DOM0_UNPAUSEDOMAIN 11
-typedef struct dom0_unpausedomain {
+struct dom0_unpausedomain {
/* IN parameters. */
domid_t domain;
-} dom0_unpausedomain_t;
+};
+typedef struct dom0_unpausedomain dom0_unpausedomain_t;
DEFINE_XEN_GUEST_HANDLE(dom0_unpausedomain_t);
#define DOM0_GETDOMAININFO 12
-typedef struct dom0_getdomaininfo {
+struct dom0_getdomaininfo {
/* IN variables. */
domid_t domain; /* NB. IN/OUT variable. */
/* OUT variables. */
@@ -99,21 +104,23 @@ typedef struct dom0_getdomaininfo {
uint32_t max_vcpu_id; /* Maximum VCPUID in use by this domain. */
uint32_t ssidref;
xen_domain_handle_t handle;
-} dom0_getdomaininfo_t;
+};
+typedef struct dom0_getdomaininfo dom0_getdomaininfo_t;
DEFINE_XEN_GUEST_HANDLE(dom0_getdomaininfo_t);
#define DOM0_SETVCPUCONTEXT 13
-typedef struct dom0_setvcpucontext {
+struct dom0_setvcpucontext {
/* IN variables. */
domid_t domain;
uint32_t vcpu;
/* IN/OUT parameters */
XEN_GUEST_HANDLE(vcpu_guest_context_t) ctxt;
-} dom0_setvcpucontext_t;
+};
+typedef struct dom0_setvcpucontext dom0_setvcpucontext_t;
DEFINE_XEN_GUEST_HANDLE(dom0_setvcpucontext_t);
#define DOM0_MSR 15
-typedef struct dom0_msr {
+struct dom0_msr {
/* IN variables. */
uint32_t write;
cpumap_t cpu_mask;
@@ -123,7 +130,8 @@ typedef struct dom0_msr {
/* OUT variables. */
uint32_t out1;
uint32_t out2;
-} dom0_msr_t;
+};
+typedef struct dom0_msr dom0_msr_t;
DEFINE_XEN_GUEST_HANDLE(dom0_msr_t);
/*
@@ -131,12 +139,13 @@ DEFINE_XEN_GUEST_HANDLE(dom0_msr_t);
* 1 January, 1970 if the current system time was <system_time>.
*/
#define DOM0_SETTIME 17
-typedef struct dom0_settime {
+struct dom0_settime {
/* IN variables. */
uint32_t secs;
uint32_t nsecs;
uint64_t system_time;
-} dom0_settime_t;
+};
+typedef struct dom0_settime dom0_settime_t;
DEFINE_XEN_GUEST_HANDLE(dom0_settime_t);
#define DOM0_GETPAGEFRAMEINFO 18
@@ -151,44 +160,47 @@ DEFINE_XEN_GUEST_HANDLE(dom0_settime_t);
#define LTAB_MASK XTAB
#define LTABTYPE_MASK (0x7<<LTAB_SHIFT)
-typedef struct dom0_getpageframeinfo {
+struct dom0_getpageframeinfo {
/* IN variables. */
unsigned long mfn; /* Machine page frame number to query. */
domid_t domain; /* To which domain does the frame belong? */
/* OUT variables. */
/* Is the page PINNED to a type? */
uint32_t type; /* see above type defs */
-} dom0_getpageframeinfo_t;
+};
+typedef struct dom0_getpageframeinfo dom0_getpageframeinfo_t;
DEFINE_XEN_GUEST_HANDLE(dom0_getpageframeinfo_t);
/*
* Read console content from Xen buffer ring.
*/
#define DOM0_READCONSOLE 19
-typedef struct dom0_readconsole {
+struct dom0_readconsole {
/* IN variables. */
uint32_t clear; /* Non-zero -> clear after reading. */
/* IN/OUT variables. */
XEN_GUEST_HANDLE(char) buffer; /* In: Buffer start; Out: Used buffer start
*/
uint32_t count; /* In: Buffer size; Out: Used buffer size */
-} dom0_readconsole_t;
+};
+typedef struct dom0_readconsole dom0_readconsole_t;
DEFINE_XEN_GUEST_HANDLE(dom0_readconsole_t);
/*
* Set which physical cpus a vcpu can execute on.
*/
#define DOM0_SETVCPUAFFINITY 20
-typedef struct dom0_setvcpuaffinity {
+struct dom0_setvcpuaffinity {
/* IN variables. */
domid_t domain;
uint32_t vcpu;
cpumap_t cpumap;
-} dom0_setvcpuaffinity_t;
+};
+typedef struct dom0_setvcpuaffinity dom0_setvcpuaffinity_t;
DEFINE_XEN_GUEST_HANDLE(dom0_setvcpuaffinity_t);
/* Get trace buffers machine base address */
#define DOM0_TBUFCONTROL 21
-typedef struct dom0_tbufcontrol {
+struct dom0_tbufcontrol {
/* IN variables */
#define DOM0_TBUF_GET_INFO 0
#define DOM0_TBUF_SET_CPU_MASK 1
@@ -203,14 +215,15 @@ typedef struct dom0_tbufcontrol {
/* OUT variables */
unsigned long buffer_mfn;
uint32_t size;
-} dom0_tbufcontrol_t;
+};
+typedef struct dom0_tbufcontrol dom0_tbufcontrol_t;
DEFINE_XEN_GUEST_HANDLE(dom0_tbufcontrol_t);
/*
* Get physical information about the host machine
*/
#define DOM0_PHYSINFO 22
-typedef struct dom0_physinfo {
+struct dom0_physinfo {
uint32_t threads_per_core;
uint32_t cores_per_socket;
uint32_t sockets_per_node;
@@ -219,17 +232,19 @@ typedef struct dom0_physinfo {
unsigned long total_pages;
unsigned long free_pages;
uint32_t hw_cap[8];
-} dom0_physinfo_t;
+};
+typedef struct dom0_physinfo dom0_physinfo_t;
DEFINE_XEN_GUEST_HANDLE(dom0_physinfo_t);
/*
* Get the ID of the current scheduler.
*/
#define DOM0_SCHED_ID 24
-typedef struct dom0_sched_id {
+struct dom0_sched_id {
/* OUT variable */
uint32_t sched_id;
-} dom0_sched_id_t;
+};
+typedef struct dom0_physinfo dom0_sched_id_t;
DEFINE_XEN_GUEST_HANDLE(dom0_sched_id_t);
/*
@@ -246,15 +261,16 @@ DEFINE_XEN_GUEST_HANDLE(dom0_sched_id_t)
#define DOM0_SHADOW_CONTROL_OP_CLEAN 11
#define DOM0_SHADOW_CONTROL_OP_PEEK 12
-typedef struct dom0_shadow_control_stats {
+struct dom0_shadow_control_stats {
uint32_t fault_count;
uint32_t dirty_count;
uint32_t dirty_net_count;
uint32_t dirty_block_count;
-} dom0_shadow_control_stats_t;
+};
+typedef struct dom0_shadow_control_stats dom0_shadow_control_stats_t;
DEFINE_XEN_GUEST_HANDLE(dom0_shadow_control_stats_t);
-typedef struct dom0_shadow_control {
+struct dom0_shadow_control {
/* IN variables. */
domid_t domain;
uint32_t op;
@@ -262,26 +278,29 @@ typedef struct dom0_shadow_control {
/* IN/OUT variables. */
unsigned long pages; /* size of buffer, updated with actual size */
/* OUT variables. */
- dom0_shadow_control_stats_t stats;
-} dom0_shadow_control_t;
+ struct dom0_shadow_control_stats stats;
+};
+typedef struct dom0_shadow_control dom0_shadow_control_t;
DEFINE_XEN_GUEST_HANDLE(dom0_shadow_control_t);
#define DOM0_SETDOMAINMAXMEM 28
-typedef struct dom0_setdomainmaxmem {
+struct dom0_setdomainmaxmem {
/* IN variables. */
domid_t domain;
unsigned long max_memkb;
-} dom0_setdomainmaxmem_t;
+};
+typedef struct dom0_setdomainmaxmem dom0_setdomainmaxmem_t;
DEFINE_XEN_GUEST_HANDLE(dom0_setdomainmaxmem_t);
#define DOM0_GETPAGEFRAMEINFO2 29 /* batched interface */
-typedef struct dom0_getpageframeinfo2 {
+struct dom0_getpageframeinfo2 {
/* IN variables. */
domid_t domain;
unsigned long num;
/* IN/OUT variables. */
XEN_GUEST_HANDLE(ulong) array;
-} dom0_getpageframeinfo2_t;
+};
+typedef struct dom0_getpageframeinfo2 dom0_getpageframeinfo2_t;
DEFINE_XEN_GUEST_HANDLE(dom0_getpageframeinfo2_t);
/*
@@ -292,7 +311,7 @@ DEFINE_XEN_GUEST_HANDLE(dom0_getpagefram
* (x86-specific).
*/
#define DOM0_ADD_MEMTYPE 31
-typedef struct dom0_add_memtype {
+struct dom0_add_memtype {
/* IN variables. */
unsigned long mfn;
unsigned long nr_mfns;
@@ -300,7 +319,8 @@ typedef struct dom0_add_memtype {
/* OUT variables. */
uint32_t handle;
uint32_t reg;
-} dom0_add_memtype_t;
+};
+typedef struct dom0_add_memtype dom0_add_memtype_t;
DEFINE_XEN_GUEST_HANDLE(dom0_add_memtype_t);
/*
@@ -311,23 +331,25 @@ DEFINE_XEN_GUEST_HANDLE(dom0_add_memtype
* (x86-specific).
*/
#define DOM0_DEL_MEMTYPE 32
-typedef struct dom0_del_memtype {
+struct dom0_del_memtype {
/* IN variables. */
uint32_t handle;
uint32_t reg;
-} dom0_del_memtype_t;
+};
+typedef struct dom0_del_memtype dom0_del_memtype_t;
DEFINE_XEN_GUEST_HANDLE(dom0_del_memtype_t);
/* Read current type of an MTRR (x86-specific). */
#define DOM0_READ_MEMTYPE 33
-typedef struct dom0_read_memtype {
+struct dom0_read_memtype {
/* IN variables. */
uint32_t reg;
/* OUT variables. */
unsigned long mfn;
unsigned long nr_mfns;
uint32_t type;
-} dom0_read_memtype_t;
+};
+typedef struct dom0_read_memtype dom0_read_memtype_t;
DEFINE_XEN_GUEST_HANDLE(dom0_read_memtype_t);
/* Interface for controlling Xen software performance counters. */
@@ -335,50 +357,56 @@ DEFINE_XEN_GUEST_HANDLE(dom0_read_memtyp
/* Sub-operations: */
#define DOM0_PERFCCONTROL_OP_RESET 1 /* Reset all counters to zero. */
#define DOM0_PERFCCONTROL_OP_QUERY 2 /* Get perfctr information. */
-typedef struct dom0_perfc_desc {
+struct dom0_perfc_desc {
char name[80]; /* name of perf counter */
uint32_t nr_vals; /* number of values for this counter */
uint32_t vals[64]; /* array of values */
-} dom0_perfc_desc_t;
+};
+typedef struct dom0_perfc_desc dom0_perfc_desc_t;
DEFINE_XEN_GUEST_HANDLE(dom0_perfc_desc_t);
-typedef struct dom0_perfccontrol {
+
+struct dom0_perfccontrol {
/* IN variables. */
uint32_t op; /* DOM0_PERFCCONTROL_OP_??? */
/* OUT variables. */
uint32_t nr_counters; /* number of counters */
XEN_GUEST_HANDLE(dom0_perfc_desc_t) desc; /* counter information (or
NULL) */
-} dom0_perfccontrol_t;
+};
+typedef struct dom0_perfccontrol dom0_perfccontrol_t;
DEFINE_XEN_GUEST_HANDLE(dom0_perfccontrol_t);
#define DOM0_MICROCODE 35
-typedef struct dom0_microcode {
+struct dom0_microcode {
/* IN variables. */
XEN_GUEST_HANDLE(void) data; /* Pointer to microcode data */
uint32_t length; /* Length of microcode data. */
-} dom0_microcode_t;
+};
+typedef struct dom0_microcode dom0_microcode_t;
DEFINE_XEN_GUEST_HANDLE(dom0_microcode_t);
#define DOM0_IOPORT_PERMISSION 36
-typedef struct dom0_ioport_permission {
+struct dom0_ioport_permission {
domid_t domain; /* domain to be affected */
uint32_t first_port; /* first port int range */
uint32_t nr_ports; /* size of port range */
uint8_t allow_access; /* allow or deny access to range? */
-} dom0_ioport_permission_t;
+};
+typedef struct dom0_ioport_permission dom0_ioport_permission_t;
DEFINE_XEN_GUEST_HANDLE(dom0_ioport_permission_t);
#define DOM0_GETVCPUCONTEXT 37
-typedef struct dom0_getvcpucontext {
+struct dom0_getvcpucontext {
/* IN variables. */
domid_t domain; /* domain to be affected */
uint32_t vcpu; /* vcpu # */
/* OUT variables. */
XEN_GUEST_HANDLE(vcpu_guest_context_t) ctxt;
-} dom0_getvcpucontext_t;
+};
+typedef struct dom0_getvcpucontext dom0_getvcpucontext_t;
DEFINE_XEN_GUEST_HANDLE(dom0_getvcpucontext_t);
#define DOM0_GETVCPUINFO 43
-typedef struct dom0_getvcpuinfo {
+struct dom0_getvcpuinfo {
/* IN variables. */
domid_t domain; /* domain to be affected */
uint32_t vcpu; /* vcpu # */
@@ -389,92 +417,104 @@ typedef struct dom0_getvcpuinfo {
uint64_t cpu_time; /* total cpu time consumed (ns) */
uint32_t cpu; /* current mapping */
cpumap_t cpumap; /* allowable mapping */
-} dom0_getvcpuinfo_t;
+};
+typedef struct dom0_getvcpuinfo dom0_getvcpuinfo_t;
DEFINE_XEN_GUEST_HANDLE(dom0_getvcpuinfo_t);
#define DOM0_GETDOMAININFOLIST 38
-typedef struct dom0_getdomaininfolist {
+struct dom0_getdomaininfolist {
/* IN variables. */
domid_t first_domain;
uint32_t max_domains;
XEN_GUEST_HANDLE(dom0_getdomaininfo_t) buffer;
/* OUT variables. */
uint32_t num_domains;
-} dom0_getdomaininfolist_t;
+};
+typedef struct dom0_getdomaininfolist dom0_getdomaininfolist_t;
DEFINE_XEN_GUEST_HANDLE(dom0_getdomaininfolist_t);
#define DOM0_PLATFORM_QUIRK 39
#define QUIRK_NOIRQBALANCING 1 /* Do not restrict IO-APIC RTE targets */
#define QUIRK_IOAPIC_BAD_REGSEL 2 /* IO-APIC REGSEL forgets its value */
#define QUIRK_IOAPIC_GOOD_REGSEL 3 /* IO-APIC REGSEL behaves properly */
-typedef struct dom0_platform_quirk {
+struct dom0_platform_quirk {
/* IN variables. */
uint32_t quirk_id;
-} dom0_platform_quirk_t;
+};
+typedef struct dom0_platform_quirk dom0_platform_quirk_t;
DEFINE_XEN_GUEST_HANDLE(dom0_platform_quirk_t);
-#define DOM0_PHYSICAL_MEMORY_MAP 40
-typedef struct dom0_memory_map_entry {
+#define DOM0_PHYSICAL_MEMORY_MAP 40 /* Unimplemented from 3.0.3 onwards */
+struct dom0_memory_map_entry {
uint64_t start, end;
uint32_t flags; /* reserved */
uint8_t is_ram;
-} dom0_memory_map_entry_t;
+};
+typedef struct dom0_memory_map_entry dom0_memory_map_entry_t;
DEFINE_XEN_GUEST_HANDLE(dom0_memory_map_entry_t);
-typedef struct dom0_physical_memory_map {
+
+struct dom0_physical_memory_map {
/* IN variables. */
uint32_t max_map_entries;
/* OUT variables. */
uint32_t nr_map_entries;
XEN_GUEST_HANDLE(dom0_memory_map_entry_t) memory_map;
-} dom0_physical_memory_map_t;
+};
+typedef struct dom0_physical_memory_map dom0_physical_memory_map_t;
DEFINE_XEN_GUEST_HANDLE(dom0_physical_memory_map_t);
#define DOM0_MAX_VCPUS 41
-typedef struct dom0_max_vcpus {
+struct dom0_max_vcpus {
domid_t domain; /* domain to be affected */
uint32_t max; /* maximum number of vcpus */
-} dom0_max_vcpus_t;
+};
+typedef struct dom0_max_vcpus dom0_max_vcpus_t;
DEFINE_XEN_GUEST_HANDLE(dom0_max_vcpus_t);
#define DOM0_SETDOMAINHANDLE 44
-typedef struct dom0_setdomainhandle {
+struct dom0_setdomainhandle {
domid_t domain;
xen_domain_handle_t handle;
-} dom0_setdomainhandle_t;
+};
+typedef struct dom0_setdomainhandle dom0_setdomainhandle_t;
DEFINE_XEN_GUEST_HANDLE(dom0_setdomainhandle_t);
#define DOM0_SETDEBUGGING 45
-typedef struct dom0_setdebugging {
+struct dom0_setdebugging {
domid_t domain;
uint8_t enable;
-} dom0_setdebugging_t;
+};
+typedef struct dom0_setdebugging dom0_setdebugging_t;
DEFINE_XEN_GUEST_HANDLE(dom0_setdebugging_t);
#define DOM0_IRQ_PERMISSION 46
-typedef struct dom0_irq_permission {
+struct dom0_irq_permission {
domid_t domain; /* domain to be affected */
uint8_t pirq;
uint8_t allow_access; /* flag to specify enable/disable of IRQ access */
-} dom0_irq_permission_t;
+};
+typedef struct dom0_irq_permission dom0_irq_permission_t;
DEFINE_XEN_GUEST_HANDLE(dom0_irq_permission_t);
#define DOM0_IOMEM_PERMISSION 47
-typedef struct dom0_iomem_permission {
+struct dom0_iomem_permission {
domid_t domain; /* domain to be affected */
unsigned long first_mfn; /* first page (physical page number) in range */
unsigned long nr_mfns; /* number of pages in range (>0) */
uint8_t allow_access; /* allow (!0) or deny (0) access to range? */
-} dom0_iomem_permission_t;
+};
+typedef struct dom0_iomem_permission dom0_iomem_permission_t;
DEFINE_XEN_GUEST_HANDLE(dom0_iomem_permission_t);
#define DOM0_HYPERCALL_INIT 48
-typedef struct dom0_hypercall_init {
+struct dom0_hypercall_init {
domid_t domain; /* domain to be affected */
unsigned long mfn; /* machine frame to be initialised */
-} dom0_hypercall_init_t;
+};
+typedef struct dom0_hypercall_init dom0_hypercall_init_t;
DEFINE_XEN_GUEST_HANDLE(dom0_hypercall_init_t);
-typedef struct dom0_op {
+struct dom0_op {
uint32_t cmd;
uint32_t interface_version; /* DOM0_INTERFACE_VERSION */
union {
@@ -517,7 +557,8 @@ typedef struct dom0_op {
struct dom0_hypercall_init hypercall_init;
uint8_t pad[128];
} u;
-} dom0_op_t;
+};
+typedef struct dom0_op dom0_op_t;
DEFINE_XEN_GUEST_HANDLE(dom0_op_t);
#endif /* __XEN_PUBLIC_DOM0_OPS_H__ */
diff -r e74246451527 -r f54d38cea8ac xen/include/public/event_channel.h
--- a/xen/include/public/event_channel.h Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/public/event_channel.h Tue May 30 14:30:34 2006 -0500
@@ -28,12 +28,13 @@ DEFINE_XEN_GUEST_HANDLE(evtchn_port_t);
* 2. <rdom> may be DOMID_SELF, allowing loopback connections.
*/
#define EVTCHNOP_alloc_unbound 6
-typedef struct evtchn_alloc_unbound {
+struct evtchn_alloc_unbound {
/* IN parameters */
domid_t dom, remote_dom;
/* OUT parameters */
evtchn_port_t port;
-} evtchn_alloc_unbound_t;
+};
+typedef struct evtchn_alloc_unbound evtchn_alloc_unbound_t;
/*
* EVTCHNOP_bind_interdomain: Construct an interdomain event channel between
@@ -45,13 +46,14 @@ typedef struct evtchn_alloc_unbound {
* 2. <remote_dom> may be DOMID_SELF, allowing loopback connections.
*/
#define EVTCHNOP_bind_interdomain 0
-typedef struct evtchn_bind_interdomain {
+struct evtchn_bind_interdomain {
/* IN parameters. */
domid_t remote_dom;
evtchn_port_t remote_port;
/* OUT parameters. */
evtchn_port_t local_port;
-} evtchn_bind_interdomain_t;
+};
+typedef struct evtchn_bind_interdomain evtchn_bind_interdomain_t;
/*
* EVTCHNOP_bind_virq: Bind a local event channel to VIRQ <irq> on specified
@@ -66,13 +68,14 @@ typedef struct evtchn_bind_interdomain {
* binding cannot be changed.
*/
#define EVTCHNOP_bind_virq 1
-typedef struct evtchn_bind_virq {
+struct evtchn_bind_virq {
/* IN parameters. */
uint32_t virq;
uint32_t vcpu;
/* OUT parameters. */
evtchn_port_t port;
-} evtchn_bind_virq_t;
+};
+typedef struct evtchn_bind_virq evtchn_bind_virq_t;
/*
* EVTCHNOP_bind_pirq: Bind a local event channel to PIRQ <irq>.
@@ -81,14 +84,15 @@ typedef struct evtchn_bind_virq {
* 2. Only a sufficiently-privileged domain may bind to a physical IRQ.
*/
#define EVTCHNOP_bind_pirq 2
-typedef struct evtchn_bind_pirq {
+struct evtchn_bind_pirq {
/* IN parameters. */
uint32_t pirq;
#define BIND_PIRQ__WILL_SHARE 1
uint32_t flags; /* BIND_PIRQ__* */
/* OUT parameters. */
evtchn_port_t port;
-} evtchn_bind_pirq_t;
+};
+typedef struct evtchn_bind_pirq evtchn_bind_pirq_t;
/*
* EVTCHNOP_bind_ipi: Bind a local event channel to receive events.
@@ -97,11 +101,12 @@ typedef struct evtchn_bind_pirq {
* may not be changed.
*/
#define EVTCHNOP_bind_ipi 7
-typedef struct evtchn_bind_ipi {
- uint32_t vcpu;
- /* OUT parameters. */
- evtchn_port_t port;
-} evtchn_bind_ipi_t;
+struct evtchn_bind_ipi {
+ uint32_t vcpu;
+ /* OUT parameters. */
+ evtchn_port_t port;
+};
+typedef struct evtchn_bind_ipi evtchn_bind_ipi_t;
/*
* EVTCHNOP_close: Close a local event channel <port>. If the channel is
@@ -109,20 +114,22 @@ typedef struct evtchn_bind_ipi {
* (EVTCHNSTAT_unbound), awaiting a new connection.
*/
#define EVTCHNOP_close 3
-typedef struct evtchn_close {
- /* IN parameters. */
- evtchn_port_t port;
-} evtchn_close_t;
+struct evtchn_close {
+ /* IN parameters. */
+ evtchn_port_t port;
+};
+typedef struct evtchn_close evtchn_close_t;
/*
* EVTCHNOP_send: Send an event to the remote end of the channel whose local
* endpoint is <port>.
*/
#define EVTCHNOP_send 4
-typedef struct evtchn_send {
- /* IN parameters. */
- evtchn_port_t port;
-} evtchn_send_t;
+struct evtchn_send {
+ /* IN parameters. */
+ evtchn_port_t port;
+};
+typedef struct evtchn_send evtchn_send_t;
/*
* EVTCHNOP_status: Get the current status of the communication channel which
@@ -133,7 +140,7 @@ typedef struct evtchn_send {
* channel for which <dom> is not DOMID_SELF.
*/
#define EVTCHNOP_status 5
-typedef struct evtchn_status {
+struct evtchn_status {
/* IN parameters */
domid_t dom;
evtchn_port_t port;
@@ -157,7 +164,8 @@ typedef struct evtchn_status {
uint32_t pirq; /* EVTCHNSTAT_pirq */
uint32_t virq; /* EVTCHNSTAT_virq */
} u;
-} evtchn_status_t;
+};
+typedef struct evtchn_status evtchn_status_t;
/*
* EVTCHNOP_bind_vcpu: Specify which vcpu a channel should notify when an
@@ -172,41 +180,44 @@ typedef struct evtchn_status {
* has its binding reset to vcpu0).
*/
#define EVTCHNOP_bind_vcpu 8
-typedef struct evtchn_bind_vcpu {
- /* IN parameters. */
- evtchn_port_t port;
- uint32_t vcpu;
-} evtchn_bind_vcpu_t;
+struct evtchn_bind_vcpu {
+ /* IN parameters. */
+ evtchn_port_t port;
+ uint32_t vcpu;
+};
+typedef struct evtchn_bind_vcpu evtchn_bind_vcpu_t;
/*
* EVTCHNOP_unmask: Unmask the specified local event-channel port and deliver
* a notification to the appropriate VCPU if an event is pending.
*/
#define EVTCHNOP_unmask 9
-typedef struct evtchn_unmask {
- /* IN parameters. */
- evtchn_port_t port;
-} evtchn_unmask_t;
+struct evtchn_unmask {
+ /* IN parameters. */
+ evtchn_port_t port;
+};
+typedef struct evtchn_unmask evtchn_unmask_t;
/*
* Argument to event_channel_op_compat() hypercall. Superceded by new
* event_channel_op() hypercall since 0x00030202.
*/
-typedef struct evtchn_op {
+struct evtchn_op {
uint32_t cmd; /* EVTCHNOP_* */
union {
- evtchn_alloc_unbound_t alloc_unbound;
- evtchn_bind_interdomain_t bind_interdomain;
- evtchn_bind_virq_t bind_virq;
- evtchn_bind_pirq_t bind_pirq;
- evtchn_bind_ipi_t bind_ipi;
- evtchn_close_t close;
- evtchn_send_t send;
- evtchn_status_t status;
- evtchn_bind_vcpu_t bind_vcpu;
- evtchn_unmask_t unmask;
+ struct evtchn_alloc_unbound alloc_unbound;
+ struct evtchn_bind_interdomain bind_interdomain;
+ struct evtchn_bind_virq bind_virq;
+ struct evtchn_bind_pirq bind_pirq;
+ struct evtchn_bind_ipi bind_ipi;
+ struct evtchn_close close;
+ struct evtchn_send send;
+ struct evtchn_status status;
+ struct evtchn_bind_vcpu bind_vcpu;
+ struct evtchn_unmask unmask;
} u;
-} evtchn_op_t;
+};
+typedef struct evtchn_op evtchn_op_t;
DEFINE_XEN_GUEST_HANDLE(evtchn_op_t);
#endif /* __XEN_PUBLIC_EVENT_CHANNEL_H__ */
diff -r e74246451527 -r f54d38cea8ac xen/include/public/grant_table.h
--- a/xen/include/public/grant_table.h Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/public/grant_table.h Tue May 30 14:30:34 2006 -0500
@@ -71,7 +71,7 @@
* [XEN]: This field is written by Xen and read by the sharing guest.
* [GST]: This field is written by the guest and read by Xen.
*/
-typedef struct grant_entry {
+struct grant_entry {
/* GTF_xxx: various type and flag information. [XEN,GST] */
#if defined(__powerpc__)
ulong flags;
@@ -85,7 +85,8 @@ typedef struct grant_entry {
* GTF_accept_transfer: Frame whose ownership transferred by @domid. [XEN]
*/
uint32_t frame;
-} grant_entry_t;
+};
+typedef struct grant_entry grant_entry_t;
/*
* Type of grant entry.
@@ -160,7 +161,7 @@ typedef uint32_t grant_handle_t;
* to be accounted to the correct grant reference!
*/
#define GNTTABOP_map_grant_ref 0
-typedef struct gnttab_map_grant_ref {
+struct gnttab_map_grant_ref {
/* IN parameters. */
uint64_t host_addr;
uint32_t flags; /* GNTMAP_* */
@@ -170,7 +171,8 @@ typedef struct gnttab_map_grant_ref {
int16_t status; /* GNTST_* */
grant_handle_t handle;
uint64_t dev_bus_addr;
-} gnttab_map_grant_ref_t;
+};
+typedef struct gnttab_map_grant_ref gnttab_map_grant_ref_t;
DEFINE_XEN_GUEST_HANDLE(gnttab_map_grant_ref_t);
/*
@@ -185,14 +187,15 @@ DEFINE_XEN_GUEST_HANDLE(gnttab_map_grant
* mappings will remain in the device or host TLBs.
*/
#define GNTTABOP_unmap_grant_ref 1
-typedef struct gnttab_unmap_grant_ref {
+struct gnttab_unmap_grant_ref {
/* IN parameters. */
uint64_t host_addr;
uint64_t dev_bus_addr;
grant_handle_t handle;
/* OUT parameters. */
int16_t status; /* GNTST_* */
-} gnttab_unmap_grant_ref_t;
+};
+typedef struct gnttab_unmap_grant_ref gnttab_unmap_grant_ref_t;
DEFINE_XEN_GUEST_HANDLE(gnttab_unmap_grant_ref_t);
/*
@@ -205,14 +208,15 @@ DEFINE_XEN_GUEST_HANDLE(gnttab_unmap_gra
* 3. Xen may not support more than a single grant-table page per domain.
*/
#define GNTTABOP_setup_table 2
-typedef struct gnttab_setup_table {
+struct gnttab_setup_table {
/* IN parameters. */
domid_t dom;
uint32_t nr_frames;
/* OUT parameters. */
int16_t status; /* GNTST_* */
XEN_GUEST_HANDLE(ulong) frame_list;
-} gnttab_setup_table_t;
+};
+typedef struct gnttab_setup_table gnttab_setup_table_t;
DEFINE_XEN_GUEST_HANDLE(gnttab_setup_table_t);
/*
@@ -220,12 +224,13 @@ DEFINE_XEN_GUEST_HANDLE(gnttab_setup_tab
* xen console. Debugging use only.
*/
#define GNTTABOP_dump_table 3
-typedef struct gnttab_dump_table {
+struct gnttab_dump_table {
/* IN parameters. */
domid_t dom;
/* OUT parameters. */
int16_t status; /* GNTST_* */
-} gnttab_dump_table_t;
+};
+typedef struct gnttab_dump_table gnttab_dump_table_t;
DEFINE_XEN_GUEST_HANDLE(gnttab_dump_table_t);
/*
@@ -237,14 +242,15 @@ DEFINE_XEN_GUEST_HANDLE(gnttab_dump_tabl
* to the calling domain *unless* the error is GNTST_bad_page.
*/
#define GNTTABOP_transfer 4
-typedef struct gnttab_transfer {
+struct gnttab_transfer {
/* IN parameters. */
unsigned long mfn;
domid_t domid;
grant_ref_t ref;
/* OUT parameters. */
int16_t status;
-} gnttab_transfer_t;
+};
+typedef struct gnttab_transfer gnttab_transfer_t;
DEFINE_XEN_GUEST_HANDLE(gnttab_transfer_t);
/*
diff -r e74246451527 -r f54d38cea8ac xen/include/public/hvm/ioreq.h
--- a/xen/include/public/hvm/ioreq.h Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/public/hvm/ioreq.h Tue May 30 14:30:34 2006 -0500
@@ -41,7 +41,7 @@
* prepare this structure and notify service OS and DM by sending
* virq
*/
-typedef struct {
+struct ioreq {
uint64_t addr; /* physical address */
uint64_t size; /* size in bytes */
uint64_t count; /* for rep prefixes */
@@ -55,31 +55,35 @@ typedef struct {
uint8_t df:1;
uint8_t type; /* I/O type */
uint64_t io_count; /* How many IO done on a vcpu */
-} ioreq_t;
+};
+typedef struct ioreq ioreq_t;
#define MAX_VECTOR 256
#define BITS_PER_BYTE 8
#define INTR_LEN (MAX_VECTOR/(BITS_PER_BYTE * sizeof(uint64_t)))
#define INTR_LEN_32 (MAX_VECTOR/(BITS_PER_BYTE * sizeof(uint32_t)))
-typedef struct {
+struct global_iodata {
uint16_t pic_elcr;
uint16_t pic_irr;
uint16_t pic_last_irr;
uint16_t pic_clear_irr;
-} global_iodata_t;
+};
+typedef struct global_iodata global_iodata_t;
-typedef struct {
- ioreq_t vp_ioreq;
+struct vcpu_iodata {
+ struct ioreq vp_ioreq;
/* Event channel port */
unsigned int vp_eport; /* VMX vcpu uses this to notify DM */
unsigned int dm_eport; /* DM uses this to notify VMX vcpu */
-} vcpu_iodata_t;
+};
+typedef struct vcpu_iodata vcpu_iodata_t;
-typedef struct {
- global_iodata_t sp_global;
- vcpu_iodata_t vcpu_iodata[1];
-} shared_iopage_t;
+struct shared_iopage {
+ struct global_iodata sp_global;
+ struct vcpu_iodata vcpu_iodata[1];
+};
+typedef struct shared_iopage shared_iopage_t;
#endif /* _IOREQ_H_ */
diff -r e74246451527 -r f54d38cea8ac xen/include/public/hvm/vmx_assist.h
--- a/xen/include/public/hvm/vmx_assist.h Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/public/hvm/vmx_assist.h Tue May 30 14:30:34 2006 -0500
@@ -37,7 +37,7 @@ union vmcs_arbytes {
/*
* World switch state
*/
-typedef struct vmx_assist_context {
+struct vmx_assist_context {
uint32_t eip; /* execution pointer */
uint32_t esp; /* stack pointer */
uint32_t eflags; /* flags register */
@@ -80,7 +80,8 @@ typedef struct vmx_assist_context {
uint32_t ldtr_limit;
uint32_t ldtr_base;
union vmcs_arbytes ldtr_arbytes;
-} vmx_assist_context_t;
+};
+typedef struct vmx_assist_context vmx_assist_context_t;
#endif /* __ASSEMBLY__ */
diff -r e74246451527 -r f54d38cea8ac xen/include/public/io/blkif.h
--- a/xen/include/public/io/blkif.h Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/public/io/blkif.h Tue May 30 14:30:34 2006 -0500
@@ -39,7 +39,7 @@
*/
#define BLKIF_MAX_SEGMENTS_PER_REQUEST 11
-typedef struct blkif_request {
+struct blkif_request {
uint8_t operation; /* BLKIF_OP_??? */
uint8_t nr_segments; /* number of segments */
blkif_vdev_t handle; /* only for read/write requests */
@@ -51,13 +51,15 @@ typedef struct blkif_request {
/* @last_sect: last sector in frame to transfer (inclusive). */
uint8_t first_sect, last_sect;
} seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-} blkif_request_t;
+};
+typedef struct blkif_request blkif_request_t;
-typedef struct blkif_response {
+struct blkif_response {
uint64_t id; /* copied from request */
uint8_t operation; /* copied from request */
int16_t status; /* BLKIF_RSP_??? */
-} blkif_response_t;
+};
+typedef struct blkif_response blkif_response_t;
#define BLKIF_RSP_ERROR -1 /* non-specific 'error' */
#define BLKIF_RSP_OKAY 0 /* non-specific 'okay' */
@@ -66,7 +68,7 @@ typedef struct blkif_response {
* Generate blkif ring structures and types.
*/
-DEFINE_RING_TYPES(blkif, blkif_request_t, blkif_response_t);
+DEFINE_RING_TYPES(blkif, struct blkif_request, struct blkif_response);
#define VDISK_CDROM 0x1
#define VDISK_REMOVABLE 0x2
diff -r e74246451527 -r f54d38cea8ac xen/include/public/io/netif.h
--- a/xen/include/public/io/netif.h Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/public/io/netif.h Tue May 30 14:30:34 2006 -0500
@@ -13,10 +13,10 @@
#include "../grant_table.h"
/*
- * Note that there is *never* any need to notify the backend when enqueuing
- * receive requests (netif_rx_request_t). Notifications after enqueuing any
- * other type of message should be conditional on the appropriate req_event
- * or rsp_event field in the shared ring.
+ * Note that there is *never* any need to notify the backend when
+ * enqueuing receive requests (struct netif_rx_request). Notifications
+ * after enqueuing any other type of message should be conditional on
+ * the appropriate req_event or rsp_event field in the shared ring.
*/
/* Protocol checksum field is blank in the packet (hardware offload)? */
@@ -27,23 +27,26 @@
#define _NETTXF_data_validated (1)
#define NETTXF_data_validated (1U<<_NETTXF_data_validated)
-typedef struct netif_tx_request {
+struct netif_tx_request {
grant_ref_t gref; /* Reference to buffer page */
uint16_t offset; /* Offset within buffer page */
uint16_t flags; /* NETTXF_* */
uint16_t id; /* Echoed in response message. */
uint16_t size; /* Packet size in bytes. */
-} netif_tx_request_t;
+};
+typedef struct netif_tx_request netif_tx_request_t;
-typedef struct netif_tx_response {
+struct netif_tx_response {
uint16_t id;
int16_t status; /* NETIF_RSP_* */
-} netif_tx_response_t;
+};
+typedef struct netif_tx_response netif_tx_response_t;
-typedef struct {
+struct netif_rx_request {
uint16_t id; /* Echoed in response message. */
grant_ref_t gref; /* Reference to incoming granted frame */
-} netif_rx_request_t;
+};
+typedef struct netif_rx_request netif_rx_request_t;
/* Packet data has been validated against protocol checksum. */
#define _NETRXF_data_validated (0)
@@ -53,19 +56,20 @@ typedef struct {
#define _NETRXF_csum_blank (1)
#define NETRXF_csum_blank (1U<<_NETRXF_csum_blank)
-typedef struct {
+struct netif_rx_response {
uint16_t id;
uint16_t offset; /* Offset in page of start of received packet */
uint16_t flags; /* NETRXF_* */
int16_t status; /* -ve: BLKIF_RSP_* ; +ve: Rx'ed pkt size. */
-} netif_rx_response_t;
+};
+typedef struct netif_rx_response netif_rx_response_t;
/*
* Generate netif ring structures and types.
*/
-DEFINE_RING_TYPES(netif_tx, netif_tx_request_t, netif_tx_response_t);
-DEFINE_RING_TYPES(netif_rx, netif_rx_request_t, netif_rx_response_t);
+DEFINE_RING_TYPES(netif_tx, struct netif_tx_request, struct netif_tx_response);
+DEFINE_RING_TYPES(netif_rx, struct netif_rx_request, struct netif_rx_response);
#define NETIF_RSP_DROPPED -2
#define NETIF_RSP_ERROR -1
diff -r e74246451527 -r f54d38cea8ac xen/include/public/io/tpmif.h
--- a/xen/include/public/io/tpmif.h Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/public/io/tpmif.h Tue May 30 14:30:34 2006 -0500
@@ -18,12 +18,13 @@
#include "../grant_table.h"
-typedef struct {
+struct tpmif_tx_request {
unsigned long addr; /* Machine address of packet. */
grant_ref_t ref; /* grant table access reference */
uint16_t unused;
uint16_t size; /* Packet size in bytes. */
-} tpmif_tx_request_t;
+};
+typedef struct tpmif_tx_request tpmif_tx_request_t;
/*
* The TPMIF_TX_RING_SIZE defines the number of pages the
@@ -35,13 +36,15 @@ typedef uint32_t TPMIF_RING_IDX;
/* This structure must fit in a memory page. */
-typedef struct {
- tpmif_tx_request_t req;
-} tpmif_ring_t;
+struct tpmif_ring {
+ struct tpmif_tx_request req;
+};
+typedef struct tpmif_ring tpmif_ring_t;
-typedef struct {
- tpmif_ring_t ring[TPMIF_TX_RING_SIZE];
-} tpmif_tx_interface_t;
+struct tpmif_tx_interface {
+ struct tpmif_ring ring[TPMIF_TX_RING_SIZE];
+};
+typedef struct tpmif_tx_interface tpmif_tx_interface_t;
#endif
diff -r e74246451527 -r f54d38cea8ac xen/include/public/io/xenbus.h
--- a/xen/include/public/io/xenbus.h Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/public/io/xenbus.h Tue May 30 14:30:34 2006 -0500
@@ -9,34 +9,37 @@
#ifndef _XEN_PUBLIC_IO_XENBUS_H
#define _XEN_PUBLIC_IO_XENBUS_H
-/* The state of either end of the Xenbus, i.e. the current communication
- status of initialisation across the bus. States here imply nothing about
- the state of the connection between the driver and the kernel's device
- layers. */
-typedef enum
-{
- XenbusStateUnknown = 0,
- XenbusStateInitialising = 1,
- XenbusStateInitWait = 2, /* Finished early initialisation, but waiting
- for information from the peer or hotplug
- scripts. */
- XenbusStateInitialised = 3, /* Initialised and waiting for a connection
- from the peer. */
- XenbusStateConnected = 4,
- XenbusStateClosing = 5, /* The device is being closed due to an error
- or an unplug event. */
- XenbusStateClosed = 6
+/*
+ * The state of either end of the Xenbus, i.e. the current communication
+ * status of initialisation across the bus. States here imply nothing about
+ * the state of the connection between the driver and the kernel's device
+ * layers.
+ */
+enum xenbus_state {
+ XenbusStateUnknown = 0,
-} XenbusState;
+ XenbusStateInitialising = 1,
+
+ /*
+ * InitWait: Finished early initialisation but waiting for information
+ * from the peer or hotplug scripts.
+ */
+ XenbusStateInitWait = 2,
+
+ /*
+ * Initialised: Waiting for a connection from the peer.
+ */
+ XenbusStateInitialised = 3,
+
+ XenbusStateConnected = 4,
+
+ /*
+ * Closing: The device is being closed due to an error or an unplug event.
+ */
+ XenbusStateClosing = 5,
+
+ XenbusStateClosed = 6
+};
+typedef enum xenbus_state XenbusState;
#endif /* _XEN_PUBLIC_IO_XENBUS_H */
-
-/*
- * Local variables:
- * c-file-style: "linux"
- * indent-tabs-mode: t
- * c-indent-level: 8
- * c-basic-offset: 8
- * tab-width: 8
- * End:
- */
diff -r e74246451527 -r f54d38cea8ac xen/include/public/memory.h
--- a/xen/include/public/memory.h Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/public/memory.h Tue May 30 14:30:34 2006 -0500
@@ -17,7 +17,7 @@
#define XENMEM_increase_reservation 0
#define XENMEM_decrease_reservation 1
#define XENMEM_populate_physmap 6
-typedef struct xen_memory_reservation {
+struct xen_memory_reservation {
/*
* XENMEM_increase_reservation:
@@ -49,7 +49,8 @@ typedef struct xen_memory_reservation {
*/
domid_t domid;
-} xen_memory_reservation_t;
+};
+typedef struct xen_memory_reservation xen_memory_reservation_t;
DEFINE_XEN_GUEST_HANDLE(xen_memory_reservation_t);
/*
@@ -74,7 +75,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_memory_reser
* arg == addr of xen_machphys_mfn_list_t.
*/
#define XENMEM_machphys_mfn_list 5
-typedef struct xen_machphys_mfn_list {
+struct xen_machphys_mfn_list {
/*
* Size of the 'extent_start' array. Fewer entries will be filled if the
* machphys table is smaller than max_extents * 2MB.
@@ -93,7 +94,8 @@ typedef struct xen_machphys_mfn_list {
* than 'max_extents' if the machphys table is smaller than max_e * 2MB.
*/
unsigned int nr_extents;
-} xen_machphys_mfn_list_t;
+};
+typedef struct xen_machphys_mfn_list xen_machphys_mfn_list_t;
DEFINE_XEN_GUEST_HANDLE(xen_machphys_mfn_list_t);
/*
@@ -102,7 +104,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_machphys_mfn
* arg == addr of xen_add_to_physmap_t.
*/
#define XENMEM_add_to_physmap 7
-typedef struct xen_add_to_physmap {
+struct xen_add_to_physmap {
/* Which domain to change the mapping for. */
domid_t domid;
@@ -116,7 +118,8 @@ typedef struct xen_add_to_physmap {
/* GPFN where the source mapping page should appear. */
unsigned long gpfn;
-} xen_add_to_physmap_t;
+};
+typedef struct xen_add_to_physmap xen_add_to_physmap_t;
DEFINE_XEN_GUEST_HANDLE(xen_add_to_physmap_t);
/*
@@ -124,7 +127,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_add_to_physm
* code on failure. This call only works for auto-translated guests.
*/
#define XENMEM_translate_gpfn_list 8
-typedef struct xen_translate_gpfn_list {
+struct xen_translate_gpfn_list {
/* Which domain to translate for? */
domid_t domid;
@@ -139,8 +142,37 @@ typedef struct xen_translate_gpfn_list {
* list (in which case each input GPFN is overwritten with the output MFN).
*/
XEN_GUEST_HANDLE(ulong) mfn_list;
-} xen_translate_gpfn_list_t;
+};
+typedef struct xen_translate_gpfn_list xen_translate_gpfn_list_t;
DEFINE_XEN_GUEST_HANDLE(xen_translate_gpfn_list_t);
+
+/*
+ * Returns the pseudo-physical memory map as it was when the domain
+ * was started.
+ */
+#define XENMEM_memory_map 9
+struct xen_memory_map {
+ /*
+ * On call the number of entries which can be stored in buffer. On
+ * return the number of entries which have been stored in
+ * buffer.
+ */
+ unsigned int nr_entries;
+
+ /*
+ * Entries in the buffer are in the same format as returned by the
+ * BIOS INT 0x15 EAX=0xE820 call.
+ */
+ XEN_GUEST_HANDLE(void) buffer;
+};
+typedef struct xen_memory_map xen_memory_map_t;
+DEFINE_XEN_GUEST_HANDLE(xen_memory_map_t);
+
+/*
+ * Returns the real physical memory map. Passes the same structure as
+ * XENMEM_memory_map.
+ */
+#define XENMEM_machine_memory_map 10
#endif /* __XEN_PUBLIC_MEMORY_H__ */
diff -r e74246451527 -r f54d38cea8ac xen/include/public/nmi.h
--- a/xen/include/public/nmi.h Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/public/nmi.h Tue May 30 14:30:34 2006 -0500
@@ -34,10 +34,11 @@
* arg == pointer to xennmi_callback structure.
*/
#define XENNMI_register_callback 0
-typedef struct xennmi_callback {
+struct xennmi_callback {
unsigned long handler_address;
unsigned long pad;
-} xennmi_callback_t;
+};
+typedef struct xennmi_callback xennmi_callback_t;
DEFINE_XEN_GUEST_HANDLE(xennmi_callback_t);
/*
diff -r e74246451527 -r f54d38cea8ac xen/include/public/physdev.h
--- a/xen/include/public/physdev.h Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/public/physdev.h Tue May 30 14:30:34 2006 -0500
@@ -14,10 +14,11 @@
* @arg == pointer to physdev_eoi structure.
*/
#define PHYSDEVOP_eoi 12
-typedef struct physdev_eoi {
+struct physdev_eoi {
/* IN */
uint32_t irq;
-} physdev_eoi_t;
+};
+typedef struct physdev_eoi physdev_eoi_t;
DEFINE_XEN_GUEST_HANDLE(physdev_eoi_t);
/*
@@ -25,12 +26,13 @@ DEFINE_XEN_GUEST_HANDLE(physdev_eoi_t);
* @arg == pointer to physdev_irq_status_query structure.
*/
#define PHYSDEVOP_irq_status_query 5
-typedef struct physdev_irq_status_query {
+struct physdev_irq_status_query {
/* IN */
uint32_t irq;
/* OUT */
uint32_t flags; /* XENIRQSTAT_* */
-} physdev_irq_status_query_t;
+};
+typedef struct physdev_irq_status_query physdev_irq_status_query_t;
DEFINE_XEN_GUEST_HANDLE(physdev_irq_status_query_t);
/* Need to call PHYSDEVOP_eoi when the IRQ has been serviced? */
@@ -42,10 +44,11 @@ DEFINE_XEN_GUEST_HANDLE(physdev_irq_stat
* @arg == pointer to physdev_set_iopl structure.
*/
#define PHYSDEVOP_set_iopl 6
-typedef struct physdev_set_iopl {
+struct physdev_set_iopl {
/* IN */
uint32_t iopl;
-} physdev_set_iopl_t;
+};
+typedef struct physdev_set_iopl physdev_set_iopl_t;
DEFINE_XEN_GUEST_HANDLE(physdev_set_iopl_t);
/*
@@ -53,11 +56,12 @@ DEFINE_XEN_GUEST_HANDLE(physdev_set_iopl
* @arg == pointer to physdev_set_iobitmap structure.
*/
#define PHYSDEVOP_set_iobitmap 7
-typedef struct physdev_set_iobitmap {
+struct physdev_set_iobitmap {
/* IN */
uint8_t *bitmap;
uint32_t nr_ports;
-} physdev_set_iobitmap_t;
+};
+typedef struct physdev_set_iobitmap physdev_set_iobitmap_t;
DEFINE_XEN_GUEST_HANDLE(physdev_set_iobitmap_t);
/*
@@ -66,13 +70,14 @@ DEFINE_XEN_GUEST_HANDLE(physdev_set_iobi
*/
#define PHYSDEVOP_apic_read 8
#define PHYSDEVOP_apic_write 9
-typedef struct physdev_apic {
+struct physdev_apic {
/* IN */
unsigned long apic_physbase;
uint32_t reg;
/* IN or OUT */
uint32_t value;
-} physdev_apic_t;
+};
+typedef struct physdev_apic physdev_apic_t;
DEFINE_XEN_GUEST_HANDLE(physdev_apic_t);
/*
@@ -81,28 +86,30 @@ DEFINE_XEN_GUEST_HANDLE(physdev_apic_t);
*/
#define PHYSDEVOP_alloc_irq_vector 10
#define PHYSDEVOP_free_irq_vector 11
-typedef struct physdev_irq {
+struct physdev_irq {
/* IN */
uint32_t irq;
/* IN or OUT */
uint32_t vector;
-} physdev_irq_t;
+};
+typedef struct physdev_irq physdev_irq_t;
DEFINE_XEN_GUEST_HANDLE(physdev_irq_t);
/*
* Argument to physdev_op_compat() hypercall. Superceded by new physdev_op()
* hypercall since 0x00030202.
*/
-typedef struct physdev_op {
+struct physdev_op {
uint32_t cmd;
union {
- physdev_irq_status_query_t irq_status_query;
- physdev_set_iopl_t set_iopl;
- physdev_set_iobitmap_t set_iobitmap;
- physdev_apic_t apic_op;
- physdev_irq_t irq_op;
+ struct physdev_irq_status_query irq_status_query;
+ struct physdev_set_iopl set_iopl;
+ struct physdev_set_iobitmap set_iobitmap;
+ struct physdev_apic apic_op;
+ struct physdev_irq irq_op;
} u;
-} physdev_op_t;
+};
+typedef struct physdev_op physdev_op_t;
DEFINE_XEN_GUEST_HANDLE(physdev_op_t);
/*
diff -r e74246451527 -r f54d38cea8ac xen/include/public/sched.h
--- a/xen/include/public/sched.h Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/public/sched.h Tue May 30 14:30:34 2006 -0500
@@ -46,9 +46,10 @@
* @arg == pointer to sched_shutdown structure.
*/
#define SCHEDOP_shutdown 2
-typedef struct sched_shutdown {
+struct sched_shutdown {
unsigned int reason; /* SHUTDOWN_* */
-} sched_shutdown_t;
+};
+typedef struct sched_shutdown sched_shutdown_t;
DEFINE_XEN_GUEST_HANDLE(sched_shutdown_t);
/*
@@ -57,11 +58,12 @@ DEFINE_XEN_GUEST_HANDLE(sched_shutdown_t
* @arg == pointer to sched_poll structure.
*/
#define SCHEDOP_poll 3
-typedef struct sched_poll {
+struct sched_poll {
XEN_GUEST_HANDLE(evtchn_port_t) ports;
unsigned int nr_ports;
uint64_t timeout;
-} sched_poll_t;
+};
+typedef struct sched_poll sched_poll_t;
DEFINE_XEN_GUEST_HANDLE(sched_poll_t);
/*
@@ -71,10 +73,11 @@ DEFINE_XEN_GUEST_HANDLE(sched_poll_t);
* @arg == pointer to sched_remote_shutdown structure.
*/
#define SCHEDOP_remote_shutdown 4
-typedef struct sched_remote_shutdown {
+struct sched_remote_shutdown {
domid_t domain_id; /* Remote domain ID */
unsigned int reason; /* SHUTDOWN_xxx reason */
-} sched_remote_shutdown_t;
+};
+typedef struct sched_remote_shutdown sched_remote_shutdown_t;
DEFINE_XEN_GUEST_HANDLE(sched_remote_shutdown_t);
/*
diff -r e74246451527 -r f54d38cea8ac xen/include/public/sched_ctl.h
--- a/xen/include/public/sched_ctl.h Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/public/sched_ctl.h Tue May 30 14:30:34 2006 -0500
@@ -10,6 +10,7 @@
/* Scheduler types. */
#define SCHED_BVT 0
#define SCHED_SEDF 4
+#define SCHED_CREDIT 5
/* Set or get info? */
#define SCHED_INFO_PUT 0
@@ -48,6 +49,10 @@ struct sched_adjdom_cmd {
uint32_t extratime;
uint32_t weight;
} sedf;
+ struct sched_credit_adjdom {
+ uint16_t weight;
+ uint16_t cap;
+ } credit;
} u;
};
diff -r e74246451527 -r f54d38cea8ac xen/include/public/vcpu.h
--- a/xen/include/public/vcpu.h Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/public/vcpu.h Tue May 30 14:30:34 2006 -0500
@@ -56,7 +56,7 @@
* @extra_arg == pointer to vcpu_runstate_info structure.
*/
#define VCPUOP_get_runstate_info 4
-typedef struct vcpu_runstate_info {
+struct vcpu_runstate_info {
/* VCPU's current state (RUNSTATE_*). */
int state;
/* When was current state entered (system time, ns)? */
@@ -66,7 +66,8 @@ typedef struct vcpu_runstate_info {
* guaranteed not to drift from system time.
*/
uint64_t time[4];
-} vcpu_runstate_info_t;
+};
+typedef struct vcpu_runstate_info vcpu_runstate_info_t;
/* VCPU is currently running on a physical CPU. */
#define RUNSTATE_running 0
@@ -99,12 +100,13 @@ typedef struct vcpu_runstate_info {
* @extra_arg == pointer to vcpu_register_runstate_memory_area structure.
*/
#define VCPUOP_register_runstate_memory_area 5
-typedef struct vcpu_register_runstate_memory_area {
+struct vcpu_register_runstate_memory_area {
union {
struct vcpu_runstate_info *v;
uint64_t p;
} addr;
-} vcpu_register_runstate_memory_area_t;
+};
+typedef struct vcpu_register_runstate_memory_area
vcpu_register_runstate_memory_area_t;
#endif /* __XEN_PUBLIC_VCPU_H__ */
diff -r e74246451527 -r f54d38cea8ac xen/include/public/version.h
--- a/xen/include/public/version.h Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/public/version.h Tue May 30 14:30:34 2006 -0500
@@ -22,12 +22,13 @@ typedef char xen_extraversion_t[16];
/* arg == xen_compile_info_t. */
#define XENVER_compile_info 2
-typedef struct xen_compile_info {
+struct xen_compile_info {
char compiler[64];
char compile_by[16];
char compile_domain[32];
char compile_date[32];
-} xen_compile_info_t;
+};
+typedef struct xen_compile_info xen_compile_info_t;
#define XENVER_capabilities 3
typedef char xen_capabilities_info_t[1024];
@@ -38,15 +39,17 @@ typedef char xen_changeset_info_t[64];
#define XEN_CHANGESET_INFO_LEN (sizeof(xen_changeset_info_t))
#define XENVER_platform_parameters 5
-typedef struct xen_platform_parameters {
+struct xen_platform_parameters {
unsigned long virt_start;
-} xen_platform_parameters_t;
+};
+typedef struct xen_platform_parameters xen_platform_parameters_t;
#define XENVER_get_features 6
-typedef struct xen_feature_info {
+struct xen_feature_info {
unsigned int submap_idx; /* IN: which 32-bit submap to return */
uint32_t submap; /* OUT: 32-bit submap */
-} xen_feature_info_t;
+};
+typedef struct xen_feature_info xen_feature_info_t;
/* Declares the features reported by XENVER_get_features. */
#include "features.h"
diff -r e74246451527 -r f54d38cea8ac xen/include/public/xen.h
--- a/xen/include/public/xen.h Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/public/xen.h Tue May 30 14:30:34 2006 -0500
@@ -195,7 +195,7 @@
#define MMUEXT_NEW_USER_BASEPTR 15
#ifndef __ASSEMBLY__
-typedef struct mmuext_op {
+struct mmuext_op {
unsigned int cmd;
union {
/* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR */
@@ -209,7 +209,8 @@ typedef struct mmuext_op {
/* TLB_FLUSH_MULTI, INVLPG_MULTI */
void *vcpumask;
} arg2;
-} mmuext_op_t;
+};
+typedef struct mmuext_op mmuext_op_t;
DEFINE_XEN_GUEST_HANDLE(mmuext_op_t);
#endif
@@ -273,20 +274,22 @@ typedef uint16_t domid_t;
* Send an array of these to HYPERVISOR_mmu_update().
* NB. The fields are natural pointer/address size for this architecture.
*/
-typedef struct mmu_update {
+struct mmu_update {
uint64_t ptr; /* Machine address of PTE. */
uint64_t val; /* New contents of PTE. */
-} mmu_update_t;
+};
+typedef struct mmu_update mmu_update_t;
DEFINE_XEN_GUEST_HANDLE(mmu_update_t);
/*
* Send an array of these to HYPERVISOR_multicall().
* NB. The fields are natural register size for this architecture.
*/
-typedef struct multicall_entry {
+struct multicall_entry {
unsigned long op, result;
unsigned long args[6];
-} multicall_entry_t;
+};
+typedef struct multicall_entry multicall_entry_t;
DEFINE_XEN_GUEST_HANDLE(multicall_entry_t);
/*
@@ -295,7 +298,7 @@ DEFINE_XEN_GUEST_HANDLE(multicall_entry_
*/
#define NR_EVENT_CHANNELS (sizeof(unsigned long) * sizeof(unsigned long) * 64)
-typedef struct vcpu_time_info {
+struct vcpu_time_info {
/*
* Updates to the following values are preceded and followed by an
* increment of 'version'. The guest can therefore detect updates by
@@ -319,9 +322,10 @@ typedef struct vcpu_time_info {
uint32_t tsc_to_system_mul;
int8_t tsc_shift;
int8_t pad1[3];
-} vcpu_time_info_t; /* 32 bytes */
-
-typedef struct vcpu_info {
+}; /* 32 bytes */
+typedef struct vcpu_time_info vcpu_time_info_t;
+
+struct vcpu_info {
/*
* 'evtchn_upcall_pending' is written non-zero by Xen to indicate
* a pending notification for a particular VCPU. It is then cleared
@@ -354,16 +358,17 @@ typedef struct vcpu_info {
#endif
uint8_t evtchn_upcall_mask;
unsigned long evtchn_pending_sel;
- arch_vcpu_info_t arch;
- vcpu_time_info_t time;
-} vcpu_info_t; /* 64 bytes (x86) */
+ struct arch_vcpu_info arch;
+ struct vcpu_time_info time;
+}; /* 64 bytes (x86) */
+typedef struct vcpu_info vcpu_info_t;
/*
* Xen/kernel shared data -- pointer provided in start_info.
* NB. We expect that this struct is smaller than a page.
*/
-typedef struct shared_info {
- vcpu_info_t vcpu_info[MAX_VIRT_CPUS];
+struct shared_info {
+ struct vcpu_info vcpu_info[MAX_VIRT_CPUS];
/*
* A domain can create "event channels" on which it can send and receive
@@ -407,9 +412,10 @@ typedef struct shared_info {
uint32_t wc_sec; /* Secs 00:00:00 UTC, Jan 1, 1970. */
uint32_t wc_nsec; /* Nsecs 00:00:00 UTC, Jan 1, 1970. */
- arch_shared_info_t arch;
-
-} shared_info_t;
+ struct arch_shared_info arch;
+
+};
+typedef struct shared_info shared_info_t;
/*
* Start-of-day memory layout for the initial domain (DOM0):
@@ -437,7 +443,7 @@ typedef struct shared_info {
*/
#define MAX_GUEST_CMDLINE 1024
-typedef struct start_info {
+struct start_info {
/* THE FOLLOWING ARE FILLED IN BOTH ON INITIAL BOOT AND ON RESUME. */
char magic[32]; /* "xen-<version>-<platform>". */
unsigned long nr_pages; /* Total pages allocated to this domain. */
@@ -454,7 +460,8 @@ typedef struct start_info {
unsigned long mod_start; /* VIRTUAL address of pre-loaded module. */
unsigned long mod_len; /* Size (bytes) of pre-loaded module. */
int8_t cmd_line[MAX_GUEST_CMDLINE];
-} start_info_t;
+};
+typedef struct start_info start_info_t;
/* These flags are passed in the 'flags' field of start_info_t. */
#define SIF_PRIVILEGED (1<<0) /* Is the domain privileged? */
diff -r e74246451527 -r f54d38cea8ac xen/include/public/xenoprof.h
--- a/xen/include/public/xenoprof.h Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/public/xenoprof.h Tue May 30 14:30:34 2006 -0500
@@ -41,7 +41,7 @@ struct event_log {
};
/* Xenoprof buffer shared between Xen and domain - 1 per VCPU */
-typedef struct xenoprof_buf {
+struct xenoprof_buf {
uint32_t event_head;
uint32_t event_tail;
uint32_t event_size;
@@ -51,10 +51,11 @@ typedef struct xenoprof_buf {
uint64_t user_samples;
uint64_t lost_samples;
struct event_log event_log[1];
-} xenoprof_buf_t;
+};
+typedef struct xenoprof_buf xenoprof_buf_t;
DEFINE_XEN_GUEST_HANDLE(xenoprof_buf_t);
-typedef struct xenoprof_init {
+struct xenoprof_init {
int32_t max_samples;
int32_t num_events;
int32_t is_primary;
@@ -62,10 +63,11 @@ typedef struct xenoprof_init {
int32_t bufsize;
uint64_t buf_maddr;
char cpu_type[XENOPROF_CPU_TYPE_SIZE];
-} xenoprof_init_t;
+};
+typedef struct xenoprof_init xenoprof_init_t;
DEFINE_XEN_GUEST_HANDLE(xenoprof_init_t);
-typedef struct xenoprof_counter {
+struct xenoprof_counter {
uint32_t ind;
uint64_t count;
uint32_t enabled;
@@ -74,7 +76,8 @@ typedef struct xenoprof_counter {
uint32_t kernel;
uint32_t user;
uint64_t unit_mask;
-} xenoprof_counter_t;
+};
+typedef struct xenoprof_counter xenoprof_counter_t;
DEFINE_XEN_GUEST_HANDLE(xenoprof_counter_t);
diff -r e74246451527 -r f54d38cea8ac xen/include/xen/hypercall.h
--- a/xen/include/xen/hypercall.h Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/xen/hypercall.h Tue May 30 14:30:34 2006 -0500
@@ -80,7 +80,7 @@ do_vcpu_op(
extern long
do_acm_op(
- XEN_GUEST_HANDLE(acm_op_t) u_acm_op);
+ int cmd, XEN_GUEST_HANDLE(void) arg);
extern long
do_nmi_op(
diff -r e74246451527 -r f54d38cea8ac xen/include/xen/sched-if.h
--- a/xen/include/xen/sched-if.h Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/xen/sched-if.h Tue May 30 14:30:34 2006 -0500
@@ -58,6 +58,8 @@ struct scheduler {
char *opt_name; /* option name for this scheduler */
unsigned int sched_id; /* ID for this scheduler */
+ void (*init) (void);
+ void (*tick) (unsigned int cpu);
int (*alloc_task) (struct vcpu *);
void (*add_task) (struct vcpu *);
void (*free_task) (struct domain *);
diff -r e74246451527 -r f54d38cea8ac xen/include/xen/softirq.h
--- a/xen/include/xen/softirq.h Tue May 30 12:52:02 2006 -0500
+++ b/xen/include/xen/softirq.h Tue May 30 14:30:34 2006 -0500
@@ -26,6 +26,19 @@ asmlinkage void do_softirq(void);
asmlinkage void do_softirq(void);
extern void open_softirq(int nr, softirq_handler handler);
+static inline void cpumask_raise_softirq(cpumask_t mask, unsigned int nr)
+{
+ int cpu;
+
+ for_each_cpu_mask(cpu, mask)
+ {
+ if ( test_and_set_bit(nr, &softirq_pending(cpu)) )
+ cpu_clear(cpu, mask);
+ }
+
+ smp_send_event_check_mask(mask);
+}
+
static inline void cpu_raise_softirq(unsigned int cpu, unsigned int nr)
{
if ( !test_and_set_bit(nr, &softirq_pending(cpu)) )
diff -r e74246451527 -r f54d38cea8ac linux-2.6-xen-sparse/arch/ia64/xen/util.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/util.c Tue May 30 14:30:34 2006 -0500
@@ -0,0 +1,115 @@
+/******************************************************************************
+ * arch/ia64/xen/util.c
+ * This file is the ia64 counterpart of drivers/xen/util.c
+ *
+ * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp>
+ * VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <asm/uaccess.h>
+#include <xen/driver_util.h>
+
+struct vm_struct *alloc_vm_area(unsigned long size)
+{
+ int order;
+ unsigned long virt;
+ unsigned long nr_pages;
+ struct vm_struct* area;
+
+ order = get_order(size);
+ virt = __get_free_pages(GFP_KERNEL, order);
+ if (virt == 0) {
+ goto err0;
+ }
+ nr_pages = 1 << order;
+ scrub_pages(virt, nr_pages);
+
+ area = kmalloc(sizeof(*area), GFP_KERNEL);
+ if (area == NULL) {
+ goto err1;
+ }
+
+ area->flags = VM_IOREMAP;//XXX
+ area->addr = (void*)virt;
+ area->size = size;
+ area->pages = NULL; //XXX
+ area->nr_pages = nr_pages;
+ area->phys_addr = __pa(virt);
+
+ return area;
+
+err1:
+ free_pages(virt, order);
+err0:
+ return NULL;
+
+}
+EXPORT_SYMBOL_GPL(alloc_vm_area);
+
+void free_vm_area(struct vm_struct *area)
+{
+ unsigned int order = get_order(area->size);
+ unsigned long i;
+
+ // This area is used for foreign page mappping.
+ // So underlying machine page may not be assigned.
+ for (i = 0; i < (1 << order); i++) {
+ unsigned long ret;
+ unsigned long gpfn = (area->phys_addr >> PAGE_SHIFT) + i;
+ struct xen_memory_reservation reservation = {
+ .nr_extents = 1,
+ .address_bits = 0,
+ .extent_order = 0,
+ .domid = DOMID_SELF
+ };
+ set_xen_guest_handle(reservation.extent_start, &gpfn);
+ ret = HYPERVISOR_memory_op(XENMEM_populate_physmap,
+ &reservation);
+ BUG_ON(ret != 1);
+ }
+ free_pages((unsigned long)area->addr, order);
+ kfree(area);
+}
+EXPORT_SYMBOL_GPL(free_vm_area);
+
+void lock_vm_area(struct vm_struct *area)
+{
+ // nothing
+}
+EXPORT_SYMBOL_GPL(lock_vm_area);
+
+void unlock_vm_area(struct vm_struct *area)
+{
+ // nothing
+}
+EXPORT_SYMBOL_GPL(unlock_vm_area);
+
+/*
+ * Local variables:
+ * c-file-style: "linux"
+ * indent-tabs-mode: t
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * tab-width: 8
+ * End:
+ */
diff -r e74246451527 -r f54d38cea8ac
linux-2.6-xen-sparse/drivers/xen/core/cpu_hotplug.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/linux-2.6-xen-sparse/drivers/xen/core/cpu_hotplug.c Tue May 30
14:30:34 2006 -0500
@@ -0,0 +1,185 @@
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <xen/cpu_hotplug.h>
+#include <xen/xenbus.h>
+
+/*
+ * Set of CPUs that remote admin software will allow us to bring online.
+ * Notified to us via xenbus.
+ */
+static cpumask_t xenbus_allowed_cpumask;
+
+/* Set of CPUs that local admin will allow us to bring online. */
+static cpumask_t local_allowed_cpumask = CPU_MASK_ALL;
+
+static int local_cpu_hotplug_request(void)
+{
+ /*
+ * We assume a CPU hotplug request comes from local admin if it is made
+ * via a userspace process (i.e., one with a real mm_struct).
+ */
+ return (current->mm != NULL);
+}
+
+static void vcpu_hotplug(unsigned int cpu)
+{
+ int err;
+ char dir[32], state[32];
+
+ if ((cpu >= NR_CPUS) || !cpu_possible(cpu))
+ return;
+
+ sprintf(dir, "cpu/%d", cpu);
+ err = xenbus_scanf(XBT_NULL, dir, "availability", "%s", state);
+ if (err != 1) {
+ printk(KERN_ERR "XENBUS: Unable to read cpu state\n");
+ return;
+ }
+
+ if (strcmp(state, "online") == 0) {
+ cpu_set(cpu, xenbus_allowed_cpumask);
+ (void)cpu_up(cpu);
+ } else if (strcmp(state, "offline") == 0) {
+ cpu_clear(cpu, xenbus_allowed_cpumask);
+ (void)cpu_down(cpu);
+ } else {
+ printk(KERN_ERR "XENBUS: unknown state(%s) on CPU%d\n",
+ state, cpu);
+ }
+}
+
+static void handle_vcpu_hotplug_event(
+ struct xenbus_watch *watch, const char **vec, unsigned int len)
+{
+ int cpu;
+ char *cpustr;
+ const char *node = vec[XS_WATCH_PATH];
+
+ if ((cpustr = strstr(node, "cpu/")) != NULL) {
+ sscanf(cpustr, "cpu/%d", &cpu);
+ vcpu_hotplug(cpu);
+ }
+}
+
+static int smpboot_cpu_notify(struct notifier_block *notifier,
+ unsigned long action, void *hcpu)
+{
+ int cpu = (long)hcpu;
+
+ /*
+ * We do this in a callback notifier rather than __cpu_disable()
+ * because local_cpu_hotplug_request() does not work in the latter
+ * as it's always executed from within a stopmachine kthread.
+ */
+ if ((action == CPU_DOWN_PREPARE) && local_cpu_hotplug_request())
+ cpu_clear(cpu, local_allowed_cpumask);
+
+ return NOTIFY_OK;
+}
+
+static int setup_cpu_watcher(struct notifier_block *notifier,
+ unsigned long event, void *data)
+{
+ int i;
+
+ static struct xenbus_watch cpu_watch = {
+ .node = "cpu",
+ .callback = handle_vcpu_hotplug_event,
+ .flags = XBWF_new_thread };
+ (void)register_xenbus_watch(&cpu_watch);
+
+ if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
+ for_each_cpu(i)
+ vcpu_hotplug(i);
+ printk(KERN_INFO "Brought up %ld CPUs\n",
+ (long)num_online_cpus());
+ }
+
+ return NOTIFY_DONE;
+}
+
+static int __init setup_vcpu_hotplug_event(void)
+{
+ static struct notifier_block hotplug_cpu = {
+ .notifier_call = smpboot_cpu_notify };
+ static struct notifier_block xsn_cpu = {
+ .notifier_call = setup_cpu_watcher };
+
+ register_cpu_notifier(&hotplug_cpu);
+ register_xenstore_notifier(&xsn_cpu);
+
+ return 0;
+}
+
+arch_initcall(setup_vcpu_hotplug_event);
+
+int smp_suspend(void)
+{
+ int i, err;
+
+ lock_cpu_hotplug();
+
+ /*
+ * Take all other CPUs offline. We hold the hotplug mutex to
+ * avoid other processes bringing up CPUs under our feet.
+ */
+ while (num_online_cpus() > 1) {
+ unlock_cpu_hotplug();
+ for_each_online_cpu(i) {
+ if (i == 0)
+ continue;
+ err = cpu_down(i);
+ if (err) {
+ printk(KERN_CRIT "Failed to take all CPUs "
+ "down: %d.\n", err);
+ for_each_cpu(i)
+ vcpu_hotplug(i);
+ return err;
+ }
+ }
+ lock_cpu_hotplug();
+ }
+
+ return 0;
+}
+
+void smp_resume(void)
+{
+ int cpu;
+
+ for_each_cpu(cpu)
+ cpu_initialize_context(cpu);
+
+ unlock_cpu_hotplug();
+
+ for_each_cpu(cpu)
+ vcpu_hotplug(cpu);
+}
+
+int cpu_up_check(unsigned int cpu)
+{
+ int rc = 0;
+
+ if (local_cpu_hotplug_request()) {
+ cpu_set(cpu, local_allowed_cpumask);
+ if (!cpu_isset(cpu, xenbus_allowed_cpumask)) {
+ printk("%s: attempt to bring up CPU %u disallowed by "
+ "remote admin.\n", __FUNCTION__, cpu);
+ rc = -EBUSY;
+ }
+ } else if (!cpu_isset(cpu, local_allowed_cpumask) ||
+ !cpu_isset(cpu, xenbus_allowed_cpumask)) {
+ rc = -EBUSY;
+ }
+
+ return rc;
+}
+
+void init_xenbus_allowed_cpumask(void)
+{
+ xenbus_allowed_cpumask = cpu_present_map;
+}
diff -r e74246451527 -r f54d38cea8ac
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/e820.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/e820.h Tue May
30 14:30:34 2006 -0500
@@ -0,0 +1,63 @@
+/*
+ * structures and definitions for the int 15, ax=e820 memory map
+ * scheme.
+ *
+ * In a nutshell, setup.S populates a scratch table in the
+ * empty_zero_block that contains a list of usable address/size
+ * duples. setup.c, this information is transferred into the e820map,
+ * and in init.c/numa.c, that new information is used to mark pages
+ * reserved or not.
+ */
+#ifndef __E820_HEADER
+#define __E820_HEADER
+
+#include <linux/mmzone.h>
+
+#define E820MAP 0x2d0 /* our map */
+#define E820MAX 128 /* number of entries in E820MAP */
+#define E820NR 0x1e8 /* # entries in E820MAP */
+
+#define E820_RAM 1
+#define E820_RESERVED 2
+#define E820_ACPI 3 /* usable as RAM once ACPI tables have been read */
+#define E820_NVS 4
+
+#define HIGH_MEMORY (1024*1024)
+
+#define LOWMEMSIZE() (0x9f000)
+
+#ifndef __ASSEMBLY__
+struct e820entry {
+ u64 addr; /* start of memory segment */
+ u64 size; /* size of memory segment */
+ u32 type; /* type of memory segment */
+} __attribute__((packed));
+
+struct e820map {
+ int nr_map;
+ struct e820entry map[E820MAX];
+};
+
+extern unsigned long find_e820_area(unsigned long start, unsigned long end,
+ unsigned size);
+extern void add_memory_region(unsigned long start, unsigned long size,
+ int type);
+extern void setup_memory_region(void);
+extern void contig_e820_setup(void);
+extern unsigned long e820_end_of_ram(void);
+extern void e820_reserve_resources(struct e820entry *e820, int nr_map);
+extern void e820_print_map(char *who);
+extern int e820_mapped(unsigned long start, unsigned long end, unsigned type);
+
+extern void e820_bootmem_free(pg_data_t *pgdat, unsigned long start,unsigned
long end);
+extern void e820_setup_gap(struct e820entry *e820, int nr_map);
+extern unsigned long e820_hole_size(unsigned long start_pfn,
+ unsigned long end_pfn);
+
+extern void __init parse_memopt(char *p, char **end);
+extern void __init parse_memmapopt(char *p, char **end);
+
+extern struct e820map e820;
+#endif/*!__ASSEMBLY__*/
+
+#endif/*__E820_HEADER*/
diff -r e74246451527 -r f54d38cea8ac
linux-2.6-xen-sparse/include/xen/cpu_hotplug.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/linux-2.6-xen-sparse/include/xen/cpu_hotplug.h Tue May 30 14:30:34
2006 -0500
@@ -0,0 +1,42 @@
+#ifndef __XEN_CPU_HOTPLUG_H__
+#define __XEN_CPU_HOTPLUG_H__
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/cpumask.h>
+
+#if defined(CONFIG_HOTPLUG_CPU)
+
+#if defined(CONFIG_X86)
+void cpu_initialize_context(unsigned int cpu);
+#else
+#define cpu_initialize_context(cpu) ((void)0)
+#endif
+
+int cpu_up_check(unsigned int cpu);
+void init_xenbus_allowed_cpumask(void);
+int smp_suspend(void);
+void smp_resume(void);
+
+#else /* !defined(CONFIG_HOTPLUG_CPU) */
+
+#define cpu_up_check(cpu) (0)
+#define init_xenbus_allowed_cpumask() ((void)0)
+
+static inline int smp_suspend(void)
+{
+ if (num_online_cpus() > 1) {
+ printk(KERN_WARNING "Can't suspend SMP guests "
+ "without CONFIG_HOTPLUG_CPU\n");
+ return -EOPNOTSUPP;
+ }
+ return 0;
+}
+
+static inline void smp_resume(void)
+{
+}
+
+#endif /* !defined(CONFIG_HOTPLUG_CPU) */
+
+#endif /* __XEN_CPU_HOTPLUG_H__ */
diff -r e74246451527 -r f54d38cea8ac
patches/linux-2.6.16.13/fix-ide-cd-pio-mode.patch
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/patches/linux-2.6.16.13/fix-ide-cd-pio-mode.patch Tue May 30 14:30:34
2006 -0500
@@ -0,0 +1,18 @@
+diff -ru ../pristine-linux-2.6.16.13/drivers/ide/ide-lib.c
./drivers/ide/ide-lib.c
+--- ../pristine-linux-2.6.16.13/drivers/ide/ide-lib.c 2006-05-02
22:38:44.000000000 +0100
++++ ./drivers/ide/ide-lib.c 2006-05-24 18:37:05.000000000 +0100
+@@ -410,10 +410,10 @@
+ {
+ u64 addr = BLK_BOUNCE_HIGH; /* dma64_addr_t */
+
+- if (!PCI_DMA_BUS_IS_PHYS) {
+- addr = BLK_BOUNCE_ANY;
+- } else if (on && drive->media == ide_disk) {
+- if (HWIF(drive)->pci_dev)
++ if (on && drive->media == ide_disk) {
++ if (!PCI_DMA_BUS_IS_PHYS)
++ addr = BLK_BOUNCE_ANY;
++ else if (HWIF(drive)->pci_dev)
+ addr = HWIF(drive)->pci_dev->dma_mask;
+ }
+
diff -r e74246451527 -r f54d38cea8ac patches/linux-2.6.16.13/xen-hotplug.patch
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/patches/linux-2.6.16.13/xen-hotplug.patch Tue May 30 14:30:34 2006 -0500
@@ -0,0 +1,11 @@
+--- ../pristine-linux-2.6.16.13/fs/proc/proc_misc.c 2006-05-02
22:38:44.000000000 +0100
++++ ./fs/proc/proc_misc.c 2006-05-22 15:29:34.000000000 +0100
+@@ -433,7 +433,7 @@ static int show_stat(struct seq_file *p,
+ (unsigned long long)cputime64_to_clock_t(irq),
+ (unsigned long long)cputime64_to_clock_t(softirq),
+ (unsigned long long)cputime64_to_clock_t(steal));
+- for_each_online_cpu(i) {
++ for_each_cpu(i) {
+
+ /* Copy values here to work around gcc-2.95.3, gcc-2.96 */
+ user = kstat_cpu(i).cpustat.user;
diff -r e74246451527 -r f54d38cea8ac tools/libxc/xc_csched.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/libxc/xc_csched.c Tue May 30 14:30:34 2006 -0500
@@ -0,0 +1,50 @@
+/****************************************************************************
+ * (C) 2006 - Emmanuel Ackaouy - XenSource Inc.
+ ****************************************************************************
+ *
+ * File: xc_csched.c
+ * Author: Emmanuel Ackaouy
+ *
+ * Description: XC Interface to the credit scheduler
+ *
+ */
+#include "xc_private.h"
+
+
+int
+xc_sched_credit_domain_set(
+ int xc_handle,
+ uint32_t domid,
+ struct sched_credit_adjdom *sdom)
+{
+ DECLARE_DOM0_OP;
+
+ op.cmd = DOM0_ADJUSTDOM;
+ op.u.adjustdom.domain = (domid_t) domid;
+ op.u.adjustdom.sched_id = SCHED_CREDIT;
+ op.u.adjustdom.direction = SCHED_INFO_PUT;
+ op.u.adjustdom.u.credit = *sdom;
+
+ return do_dom0_op(xc_handle, &op);
+}
+
+int
+xc_sched_credit_domain_get(
+ int xc_handle,
+ uint32_t domid,
+ struct sched_credit_adjdom *sdom)
+{
+ DECLARE_DOM0_OP;
+ int err;
+
+ op.cmd = DOM0_ADJUSTDOM;
+ op.u.adjustdom.domain = (domid_t) domid;
+ op.u.adjustdom.sched_id = SCHED_CREDIT;
+ op.u.adjustdom.direction = SCHED_INFO_GET;
+
+ err = do_dom0_op(xc_handle, &op);
+ if ( err == 0 )
+ *sdom = op.u.adjustdom.u.credit;
+
+ return err;
+}
diff -r e74246451527 -r f54d38cea8ac tools/xenstore/xenstored_linux.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/xenstore/xenstored_linux.c Tue May 30 14:30:34 2006 -0500
@@ -0,0 +1,69 @@
+/******************************************************************************
+ *
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (C) 2005 Rusty Russell IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2 of the
+ * License.
+ */
+
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+
+#include "xenstored_core.h"
+
+#define XENSTORED_PROC_KVA "/proc/xen/xsd_kva"
+#define XENSTORED_PROC_PORT "/proc/xen/xsd_port"
+
+evtchn_port_t xenbus_evtchn(void)
+{
+ int fd;
+ int rc;
+ evtchn_port_t port;
+ char str[20];
+
+ fd = open(XENSTORED_PROC_PORT, O_RDONLY);
+ if (fd == -1)
+ return -1;
+
+ rc = read(fd, str, sizeof(str));
+ if (rc == -1)
+ {
+ int err = errno;
+ close(fd);
+ errno = err;
+ return -1;
+ }
+
+ str[rc] = '\0';
+ port = strtoul(str, NULL, 0);
+
+ close(fd);
+ return port;
+}
+
+void *xenbus_map(void)
+{
+ int fd;
+ void *addr;
+
+ fd = open(XENSTORED_PROC_KVA, O_RDWR);
+ if (fd == -1)
+ return NULL;
+
+ addr = mmap(NULL, getpagesize(), PROT_READ|PROT_WRITE,
+ MAP_SHARED, fd, 0);
+
+ if (addr == MAP_FAILED)
+ addr = NULL;
+
+ close(fd);
+
+ return addr;
+}
diff -r e74246451527 -r f54d38cea8ac
tools/xm-test/tests/block-integrity/01_block_device_read_verify.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/xm-test/tests/block-integrity/01_block_device_read_verify.py
Tue May 30 14:30:34 2006 -0500
@@ -0,0 +1,62 @@
+#!/usr/bin/python
+
+# Copyright (C) International Business Machines Corp., 2006
+# Author: Harry Butterworth <butterwo@xxxxxxxxxx>
+
+# This test initialises a ram disk in dom0 with data from /dev/urandom and
+# then imports the ram disk device as a physical device into a domU. The md5
+# checksum of the data in the ramdisk is calculated in dom0 and also
+# calculated by the domU reading the data through the blk frontend and
+# backend drivers. The test succeeds if the checksums match indicating that
+# the domU successfully read all the correct data from the device.
+
+import re
+
+from XmTestLib import *
+from XmTestLib.block_utils import *
+
+if ENABLE_HVM_SUPPORT:
+ SKIP("Block-attach not supported for HVM domains")
+
+domain = XmTestDomain()
+
+try:
+ console = domain.start()
+except DomainError, e:
+ FAIL(str(e))
+
+console.setHistorySaveCmds(value=True)
+
+traceCommand("cat /dev/urandom > /dev/ram1")
+
+s, o = traceCommand("md5sum /dev/ram1")
+
+dom0_md5sum_match = re.search(r"^[\dA-Fa-f]{32}", o)
+
+block_attach(domain, "phy:ram1", "hda1")
+
+try:
+ run = console.runCmd("md5sum /dev/hda1")
+except ConsoleError, e:
+ FAIL(str(e))
+
+domU_md5sum_match = re.search(r"^[\dA-Fa-f]{32}", run["output"])
+
+domain.closeConsole()
+
+domain.stop()
+
+if dom0_md5sum_match == None:
+ FAIL("Failed to get md5sum of test ram disk in dom0.")
+
+if domU_md5sum_match == None:
+ FAIL("Failed to get md5sum of test ram disk in domU.")
+
+if verbose:
+ print "md5sum dom0:"
+ print dom0_md5sum_match.group()
+ print "md5sum domU:"
+ print domU_md5sum_match.group()
+
+if dom0_md5sum_match.group() != domU_md5sum_match.group():
+ FAIL("MISCOMPARE: data read in domU did not match data provided by domO.")
diff -r e74246451527 -r f54d38cea8ac
tools/xm-test/tests/block-integrity/Makefile.am
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/xm-test/tests/block-integrity/Makefile.am Tue May 30 14:30:34
2006 -0500
@@ -0,0 +1,21 @@
+
+SUBDIRS =
+
+TESTS = 01_block_device_read_verify.test
+
+XFAIL_TESTS =
+
+EXTRA_DIST = $(TESTS) $(XFAIL_TESTS)
+
+TESTS_ENVIRONMENT=@TENV@
+
+%.test: %.py
+ cp $< $@
+ chmod +x $@
+
+clean-local: am_config_clean-local
+
+am_config_clean-local:
+ rm -f *test
+ rm -f *log
+ rm -f *~
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/tools/sparse-merge
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/ia64/tools/sparse-merge Tue May 30 14:30:34 2006 -0500
@@ -0,0 +1,144 @@
+#!/bin/bash
+# Generate a patch for each of the ia64 files in the linux-2.6-xen-sparse tree
+
+# Path to mercurial tree of upstream Linux
+# WARNING: This will do an 'hg up -C' on the upstream Linux tree, you
+# will lose data if there's anything there you care about.
+: ${LINUXPATH:=/tmp/linux-2.6}
+# Tag of current base upstream image for Xen files
+: ${OLDTAG:=v$(awk '/^LINUX_VER/{print $NF}' buildconfigs/mk.linux-2.6-xen)}
+# Tag of new upstream base to go to
+: ${NEWTAG:=v$(wget -O- -o/dev/null http://kernel.org/kdist/finger_banner \
+ | awk '/latest stable/{print $NF}')}
+# Restrict merge to specific arch (set to . for all)
+: ${ARCH:=ia64}
+
+SPARSEDIR=linux-2.6-xen-sparse
+WD=$PWD
+
+if [ ! -d $SPARSEDIR ]; then
+ echo "Can't find $SPARSEDIR directory."
+ exit
+fi
+
+# Check for modified files in the sparse tree before starting
+if hg st $SPARSEDIR | head | grep .; then
+ echo
+ echo "$SPARSEDIR contains modifications, please clean it up first"
+ exit
+fi
+
+# We want the linux upstream tree to be at the OLDTAG to get the OLDTAG-Xen
diff.
+# Save current revision to restore when done
+cd $LINUXPATH || exit 1
+OLDCSET=$(hg parents | awk '/^changeset:/{print($2)}' | cut -f 1 -d :)
+for t in $OLDTAG $NEWTAG; do
+ if ! hg tags | cut -f1 -d' ' | grep -Fx $t; then
+ echo "Tag $t not found, ketching up"
+ hg up -C ${t%.*} || exit 1
+ ketchup ${t#v} || exit 1
+ hg addremove
+ hg ci -m $t
+ hg tag -l $t
+ fi
+done
+hg up -C $OLDTAG || exit 1
+
+cd $WD
+for i in $(hg manifest | awk '{print($3)}' | grep $SPARSEDIR | grep "$ARCH");
do
+ cd $WD
+
+ FILENAME=$(basename $i)
+ DIRNAME=$(dirname $i)
+ DIFFPATH=$(echo $i | sed -e "s,^$SPARSEDIR,$LINUXPATH,")
+
+ if [ ! -d $DIRNAME ]; then
+ echo "Hmm, something bad happened parsing directory name: $i"
+ continue
+ fi
+
+ if [ ! -e $DIFFPATH ]; then
+ continue
+ fi
+
+ echo -n "$i ... "
+
+ cd $DIRNAME
+ XENDIR=$(pwd)
+
+ ORIGPATH=$(echo $i | sed -e "s/^$SPARSEDIR/./")
+ APATH=$(echo $i | sed -e "s/^$SPARSEDIR/a/")
+ BPATH=$(echo $i | sed -e "s/^$SPARSEDIR/b/")
+ cd $LINUXPATH
+ hg diff -r $OLDTAG -r $NEWTAG $ORIGPATH | \
+ sed -e "s,^--- $APATH,--- $FILENAME," \
+ -e "s,^+++ $BPATH,+++ $FILENAME," \
+ > $XENDIR/$FILENAME-$OLDTAG-$NEWTAG.diff
+ cd $XENDIR
+
+ # Do we have a diff file? Did anything change?
+ if [ ! -s $FILENAME-$OLDTAG-$NEWTAG.diff ]; then
+ echo "SUCCESS (Upstream unchanged)"
+ continue
+ fi
+
+ if ! patch -f -i $FILENAME-$OLDTAG-$NEWTAG.diff > /dev/null 2>&1; then
+ # It failed, how badly?
+ if [ ! -e ${FILENAME}.rej ]; then
+ echo "ERROR, Hmm, no .rej file, but diff failed, fix
manually"
+ continue
+ fi
+ TONEWREJ=$(wc -l ${FILENAME}.rej | \
+ awk '{print($1)}')
+ hg st $FILENAME | grep -q . && hg revert $FILENAME
+ rm -f ${FILENAME}.rej ${FILENAME}.orig
+ diff -uN $DIFFPATH $FILENAME | \
+ sed -e "s,^--- $DIFFPATH,--- $FILENAME," \
+ > $FILENAME-$OLDTAG-Xen.diff
+
+ if [ ! -e $FILENAME-$OLDTAG-Xen.diff ]; then
+ echo "ERROR, failed to create patch file"
+ continue
+ fi
+
+ if ! patch -R -i $FILENAME-$OLDTAG-Xen.diff > /dev/null 2>&1;
then
+ echo "ERROR, reverting Xen changes failed"
+ hg revert $FILENAME
+ continue
+ fi
+
+ if ! patch -f -i $FILENAME-$OLDTAG-$NEWTAG.diff > /dev/null
2>&1; then
+ echo "ERROR, new upstream patch failed on reverted file"
+ hg revert $FILENAME
+ continue
+ fi
+
+ if ! patch -f -i $FILENAME-$OLDTAG-Xen.diff > /dev/null 2>&1;
then
+ if [ ! -e ${FILENAME}.rej ]; then
+ echo "ERROR, Hmm, no .rej file, but diff
failed, fix manually"
+ continue
+ fi
+ TOXENREJ=$(wc -l ${FILENAME}.rej | \
+ awk '{print($1)}')
+
+ if [ $TOXENREJ -gt $TONEWREJ ]; then
+ hg revert $FILENAME
+ rm -f ${FILENAME}.rej ${FILENAME}.orig
+ patch -f -i $FILENAME-$OLDTAG-$NEWTAG.diff >
/dev/null 2>&1
+ echo "MANUAL MERGE REQUIRED (Upstream reject)"
+ else
+ echo "MANUAL MERGE REQUIRED (Xen reject)"
+ fi
+
+ else
+ rm -f ${FILENAME}.rej ${FILENAME}.orig
+ echo "SUCCESS (Re-applied Xen patch)"
+ fi
+ else
+ rm -f ${FILENAME}.rej ${FILENAME}.orig
+ echo "SUCCESS (Upstream applied)"
+ fi
+done
+find $SPARSEDIR -name \*.diff -empty | xargs -r rm -f
+cd $LINUXPATH
+hg up -C $OLDCSET
diff -r e74246451527 -r f54d38cea8ac xen/arch/ia64/xen/efi_emul.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/ia64/xen/efi_emul.c Tue May 30 14:30:34 2006 -0500
@@ -0,0 +1,180 @@
+/*
+ * efi_emul.c:
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#include <xen/config.h>
+#include <xen/compile.h>
+#include <asm/pgalloc.h>
+#include <asm/vcpu.h>
+#include <asm/dom_fw.h>
+#include <public/sched.h>
+
+extern unsigned long translate_domain_mpaddr(unsigned long);
+extern unsigned long domain_mpa_to_imva(struct domain *,unsigned long mpaddr);
+
+// given a current domain (virtual or metaphysical) address, return the
virtual address
+static unsigned long
+efi_translate_domain_addr(unsigned long domain_addr, IA64FAULT *fault)
+{
+ struct vcpu *v = current;
+ unsigned long mpaddr = domain_addr;
+ *fault = IA64_NO_FAULT;
+
+ if (v->domain->arch.efi_virt_mode) {
+ *fault = vcpu_tpa(v, domain_addr, &mpaddr);
+ if (*fault != IA64_NO_FAULT) return 0;
+ }
+
+ return ((unsigned long) __va(translate_domain_mpaddr(mpaddr)));
+}
+
+static efi_status_t
+efi_emulate_get_time(
+ unsigned long tv_addr, unsigned long tc_addr,
+ IA64FAULT *fault)
+{
+ unsigned long tv = 0, tc = 0;
+ efi_status_t status;
+
+ //printf("efi_get_time(%016lx,%016lx) called\n", tv_addr, tc_addr);
+ tv = efi_translate_domain_addr(tv_addr, fault);
+ if (*fault != IA64_NO_FAULT) return 0;
+ if (tc_addr) {
+ tc = efi_translate_domain_addr(tc_addr, fault);
+ if (*fault != IA64_NO_FAULT) return 0;
+ }
+ //printf("efi_get_time(%016lx,%016lx) translated to xen virtual
address\n", tv, tc);
+ status = (*efi.get_time)((efi_time_t *) tv, (efi_time_cap_t *) tc);
+ //printf("efi_get_time returns %lx\n", status);
+ return status;
+}
+
+static efi_status_t
+efi_emulate_set_virtual_address_map(
+ unsigned long memory_map_size, unsigned long descriptor_size,
+ u32 descriptor_version, efi_memory_desc_t *virtual_map)
+{
+ void *efi_map_start, *efi_map_end, *p;
+ efi_memory_desc_t entry, *md = &entry;
+ u64 efi_desc_size;
+
+ unsigned long *vfn;
+ struct domain *d = current->domain;
+ efi_runtime_services_t *efi_runtime = d->arch.efi_runtime;
+
+ if (descriptor_version != EFI_MEMDESC_VERSION) {
+ printf ("efi_emulate_set_virtual_address_map: memory descriptor
version unmatched\n");
+ return EFI_INVALID_PARAMETER;
+ }
+
+ if (descriptor_size != sizeof(efi_memory_desc_t)) {
+ printf ("efi_emulate_set_virtual_address_map: memory descriptor
size unmatched\n");
+ return EFI_INVALID_PARAMETER;
+ }
+
+ if (d->arch.efi_virt_mode) return EFI_UNSUPPORTED;
+
+ efi_map_start = virtual_map;
+ efi_map_end = efi_map_start + memory_map_size;
+ efi_desc_size = sizeof(efi_memory_desc_t);
+
+ for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
+ if (copy_from_user(&entry, p, sizeof(efi_memory_desc_t))) {
+ printf ("efi_emulate_set_virtual_address_map:
copy_from_user() fault. addr=0x%p\n", p);
+ return EFI_UNSUPPORTED;
+ }
+
+ /* skip over non-PAL_CODE memory descriptors; EFI_RUNTIME is
included in PAL_CODE. */
+ if (md->type != EFI_PAL_CODE)
+ continue;
+
+#define EFI_HYPERCALL_PATCH_TO_VIRT(tgt,call) \
+ do { \
+ vfn = (unsigned long *) domain_mpa_to_imva(d, tgt); \
+ *vfn++ = FW_HYPERCALL_##call##_INDEX * 16UL + md->virt_addr; \
+ *vfn++ = 0; \
+ } while (0)
+
+ EFI_HYPERCALL_PATCH_TO_VIRT(efi_runtime->get_time,EFI_GET_TIME);
+ EFI_HYPERCALL_PATCH_TO_VIRT(efi_runtime->set_time,EFI_SET_TIME);
+
EFI_HYPERCALL_PATCH_TO_VIRT(efi_runtime->get_wakeup_time,EFI_GET_WAKEUP_TIME);
+
EFI_HYPERCALL_PATCH_TO_VIRT(efi_runtime->set_wakeup_time,EFI_SET_WAKEUP_TIME);
+
EFI_HYPERCALL_PATCH_TO_VIRT(efi_runtime->set_virtual_address_map,EFI_SET_VIRTUAL_ADDRESS_MAP);
+
EFI_HYPERCALL_PATCH_TO_VIRT(efi_runtime->get_variable,EFI_GET_VARIABLE);
+
EFI_HYPERCALL_PATCH_TO_VIRT(efi_runtime->get_next_variable,EFI_GET_NEXT_VARIABLE);
+
EFI_HYPERCALL_PATCH_TO_VIRT(efi_runtime->set_variable,EFI_SET_VARIABLE);
+
EFI_HYPERCALL_PATCH_TO_VIRT(efi_runtime->get_next_high_mono_count,EFI_GET_NEXT_HIGH_MONO_COUNT);
+
EFI_HYPERCALL_PATCH_TO_VIRT(efi_runtime->reset_system,EFI_RESET_SYSTEM);
+ }
+
+ /* The virtual address map has been applied. */
+ d->arch.efi_virt_mode = 1;
+
+ return EFI_SUCCESS;
+}
+
+efi_status_t
+efi_emulator (struct pt_regs *regs, IA64FAULT *fault)
+{
+ struct vcpu *v = current;
+ efi_status_t status;
+
+ *fault = IA64_NO_FAULT;
+
+ switch (regs->r2) {
+ case FW_HYPERCALL_EFI_RESET_SYSTEM:
+ printf("efi.reset_system called ");
+ if (current->domain == dom0) {
+ printf("(by dom0)\n ");
+ (*efi.reset_system)(EFI_RESET_WARM,0,0,NULL);
+ }
+ else
+ domain_shutdown (current->domain, SHUTDOWN_reboot);
+ status = EFI_UNSUPPORTED;
+ break;
+ case FW_HYPERCALL_EFI_GET_TIME:
+ status = efi_emulate_get_time (
+ vcpu_get_gr(v,32),
+ vcpu_get_gr(v,33),
+ fault);
+ break;
+ case FW_HYPERCALL_EFI_SET_VIRTUAL_ADDRESS_MAP:
+ status = efi_emulate_set_virtual_address_map (
+ vcpu_get_gr(v,32),
+ vcpu_get_gr(v,33),
+ (u32) vcpu_get_gr(v,34),
+ (efi_memory_desc_t *) vcpu_get_gr(v,35));
+ break;
+ case FW_HYPERCALL_EFI_SET_TIME:
+ case FW_HYPERCALL_EFI_GET_WAKEUP_TIME:
+ case FW_HYPERCALL_EFI_SET_WAKEUP_TIME:
+ // FIXME: need fixes in efi.h from 2.6.9
+ case FW_HYPERCALL_EFI_GET_VARIABLE:
+ // FIXME: need fixes in efi.h from 2.6.9
+ case FW_HYPERCALL_EFI_GET_NEXT_VARIABLE:
+ case FW_HYPERCALL_EFI_SET_VARIABLE:
+ case FW_HYPERCALL_EFI_GET_NEXT_HIGH_MONO_COUNT:
+ // FIXME: need fixes in efi.h from 2.6.9
+ status = EFI_UNSUPPORTED;
+ break;
+ default:
+ printf("unknown ia64 fw hypercall %lx\n", regs->r2);
+ status = EFI_UNSUPPORTED;
+ }
+
+ return status;
+}
diff -r e74246451527 -r f54d38cea8ac xen/common/sched_credit.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/common/sched_credit.c Tue May 30 14:30:34 2006 -0500
@@ -0,0 +1,1233 @@
+/****************************************************************************
+ * (C) 2005-2006 - Emmanuel Ackaouy - XenSource Inc.
+ ****************************************************************************
+ *
+ * File: common/csched_credit.c
+ * Author: Emmanuel Ackaouy
+ *
+ * Description: Credit-based SMP CPU scheduler
+ */
+
+#include <xen/config.h>
+#include <xen/init.h>
+#include <xen/lib.h>
+#include <xen/sched.h>
+#include <xen/domain.h>
+#include <xen/delay.h>
+#include <xen/event.h>
+#include <xen/time.h>
+#include <xen/perfc.h>
+#include <xen/sched-if.h>
+#include <xen/softirq.h>
+#include <asm/atomic.h>
+
+
+/*
+ * CSCHED_STATS
+ *
+ * Manage very basic counters and stats.
+ *
+ * Useful for debugging live systems. The stats are displayed
+ * with runq dumps ('r' on the Xen console).
+ */
+#define CSCHED_STATS
+
+
+/*
+ * Basic constants
+ */
+#define CSCHED_TICK 10 /* milliseconds */
+#define CSCHED_TSLICE 30 /* milliseconds */
+#define CSCHED_ACCT_NTICKS 3
+#define CSCHED_ACCT_PERIOD (CSCHED_ACCT_NTICKS * CSCHED_TICK)
+#define CSCHED_DEFAULT_WEIGHT 256
+
+
+/*
+ * Priorities
+ */
+#define CSCHED_PRI_TS_UNDER -1 /* time-share w/ credits */
+#define CSCHED_PRI_TS_OVER -2 /* time-share w/o credits */
+#define CSCHED_PRI_IDLE -64 /* idle */
+#define CSCHED_PRI_TS_PARKED -65 /* time-share w/ capped credits */
+
+
+/*
+ * Useful macros
+ */
+#define CSCHED_PCPU(_c) ((struct csched_pcpu
*)schedule_data[_c].sched_priv)
+#define CSCHED_VCPU(_vcpu) ((struct csched_vcpu *) (_vcpu)->sched_priv)
+#define CSCHED_DOM(_dom) ((struct csched_dom *) (_dom)->sched_priv)
+#define RUNQ(_cpu) (&(CSCHED_PCPU(_cpu)->runq))
+
+
+/*
+ * Stats
+ */
+#ifdef CSCHED_STATS
+
+#define CSCHED_STAT(_X) (csched_priv.stats._X)
+#define CSCHED_STAT_DEFINE(_X) uint32_t _X;
+#define CSCHED_STAT_PRINTK(_X) \
+ do \
+ { \
+ printk("\t%-30s = %u\n", #_X, CSCHED_STAT(_X)); \
+ } while ( 0 );
+
+#define CSCHED_STATS_EXPAND_SCHED(_MACRO) \
+ _MACRO(vcpu_alloc) \
+ _MACRO(vcpu_add) \
+ _MACRO(vcpu_sleep) \
+ _MACRO(vcpu_wake_running) \
+ _MACRO(vcpu_wake_onrunq) \
+ _MACRO(vcpu_wake_runnable) \
+ _MACRO(vcpu_wake_not_runnable) \
+ _MACRO(dom_free) \
+ _MACRO(schedule) \
+ _MACRO(tickle_local_idler) \
+ _MACRO(tickle_local_over) \
+ _MACRO(tickle_local_under) \
+ _MACRO(tickle_local_other) \
+ _MACRO(acct_run) \
+ _MACRO(acct_no_work) \
+ _MACRO(acct_balance) \
+ _MACRO(acct_reorder) \
+ _MACRO(acct_min_credit) \
+ _MACRO(acct_vcpu_active) \
+ _MACRO(acct_vcpu_idle) \
+ _MACRO(acct_vcpu_credit_min)
+
+#define CSCHED_STATS_EXPAND_SMP_LOAD_BALANCE(_MACRO) \
+ _MACRO(vcpu_migrate) \
+ _MACRO(load_balance_idle) \
+ _MACRO(load_balance_over) \
+ _MACRO(load_balance_other) \
+ _MACRO(steal_trylock_failed) \
+ _MACRO(steal_peer_down) \
+ _MACRO(steal_peer_idle) \
+ _MACRO(steal_peer_running) \
+ _MACRO(steal_peer_pinned) \
+ _MACRO(tickle_idlers_none) \
+ _MACRO(tickle_idlers_some)
+
+#ifndef NDEBUG
+#define CSCHED_STATS_EXPAND_CHECKS(_MACRO) \
+ _MACRO(vcpu_check)
+#else
+#define CSCHED_STATS_EXPAND_CHECKS(_MACRO)
+#endif
+
+#define CSCHED_STATS_EXPAND(_MACRO) \
+ CSCHED_STATS_EXPAND_SCHED(_MACRO) \
+ CSCHED_STATS_EXPAND_SMP_LOAD_BALANCE(_MACRO) \
+ CSCHED_STATS_EXPAND_CHECKS(_MACRO)
+
+#define CSCHED_STATS_RESET() \
+ do \
+ { \
+ memset(&csched_priv.stats, 0, sizeof(csched_priv.stats)); \
+ } while ( 0 )
+
+#define CSCHED_STATS_DEFINE() \
+ struct \
+ { \
+ CSCHED_STATS_EXPAND(CSCHED_STAT_DEFINE) \
+ } stats
+
+#define CSCHED_STATS_PRINTK() \
+ do \
+ { \
+ printk("stats:\n"); \
+ CSCHED_STATS_EXPAND(CSCHED_STAT_PRINTK) \
+ } while ( 0 )
+
+#define CSCHED_STAT_CRANK(_X) (CSCHED_STAT(_X)++)
+
+#else /* CSCHED_STATS */
+
+#define CSCHED_STATS_RESET() do {} while ( 0 )
+#define CSCHED_STATS_DEFINE() do {} while ( 0 )
+#define CSCHED_STATS_PRINTK() do {} while ( 0 )
+#define CSCHED_STAT_CRANK(_X) do {} while ( 0 )
+
+#endif /* CSCHED_STATS */
+
+
+/*
+ * Physical CPU
+ */
+struct csched_pcpu {
+ struct list_head runq;
+ uint32_t runq_sort_last;
+};
+
+/*
+ * Virtual CPU
+ */
+struct csched_vcpu {
+ struct list_head runq_elem;
+ struct list_head active_vcpu_elem;
+ struct csched_dom *sdom;
+ struct vcpu *vcpu;
+ atomic_t credit;
+ int credit_last;
+ uint32_t credit_incr;
+ uint32_t state_active;
+ uint32_t state_idle;
+ int16_t pri;
+};
+
+/*
+ * Domain
+ */
+struct csched_dom {
+ struct list_head active_vcpu;
+ struct list_head active_sdom_elem;
+ struct domain *dom;
+ uint16_t active_vcpu_count;
+ uint16_t weight;
+ uint16_t cap;
+};
+
+/*
+ * System-wide private data
+ */
+struct csched_private {
+ spinlock_t lock;
+ struct list_head active_sdom;
+ uint32_t ncpus;
+ unsigned int master;
+ cpumask_t idlers;
+ uint32_t weight;
+ uint32_t credit;
+ int credit_balance;
+ uint32_t runq_sort;
+ CSCHED_STATS_DEFINE();
+};
+
+
+/*
+ * Global variables
+ */
+static struct csched_private csched_priv;
+
+
+
+static inline int
+__vcpu_on_runq(struct csched_vcpu *svc)
+{
+ return !list_empty(&svc->runq_elem);
+}
+
+static inline struct csched_vcpu *
+__runq_elem(struct list_head *elem)
+{
+ return list_entry(elem, struct csched_vcpu, runq_elem);
+}
+
+static inline void
+__runq_insert(unsigned int cpu, struct csched_vcpu *svc)
+{
+ const struct list_head * const runq = RUNQ(cpu);
+ struct list_head *iter;
+
+ BUG_ON( __vcpu_on_runq(svc) );
+ BUG_ON( cpu != svc->vcpu->processor );
+
+ list_for_each( iter, runq )
+ {
+ const struct csched_vcpu * const iter_svc = __runq_elem(iter);
+ if ( svc->pri > iter_svc->pri )
+ break;
+ }
+
+ list_add_tail(&svc->runq_elem, iter);
+}
+
+static inline void
+__runq_remove(struct csched_vcpu *svc)
+{
+ BUG_ON( !__vcpu_on_runq(svc) );
+ list_del_init(&svc->runq_elem);
+}
+
+static inline void
+__runq_tickle(unsigned int cpu, struct csched_vcpu *new)
+{
+ struct csched_vcpu * const cur = CSCHED_VCPU(schedule_data[cpu].curr);
+ cpumask_t mask;
+
+ ASSERT(cur);
+ cpus_clear(mask);
+
+ /* If strictly higher priority than current VCPU, signal the CPU */
+ if ( new->pri > cur->pri )
+ {
+ if ( cur->pri == CSCHED_PRI_IDLE )
+ CSCHED_STAT_CRANK(tickle_local_idler);
+ else if ( cur->pri == CSCHED_PRI_TS_OVER )
+ CSCHED_STAT_CRANK(tickle_local_over);
+ else if ( cur->pri == CSCHED_PRI_TS_UNDER )
+ CSCHED_STAT_CRANK(tickle_local_under);
+ else
+ CSCHED_STAT_CRANK(tickle_local_other);
+
+ cpu_set(cpu, mask);
+ }
+
+ /*
+ * If this CPU has at least two runnable VCPUs, we tickle any idlers to
+ * let them know there is runnable work in the system...
+ */
+ if ( cur->pri > CSCHED_PRI_IDLE )
+ {
+ if ( cpus_empty(csched_priv.idlers) )
+ {
+ CSCHED_STAT_CRANK(tickle_idlers_none);
+ }
+ else
+ {
+ CSCHED_STAT_CRANK(tickle_idlers_some);
+ cpus_or(mask, mask, csched_priv.idlers);
+ }
+ }
+
+ /* Send scheduler interrupts to designated CPUs */
+ if ( !cpus_empty(mask) )
+ cpumask_raise_softirq(mask, SCHEDULE_SOFTIRQ);
+}
+
+static void
+csched_pcpu_init(int cpu)
+{
+ struct csched_pcpu *spc;
+ unsigned long flags;
+
+ spin_lock_irqsave(&csched_priv.lock, flags);
+
+ /* Initialize/update system-wide config */
+ csched_priv.credit += CSCHED_ACCT_PERIOD;
+ if ( csched_priv.ncpus <= cpu )
+ csched_priv.ncpus = cpu + 1;
+ if ( csched_priv.master >= csched_priv.ncpus )
+ csched_priv.master = cpu;
+
+ /* Allocate per-PCPU info */
+ spc = xmalloc(struct csched_pcpu);
+ BUG_ON( spc == NULL );
+ INIT_LIST_HEAD(&spc->runq);
+ spc->runq_sort_last = csched_priv.runq_sort;
+ schedule_data[cpu].sched_priv = spc;
+
+ /* Start off idling... */
+ BUG_ON( !is_idle_vcpu(schedule_data[cpu].curr) );
+ cpu_set(cpu, csched_priv.idlers);
+
+ spin_unlock_irqrestore(&csched_priv.lock, flags);
+}
+
+#ifndef NDEBUG
+static inline void
+__csched_vcpu_check(struct vcpu *vc)
+{
+ struct csched_vcpu * const svc = CSCHED_VCPU(vc);
+ struct csched_dom * const sdom = svc->sdom;
+
+ BUG_ON( svc->vcpu != vc );
+ BUG_ON( sdom != CSCHED_DOM(vc->domain) );
+ if ( sdom )
+ {
+ BUG_ON( is_idle_vcpu(vc) );
+ BUG_ON( sdom->dom != vc->domain );
+ }
+ else
+ {
+ BUG_ON( !is_idle_vcpu(vc) );
+ }
+
+ CSCHED_STAT_CRANK(vcpu_check);
+}
+#define CSCHED_VCPU_CHECK(_vc) (__csched_vcpu_check(_vc))
+#else
+#define CSCHED_VCPU_CHECK(_vc)
+#endif
+
+static inline int
+__csched_vcpu_is_stealable(int local_cpu, struct vcpu *vc)
+{
+ /*
+ * Don't pick up work that's in the peer's scheduling tail. Also only pick
+ * up work that's allowed to run on our CPU.
+ */
+ if ( unlikely(test_bit(_VCPUF_running, &vc->vcpu_flags)) )
+ {
+ CSCHED_STAT_CRANK(steal_peer_running);
+ return 0;
+ }
+
+ if ( unlikely(!cpu_isset(local_cpu, vc->cpu_affinity)) )
+ {
+ CSCHED_STAT_CRANK(steal_peer_pinned);
+ return 0;
+ }
+
+ return 1;
+}
+
+static void
+csched_vcpu_acct(struct csched_vcpu *svc, int credit_dec)
+{
+ struct csched_dom * const sdom = svc->sdom;
+ unsigned long flags;
+
+ /* Update credits */
+ atomic_sub(credit_dec, &svc->credit);
+
+ /* Put this VCPU and domain back on the active list if it was idling */
+ if ( list_empty(&svc->active_vcpu_elem) )
+ {
+ spin_lock_irqsave(&csched_priv.lock, flags);
+
+ if ( list_empty(&svc->active_vcpu_elem) )
+ {
+ CSCHED_STAT_CRANK(acct_vcpu_active);
+ svc->state_active++;
+
+ sdom->active_vcpu_count++;
+ list_add(&svc->active_vcpu_elem, &sdom->active_vcpu);
+ if ( list_empty(&sdom->active_sdom_elem) )
+ {
+ list_add(&sdom->active_sdom_elem, &csched_priv.active_sdom);
+ csched_priv.weight += sdom->weight;
+ }
+ }
+
+ spin_unlock_irqrestore(&csched_priv.lock, flags);
+ }
+}
+
+static inline void
+__csched_vcpu_acct_idle_locked(struct csched_vcpu *svc)
+{
+ struct csched_dom * const sdom = svc->sdom;
+
+ BUG_ON( list_empty(&svc->active_vcpu_elem) );
+
+ CSCHED_STAT_CRANK(acct_vcpu_idle);
+ svc->state_idle++;
+
+ sdom->active_vcpu_count--;
+ list_del_init(&svc->active_vcpu_elem);
+ if ( list_empty(&sdom->active_vcpu) )
+ {
+ BUG_ON( csched_priv.weight < sdom->weight );
+ list_del_init(&sdom->active_sdom_elem);
+ csched_priv.weight -= sdom->weight;
+ }
+
+ atomic_set(&svc->credit, 0);
+}
+
+static int
+csched_vcpu_alloc(struct vcpu *vc)
+{
+ struct domain * const dom = vc->domain;
+ struct csched_dom *sdom;
+ struct csched_vcpu *svc;
+ int16_t pri;
+
+ CSCHED_STAT_CRANK(vcpu_alloc);
+
+ /* Allocate, if appropriate, per-domain info */
+ if ( is_idle_vcpu(vc) )
+ {
+ sdom = NULL;
+ pri = CSCHED_PRI_IDLE;
+ }
+ else if ( CSCHED_DOM(dom) )
+ {
+ sdom = CSCHED_DOM(dom);
+ pri = CSCHED_PRI_TS_UNDER;
+ }
+ else
+ {
+ sdom = xmalloc(struct csched_dom);
+ if ( !sdom )
+ return -1;
+
+ /* Initialize credit and weight */
+ INIT_LIST_HEAD(&sdom->active_vcpu);
+ sdom->active_vcpu_count = 0;
+ INIT_LIST_HEAD(&sdom->active_sdom_elem);
+ sdom->dom = dom;
+ sdom->weight = CSCHED_DEFAULT_WEIGHT;
+ sdom->cap = 0U;
+ dom->sched_priv = sdom;
+ pri = CSCHED_PRI_TS_UNDER;
+ }
+
+ /* Allocate per-VCPU info */
+ svc = xmalloc(struct csched_vcpu);
+ if ( !svc )
+ return -1;
+
+ INIT_LIST_HEAD(&svc->runq_elem);
+ INIT_LIST_HEAD(&svc->active_vcpu_elem);
+ svc->sdom = sdom;
+ svc->vcpu = vc;
+ atomic_set(&svc->credit, 0);
+ svc->credit_last = 0;
+ svc->credit_incr = 0U;
+ svc->state_active = 0U;
+ svc->state_idle = 0U;
+ svc->pri = pri;
+ vc->sched_priv = svc;
+
+ CSCHED_VCPU_CHECK(vc);
+
+ /* Attach fair-share VCPUs to the accounting list */
+ if ( likely(sdom != NULL) )
+ csched_vcpu_acct(svc, 0);
+
+ return 0;
+}
+
+static void
+csched_vcpu_add(struct vcpu *vc)
+{
+ CSCHED_STAT_CRANK(vcpu_add);
+
+ /* Allocate per-PCPU info */
+ if ( unlikely(!CSCHED_PCPU(vc->processor)) )
+ csched_pcpu_init(vc->processor);
+
+ CSCHED_VCPU_CHECK(vc);
+}
+
+static void
+csched_vcpu_free(struct vcpu *vc)
+{
+ struct csched_vcpu * const svc = CSCHED_VCPU(vc);
+ struct csched_dom * const sdom = svc->sdom;
+ unsigned long flags;
+
+ BUG_ON( sdom == NULL );
+ BUG_ON( !list_empty(&svc->runq_elem) );
+
+ spin_lock_irqsave(&csched_priv.lock, flags);
+
+ if ( !list_empty(&svc->active_vcpu_elem) )
+ __csched_vcpu_acct_idle_locked(svc);
+
+ spin_unlock_irqrestore(&csched_priv.lock, flags);
+
+ xfree(svc);
+}
+
+static void
+csched_vcpu_sleep(struct vcpu *vc)
+{
+ struct csched_vcpu * const svc = CSCHED_VCPU(vc);
+
+ CSCHED_STAT_CRANK(vcpu_sleep);
+
+ BUG_ON( is_idle_vcpu(vc) );
+
+ if ( schedule_data[vc->processor].curr == vc )
+ cpu_raise_softirq(vc->processor, SCHEDULE_SOFTIRQ);
+ else if ( __vcpu_on_runq(svc) )
+ __runq_remove(svc);
+}
+
+static void
+csched_vcpu_wake(struct vcpu *vc)
+{
+ struct csched_vcpu * const svc = CSCHED_VCPU(vc);
+ const unsigned int cpu = vc->processor;
+
+ BUG_ON( is_idle_vcpu(vc) );
+
+ if ( unlikely(schedule_data[cpu].curr == vc) )
+ {
+ CSCHED_STAT_CRANK(vcpu_wake_running);
+ return;
+ }
+ if ( unlikely(__vcpu_on_runq(svc)) )
+ {
+ CSCHED_STAT_CRANK(vcpu_wake_onrunq);
+ return;
+ }
+
+ if ( likely(vcpu_runnable(vc)) )
+ CSCHED_STAT_CRANK(vcpu_wake_runnable);
+ else
+ CSCHED_STAT_CRANK(vcpu_wake_not_runnable);
+
+ /* Put the VCPU on the runq and tickle CPUs */
+ __runq_insert(cpu, svc);
+ __runq_tickle(cpu, svc);
+}
+
+static int
+csched_vcpu_set_affinity(struct vcpu *vc, cpumask_t *affinity)
+{
+ unsigned long flags;
+ int lcpu;
+
+ if ( vc == current )
+ {
+ /* No locking needed but also can't move on the spot... */
+ if ( !cpu_isset(vc->processor, *affinity) )
+ return -EBUSY;
+
+ vc->cpu_affinity = *affinity;
+ }
+ else
+ {
+ /* Pause, modify, and unpause. */
+ vcpu_pause(vc);
+
+ vc->cpu_affinity = *affinity;
+ if ( !cpu_isset(vc->processor, vc->cpu_affinity) )
+ {
+ /*
+ * We must grab the scheduler lock for the CPU currently owning
+ * this VCPU before changing its ownership.
+ */
+ vcpu_schedule_lock_irqsave(vc, flags);
+ lcpu = vc->processor;
+
+ vc->processor = first_cpu(vc->cpu_affinity);
+
+ spin_unlock_irqrestore(&schedule_data[lcpu].schedule_lock, flags);
+ }
+
+ vcpu_unpause(vc);
+ }
+
+ return 0;
+}
+
+static int
+csched_dom_cntl(
+ struct domain *d,
+ struct sched_adjdom_cmd *cmd)
+{
+ struct csched_dom * const sdom = CSCHED_DOM(d);
+ unsigned long flags;
+
+ if ( cmd->direction == SCHED_INFO_GET )
+ {
+ cmd->u.credit.weight = sdom->weight;
+ cmd->u.credit.cap = sdom->cap;
+ }
+ else
+ {
+ ASSERT( cmd->direction == SCHED_INFO_PUT );
+
+ spin_lock_irqsave(&csched_priv.lock, flags);
+
+ if ( cmd->u.credit.weight != 0 )
+ {
+ csched_priv.weight -= sdom->weight;
+ sdom->weight = cmd->u.credit.weight;
+ csched_priv.weight += sdom->weight;
+ }
+
+ if ( cmd->u.credit.cap != (uint16_t)~0U )
+ sdom->cap = cmd->u.credit.cap;
+
+ spin_unlock_irqrestore(&csched_priv.lock, flags);
+ }
+
+ return 0;
+}
+
+static void
+csched_dom_free(struct domain *dom)
+{
+ struct csched_dom * const sdom = CSCHED_DOM(dom);
+ int i;
+
+ CSCHED_STAT_CRANK(dom_free);
+
+ for ( i = 0; i < MAX_VIRT_CPUS; i++ )
+ {
+ if ( dom->vcpu[i] )
+ csched_vcpu_free(dom->vcpu[i]);
+ }
+
+ xfree(sdom);
+}
+
+/*
+ * This is a O(n) optimized sort of the runq.
+ *
+ * Time-share VCPUs can only be one of two priorities, UNDER or OVER. We walk
+ * through the runq and move up any UNDERs that are preceded by OVERS. We
+ * remember the last UNDER to make the move up operation O(1).
+ */
+static void
+csched_runq_sort(unsigned int cpu)
+{
+ struct csched_pcpu * const spc = CSCHED_PCPU(cpu);
+ struct list_head *runq, *elem, *next, *last_under;
+ struct csched_vcpu *svc_elem;
+ unsigned long flags;
+ int sort_epoch;
+
+ sort_epoch = csched_priv.runq_sort;
+ if ( sort_epoch == spc->runq_sort_last )
+ return;
+
+ spc->runq_sort_last = sort_epoch;
+
+ spin_lock_irqsave(&schedule_data[cpu].schedule_lock, flags);
+
+ runq = &spc->runq;
+ elem = runq->next;
+ last_under = runq;
+
+ while ( elem != runq )
+ {
+ next = elem->next;
+ svc_elem = __runq_elem(elem);
+
+ if ( svc_elem->pri == CSCHED_PRI_TS_UNDER )
+ {
+ /* does elem need to move up the runq? */
+ if ( elem->prev != last_under )
+ {
+ list_del(elem);
+ list_add(elem, last_under);
+ }
+ last_under = elem;
+ }
+
+ elem = next;
+ }
+
+ spin_unlock_irqrestore(&schedule_data[cpu].schedule_lock, flags);
+}
+
+static void
+csched_acct(void)
+{
+ unsigned long flags;
+ struct list_head *iter_vcpu, *next_vcpu;
+ struct list_head *iter_sdom, *next_sdom;
+ struct csched_vcpu *svc;
+ struct csched_dom *sdom;
+ uint32_t credit_total;
+ uint32_t weight_total;
+ uint32_t weight_left;
+ uint32_t credit_fair;
+ uint32_t credit_peak;
+ int credit_balance;
+ int credit_xtra;
+ int credit;
+
+
+ spin_lock_irqsave(&csched_priv.lock, flags);
+
+ weight_total = csched_priv.weight;
+ credit_total = csched_priv.credit;
+
+ /* Converge balance towards 0 when it drops negative */
+ if ( csched_priv.credit_balance < 0 )
+ {
+ credit_total -= csched_priv.credit_balance;
+ CSCHED_STAT_CRANK(acct_balance);
+ }
+
+ if ( unlikely(weight_total == 0) )
+ {
+ csched_priv.credit_balance = 0;
+ spin_unlock_irqrestore(&csched_priv.lock, flags);
+ CSCHED_STAT_CRANK(acct_no_work);
+ return;
+ }
+
+ CSCHED_STAT_CRANK(acct_run);
+
+ weight_left = weight_total;
+ credit_balance = 0;
+ credit_xtra = 0;
+
+ list_for_each_safe( iter_sdom, next_sdom, &csched_priv.active_sdom )
+ {
+ sdom = list_entry(iter_sdom, struct csched_dom, active_sdom_elem);
+
+ BUG_ON( is_idle_domain(sdom->dom) );
+ BUG_ON( sdom->active_vcpu_count == 0 );
+ BUG_ON( sdom->weight == 0 );
+ BUG_ON( sdom->weight > weight_left );
+
+ weight_left -= sdom->weight;
+
+ /*
+ * A domain's fair share is computed using its weight in competition
+ * with that of all other active domains.
+ *
+ * At most, a domain can use credits to run all its active VCPUs
+ * for one full accounting period. We allow a domain to earn more
+ * only when the system-wide credit balance is negative.
+ */
+ credit_peak = sdom->active_vcpu_count * CSCHED_ACCT_PERIOD;
+ if ( csched_priv.credit_balance < 0 )
+ {
+ credit_peak += ( ( -csched_priv.credit_balance * sdom->weight) +
+ (weight_total - 1)
+ ) / weight_total;
+ }
+ if ( sdom->cap != 0U )
+ {
+ uint32_t credit_cap = ((sdom->cap * CSCHED_ACCT_PERIOD) + 99) /
100;
+ if ( credit_cap < credit_peak )
+ credit_peak = credit_cap;
+ }
+
+ credit_fair = ( ( credit_total * sdom->weight) + (weight_total - 1)
+ ) / weight_total;
+
+ if ( credit_fair < credit_peak )
+ {
+ credit_xtra = 1;
+ }
+ else
+ {
+ if ( weight_left != 0U )
+ {
+ /* Give other domains a chance at unused credits */
+ credit_total += ( ( ( credit_fair - credit_peak
+ ) * weight_total
+ ) + ( weight_left - 1 )
+ ) / weight_left;
+ }
+
+ if ( credit_xtra )
+ {
+ /*
+ * Lazily keep domains with extra credits at the head of
+ * the queue to give others a chance at them in future
+ * accounting periods.
+ */
+ CSCHED_STAT_CRANK(acct_reorder);
+ list_del(&sdom->active_sdom_elem);
+ list_add(&sdom->active_sdom_elem, &csched_priv.active_sdom);
+ }
+
+ credit_fair = credit_peak;
+ }
+
+ /* Compute fair share per VCPU */
+ credit_fair = ( credit_fair + ( sdom->active_vcpu_count - 1 )
+ ) / sdom->active_vcpu_count;
+
+
+ list_for_each_safe( iter_vcpu, next_vcpu, &sdom->active_vcpu )
+ {
+ svc = list_entry(iter_vcpu, struct csched_vcpu, active_vcpu_elem);
+ BUG_ON( sdom != svc->sdom );
+
+ /* Increment credit */
+ atomic_add(credit_fair, &svc->credit);
+ credit = atomic_read(&svc->credit);
+
+ /*
+ * Recompute priority or, if VCPU is idling, remove it from
+ * the active list.
+ */
+ if ( credit < 0 )
+ {
+ if ( sdom->cap == 0U )
+ svc->pri = CSCHED_PRI_TS_OVER;
+ else
+ svc->pri = CSCHED_PRI_TS_PARKED;
+
+ if ( credit < -CSCHED_TSLICE )
+ {
+ CSCHED_STAT_CRANK(acct_min_credit);
+ credit = -CSCHED_TSLICE;
+ atomic_set(&svc->credit, credit);
+ }
+ }
+ else
+ {
+ svc->pri = CSCHED_PRI_TS_UNDER;
+
+ if ( credit > CSCHED_TSLICE )
+ __csched_vcpu_acct_idle_locked(svc);
+ }
+
+ svc->credit_last = credit;
+ svc->credit_incr = credit_fair;
+ credit_balance += credit;
+ }
+ }
+
+ csched_priv.credit_balance = credit_balance;
+
+ spin_unlock_irqrestore(&csched_priv.lock, flags);
+
+ /* Inform each CPU that its runq needs to be sorted */
+ csched_priv.runq_sort++;
+}
+
+static void
+csched_tick(unsigned int cpu)
+{
+ struct csched_vcpu * const svc = CSCHED_VCPU(current);
+ struct csched_dom * const sdom = svc->sdom;
+
+ /*
+ * Accounting for running VCPU
+ *
+ * Note: Some VCPUs, such as the idle tasks, are not credit scheduled.
+ */
+ if ( likely(sdom != NULL) )
+ {
+ csched_vcpu_acct(svc, CSCHED_TICK);
+ }
+
+ /*
+ * Accounting duty
+ *
+ * Note: Currently, this is always done by the master boot CPU. Eventually,
+ * we could distribute or at the very least cycle the duty.
+ */
+ if ( (csched_priv.master == cpu) &&
+ (schedule_data[cpu].tick % CSCHED_ACCT_NTICKS) == 0 )
+ {
+ csched_acct();
+ }
+
+ /*
+ * Check if runq needs to be sorted
+ *
+ * Every physical CPU resorts the runq after the accounting master has
+ * modified priorities. This is a special O(n) sort and runs at most
+ * once per accounting period (currently 30 milliseconds).
+ */
+ csched_runq_sort(cpu);
+}
+
+static struct csched_vcpu *
+csched_runq_steal(struct csched_pcpu *spc, int cpu, int pri)
+{
+ struct list_head *iter;
+ struct csched_vcpu *speer;
+ struct vcpu *vc;
+
+ list_for_each( iter, &spc->runq )
+ {
+ speer = __runq_elem(iter);
+
+ /*
+ * If next available VCPU here is not of higher priority than ours,
+ * this PCPU is useless to us.
+ */
+ if ( speer->pri <= CSCHED_PRI_IDLE || speer->pri <= pri )
+ {
+ CSCHED_STAT_CRANK(steal_peer_idle);
+ break;
+ }
+
+ /* Is this VCPU is runnable on our PCPU? */
+ vc = speer->vcpu;
+ BUG_ON( is_idle_vcpu(vc) );
+
+ if ( __csched_vcpu_is_stealable(cpu, vc) )
+ {
+ /* We got a candidate. Grab it! */
+ __runq_remove(speer);
+ vc->processor = cpu;
+
+ return speer;
+ }
+ }
+
+ return NULL;
+}
+
+static struct csched_vcpu *
+csched_load_balance(int cpu, struct csched_vcpu *snext)
+{
+ struct csched_pcpu *spc;
+ struct csched_vcpu *speer;
+ int peer_cpu;
+
+ if ( snext->pri == CSCHED_PRI_IDLE )
+ CSCHED_STAT_CRANK(load_balance_idle);
+ else if ( snext->pri == CSCHED_PRI_TS_OVER )
+ CSCHED_STAT_CRANK(load_balance_over);
+ else
+ CSCHED_STAT_CRANK(load_balance_other);
+
+ peer_cpu = cpu;
+ BUG_ON( peer_cpu != snext->vcpu->processor );
+
+ while ( 1 )
+ {
+ /* For each PCPU in the system starting with our neighbour... */
+ peer_cpu = (peer_cpu + 1) % csched_priv.ncpus;
+ if ( peer_cpu == cpu )
+ break;
+
+ BUG_ON( peer_cpu >= csched_priv.ncpus );
+ BUG_ON( peer_cpu == cpu );
+
+ /*
+ * Get ahold of the scheduler lock for this peer CPU.
+ *
+ * Note: We don't spin on this lock but simply try it. Spinning could
+ * cause a deadlock if the peer CPU is also load balancing and trying
+ * to lock this CPU.
+ */
+ if ( spin_trylock(&schedule_data[peer_cpu].schedule_lock) )
+ {
+
+ spc = CSCHED_PCPU(peer_cpu);
+ if ( unlikely(spc == NULL) )
+ {
+ CSCHED_STAT_CRANK(steal_peer_down);
+ speer = NULL;
+ }
+ else
+ {
+ speer = csched_runq_steal(spc, cpu, snext->pri);
+ }
+
+ spin_unlock(&schedule_data[peer_cpu].schedule_lock);
+
+ /* Got one! */
+ if ( speer )
+ {
+ CSCHED_STAT_CRANK(vcpu_migrate);
+ return speer;
+ }
+ }
+ else
+ {
+ CSCHED_STAT_CRANK(steal_trylock_failed);
+ }
+ }
+
+
+ /* Failed to find more important work */
+ __runq_remove(snext);
+ return snext;
+}
+
+/*
+ * This function is in the critical path. It is designed to be simple and
+ * fast for the common case.
+ */
+static struct task_slice
+csched_schedule(s_time_t now)
+{
+ const int cpu = smp_processor_id();
+ struct list_head * const runq = RUNQ(cpu);
+ struct csched_vcpu * const scurr = CSCHED_VCPU(current);
+ struct csched_vcpu *snext;
+ struct task_slice ret;
+
+ CSCHED_STAT_CRANK(schedule);
+ CSCHED_VCPU_CHECK(current);
+
+ /*
+ * Select next runnable local VCPU (ie top of local runq)
+ */
+ if ( vcpu_runnable(current) )
+ __runq_insert(cpu, scurr);
+ else
+ BUG_ON( is_idle_vcpu(current) || list_empty(runq) );
+
+ snext = __runq_elem(runq->next);
+
+ /*
+ * SMP Load balance:
+ *
+ * If the next highest priority local runnable VCPU has already eaten
+ * through its credits, look on other PCPUs to see if we have more
+ * urgent work... If not, csched_load_balance() will return snext, but
+ * already removed from the runq.
+ */
+ if ( snext->pri > CSCHED_PRI_TS_OVER )
+ __runq_remove(snext);
+ else
+ snext = csched_load_balance(cpu, snext);
+
+ /*
+ * Update idlers mask if necessary. When we're idling, other CPUs
+ * will tickle us when they get extra work.
+ */
+ if ( snext->pri == CSCHED_PRI_IDLE )
+ {
+ if ( !cpu_isset(cpu, csched_priv.idlers) )
+ cpu_set(cpu, csched_priv.idlers);
+ }
+ else if ( cpu_isset(cpu, csched_priv.idlers) )
+ {
+ cpu_clear(cpu, csched_priv.idlers);
+ }
+
+ /*
+ * Return task to run next...
+ */
+ ret.time = MILLISECS(CSCHED_TSLICE);
+ ret.task = snext->vcpu;
+
+ CSCHED_VCPU_CHECK(ret.task);
+ BUG_ON( !vcpu_runnable(ret.task) );
+
+ return ret;
+}
+
+static void
+csched_dump_vcpu(struct csched_vcpu *svc)
+{
+ struct csched_dom * const sdom = svc->sdom;
+
+ printk("[%i.%i] pri=%i cpu=%i",
+ svc->vcpu->domain->domain_id,
+ svc->vcpu->vcpu_id,
+ svc->pri,
+ svc->vcpu->processor);
+
+ if ( sdom )
+ {
+ printk(" credit=%i (%d+%u) {a=%u i=%u w=%u}",
+ atomic_read(&svc->credit),
+ svc->credit_last,
+ svc->credit_incr,
+ svc->state_active,
+ svc->state_idle,
+ sdom->weight);
+ }
+
+ printk("\n");
+}
+
+static void
+csched_dump_pcpu(int cpu)
+{
+ struct list_head *runq, *iter;
+ struct csched_pcpu *spc;
+ struct csched_vcpu *svc;
+ int loop;
+
+ spc = CSCHED_PCPU(cpu);
+ runq = &spc->runq;
+
+ printk(" tick=%lu, sort=%d\n",
+ schedule_data[cpu].tick,
+ spc->runq_sort_last);
+
+ /* current VCPU */
+ svc = CSCHED_VCPU(schedule_data[cpu].curr);
+ if ( svc )
+ {
+ printk("\trun: ");
+ csched_dump_vcpu(svc);
+ }
+
+ loop = 0;
+ list_for_each( iter, runq )
+ {
+ svc = __runq_elem(iter);
+ if ( svc )
+ {
+ printk("\t%3d: ", ++loop);
+ csched_dump_vcpu(svc);
+ }
+ }
+}
+
+static void
+csched_dump(void)
+{
+ struct list_head *iter_sdom, *iter_svc;
+ int loop;
+
+ printk("info:\n"
+ "\tncpus = %u\n"
+ "\tmaster = %u\n"
+ "\tcredit = %u\n"
+ "\tcredit balance = %d\n"
+ "\tweight = %u\n"
+ "\trunq_sort = %u\n"
+ "\ttick = %dms\n"
+ "\ttslice = %dms\n"
+ "\taccounting period = %dms\n"
+ "\tdefault-weight = %d\n",
+ csched_priv.ncpus,
+ csched_priv.master,
+ csched_priv.credit,
+ csched_priv.credit_balance,
+ csched_priv.weight,
+ csched_priv.runq_sort,
+ CSCHED_TICK,
+ CSCHED_TSLICE,
+ CSCHED_ACCT_PERIOD,
+ CSCHED_DEFAULT_WEIGHT);
+
+ printk("idlers: 0x%lx\n", csched_priv.idlers.bits[0]);
+
+ CSCHED_STATS_PRINTK();
+
+ printk("active vcpus:\n");
+ loop = 0;
+ list_for_each( iter_sdom, &csched_priv.active_sdom )
+ {
+ struct csched_dom *sdom;
+ sdom = list_entry(iter_sdom, struct csched_dom, active_sdom_elem);
+
+ list_for_each( iter_svc, &sdom->active_vcpu )
+ {
+ struct csched_vcpu *svc;
+ svc = list_entry(iter_svc, struct csched_vcpu, active_vcpu_elem);
+
+ printk("\t%3d: ", ++loop);
+ csched_dump_vcpu(svc);
+ }
+ }
+}
+
+static void
+csched_init(void)
+{
+ spin_lock_init(&csched_priv.lock);
+ INIT_LIST_HEAD(&csched_priv.active_sdom);
+ csched_priv.ncpus = 0;
+ csched_priv.master = UINT_MAX;
+ cpus_clear(csched_priv.idlers);
+ csched_priv.weight = 0U;
+ csched_priv.credit = 0U;
+ csched_priv.credit_balance = 0;
+ csched_priv.runq_sort = 0U;
+ CSCHED_STATS_RESET();
+}
+
+
+struct scheduler sched_credit_def = {
+ .name = "SMP Credit Scheduler",
+ .opt_name = "credit",
+ .sched_id = SCHED_CREDIT,
+
+ .alloc_task = csched_vcpu_alloc,
+ .add_task = csched_vcpu_add,
+ .sleep = csched_vcpu_sleep,
+ .wake = csched_vcpu_wake,
+ .set_affinity = csched_vcpu_set_affinity,
+
+ .adjdom = csched_dom_cntl,
+ .free_task = csched_dom_free,
+
+ .tick = csched_tick,
+ .do_schedule = csched_schedule,
+
+ .dump_cpu_state = csched_dump_pcpu,
+ .dump_settings = csched_dump,
+ .init = csched_init,
+};
diff -r e74246451527 -r f54d38cea8ac xen/include/asm-ia64/tlbflush.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/include/asm-ia64/tlbflush.h Tue May 30 14:30:34 2006 -0500
@@ -0,0 +1,37 @@
+#ifndef __FLUSHTLB_H__
+#define __FLUSHTLB_H__
+
+#include <xen/sched.h>
+
+/* TLB flushes can be either local (current vcpu only) or domain wide (on
+ all vcpus).
+ TLB flushes can be either all-flush or range only.
+
+ vTLB flushing means flushing VCPU virtual TLB + machine TLB + machine VHPT.
+*/
+
+/* Local all flush of vTLB. */
+void vcpu_flush_vtlb_all (void);
+
+/* Local range flush of machine TLB only (not full VCPU virtual TLB!!!) */
+void vcpu_flush_tlb_vhpt_range (u64 vadr, u64 log_range);
+
+/* Global all flush of vTLB */
+void domain_flush_vtlb_all (void);
+
+/* Global range-flush of vTLB. */
+void domain_flush_vtlb_range (struct domain *d, u64 vadr, u64 addr_range);
+
+/* Final vTLB flush on every dirty cpus. */
+void domain_flush_destroy (struct domain *d);
+
+/* Flush v-tlb on cpus set in mask for current domain. */
+void flush_tlb_mask(cpumask_t mask);
+
+/* Flush local machine TLB. */
+void local_flush_tlb_all (void);
+
+#define tlbflush_current_time() 0
+#define tlbflush_filter(x,y) ((void)0)
+
+#endif
diff -r e74246451527 -r f54d38cea8ac
linux-2.6-xen-sparse/arch/ia64/xen/xen_ksyms.c
--- a/linux-2.6-xen-sparse/arch/ia64/xen/xen_ksyms.c Tue May 30 12:52:02
2006 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,12 +0,0 @@
-/*
- * Architecture-specific kernel symbols
- *
- * Don't put any exports here unless it's defined in an assembler file.
- * All other exports should be put directly after the definition.
- */
-
-#include <linux/config.h>
-#include <linux/module.h>
-
-extern int is_running_on_xen(void);
-EXPORT_SYMBOL(is_running_on_xen);
diff -r e74246451527 -r f54d38cea8ac
linux-2.6-xen-sparse/drivers/xen/net_driver_util.c
--- a/linux-2.6-xen-sparse/drivers/xen/net_driver_util.c Tue May 30
12:52:02 2006 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,58 +0,0 @@
-/*****************************************************************************
- *
- * Utility functions for Xen network devices.
- *
- * Copyright (c) 2005 XenSource Ltd.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation; or, when distributed
- * separately from the Linux kernel or incorporated into other
- * software packages, subject to the following license:
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this source file (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy, modify,
- * merge, publish, distribute, sublicense, and/or sell copies of the Software,
- * and to permit persons to whom the Software is furnished to do so, subject
- * to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-#include <linux/if_ether.h>
-#include <linux/err.h>
-#include <linux/module.h>
-#include <xen/net_driver_util.h>
-
-
-int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
-{
- char *s;
- int i;
- char *e;
- char *macstr = xenbus_read(XBT_NULL, dev->nodename, "mac", NULL);
- if (IS_ERR(macstr))
- return PTR_ERR(macstr);
- s = macstr;
- for (i = 0; i < ETH_ALEN; i++) {
- mac[i] = simple_strtoul(s, &e, 16);
- if (s == e || (e[0] != ':' && e[0] != 0)) {
- kfree(macstr);
- return -ENOENT;
- }
- s = &e[1];
- }
- kfree(macstr);
- return 0;
-}
-EXPORT_SYMBOL_GPL(xen_net_read_mac);
diff -r e74246451527 -r f54d38cea8ac
linux-2.6-xen-sparse/include/xen/net_driver_util.h
--- a/linux-2.6-xen-sparse/include/xen/net_driver_util.h Tue May 30
12:52:02 2006 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,48 +0,0 @@
-/*****************************************************************************
- *
- * Utility functions for Xen network devices.
- *
- * Copyright (c) 2005 XenSource Ltd.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation; or, when distributed
- * separately from the Linux kernel or incorporated into other
- * software packages, subject to the following license:
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this source file (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy, modify,
- * merge, publish, distribute, sublicense, and/or sell copies of the Software,
- * and to permit persons to whom the Software is furnished to do so, subject
- * to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef _ASM_XEN_NET_DRIVER_UTIL_H
-#define _ASM_XEN_NET_DRIVER_UTIL_H
-
-
-#include <xen/xenbus.h>
-
-
-/**
- * Read the 'mac' node at the given device's node in the store, and parse that
- * as colon-separated octets, placing result the given mac array. mac must be
- * a preallocated array of length ETH_ALEN (as declared in linux/if_ether.h).
- * Return 0 on success, or -errno on error.
- */
-int xen_net_read_mac(struct xenbus_device *dev, u8 mac[]);
-
-
-#endif /* _ASM_XEN_NET_DRIVER_UTIL_H */
diff -r e74246451527 -r f54d38cea8ac tools/xenstore/xenstored_proc.h
--- a/tools/xenstore/xenstored_proc.h Tue May 30 12:52:02 2006 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,27 +0,0 @@
-/*
- Copyright (C) 2005 XenSource Ltd
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
-*/
-
-#ifndef _XENSTORED_PROC_H
-#define _XENSTORED_PROC_H
-
-#define XENSTORED_PROC_KVA "/proc/xen/xsd_kva"
-#define XENSTORED_PROC_PORT "/proc/xen/xsd_port"
-
-
-#endif /* _XENSTORED_PROC_H */
diff -r e74246451527 -r f54d38cea8ac xen/include/asm-ia64/flushtlb.h
--- a/xen/include/asm-ia64/flushtlb.h Tue May 30 12:52:02 2006 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,9 +0,0 @@
-#ifndef __FLUSHTLB_H__
-#define __FLUSHTLB_H__
-
-#include <asm/tlbflush.h>
-
-#define tlbflush_current_time() 0
-#define tlbflush_filter(x,y) ((void)0)
-
-#endif
diff -r e74246451527 -r f54d38cea8ac
xen/include/asm-ia64/linux-xen/asm/tlbflush.h
--- a/xen/include/asm-ia64/linux-xen/asm/tlbflush.h Tue May 30 12:52:02
2006 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,119 +0,0 @@
-#ifndef _ASM_IA64_TLBFLUSH_H
-#define _ASM_IA64_TLBFLUSH_H
-
-/*
- * Copyright (C) 2002 Hewlett-Packard Co
- * David Mosberger-Tang <davidm@xxxxxxxxxx>
- */
-
-#include <linux/config.h>
-
-#include <linux/mm.h>
-
-#include <asm/intrinsics.h>
-#include <asm/mmu_context.h>
-#include <asm/page.h>
-
-/*
- * Now for some TLB flushing routines. This is the kind of stuff that
- * can be very expensive, so try to avoid them whenever possible.
- */
-
-/*
- * Flush everything (kernel mapping may also have changed due to
- * vmalloc/vfree).
- */
-extern void local_flush_tlb_all (void);
-
-#ifdef CONFIG_SMP
- extern void smp_flush_tlb_all (void);
- extern void smp_flush_tlb_mm (struct mm_struct *mm);
-# define flush_tlb_all() smp_flush_tlb_all()
-#else
-# define flush_tlb_all() local_flush_tlb_all()
-#endif
-
-#ifndef XEN
-static inline void
-local_finish_flush_tlb_mm (struct mm_struct *mm)
-{
-#ifndef XEN
-// FIXME SMP?
- if (mm == current->active_mm)
- activate_context(mm);
-#endif
-}
-
-/*
- * Flush a specified user mapping. This is called, e.g., as a result of
fork() and
- * exit(). fork() ends up here because the copy-on-write mechanism needs to
write-protect
- * the PTEs of the parent task.
- */
-static inline void
-flush_tlb_mm (struct mm_struct *mm)
-{
- if (!mm)
- return;
-
-#ifndef XEN
-// FIXME SMP?
- mm->context = 0;
-#endif
-
- if (atomic_read(&mm->mm_users) == 0)
- return; /* happens as a result of exit_mmap() */
-
-#ifdef CONFIG_SMP
- smp_flush_tlb_mm(mm);
-#else
- local_finish_flush_tlb_mm(mm);
-#endif
-}
-
-extern void flush_tlb_range (struct vm_area_struct *vma, unsigned long start,
unsigned long end);
-
-/*
- * Page-granular tlb flush.
- */
-static inline void
-flush_tlb_page (struct vm_area_struct *vma, unsigned long addr)
-{
-#ifdef CONFIG_SMP
- flush_tlb_range(vma, (addr & PAGE_MASK), (addr & PAGE_MASK) +
PAGE_SIZE);
-#else
-#ifdef XEN
- if (vma->vm_mm == current->domain->arch.mm)
-#else
- if (vma->vm_mm == current->active_mm)
-#endif
- ia64_ptcl(addr, (PAGE_SHIFT << 2));
-#ifndef XEN
-// FIXME SMP?
- else
- vma->vm_mm->context = 0;
-#endif
-#endif
-}
-
-/*
- * Flush the TLB entries mapping the virtually mapped linear page
- * table corresponding to address range [START-END).
- */
-static inline void
-flush_tlb_pgtables (struct mm_struct *mm, unsigned long start, unsigned long
end)
-{
- /*
- * Deprecated. The virtual page table is now flushed via the normal
gather/flush
- * interface (see tlb.h).
- */
-}
-
-
-#define flush_tlb_kernel_range(start, end) flush_tlb_all() /* XXX fix me */
-#endif /* XEN */
-
-#ifdef XEN
-extern void flush_tlb_mask(cpumask_t mask);
-#endif
-
-#endif /* _ASM_IA64_TLBFLUSH_H */
_______________________________________________
Xen-ppc-devel mailing list
Xen-ppc-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-ppc-devel
|