WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-unstable] merge with xen-unstable.hg

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] [xen-unstable] merge with xen-unstable.hg
From: Xen patchbot-unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Wed, 17 Sep 2008 09:50:33 -0700
Delivery-date: Wed, 17 Sep 2008 09:52:26 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User Isaku Yamahata <yamahata@xxxxxxxxxxxxx>
# Date 1221198460 -32400
# Node ID ec8eaab557d867dca3e8cbb3e0384d797929102a
# Parent  4ddd63b4be9be2440d213da60b10c20327e5c515
# Parent  346c073ed6a4f0debca36588039d649e2efd93c3
merge with xen-unstable.hg
---
 .hgignore                                             |    1 
 Config.mk                                             |    4 
 docs/misc/vtd.txt                                     |   27 
 docs/src/user.tex                                     |    4 
 stubdom/README                                        |    8 
 tools/examples/init.d/xendomains                      |    6 
 tools/examples/xend-config.sxp                        |    4 
 tools/examples/xmexample.hvm                          |    2 
 tools/examples/xmexample.hvm-stubdom                  |    2 
 tools/flask/policy/Makefile                           |  234 +++++
 tools/flask/policy/Rules.modular                      |  166 +++
 tools/flask/policy/Rules.monolithic                   |  196 ++++
 tools/flask/policy/policy/constraints                 |   27 
 tools/flask/policy/policy/flask/Makefile              |   41 
 tools/flask/policy/policy/flask/access_vectors        |  166 +++
 tools/flask/policy/policy/flask/initial_sids          |   17 
 tools/flask/policy/policy/flask/mkaccess_vector.sh    |  227 +++++
 tools/flask/policy/policy/flask/mkflask.sh            |   95 ++
 tools/flask/policy/policy/flask/security_classes      |   20 
 tools/flask/policy/policy/global_booleans             |    5 
 tools/flask/policy/policy/global_tunables             |    6 
 tools/flask/policy/policy/mcs                         |  324 +++++++
 tools/flask/policy/policy/mls                         |  354 ++++++++
 tools/flask/policy/policy/modules.conf                |   21 
 tools/flask/policy/policy/modules/xen/xen.if          |    1 
 tools/flask/policy/policy/modules/xen/xen.te          |  135 +++
 tools/flask/policy/policy/support/loadable_module.spt |  166 +++
 tools/flask/policy/policy/support/misc_macros.spt     |   32 
 tools/flask/policy/policy/systemuser                  |   19 
 tools/flask/policy/policy/users                       |   39 
 tools/ioemu/hw/cirrus_vga.c                           |    3 
 tools/ioemu/hw/pass-through.c                         |  146 +++
 tools/ioemu/hw/pass-through.h                         |   15 
 tools/ioemu/hw/pci.c                                  |    5 
 tools/ioemu/hw/pt-msi.c                               |    2 
 tools/ioemu/hw/vga.c                                  |    8 
 tools/ioemu/hw/xen_machine_fv.c                       |    4 
 tools/ioemu/vl.h                                      |    2 
 tools/libxc/ia64/xc_ia64_linux_save.c                 |    6 
 tools/libxc/xc_domain_save.c                          |   65 -
 tools/libxc/xc_evtchn.c                               |   15 
 tools/libxc/xc_private.c                              |   10 
 tools/libxc/xenctrl.h                                 |    6 
 tools/libxc/xenguest.h                                |    2 
 tools/python/Makefile                                 |   26 
 tools/python/xen/util/xsconstants.py                  |    6 
 tools/python/xen/util/xsm/flask/flask.py              |    8 
 tools/python/xen/util/xsm/xsm.py                      |   20 
 tools/python/xen/xend/XendConfig.py                   |    2 
 tools/python/xen/xend/XendDomainInfo.py               |    6 
 tools/python/xen/xend/XendOptions.py                  |    8 
 tools/python/xen/xend/server/blkif.py                 |    2 
 tools/python/xen/xend/server/netif.py                 |    2 
 tools/python/xen/xend/server/pciif.py                 |    2 
 tools/python/xen/xm/create.py                         |    6 
 tools/python/xen/xm/main.py                           |    2 
 tools/xcutils/lsevtchn.c                              |   48 -
 tools/xcutils/xc_save.c                               |  117 +-
 tools/xenstore/xs.c                                   |    7 
 tools/xentrace/formats                                |  149 ++-
 tools/xentrace/xentrace.c                             |  399 ++++++++-
 xen/arch/x86/acpi/Makefile                            |    2 
 xen/arch/x86/acpi/cpu_idle.c                          |  434 ++-------
 xen/arch/x86/acpi/cpufreq/cpufreq.c                   |   26 
 xen/arch/x86/acpi/cpufreq/powernow.c                  |    4 
 xen/arch/x86/acpi/cpuidle_menu.c                      |  132 +++
 xen/arch/x86/domain.c                                 |   24 
 xen/arch/x86/domain_build.c                           |    1 
 xen/arch/x86/domctl.c                                 |   47 -
 xen/arch/x86/hpet.c                                   |   30 
 xen/arch/x86/hvm/hvm.c                                |    5 
 xen/arch/x86/hvm/svm/intr.c                           |    4 
 xen/arch/x86/hvm/svm/svm.c                            |   36 
 xen/arch/x86/hvm/vmx/intr.c                           |    2 
 xen/arch/x86/hvm/vmx/vmx.c                            |   49 -
 xen/arch/x86/io_apic.c                                |   13 
 xen/arch/x86/irq.c                                    |   23 
 xen/arch/x86/mm.c                                     |  783 +++++++++++-------
 xen/arch/x86/mm/hap/hap.c                             |    1 
 xen/arch/x86/mm/shadow/common.c                       |   71 +
 xen/arch/x86/mm/shadow/multi.c                        |  210 ++++
 xen/arch/x86/mm/shadow/private.h                      |   43 
 xen/arch/x86/physdev.c                                |   80 -
 xen/arch/x86/platform_hypercall.c                     |   16 
 xen/arch/x86/smpboot.c                                |   40 
 xen/arch/x86/time.c                                   |    7 
 xen/arch/x86/traps.c                                  |   45 +
 xen/common/domain.c                                   |    4 
 xen/common/domctl.c                                   |   19 
 xen/common/event_channel.c                            |   21 
 xen/common/rangeset.c                                 |    9 
 xen/common/sched_credit.c                             |    5 
 xen/common/schedule.c                                 |  123 ++
 xen/common/sysctl.c                                   |   12 
 xen/common/trace.c                                    |   45 -
 xen/drivers/acpi/hwregs.c                             |    2 
 xen/drivers/passthrough/iommu.c                       |    4 
 xen/drivers/passthrough/vtd/iommu.c                   |   22 
 xen/include/asm-ia64/shadow.h                         |    2 
 xen/include/asm-x86/bitops.h                          |    4 
 xen/include/asm-x86/guest_access.h                    |    6 
 xen/include/asm-x86/hvm/trace.h                       |   49 -
 xen/include/asm-x86/io_apic.h                         |    2 
 xen/include/asm-x86/mm.h                              |   38 
 xen/include/asm-x86/msr-index.h                       |   12 
 xen/include/asm-x86/shadow.h                          |    2 
 xen/include/public/trace.h                            |   51 -
 xen/include/xen/cpuidle.h                             |   82 +
 xen/include/xen/iommu.h                               |    1 
 xen/include/xen/sched.h                               |   22 
 xen/include/xen/trace.h                               |    2 
 xen/include/xsm/xsm.h                                 |  148 ++-
 xen/xsm/dummy.c                                       |  130 ++
 xen/xsm/flask/hooks.c                                 |  318 ++++++-
 xen/xsm/flask/include/av_perm_to_string.h             |   21 
 xen/xsm/flask/include/av_permissions.h                |   63 -
 xen/xsm/flask/include/flask.h                         |   11 
 xen/xsm/flask/include/initial_sid_to_string.h         |    3 
 xen/xsm/flask/include/security.h                      |    6 
 xen/xsm/flask/ss/policydb.h                           |   13 
 xen/xsm/flask/ss/services.c                           |   40 
 121 files changed, 5439 insertions(+), 1429 deletions(-)

diff -r 4ddd63b4be9b -r ec8eaab557d8 .hgignore
--- a/.hgignore Fri Sep 12 14:32:45 2008 +0900
+++ b/.hgignore Fri Sep 12 14:47:40 2008 +0900
@@ -185,7 +185,6 @@
 ^tools/misc/xenperf$
 ^tools/pygrub/build/.*$
 ^tools/python/build/.*$
-^tools/python/xen/util/xsm/xsm\.py$
 ^tools/security/secpol_tool$
 ^tools/security/xen/.*$
 ^tools/security/xensec_tool$
diff -r 4ddd63b4be9b -r ec8eaab557d8 Config.mk
--- a/Config.mk Fri Sep 12 14:32:45 2008 +0900
+++ b/Config.mk Fri Sep 12 14:47:40 2008 +0900
@@ -86,11 +86,7 @@ QEMU_REMOTE=http://xenbits.xensource.com
 # Mercurial in-tree version, or a local directory, or a git URL.
 # CONFIG_QEMU   ?= ioemu
 # CONFIG_QEMU   ?= ../qemu-xen.git
-ifeq ($(XEN_TARGET_ARCH),ia64)
-CONFIG_QEMU   ?= ioemu
-else
 CONFIG_QEMU   ?= $(QEMU_REMOTE)
-endif
 
 # Optional components
 XENSTAT_XENTOP     ?= y
diff -r 4ddd63b4be9b -r ec8eaab557d8 docs/misc/vtd.txt
--- a/docs/misc/vtd.txt Fri Sep 12 14:32:45 2008 +0900
+++ b/docs/misc/vtd.txt Fri Sep 12 14:47:40 2008 +0900
@@ -1,8 +1,9 @@ Title   : How to do PCI Passthrough with
 Title   : How to do PCI Passthrough with VT-d
 Authors : Allen Kay    <allen.m.kay@xxxxxxxxx>
           Weidong Han  <weidong.han@xxxxxxxxx>
+          Yuji Shimada <shimada-yxb@xxxxxxxxxxxxxxx>
 Created : October-24-2007
-Updated : August-06-2008
+Updated : September-09-2008
 
 How to turn on VT-d in Xen
 --------------------------
@@ -106,3 +107,27 @@ http://h10010.www1.hp.com/wwpc/us/en/en/
 
 For more information, pls refer to http://wiki.xensource.com/xenwiki/VTdHowTo.
 
+
+Assigning devices to HVM domains
+--------------------------------
+
+Most device types such as NIC, HBA, EHCI and UHCI can be assigned to
+an HVM domain.
+
+But some devices have design features which make them unsuitable for
+assignment to an HVM domain. Examples include:
+
+ * Device has an internal resource, such as private memory, which is
+   mapped to memory address space with BAR (Base Address Register).
+ * Driver submits command with a pointer to a buffer within internal
+   resource. Device decodes the pointer (address), and accesses to the
+   buffer.
+
+In an HVM domain, the BAR is virtualized, and host-BAR value and
+guest-BAR value are different. The addresses of internal resource from
+device's view and driver's view are different. Similarly, the
+addresses of buffer within internal resource from device's view and
+driver's view are different. As a result, device can't access to the
+buffer specified by driver.
+
+Such devices assigned to HVM domain currently do not work.
diff -r 4ddd63b4be9b -r ec8eaab557d8 docs/src/user.tex
--- a/docs/src/user.tex Fri Sep 12 14:32:45 2008 +0900
+++ b/docs/src/user.tex Fri Sep 12 14:47:40 2008 +0900
@@ -4252,7 +4252,7 @@ directory of the Xen source distribution
 \section{Online References}
 
 The official Xen web site can be found at:
-\begin{quote} {\tt http://www.xensource.com}
+\begin{quote} {\tt http://www.xen.org}
 \end{quote}
 
 This contains links to the latest versions of all online
@@ -4282,7 +4282,7 @@ mailing lists and subscription informati
   Subscribe at: \\
   {\small {\tt http://lists.xensource.com/xen-announce}}
 \item[xen-changelog@xxxxxxxxxxxxxxxxxxx] Changelog feed
-  from the unstable and 2.0 trees - developer oriented.  Subscribe at: \\
+  from the unstable and 3.x trees - developer oriented.  Subscribe at: \\
   {\small {\tt http://lists.xensource.com/xen-changelog}}
 \end{description}
 
diff -r 4ddd63b4be9b -r ec8eaab557d8 stubdom/README
--- a/stubdom/README    Fri Sep 12 14:32:45 2008 +0900
+++ b/stubdom/README    Fri Sep 12 14:47:40 2008 +0900
@@ -27,7 +27,7 @@ device_model = '/usr/lib/xen/bin/stubdom
 - disable anything related to dom0, like pty serial assignments
 
 
-Create /etc/xen/stubdom-hvmconfig (where "hvmconfig" is the name of your HVM
+Create /etc/xen/hvmconfig-dm (where "hvmconfig" is the name of your HVM
 guest) with
 
 kernel = "/usr/lib/xen/boot/ioemu-stubdom.gz"
@@ -52,7 +52,7 @@ vnc = 0
 vnc = 0
 sdl = 0
 
-  - In stubdom-hvmconfig, set an sdl vfb:
+  - In hvmconfig-dm, set an sdl vfb:
 
 vfb = [ 'type=sdl' ]
 
@@ -65,7 +65,7 @@ vnc = 1
 vnc = 1
 vnclisten = "172.30.206.1"
 
-  - In stubdom-hvmconfig, fill the reserved vif with the same IP, for instance:
+  - In hvmconfig-dm, fill the reserved vif with the same IP, for instance:
 
 vif = [ 'ip=172.30.206.1', 'ip=10.0.1.1,mac=aa:00:00:12:23:34']
 
@@ -76,7 +76,7 @@ vnc = 0
 vnc = 0
 sdl = 0
 
-  - In stubdom-hvmconfig, set a vnc vfb:
+  - In hvmconfig-dm, set a vnc vfb:
 
 vfb = [ 'type=vnc' ]
 
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/examples/init.d/xendomains
--- a/tools/examples/init.d/xendomains  Fri Sep 12 14:32:45 2008 +0900
+++ b/tools/examples/init.d/xendomains  Fri Sep 12 14:47:40 2008 +0900
@@ -327,15 +327,17 @@ stop()
        if test $id = 0; then continue; fi
        echo -n " $name"
        if test "$XENDOMAINS_AUTO_ONLY" = "true"; then
-           case $name in
+           eval "
+           case \"\$name\" in
                ($NAMES)
                    # nothing
                    ;;
                (*)
-                   echo -n "(skip)"
+                   echo -n '(skip)'
                    continue
                    ;;
            esac
+           "
        fi
        # XENDOMAINS_SYSRQ chould be something like just "s" 
        # or "s e i u" or even "s e s i u o"
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/examples/xend-config.sxp
--- a/tools/examples/xend-config.sxp    Fri Sep 12 14:32:45 2008 +0900
+++ b/tools/examples/xend-config.sxp    Fri Sep 12 14:47:40 2008 +0900
@@ -14,6 +14,10 @@
 #(logfile /var/log/xen/xend.log)
 #(loglevel DEBUG)
 
+# Uncomment the line below.  Set the value to flask, acm, or dummy to 
+# select a security module.
+
+#(xsm_module_name dummy)
 
 # The Xen-API server configuration.
 #
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/examples/xmexample.hvm
--- a/tools/examples/xmexample.hvm      Fri Sep 12 14:32:45 2008 +0900
+++ b/tools/examples/xmexample.hvm      Fri Sep 12 14:47:40 2008 +0900
@@ -220,7 +220,7 @@ serial='pty'
 #   Configure guest CPUID responses:
 #
 #cpuid=[ '1:ecx=xxxxxxxxxxx00xxxxxxxxxxxxxxxxxxx,
-#           eax=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx' ]
+#           eax=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx' ]
 # - Unset the SSE4 features (CPUID.1[ECX][20-19])
 # - Default behaviour for all other bits in ECX And EAX registers.
 # 
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/examples/xmexample.hvm-stubdom
--- a/tools/examples/xmexample.hvm-stubdom      Fri Sep 12 14:32:45 2008 +0900
+++ b/tools/examples/xmexample.hvm-stubdom      Fri Sep 12 14:47:40 2008 +0900
@@ -236,7 +236,7 @@ stdvga=0
 #   Configure guest CPUID responses:
 #
 #cpuid=[ '1:ecx=xxxxxxxxxxx00xxxxxxxxxxxxxxxxxxx,
-#           eax=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx' ]
+#           eax=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx' ]
 # - Unset the SSE4 features (CPUID.1[ECX][20-19])
 # - Default behaviour for all other bits in ECX And EAX registers.
 # 
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/flask/policy/Makefile
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/flask/policy/Makefile       Fri Sep 12 14:47:40 2008 +0900
@@ -0,0 +1,234 @@
+#
+# Makefile for the security policy.
+#
+# Targets:
+# 
+# install       - compile and install the policy configuration, and context 
files.
+# load          - compile, install, and load the policy configuration.
+# reload        - compile, install, and load/reload the policy configuration.
+# policy        - compile the policy configuration locally for 
testing/development.
+#
+# The default target is 'policy'.
+#
+
+########################################
+#
+# Configurable portions of the Makefile
+#
+
+# Policy version
+# By default, checkpolicy will create the highest
+# version policy it supports.  Setting this will
+# override the version.
+OUTPUT_POLICY = 20
+
+# Policy Type
+# strict, targeted,
+# strict-mls, targeted-mls,
+# strict-mcs, targeted-mcs
+TYPE = strict
+
+# Policy Name
+# If set, this will be used as the policy
+# name.  Otherwise the policy type will be
+# used for the name.
+NAME = xenrefpolicy
+
+# Distribution
+# Some distributions have portions of policy
+# for programs or configurations specific to the
+# distribution.  Setting this will enable options
+# for the distribution.
+# redhat, gentoo, debian, and suse are current options.
+# Fedora users should enable redhat.
+#DISTRO = 
+
+# Build monolithic policy.  Putting n here
+# will build a loadable module policy.
+MONOLITHIC=y
+
+# Uncomment this to disable command echoing
+#QUIET:=@
+
+########################################
+#
+# NO OPTIONS BELOW HERE
+#
+
+# executable paths
+PREFIX := /usr
+BINDIR := $(PREFIX)/bin
+SBINDIR := $(PREFIX)/sbin
+CHECKPOLICY := $(BINDIR)/checkpolicy
+CHECKMODULE := $(BINDIR)/checkmodule
+SEMOD_PKG := $(BINDIR)/semodule_package
+LOADPOLICY := $(SBINDIR)/flask-loadpolicy
+
+CFLAGS := -Wall
+
+# policy source layout
+POLDIR := policy
+MODDIR := $(POLDIR)/modules
+FLASKDIR := $(POLDIR)/flask
+SECCLASS := $(FLASKDIR)/security_classes
+ISIDS := $(FLASKDIR)/initial_sids
+AVS := $(FLASKDIR)/access_vectors
+
+#policy building support tools
+SUPPORT := support
+FCSORT := tmp/fc_sort
+
+# config file paths
+GLOBALTUN := $(POLDIR)/global_tunables
+GLOBALBOOL := $(POLDIR)/global_booleans
+MOD_CONF := $(POLDIR)/modules.conf
+TUNABLES := $(POLDIR)/tunables.conf
+BOOLEANS := $(POLDIR)/booleans.conf
+
+# install paths
+TOPDIR = $(DESTDIR)/etc/xen/
+INSTALLDIR = $(TOPDIR)/$(NAME)
+SRCPATH = $(INSTALLDIR)/src
+USERPATH = $(INSTALLDIR)/users
+CONTEXTPATH = $(INSTALLDIR)/contexts
+
+# enable MLS if requested.
+ifneq ($(findstring -mls,$(TYPE)),)
+       override M4PARAM += -D enable_mls
+       CHECKPOLICY += -M
+       CHECKMODULE += -M
+endif
+
+# enable MLS if MCS requested.
+ifneq ($(findstring -mcs,$(TYPE)),)
+       override M4PARAM += -D enable_mcs
+       CHECKPOLICY += -M
+       CHECKMODULE += -M
+endif
+
+# compile targeted policy if requested.
+ifneq ($(findstring targeted,$(TYPE)),)
+       override M4PARAM += -D targeted_policy
+endif
+
+# enable distribution-specific policy
+ifneq ($(DISTRO),)
+       override M4PARAM += -D distro_$(DISTRO)
+endif
+
+ifneq ($(OUTPUT_POLICY),)
+       CHECKPOLICY += -c $(OUTPUT_POLICY)
+endif
+
+ifeq ($(NAME),)
+       NAME := $(TYPE)
+endif
+
+# determine the policy version and current kernel version if possible
+PV := $(shell $(CHECKPOLICY) -V |cut -f 1 -d ' ')
+KV := $(shell cat /selinux/policyvers)
+
+# dont print version warnings if we are unable to determine
+# the currently running kernel's policy version
+ifeq ($(KV),)
+       KV := $(PV)
+endif
+
+FC := file_contexts
+POLVER := policy.$(PV)
+
+M4SUPPORT = $(wildcard $(POLDIR)/support/*.spt)
+
+APPCONF := config/appconfig-$(TYPE)
+APPDIR := $(CONTEXTPATH)
+APPFILES := $(INSTALLDIR)/booleans
+CONTEXTFILES += $(wildcard $(APPCONF)/*_context*) $(APPCONF)/media
+USER_FILES := $(POLDIR)/systemuser $(POLDIR)/users
+
+ALL_LAYERS := $(filter-out $(MODDIR)/CVS,$(shell find $(wildcard $(MODDIR)/*) 
-maxdepth 0 -type d))
+
+GENERATED_TE := $(basename $(foreach dir,$(ALL_LAYERS),$(wildcard 
$(dir)/*.te.in)))
+GENERATED_IF := $(basename $(foreach dir,$(ALL_LAYERS),$(wildcard 
$(dir)/*.if.in)))
+GENERATED_FC := $(basename $(foreach dir,$(ALL_LAYERS),$(wildcard 
$(dir)/*.fc.in)))
+
+# sort here since it removes duplicates, which can happen
+# when a generated file is already generated
+DETECTED_MODS := $(sort $(foreach dir,$(ALL_LAYERS),$(wildcard $(dir)/*.te)) 
$(GENERATED_TE))
+
+# modules.conf setting for base module
+MODBASE := base
+
+# modules.conf setting for module
+MODMOD := module
+
+# extract settings from modules.conf
+BASE_MODS := $(foreach mod,$(shell awk '/^[[:blank:]]*[[:alpha:]]/{ if ($$3 == 
"$(MODBASE)") print $$1 }' $(MOD_CONF) 2> /dev/null),$(subst ./,,$(shell find 
-iname $(mod).te)))
+MOD_MODS := $(foreach mod,$(shell awk '/^[[:blank:]]*[[:alpha:]]/{ if ($$3 == 
"$(MODMOD)") print $$1 }' $(MOD_CONF) 2> /dev/null),$(subst ./,,$(shell find 
-iname $(mod).te)))
+
+HOMEDIR_TEMPLATE = tmp/homedir_template
+
+########################################
+#
+# Load appropriate rules
+#
+
+ifeq ($(MONOLITHIC),y)
+       include Rules.monolithic
+else
+       include Rules.modular
+endif
+
+########################################
+#
+# Create config files
+#
+conf: $(MOD_CONF) $(BOOLEANS) $(GENERATED_TE) $(GENERATED_IF) $(GENERATED_FC)
+
+$(MOD_CONF) $(BOOLEANS): $(POLXML)
+       @echo "Updating $(MOD_CONF) and $(BOOLEANS)"
+       $(QUIET) cd $(DOCS) && ../$(GENDOC) -t ../$(BOOLEANS) -m ../$(MOD_CONF) 
-x ../$(POLXML)
+
+########################################
+#
+# Appconfig files
+#
+install-appconfig: $(APPFILES)
+
+$(INSTALLDIR)/booleans: $(BOOLEANS)
+       @mkdir -p $(INSTALLDIR)
+       $(QUIET) egrep '^[[:blank:]]*[[:alpha:]]' $(BOOLEANS) \
+               | sed -e 's/false/0/g' -e 's/true/1/g' > tmp/booleans
+       $(QUIET) install -m 644 tmp/booleans $@
+
+########################################
+#
+# Install policy sources
+#
+install-src:
+       rm -rf $(SRCPATH)/policy.old
+       -mv $(SRCPATH)/policy $(SRCPATH)/policy.old
+       mkdir -p $(SRCPATH)/policy
+       cp -R . $(SRCPATH)/policy
+
+########################################
+#
+# Clean everything
+#
+bare: clean
+       rm -f $(POLXML)
+       rm -f $(SUPPORT)/*.pyc
+       rm -f $(FCSORT)
+       rm -f $(MOD_CONF)
+       rm -f $(BOOLEANS)
+       rm -fR $(HTMLDIR)
+ifneq ($(GENERATED_TE),)
+       rm -f $(GENERATED_TE)
+endif
+ifneq ($(GENERATED_IF),)
+       rm -f $(GENERATED_IF)
+endif
+ifneq ($(GENERATED_FC),)
+       rm -f $(GENERATED_FC)
+endif
+
+.PHONY: install-src install-appconfig conf html bare
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/flask/policy/Rules.modular
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/flask/policy/Rules.modular  Fri Sep 12 14:47:40 2008 +0900
@@ -0,0 +1,166 @@
+########################################
+#
+# Rules and Targets for building modular policies
+#
+
+ALL_MODULES := $(filter $(BASE_MODS) $(MOD_MODS),$(DETECTED_MODS))
+ALL_INTERFACES := $(ALL_MODULES:.te=.if)
+
+BASE_PKG := base.pp
+BASE_FC := base.fc
+
+BASE_SECTIONS := tmp/pre_te_files.conf tmp/generated_definitions.conf 
tmp/all_interfaces.conf tmp/all_attrs_types.conf $(GLOBALBOOL) $(GLOBALTUN) 
tmp/only_te_rules.conf tmp/all_post.conf
+
+BASE_PRE_TE_FILES := $(SECCLASS) $(ISIDS) $(AVS) $(M4SUPPORT) $(POLDIR)/mls 
$(POLDIR)/mcs
+BASE_TE_FILES := $(BASE_MODS)
+BASE_POST_TE_FILES := $(POLDIR)/systemuser $(POLDIR)/constraints
+BASE_FC_FILES := $(BASE_MODS:.te=.fc)
+
+MOD_MODULES := $(MOD_MODS:.te=.mod)
+MOD_PKGS := $(notdir $(MOD_MODS:.te=.pp))
+
+# search layer dirs for source files
+vpath %.te $(ALL_LAYERS)
+vpath %.if $(ALL_LAYERS)
+vpath %.fc $(ALL_LAYERS)
+
+########################################
+#
+# default action: create all module packages
+#
+default: base
+
+base: $(BASE_PKG)
+
+modules: $(MOD_PKGS)
+
+#policy: $(POLVER)
+#install: $(LOADPATH) $(FCPATH) $(APPFILES) $(USERPATH)/local.users
+#load: tmp/load
+
+########################################
+#
+# Create a base module package
+#
+$(BASE_PKG): tmp/base.mod $(BASE_FC)
+       @echo "Creating $(NAME) base module package"
+       $(QUIET) $(SEMOD_PKG) $@ $^
+
+########################################
+#
+# Compile a base module
+#
+tmp/base.mod: base.conf
+       @echo "Compiling $(NAME) base module"
+       $(QUIET) $(CHECKMODULE) $^ -o $@
+
+########################################
+#
+# Construct a base module policy.conf
+#
+base.conf: $(BASE_SECTIONS)
+       @echo "Creating $(NAME) base module policy.conf"
+# checkpolicy can use the #line directives provided by -s for error reporting:
+       $(QUIET) m4 -D self_contained_policy $(M4PARAM) -s $^ > tmp/$@.tmp
+       $(QUIET) sed -e /^portcon/d -e /^nodecon/d -e /^netifcon/d < tmp/$@.tmp 
> $@
+# the ordering of these ocontexts matters:
+       $(QUIET) grep ^portcon tmp/$@.tmp >> $@ || true
+       $(QUIET) grep ^netifcon tmp/$@.tmp >> $@ || true
+       $(QUIET) grep ^nodecon tmp/$@.tmp >> $@ || true
+
+tmp/pre_te_files.conf: $(BASE_PRE_TE_FILES)
+       @test -d tmp || mkdir -p tmp
+       $(QUIET) cat $^ > $@
+
+tmp/generated_definitions.conf: $(ALL_LAYERS) $(BASE_TE_FILES)
+       @test -d tmp || mkdir -p tmp
+# define all available object classes
+       $(QUIET) $(GENPERM) $(AVS) $(SECCLASS) > $@
+# per-userdomain templates
+       $(QUIET) echo "define(\`per_userdomain_templates',\`" >> $@
+       $(QUIET) for i in $(patsubst %.te,%,$(notdir $(ALL_MODULES))); do \
+               echo 
"ifdef(\`""$$i""_per_userdomain_template',\`""$$i""_per_userdomain_template("'$$*'")')"
 \
+                       >> $@ ;\
+       done
+       $(QUIET) echo "')" >> $@
+# define foo.te
+       $(QUIET) for i in $(notdir $(BASE_TE_FILES)); do \
+               echo "define(\`$$i')" >> $@ ;\
+       done
+       $(QUIET) $(SETTUN) $(BOOLEANS) >> $@
+
+tmp/all_interfaces.conf: $(M4SUPPORT) $(ALL_INTERFACES)
+ifeq ($(ALL_INTERFACES),)
+       $(error No enabled modules! $(notdir $(MOD_CONF)) may need to be 
generated by using "make conf")
+endif
+       @test -d tmp || mkdir -p tmp
+       $(QUIET) m4 $^ | sed -e s/dollarsstar/\$$\*/g > $@
+
+tmp/all_te_files.conf: $(BASE_TE_FILES)
+ifeq ($(BASE_TE_FILES),)
+       $(error No enabled modules! $(notdir $(MOD_CONF)) may need to be 
generated by using "make conf")
+endif
+       @test -d tmp || mkdir -p tmp
+       $(QUIET) cat $^ > $@
+
+tmp/post_te_files.conf: $(BASE_POST_TE_FILES)
+       @test -d tmp || mkdir -p tmp
+       $(QUIET) cat $^ > $@
+
+# extract attributes and put them first. extract post te stuff
+# like genfscon and put last.  portcon, nodecon, and netifcon
+# is delayed since they are generated by m4
+tmp/all_attrs_types.conf tmp/only_te_rules.conf tmp/all_post.conf: 
tmp/all_te_files.conf tmp/post_te_files.conf
+       $(QUIET) grep ^attribute tmp/all_te_files.conf > 
tmp/all_attrs_types.conf || true
+       $(QUIET) grep '^type ' tmp/all_te_files.conf >> tmp/all_attrs_types.conf
+       $(QUIET) cat tmp/post_te_files.conf > tmp/all_post.conf
+       $(QUIET) grep '^sid ' tmp/all_te_files.conf >> tmp/all_post.conf || true
+       $(QUIET) egrep '^fs_use_(xattr|task|trans)' tmp/all_te_files.conf >> 
tmp/all_post.conf || true
+       $(QUIET) grep ^genfscon tmp/all_te_files.conf >> tmp/all_post.conf || 
true
+       $(QUIET) sed -r -e /^attribute/d -e '/^type /d' -e /^genfscon/d \
+                       -e '/^sid /d' -e '/^fs_use_(xattr|task|trans)/d' \
+                       < tmp/all_te_files.conf > tmp/only_te_rules.conf
+
+########################################
+#
+# Construct base module file contexts
+#
+$(BASE_FC): $(M4SUPPORT) tmp/generated_definitions.conf $(BASE_FC_FILES) 
$(FCSORT)
+ifeq ($(BASE_FC_FILES),)
+       $(error No enabled modules! $(notdir $(MOD_CONF)) may need to be 
generated by using "make conf")
+endif
+       @echo "Creating $(NAME) base module file contexts."
+       @test -d tmp || mkdir -p tmp
+       $(QUIET) m4 $(M4PARAM) $(M4SUPPORT) tmp/generated_definitions.conf 
$(BASE_FC_FILES) > tmp/$@.tmp
+       $(QUIET) grep -e HOME -e ROLE tmp/$@.tmp > $(HOMEDIR_TEMPLATE)
+       $(QUIET) sed -i -e /HOME/d -e /ROLE/d tmp/$@.tmp
+       $(QUIET) $(FCSORT) tmp/$@.tmp $@
+
+########################################
+#
+# Build module packages
+#
+tmp/%.mod: $(M4SUPPORT) tmp/generated_definitions.conf tmp/all_interfaces.conf 
%.te
+       @if test -z "$(filter $^,$(MOD_MODS))"; then \
+               echo "The $(notdir $(basename $@)) module is not configured to 
be compiled as a lodable module." ;\
+               false ;\
+       fi
+       @echo "Compliling $(NAME) $(@F) module"
+       $(QUIET) m4 $(M4PARAM) -s $^ > $(@:.mod=.tmp)
+       $(QUIET) $(CHECKMODULE) -m $(@:.mod=.tmp) -o $@
+
+%.pp: tmp/%.mod %.fc
+       @echo "Creating $(NAME) $(@F) policy package"
+       $(QUIET) $(SEMOD_PKG) $@ $^
+
+########################################
+#
+# Clean the sources
+#
+clean:
+       rm -fR tmp
+       rm -f base.conf
+       rm -f *.pp
+       rm -f $(BASE_FC)
+
+.PHONY: default base modules clean
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/flask/policy/Rules.monolithic
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/flask/policy/Rules.monolithic       Fri Sep 12 14:47:40 2008 +0900
@@ -0,0 +1,196 @@
+########################################
+#
+# Rules and Targets for building monolithic policies
+#
+
+# install paths
+POLICYPATH = $(INSTALLDIR)/policy
+LOADPATH = $(POLICYPATH)/$(POLVER)
+FCPATH = $(CONTEXTPATH)/files/file_contexts
+HOMEDIRPATH = $(CONTEXTPATH)/files/homedir_template
+
+# for monolithic policy use all base and module to create policy
+ENABLEMOD := $(BASE_MODS) $(MOD_MODS)
+
+ALL_MODULES := $(filter $(ENABLEMOD),$(DETECTED_MODS))
+
+ALL_INTERFACES := $(ALL_MODULES:.te=.if)
+ALL_TE_FILES := $(ALL_MODULES)
+ALL_FC_FILES := $(ALL_MODULES:.te=.fc)
+
+PRE_TE_FILES := $(SECCLASS) $(ISIDS) $(AVS) $(M4SUPPORT) $(POLDIR)/mls 
$(POLDIR)/mcs
+POST_TE_FILES := $(POLDIR)/systemuser $(POLDIR)/users $(POLDIR)/constraints
+
+POLICY_SECTIONS := tmp/pre_te_files.conf tmp/generated_definitions.conf 
tmp/all_interfaces.conf tmp/all_attrs_types.conf $(GLOBALBOOL) $(GLOBALTUN) 
tmp/only_te_rules.conf tmp/all_post.conf
+
+########################################
+#
+# default action: build policy locally
+#
+default: policy
+
+policy: $(POLVER)
+
+install: $(LOADPATH) $(FCPATH) $(APPFILES) $(USERPATH)/local.users
+
+load: tmp/load
+
+########################################
+#
+# Build a binary policy locally
+#
+$(POLVER): policy.conf
+       @echo "Compiling $(NAME) $(POLVER)"
+ifneq ($(PV),$(KV))
+       @echo
+       @echo "WARNING: Policy version mismatch!  Is your OUTPUT_POLICY set 
correctly?"
+       @echo
+endif
+       $(QUIET) $(CHECKPOLICY) $^ -o $@
+
+########################################
+#
+# Install a binary policy
+#
+$(LOADPATH): policy.conf
+       @mkdir -p $(POLICYPATH)
+       @echo "Compiling and installing $(NAME) $(LOADPATH)"
+ifneq ($(PV),$(KV))
+       @echo
+       @echo "WARNING: Policy version mismatch!  Is your OUTPUT_POLICY set 
correctly?"
+       @echo
+endif
+       $(QUIET) $(CHECKPOLICY) $^ -o $@
+
+########################################
+#
+# Load the binary policy
+#
+reload tmp/load: $(LOADPATH) $(FCPATH)
+       @echo "Loading $(NAME) $(LOADPATH)"
+       $(QUIET) $(LOADPOLICY) -q $(LOADPATH)
+       @touch tmp/load
+
+########################################
+#
+# Construct a monolithic policy.conf
+#
+policy.conf: $(POLICY_SECTIONS)
+       @echo "Creating $(NAME) policy.conf"
+# checkpolicy can use the #line directives provided by -s for error reporting:
+       $(QUIET) m4 -D self_contained_policy $(M4PARAM) -s $^ > tmp/$@.tmp
+       $(QUIET) sed -e /^portcon/d -e /^nodecon/d -e /^netifcon/d < tmp/$@.tmp 
> $@
+
+tmp/pre_te_files.conf: $(PRE_TE_FILES)
+       @test -d tmp || mkdir -p tmp
+       $(QUIET) cat $^ > $@
+
+tmp/generated_definitions.conf: $(ALL_LAYERS) $(ALL_TE_FILES)
+# per-userdomain templates:
+       @test -d tmp || mkdir -p tmp
+       $(QUIET) echo "define(\`per_userdomain_templates',\`" > $@
+       $(QUIET) for i in $(patsubst %.te,%,$(notdir $(ALL_MODULES))); do \
+               echo 
"ifdef(\`""$$i""_per_userdomain_template',\`""$$i""_per_userdomain_template("'$$*'")')"
 \
+                       >> $@ ;\
+       done
+       $(QUIET) echo "')" >> $@
+# define foo.te
+       $(QUIET) for i in $(notdir $(ALL_MODULES)); do \
+               echo "define(\`$$i')" >> $@ ;\
+       done
+#      $(QUIET) $(SETTUN) $(BOOLEANS) >> $@
+
+tmp/all_interfaces.conf: $(M4SUPPORT) $(ALL_INTERFACES)
+ifeq ($(ALL_INTERFACES),)
+       $(error No enabled modules! $(notdir $(MOD_CONF)) may need to be 
generated by using "make conf")
+endif
+       @test -d tmp || mkdir -p tmp
+       $(QUIET) m4 $^ | sed -e s/dollarsstar/\$$\*/g > $@
+
+tmp/all_te_files.conf: $(ALL_TE_FILES)
+ifeq ($(ALL_TE_FILES),)
+       $(error No enabled modules! $(notdir $(MOD_CONF)) may need to be 
generated by using "make conf")
+endif
+       @test -d tmp || mkdir -p tmp
+       $(QUIET) cat $^ > $@
+
+tmp/post_te_files.conf: $(POST_TE_FILES)
+       @test -d tmp || mkdir -p tmp
+       $(QUIET) cat $^ > $@
+
+# extract attributes and put them first. extract post te stuff
+# like genfscon and put last.  portcon, nodecon, and netifcon
+# is delayed since they are generated by m4
+tmp/all_attrs_types.conf tmp/only_te_rules.conf tmp/all_post.conf: 
tmp/all_te_files.conf tmp/post_te_files.conf
+       $(QUIET) grep ^attribute tmp/all_te_files.conf > 
tmp/all_attrs_types.conf || true
+       $(QUIET) grep '^type ' tmp/all_te_files.conf >> tmp/all_attrs_types.conf
+       $(QUIET) cat tmp/post_te_files.conf > tmp/all_post.conf
+       $(QUIET) grep '^sid ' tmp/all_te_files.conf >> tmp/all_post.conf || true
+       $(QUIET) egrep '^fs_use_(xattr|task|trans)' tmp/all_te_files.conf >> 
tmp/all_post.conf || true
+       $(QUIET) grep ^genfscon tmp/all_te_files.conf >> tmp/all_post.conf || 
true
+       $(QUIET) sed -r -e /^attribute/d -e '/^type /d' -e /^genfscon/d \
+                       -e '/^sid /d' -e '/^fs_use_(xattr|task|trans)/d' \
+                       < tmp/all_te_files.conf > tmp/only_te_rules.conf
+
+########################################
+#
+# Remove the dontaudit rules from the policy.conf
+#
+enableaudit: policy.conf
+       @test -d tmp || mkdir -p tmp
+       @echo "Removing dontaudit rules from policy.conf"
+       $(QUIET) grep -v dontaudit policy.conf > tmp/policy.audit
+       $(QUIET) mv tmp/policy.audit policy.conf
+
+########################################
+#
+# Construct file_contexts
+#
+$(FC): $(M4SUPPORT) tmp/generated_definitions.conf $(ALL_FC_FILES)
+ifeq ($(ALL_FC_FILES),)
+       $(error No enabled modules! $(notdir $(MOD_CONF)) may need to be 
generated by using "make conf")
+endif
+       @echo "Creating $(NAME) file_contexts."
+       @test -d tmp || mkdir -p tmp
+       $(QUIET) m4 $(M4PARAM) $(M4SUPPORT) tmp/generated_definitions.conf 
$(ALL_FC_FILES) > tmp/$@.tmp
+#      $(QUIET) grep -e HOME -e ROLE tmp/$@.tmp > $(HOMEDIR_TEMPLATE)
+#      $(QUIET) sed -i -e /HOME/d -e /ROLE/d tmp/$@.tmp
+#      $(QUIET) $(FCSORT) tmp/$@.tmp $@
+       $(QUIET) touch $(HOMEDIR_TEMPLATE)
+       $(QUIET) touch $@
+
+########################################
+#
+# Install file_contexts
+#
+$(FCPATH): $(FC) $(LOADPATH) $(USERPATH)/system.users
+       @echo "Validating $(NAME) file_contexts."
+#      $(QUIET) $(SETFILES) -q -c $(LOADPATH) $(FC)
+       @echo "Installing file_contexts."
+       @mkdir -p $(CONTEXTPATH)/files
+       $(QUIET) install -m 644 $(FC) $(FCPATH)
+       $(QUIET) install -m 644 $(HOMEDIR_TEMPLATE) $(HOMEDIRPATH)
+#      $(QUIET) $(GENHOMEDIRCON) -d $(TOPDIR) -t $(NAME) $(USEPWD)
+
+########################################
+#
+# Run policy source checks
+#
+check: policy.conf $(FC)
+       $(SECHECK) -s --profile=development --policy=policy.conf --fcfile=$(FC) 
> $@.res
+
+longcheck: policy.conf $(FC)
+       $(SECHECK) -s --profile=all --policy=policy.conf --fcfile=$(FC) > $@.res
+
+########################################
+#
+# Clean the sources
+#
+clean:
+       rm -fR tmp
+       rm -f policy.conf
+       rm -f policy.$(PV)
+       rm -f $(FC)
+       rm -f *.res
+
+.PHONY: default policy install load reload enableaudit checklabels 
restorelabels relabel check longcheck clean
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/flask/policy/policy/constraints
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/flask/policy/policy/constraints     Fri Sep 12 14:47:40 2008 +0900
@@ -0,0 +1,27 @@
+
+#
+# Define the constraints
+#
+# constrain class_set perm_set expression ;
+#
+# expression : ( expression ) 
+#           | not expression
+#           | expression and expression
+#           | expression or expression
+#           | u1 op u2
+#           | r1 role_op r2
+#           | t1 op t2
+#           | u1 op names
+#           | u2 op names
+#           | r1 op names
+#           | r2 op names
+#           | t1 op names
+#           | t2 op names
+#
+# op : == | != 
+# role_op : == | != | eq | dom | domby | incomp
+#
+# names : name | { name_list }
+# name_list : name | name_list name            
+#
+
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/flask/policy/policy/flask/Makefile
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/flask/policy/policy/flask/Makefile  Fri Sep 12 14:47:40 2008 +0900
@@ -0,0 +1,41 @@
+# flask needs to know where to export the libselinux headers.
+LIBSEL ?= ../../libselinux
+
+# flask needs to know where to export the kernel headers.
+LINUXDIR ?= ../../../linux-2.6
+
+AWK = awk
+
+CONFIG_SHELL := $(shell if [ -x "$$BASH" ]; then echo $$BASH; \
+          else if [ -x /bin/bash ]; then echo /bin/bash; \
+          else echo sh; fi ; fi)
+
+FLASK_H_DEPEND = security_classes initial_sids
+AV_H_DEPEND = access_vectors
+
+FLASK_H_FILES = class_to_string.h flask.h initial_sid_to_string.h
+AV_H_FILES = av_inherit.h common_perm_to_string.h av_perm_to_string.h 
av_permissions.h
+ALL_H_FILES = $(FLASK_H_FILES) $(AV_H_FILES)
+
+all:  $(ALL_H_FILES)
+
+$(FLASK_H_FILES): $(FLASK_H_DEPEND)
+       $(CONFIG_SHELL) mkflask.sh $(AWK) $(FLASK_H_DEPEND)
+
+$(AV_H_FILES): $(AV_H_DEPEND)
+       $(CONFIG_SHELL) mkaccess_vector.sh $(AWK) $(AV_H_DEPEND)
+
+tolib: all
+       install -m 644 flask.h av_permissions.h $(LIBSEL)/include/selinux
+       install -m 644 class_to_string.h av_inherit.h common_perm_to_string.h 
av_perm_to_string.h $(LIBSEL)/src
+
+tokern: all
+       install -m 644 $(ALL_H_FILES) $(LINUXDIR)/security/selinux/include
+
+install: all
+
+relabel:
+
+clean:  
+       rm -f $(FLASK_H_FILES)
+       rm -f $(AV_H_FILES)
diff -r 4ddd63b4be9b -r ec8eaab557d8 
tools/flask/policy/policy/flask/access_vectors
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/flask/policy/policy/flask/access_vectors    Fri Sep 12 14:47:40 
2008 +0900
@@ -0,0 +1,166 @@
+#
+# Define common prefixes for access vectors
+#
+# common common_name { permission_name ... }
+
+#
+# Define a common prefix for file access vectors.
+#
+
+
+#
+# Define the access vectors.
+#
+# class class_name [ inherits common_name ] { permission_name ... }
+
+
+#
+# Define the access vector interpretation for file-related objects.
+#
+
+class xen
+{
+       scheduler
+       settime
+       tbufcontrol
+       readconsole
+       clearconsole
+       perfcontrol
+       mtrr_add
+       mtrr_del
+       mtrr_read
+       microcode
+       physinfo
+       quirk
+    writeconsole
+    readapic
+    writeapic
+    privprofile
+    nonprivprofile
+    kexec
+       firmware
+       sleep
+       frequency
+       getidle
+       debug
+       getcpuinfo
+       heap
+}
+
+class domain
+{
+       setvcpucontext
+       pause
+       unpause
+    resume
+    create
+    transition
+    max_vcpus
+    destroy
+    setvcpuaffinity
+       getvcpuaffinity
+       scheduler
+       getdomaininfo
+       getvcpuinfo
+       getvcpucontext
+       setdomainmaxmem
+       setdomainhandle
+       setdebugging
+       hypercall
+    settime
+    set_target
+    shutdown
+    setaddrsize
+    getaddrsize
+       trigger
+       getextvcpucontext
+       setextvcpucontext
+}
+
+class hvm
+{
+    sethvmc
+    gethvmc
+    setparam
+    getparam
+    pcilevel
+    irqlevel
+    pciroute
+       bind_irq
+       cacheattr
+}
+
+class event
+{
+       bind
+       send
+       status
+       notify
+       create
+    vector
+    reset
+}
+
+class grant
+{
+       map_read
+       map_write
+       unmap
+       transfer
+       setup
+    copy
+    query
+}
+
+class mmu
+{
+       map_read
+       map_write
+       pageinfo
+       pagelist
+    adjust
+    stat
+    translategp
+       updatemp
+    physmap
+    pinpage
+    mfnlist
+    memorymap
+}
+
+class shadow
+{
+       disable
+       enable
+    logdirty
+}
+
+class resource
+{
+       add
+       remove
+       use
+       add_irq
+       remove_irq
+       add_ioport
+       remove_ioport
+       add_iomem
+       remove_iomem
+       stat_device
+       add_device
+       remove_device
+}
+
+class security
+{
+       compute_av
+       compute_create
+       compute_member
+       check_context
+       load_policy
+       compute_relabel
+       compute_user
+       setenforce
+       setbool
+       setsecparam
+}
diff -r 4ddd63b4be9b -r ec8eaab557d8 
tools/flask/policy/policy/flask/initial_sids
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/flask/policy/policy/flask/initial_sids      Fri Sep 12 14:47:40 
2008 +0900
@@ -0,0 +1,17 @@
+# FLASK
+
+#
+# Define initial security identifiers 
+#
+sid xen
+sid dom0
+sid domU
+sid domio
+sid domxen
+sid unlabeled
+sid security
+sid ioport
+sid iomem
+sid pirq
+sid device
+# FLASK
diff -r 4ddd63b4be9b -r ec8eaab557d8 
tools/flask/policy/policy/flask/mkaccess_vector.sh
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/flask/policy/policy/flask/mkaccess_vector.sh        Fri Sep 12 
14:47:40 2008 +0900
@@ -0,0 +1,227 @@
+#!/bin/sh -
+#
+
+# FLASK
+
+set -e
+
+awk=$1
+shift
+
+# output files
+av_permissions="av_permissions.h"
+av_inherit="av_inherit.h"
+common_perm_to_string="common_perm_to_string.h"
+av_perm_to_string="av_perm_to_string.h"
+
+cat $* | $awk "
+BEGIN  {
+               outfile = \"$av_permissions\"
+               inheritfile = \"$av_inherit\"
+               cpermfile = \"$common_perm_to_string\"
+               avpermfile = \"$av_perm_to_string\"
+               "'
+               nextstate = "COMMON_OR_AV";
+               printf("/* This file is automatically generated.  Do not edit. 
*/\n") > outfile;
+               printf("/* This file is automatically generated.  Do not edit. 
*/\n") > inheritfile;
+               printf("/* This file is automatically generated.  Do not edit. 
*/\n") > cpermfile;
+               printf("/* This file is automatically generated.  Do not edit. 
*/\n") > avpermfile;
+;
+       }
+/^[ \t]*#/     { 
+                       next;
+               }
+$1 == "common" { 
+                       if (nextstate != "COMMON_OR_AV")
+                       {
+                               printf("Parse error:  Unexpected COMMON 
definition on line %d\n", NR);
+                               next;   
+                       }
+
+                       if ($2 in common_defined)
+                       {
+                               printf("Duplicate COMMON definition for %s on 
line %d.\n", $2, NR);
+                               next;
+                       }       
+                       common_defined[$2] = 1;
+
+                       tclass = $2;
+                       common_name = $2; 
+                       permission = 1;
+
+                       printf("TB_(common_%s_perm_to_string)\n", $2) > 
cpermfile;
+
+                       nextstate = "COMMON-OPENBRACKET";
+                       next;
+               }
+$1 == "class"  {
+                       if (nextstate != "COMMON_OR_AV" &&
+                           nextstate != "CLASS_OR_CLASS-OPENBRACKET")
+                       {
+                               printf("Parse error:  Unexpected class 
definition on line %d\n", NR);
+                               next;   
+                       }
+
+                       tclass = $2;
+
+                       if (tclass in av_defined)
+                       {
+                               printf("Duplicate access vector definition for 
%s on line %d\n", tclass, NR);
+                               next;
+                       } 
+                       av_defined[tclass] = 1;
+
+                       inherits = "";
+                       permission = 1;
+
+                       nextstate = "INHERITS_OR_CLASS-OPENBRACKET";
+                       next;
+               }
+$1 == "inherits" {                     
+                       if (nextstate != "INHERITS_OR_CLASS-OPENBRACKET")
+                       {
+                               printf("Parse error:  Unexpected INHERITS 
definition on line %d\n", NR);
+                               next;   
+                       }
+
+                       if (!($2 in common_defined))
+                       {
+                               printf("COMMON %s is not defined (line %d).\n", 
$2, NR);
+                               next;
+                       }
+
+                       inherits = $2;
+                       permission = common_base[$2];
+
+                       for (combined in common_perms)
+                       {
+                               split(combined,separate, SUBSEP);
+                               if (separate[1] == inherits)
+                               {
+                                       inherited_perms[common_perms[combined]] 
= separate[2];
+                               }
+                       }
+
+                        j = 1;
+                        for (i in inherited_perms) {
+                            ind[j] = i + 0;
+                            j++;
+                        }
+                        n = asort(ind);
+                       for (i = 1; i <= n; i++) {
+                               perm = inherited_perms[ind[i]];
+                               printf("#define %s__%s", toupper(tclass), 
toupper(perm)) > outfile; 
+                               spaces = 40 - (length(perm) + length(tclass));
+                               if (spaces < 1)
+                                     spaces = 1;
+                               for (j = 0; j < spaces; j++) 
+                                       printf(" ") > outfile; 
+                               printf("0x%08xUL\n", ind[i]) > outfile; 
+                       }
+                       printf("\n") > outfile;
+                        for (i in ind) delete ind[i];
+                        for (i in inherited_perms) delete inherited_perms[i];
+
+                       printf("   S_(SECCLASS_%s, %s, 0x%08xUL)\n", 
toupper(tclass), inherits, permission) > inheritfile; 
+
+                       nextstate = "CLASS_OR_CLASS-OPENBRACKET";
+                       next;
+               }
+$1 == "{"      { 
+                       if (nextstate != "INHERITS_OR_CLASS-OPENBRACKET" &&
+                           nextstate != "CLASS_OR_CLASS-OPENBRACKET" &&
+                           nextstate != "COMMON-OPENBRACKET")
+                       {
+                               printf("Parse error:  Unexpected { on line 
%d\n", NR);
+                               next;
+                       }
+
+                       if (nextstate == "INHERITS_OR_CLASS-OPENBRACKET")
+                               nextstate = "CLASS-CLOSEBRACKET";
+
+                       if (nextstate == "CLASS_OR_CLASS-OPENBRACKET")
+                               nextstate = "CLASS-CLOSEBRACKET";
+
+                       if (nextstate == "COMMON-OPENBRACKET")
+                               nextstate = "COMMON-CLOSEBRACKET";
+               }
+/[a-z][a-z_]*/ {
+                       if (nextstate != "COMMON-CLOSEBRACKET" &&
+                           nextstate != "CLASS-CLOSEBRACKET")
+                       {
+                               printf("Parse error:  Unexpected symbol %s on 
line %d\n", $1, NR);              
+                               next;
+                       }
+
+                       if (nextstate == "COMMON-CLOSEBRACKET")
+                       {
+                               if ((common_name,$1) in common_perms)
+                               {
+                                       printf("Duplicate permission %s for 
common %s on line %d.\n", $1, common_name, NR);
+                                       next;
+                               }
+
+                               common_perms[common_name,$1] = permission;
+
+                               printf("#define COMMON_%s__%s", 
toupper(common_name), toupper($1)) > outfile; 
+
+                               printf("    S_(\"%s\")\n", $1) > cpermfile;
+                       }
+                       else
+                       {
+                               if ((tclass,$1) in av_perms)
+                               {
+                                       printf("Duplicate permission %s for %s 
on line %d.\n", $1, tclass, NR);
+                                       next;
+                               }
+
+                               av_perms[tclass,$1] = permission;
+               
+                               if (inherits != "")
+                               {
+                                       if ((inherits,$1) in common_perms)
+                                       {
+                                               printf("Permission %s in %s on 
line %d conflicts with common permission.\n", $1, tclass, inherits, NR);
+                                               next;
+                                       }
+                               }
+
+                               printf("#define %s__%s", toupper(tclass), 
toupper($1)) > outfile; 
+
+                               printf("   S_(SECCLASS_%s, %s__%s, \"%s\")\n", 
toupper(tclass), toupper(tclass), toupper($1), $1) > avpermfile; 
+                       }
+
+                       spaces = 40 - (length($1) + length(tclass));
+                       if (spaces < 1)
+                             spaces = 1;
+
+                       for (i = 0; i < spaces; i++) 
+                               printf(" ") > outfile; 
+                       printf("0x%08xUL\n", permission) > outfile; 
+                       permission = permission * 2;
+               }
+$1 == "}"      {
+                       if (nextstate != "CLASS-CLOSEBRACKET" && 
+                           nextstate != "COMMON-CLOSEBRACKET")
+                       {
+                               printf("Parse error:  Unexpected } on line 
%d\n", NR);
+                               next;
+                       }
+
+                       if (nextstate == "COMMON-CLOSEBRACKET")
+                       {
+                               common_base[common_name] = permission;
+                               printf("TE_(common_%s_perm_to_string)\n\n", 
common_name) > cpermfile; 
+                       }
+
+                       printf("\n") > outfile;
+
+                       nextstate = "COMMON_OR_AV";
+               }
+END    {
+               if (nextstate != "COMMON_OR_AV" && nextstate != 
"CLASS_OR_CLASS-OPENBRACKET")
+                       printf("Parse error:  Unexpected end of file\n");
+
+       }'
+
+# FLASK
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/flask/policy/policy/flask/mkflask.sh
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/flask/policy/policy/flask/mkflask.sh        Fri Sep 12 14:47:40 
2008 +0900
@@ -0,0 +1,95 @@
+#!/bin/sh -
+#
+
+# FLASK
+
+set -e
+
+awk=$1
+shift 1
+
+# output file
+output_file="flask.h"
+debug_file="class_to_string.h"
+debug_file2="initial_sid_to_string.h"
+
+cat $* | $awk "
+BEGIN  {
+               outfile = \"$output_file\"
+               debugfile = \"$debug_file\"
+               debugfile2 = \"$debug_file2\"
+               "'
+               nextstate = "CLASS";
+
+               printf("/* This file is automatically generated.  Do not edit. 
*/\n") > outfile;
+
+               printf("#ifndef _SELINUX_FLASK_H_\n") > outfile;
+               printf("#define _SELINUX_FLASK_H_\n") > outfile;
+               printf("\n/*\n * Security object class definitions\n */\n") > 
outfile;
+               printf("/* This file is automatically generated.  Do not edit. 
*/\n") > debugfile;
+               printf("/*\n * Security object class definitions\n */\n") > 
debugfile;
+               printf("    S_(\"null\")\n") > debugfile;
+               printf("/* This file is automatically generated.  Do not edit. 
*/\n") > debugfile2;
+               printf("static char *initial_sid_to_string[] =\n{\n") > 
debugfile2;
+               printf("    \"null\",\n") > debugfile2;
+       }
+/^[ \t]*#/     { 
+                       next;
+               }
+$1 == "class"  { 
+                       if (nextstate != "CLASS")
+                       {
+                               printf("Parse error:  Unexpected class 
definition on line %d\n", NR);
+                               next;   
+                       }
+
+                       if ($2 in class_found)
+                       {
+                               printf("Duplicate class definition for %s on 
line %d.\n", $2, NR);
+                               next;
+                       }       
+                       class_found[$2] = 1;
+
+                       class_value++;
+
+                       printf("#define SECCLASS_%s", toupper($2)) > outfile;
+                       for (i = 0; i < 40 - length($2); i++) 
+                               printf(" ") > outfile; 
+                       printf("%d\n", class_value) > outfile; 
+
+                       printf("    S_(\"%s\")\n", $2) > debugfile;
+               }
+$1 == "sid"    { 
+                       if (nextstate == "CLASS")
+                       {
+                           nextstate = "SID";
+                           printf("\n/*\n * Security identifier indices for 
initial entities\n */\n") > outfile;                           
+                       }
+
+                       if ($2 in sid_found)
+                       {
+                               printf("Duplicate SID definition for %s on line 
%d.\n", $2, NR);
+                               next;
+                       }       
+                       sid_found[$2] = 1;
+                       sid_value++;
+
+                       printf("#define SECINITSID_%s", toupper($2)) > outfile;
+                       for (i = 0; i < 37 - length($2); i++) 
+                               printf(" ") > outfile; 
+                       printf("%d\n", sid_value) > outfile; 
+                       printf("    \"%s\",\n", $2) > debugfile2;
+               }
+END    {
+               if (nextstate != "SID")
+                       printf("Parse error:  Unexpected end of file\n");
+
+               printf("\n#define SECINITSID_NUM") > outfile;
+               for (i = 0; i < 34; i++) 
+                       printf(" ") > outfile; 
+               printf("%d\n", sid_value) > outfile; 
+               printf("\n#endif\n") > outfile;
+               printf("};\n\n") > debugfile2;
+       }'
+
+# FLASK
diff -r 4ddd63b4be9b -r ec8eaab557d8 
tools/flask/policy/policy/flask/security_classes
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/flask/policy/policy/flask/security_classes  Fri Sep 12 14:47:40 
2008 +0900
@@ -0,0 +1,20 @@
+# FLASK
+
+#
+# Define the security object classes 
+#
+
+# Classes marked as userspace are classes
+# for userspace object managers
+
+class xen
+class domain
+class hvm
+class mmu
+class resource
+class shadow
+class event
+class grant
+class security
+
+# FLASK
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/flask/policy/policy/global_booleans
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/flask/policy/policy/global_booleans Fri Sep 12 14:47:40 2008 +0900
@@ -0,0 +1,5 @@
+#
+# This file is for the declaration of global booleans.
+# To change the default value at build time, the booleans.conf
+# file should be used.
+#
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/flask/policy/policy/global_tunables
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/flask/policy/policy/global_tunables Fri Sep 12 14:47:40 2008 +0900
@@ -0,0 +1,6 @@
+#
+# This file is for the declaration of global tunables.
+# To change the default value at build time, the booleans.conf
+# file should be used.
+#
+
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/flask/policy/policy/mcs
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/flask/policy/policy/mcs     Fri Sep 12 14:47:40 2008 +0900
@@ -0,0 +1,324 @@
+ifdef(`enable_mcs',`
+#
+# Define sensitivities 
+#
+# Each sensitivity has a name and zero or more aliases.
+#
+# MCS is single-sensitivity.
+#
+sensitivity s0;
+
+#
+# Define the ordering of the sensitivity levels (least to greatest)
+#
+dominance { s0 }
+
+
+#
+# Define the categories
+#
+# Each category has a name and zero or more aliases.
+#
+category c0;
+category c1;
+category c2;
+category c3;
+category c4;
+category c5;
+category c6;
+category c7;
+category c8;
+category c9;
+category c10;
+category c11;
+category c12;
+category c13;
+category c14;
+category c15;
+category c16;
+category c17;
+category c18;
+category c19;
+category c20;
+category c21;
+category c22;
+category c23;
+category c24;
+category c25;
+category c26;
+category c27;
+category c28;
+category c29;
+category c30;
+category c31;
+category c32;
+category c33;
+category c34;
+category c35;
+category c36;
+category c37;
+category c38;
+category c39;
+category c40;
+category c41;
+category c42;
+category c43;
+category c44;
+category c45;
+category c46;
+category c47;
+category c48;
+category c49;
+category c50;
+category c51;
+category c52;
+category c53;
+category c54;
+category c55;
+category c56;
+category c57;
+category c58;
+category c59;
+category c60;
+category c61;
+category c62;
+category c63;
+category c64;
+category c65;
+category c66;
+category c67;
+category c68;
+category c69;
+category c70;
+category c71;
+category c72;
+category c73;
+category c74;
+category c75;
+category c76;
+category c77;
+category c78;
+category c79;
+category c80;
+category c81;
+category c82;
+category c83;
+category c84;
+category c85;
+category c86;
+category c87;
+category c88;
+category c89;
+category c90;
+category c91;
+category c92;
+category c93;
+category c94;
+category c95;
+category c96;
+category c97;
+category c98;
+category c99;
+category c100;
+category c101;
+category c102;
+category c103;
+category c104;
+category c105;
+category c106;
+category c107;
+category c108;
+category c109;
+category c110;
+category c111;
+category c112;
+category c113;
+category c114;
+category c115;
+category c116;
+category c117;
+category c118;
+category c119;
+category c120;
+category c121;
+category c122;
+category c123;
+category c124;
+category c125;
+category c126;
+category c127;
+category c128;
+category c129;
+category c130;
+category c131;
+category c132;
+category c133;
+category c134;
+category c135;
+category c136;
+category c137;
+category c138;
+category c139;
+category c140;
+category c141;
+category c142;
+category c143;
+category c144;
+category c145;
+category c146;
+category c147;
+category c148;
+category c149;
+category c150;
+category c151;
+category c152;
+category c153;
+category c154;
+category c155;
+category c156;
+category c157;
+category c158;
+category c159;
+category c160;
+category c161;
+category c162;
+category c163;
+category c164;
+category c165;
+category c166;
+category c167;
+category c168;
+category c169;
+category c170;
+category c171;
+category c172;
+category c173;
+category c174;
+category c175;
+category c176;
+category c177;
+category c178;
+category c179;
+category c180;
+category c181;
+category c182;
+category c183;
+category c184;
+category c185;
+category c186;
+category c187;
+category c188;
+category c189;
+category c190;
+category c191;
+category c192;
+category c193;
+category c194;
+category c195;
+category c196;
+category c197;
+category c198;
+category c199;
+category c200;
+category c201;
+category c202;
+category c203;
+category c204;
+category c205;
+category c206;
+category c207;
+category c208;
+category c209;
+category c210;
+category c211;
+category c212;
+category c213;
+category c214;
+category c215;
+category c216;
+category c217;
+category c218;
+category c219;
+category c220;
+category c221;
+category c222;
+category c223;
+category c224;
+category c225;
+category c226;
+category c227;
+category c228;
+category c229;
+category c230;
+category c231;
+category c232;
+category c233;
+category c234;
+category c235;
+category c236;
+category c237;
+category c238;
+category c239;
+category c240;
+category c241;
+category c242;
+category c243;
+category c244;
+category c245;
+category c246;
+category c247;
+category c248;
+category c249;
+category c250;
+category c251;
+category c252;
+category c253;
+category c254;
+category c255;
+
+
+#
+# Each MCS level specifies a sensitivity and zero or more categories which may
+# be associated with that sensitivity.
+#
+level s0:c0.c255;
+
+#
+# Define the MCS policy
+#
+# mlsconstrain class_set perm_set expression ;
+#
+# mlsvalidatetrans class_set expression ;
+#
+# expression : ( expression )
+#           | not expression
+#           | expression and expression
+#           | expression or expression
+#           | u1 op u2
+#           | r1 role_mls_op r2
+#           | t1 op t2
+#           | l1 role_mls_op l2
+#           | l1 role_mls_op h2
+#           | h1 role_mls_op l2
+#           | h1 role_mls_op h2
+#           | l1 role_mls_op h1
+#           | l2 role_mls_op h2
+#           | u1 op names
+#           | u2 op names
+#           | r1 op names
+#           | r2 op names
+#           | t1 op names
+#           | t2 op names
+#           | u3 op names (NOTE: this is only available for mlsvalidatetrans)
+#           | r3 op names (NOTE: this is only available for mlsvalidatetrans)
+#           | t3 op names (NOTE: this is only available for mlsvalidatetrans)
+#
+# op : == | !=
+# role_mls_op : == | != | eq | dom | domby | incomp
+#
+# names : name | { name_list }
+# name_list : name | name_list name
+#
+
+
+') dnl end enable_mcs
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/flask/policy/policy/mls
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/flask/policy/policy/mls     Fri Sep 12 14:47:40 2008 +0900
@@ -0,0 +1,354 @@
+
+ifdef(`enable_mls',`
+#
+# Define sensitivities 
+#
+# Each sensitivity has a name and zero or more aliases.
+#
+sensitivity s0;
+sensitivity s1;
+sensitivity s2;
+sensitivity s3;
+sensitivity s4;
+sensitivity s5;
+sensitivity s6;
+sensitivity s7;
+sensitivity s8;
+sensitivity s9;
+sensitivity s10;
+sensitivity s11;
+sensitivity s12;
+sensitivity s13;
+sensitivity s14;
+sensitivity s15;
+
+#
+# Define the ordering of the sensitivity levels (least to greatest)
+#
+dominance { s0 s1 s2 s3 s4 s5 s6 s7 s8 s9 s10 s11 s12 s13 s14 s15 }
+
+
+#
+# Define the categories
+#
+# Each category has a name and zero or more aliases.
+#
+category c0;
+category c1;
+category c2;
+category c3;
+category c4;
+category c5;
+category c6;
+category c7;
+category c8;
+category c9;
+category c10;
+category c11;
+category c12;
+category c13;
+category c14;
+category c15;
+category c16;
+category c17;
+category c18;
+category c19;
+category c20;
+category c21;
+category c22;
+category c23;
+category c24;
+category c25;
+category c26;
+category c27;
+category c28;
+category c29;
+category c30;
+category c31;
+category c32;
+category c33;
+category c34;
+category c35;
+category c36;
+category c37;
+category c38;
+category c39;
+category c40;
+category c41;
+category c42;
+category c43;
+category c44;
+category c45;
+category c46;
+category c47;
+category c48;
+category c49;
+category c50;
+category c51;
+category c52;
+category c53;
+category c54;
+category c55;
+category c56;
+category c57;
+category c58;
+category c59;
+category c60;
+category c61;
+category c62;
+category c63;
+category c64;
+category c65;
+category c66;
+category c67;
+category c68;
+category c69;
+category c70;
+category c71;
+category c72;
+category c73;
+category c74;
+category c75;
+category c76;
+category c77;
+category c78;
+category c79;
+category c80;
+category c81;
+category c82;
+category c83;
+category c84;
+category c85;
+category c86;
+category c87;
+category c88;
+category c89;
+category c90;
+category c91;
+category c92;
+category c93;
+category c94;
+category c95;
+category c96;
+category c97;
+category c98;
+category c99;
+category c100;
+category c101;
+category c102;
+category c103;
+category c104;
+category c105;
+category c106;
+category c107;
+category c108;
+category c109;
+category c110;
+category c111;
+category c112;
+category c113;
+category c114;
+category c115;
+category c116;
+category c117;
+category c118;
+category c119;
+category c120;
+category c121;
+category c122;
+category c123;
+category c124;
+category c125;
+category c126;
+category c127;
+category c128;
+category c129;
+category c130;
+category c131;
+category c132;
+category c133;
+category c134;
+category c135;
+category c136;
+category c137;
+category c138;
+category c139;
+category c140;
+category c141;
+category c142;
+category c143;
+category c144;
+category c145;
+category c146;
+category c147;
+category c148;
+category c149;
+category c150;
+category c151;
+category c152;
+category c153;
+category c154;
+category c155;
+category c156;
+category c157;
+category c158;
+category c159;
+category c160;
+category c161;
+category c162;
+category c163;
+category c164;
+category c165;
+category c166;
+category c167;
+category c168;
+category c169;
+category c170;
+category c171;
+category c172;
+category c173;
+category c174;
+category c175;
+category c176;
+category c177;
+category c178;
+category c179;
+category c180;
+category c181;
+category c182;
+category c183;
+category c184;
+category c185;
+category c186;
+category c187;
+category c188;
+category c189;
+category c190;
+category c191;
+category c192;
+category c193;
+category c194;
+category c195;
+category c196;
+category c197;
+category c198;
+category c199;
+category c200;
+category c201;
+category c202;
+category c203;
+category c204;
+category c205;
+category c206;
+category c207;
+category c208;
+category c209;
+category c210;
+category c211;
+category c212;
+category c213;
+category c214;
+category c215;
+category c216;
+category c217;
+category c218;
+category c219;
+category c220;
+category c221;
+category c222;
+category c223;
+category c224;
+category c225;
+category c226;
+category c227;
+category c228;
+category c229;
+category c230;
+category c231;
+category c232;
+category c233;
+category c234;
+category c235;
+category c236;
+category c237;
+category c238;
+category c239;
+category c240;
+category c241;
+category c242;
+category c243;
+category c244;
+category c245;
+category c246;
+category c247;
+category c248;
+category c249;
+category c250;
+category c251;
+category c252;
+category c253;
+category c254;
+category c255;
+
+
+#
+# Each MLS level specifies a sensitivity and zero or more categories which may
+# be associated with that sensitivity.
+#
+level s0:c0.c255;
+level s1:c0.c255;
+level s2:c0.c255;
+level s3:c0.c255;
+level s4:c0.c255;
+level s5:c0.c255;
+level s6:c0.c255;
+level s7:c0.c255;
+level s8:c0.c255;
+level s9:c0.c255;
+level s10:c0.c255;
+level s11:c0.c255;
+level s12:c0.c255;
+level s13:c0.c255;
+level s14:c0.c255;
+level s15:c0.c255;
+
+
+#
+# Define the MLS policy
+#
+# mlsconstrain class_set perm_set expression ;
+#
+# mlsvalidatetrans class_set expression ;
+#
+# expression : ( expression )
+#           | not expression
+#           | expression and expression
+#           | expression or expression
+#           | u1 op u2
+#           | r1 role_mls_op r2
+#           | t1 op t2
+#           | l1 role_mls_op l2
+#           | l1 role_mls_op h2
+#           | h1 role_mls_op l2
+#           | h1 role_mls_op h2
+#           | l1 role_mls_op h1
+#           | l2 role_mls_op h2
+#           | u1 op names
+#           | u2 op names
+#           | r1 op names
+#           | r2 op names
+#           | t1 op names
+#           | t2 op names
+#           | u3 op names (NOTE: this is only available for mlsvalidatetrans)
+#           | r3 op names (NOTE: this is only available for mlsvalidatetrans)
+#           | t3 op names (NOTE: this is only available for mlsvalidatetrans)
+#
+# op : == | !=
+# role_mls_op : == | != | eq | dom | domby | incomp
+#
+# names : name | { name_list }
+# name_list : name | name_list name
+#
+
+
+') dnl end enable_mls
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/flask/policy/policy/modules.conf
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/flask/policy/policy/modules.conf    Fri Sep 12 14:47:40 2008 +0900
@@ -0,0 +1,21 @@
+#
+# This file contains a listing of available modules.
+# To prevent a module from  being used in policy
+# creation, set the module name to "off".
+#
+# For monolithic policies, modules set to "base" and "module"
+# will be built into the policy.
+#
+# For modular policies, modules set to "base" will be
+# included in the base module.  "module" will be compiled
+# as individual loadable modules.
+#
+
+# Layer: xen
+# Module: xen
+# Required in base
+#
+# Policy for xen.
+# 
+xen = base
+
diff -r 4ddd63b4be9b -r ec8eaab557d8 
tools/flask/policy/policy/modules/xen/xen.if
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/flask/policy/policy/modules/xen/xen.if      Fri Sep 12 14:47:40 
2008 +0900
@@ -0,0 +1,1 @@
+#
diff -r 4ddd63b4be9b -r ec8eaab557d8 
tools/flask/policy/policy/modules/xen/xen.te
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/flask/policy/policy/modules/xen/xen.te      Fri Sep 12 14:47:40 
2008 +0900
@@ -0,0 +1,135 @@
+attribute xen_type;
+attribute domain_type;
+attribute resource_type;
+attribute event_type;
+
+type xen_t, xen_type, domain_type;
+
+type dom0_t, domain_type;
+
+type domio_t, domain_type;
+
+type domxen_t, domain_type;
+
+type unlabeled_t, domain_type;
+
+type security_t, domain_type;
+
+type pirq_t, resource_type;
+type ioport_t, resource_type;
+type iomem_t, resource_type;
+type device_t, resource_type;
+
+################################################################################
+#
+# create_domain(priv_dom, domain, channel)
+#
+################################################################################
+define(`create_domain', `
+       type $2, domain_type;
+       allow $1 $2:domain {create max_vcpus setdomainmaxmem 
+                               setaddrsize getdomaininfo hypercall 
+                               setvcpucontext scheduler unpause 
+                               getvcpuinfo getaddrsize getvcpuaffinity};
+       allow $1 $2:shadow {enable};
+       allow $1 $2:mmu {map_read map_write memorymap adjust pinpage};
+       allow $2 $2:mmu {map_read map_write pinpage};
+       allow $2 domio_t:mmu {map_read};
+       allow $2 $2:grant {query setup};
+       allow $1 $2:grant {map_read unmap};
+       allow $1 $3:event {create};
+')
+
+################################################################################
+#
+# manage_domain(priv_dom, domain)
+#
+################################################################################
+define(`manage_domain', `
+       allow $1 $2:domain {pause destroy};
+')
+
+################################################################################
+#
+# create_channel(caller, peer, channel)
+#
+################################################################################
+define(`create_channel', `
+       type $3, event_type;
+       type_transition $1 $2:event $3;
+       allow $1 $3:event {create};
+       allow $3 $2:event {bind};
+')
+
+################################################################################
+#
+# Boot the hypervisor and dom0
+#
+################################################################################
+allow dom0_t xen_t:xen {kexec readapic writeapic mtrr_read mtrr_add mtrr_del 
+scheduler physinfo heap quirk readconsole writeconsole settime microcode};
+
+allow dom0_t domio_t:mmu {map_read map_write};
+allow dom0_t iomem_t:mmu {map_read map_write};
+allow dom0_t pirq_t:event {vector};
+allow dom0_t xen_t:mmu {memorymap};
+
+allow dom0_t dom0_t:mmu {pinpage map_read map_write adjust};
+allow dom0_t dom0_t:grant {query setup};
+allow dom0_t dom0_t:domain {scheduler getdomaininfo getvcpuinfo 
getvcpuaffinity};
+
+allow xen_t dom0_t:domain {create};
+allow xen_t dom0_t:resource {add remove};
+allow xen_t ioport_t:resource {add_ioport remove_ioport};
+allow dom0_t ioport_t:resource {use};
+allow xen_t iomem_t:resource {add_iomem remove_iomem};
+allow dom0_t iomem_t:resource {use};
+allow xen_t pirq_t:resource {add_irq remove_irq};
+allow dom0_t pirq_t:resource {use};
+
+allow dom0_t security_t:security {compute_av compute_create compute_member 
+check_context load_policy compute_relabel compute_user setenforce setbool
+setsecparam};
+
+create_channel(dom0_t, dom0_t, evchn0-0_t)
+allow dom0_t evchn0-0_t:event {send};
+
+################################################################################
+#
+# Create and manage a domU w/ dom0 IO
+#
+################################################################################
+create_domain(dom0_t, domU_t, evchnU-0_t)
+
+create_channel(domU_t, domU_t, evchnU-U_t)
+allow domU_t evchnU-U_t:event {send};
+
+create_channel(dom0_t, domU_t, evchn0-U_t)
+allow dom0_t evchn0-U_t:event {send};
+
+create_channel(domU_t, dom0_t, evchnU-0_t)
+allow domU_t evchnU-0_t:event {send};
+
+manage_domain(dom0_t, domU_t)
+
+################################################################################
+#
+#
+#
+################################################################################
+sid xen gen_context(system_u:system_r:xen_t,s0)
+sid dom0 gen_context(system_u:system_r:dom0_t,s0)
+sid domU gen_context(system_u:system_r:domU_t,s0)
+sid domxen gen_context(system_u:system_r:domxen_t,s0)
+sid domio gen_context(system_u:system_r:domio_t,s0)
+sid unlabeled gen_context(system_u:system_r:unlabeled_t,s0)
+sid security gen_context(system_u:system_r:security_t,s0)
+sid pirq gen_context(system_u:object_r:pirq_t,s0)
+sid iomem gen_context(system_u:object_r:iomem_t,s0)
+sid ioport gen_context(system_u:object_r:ioport_t,s0)
+sid device gen_context(system_u:object_r:device_t,s0)
+
+role system_r types { xen_type domain_type };
+role user_r types { xen_type domain_type };
+role sysadm_r types { xen_type domain_type };
+role staff_r types { xen_type domain_type };
diff -r 4ddd63b4be9b -r ec8eaab557d8 
tools/flask/policy/policy/support/loadable_module.spt
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/flask/policy/policy/support/loadable_module.spt     Fri Sep 12 
14:47:40 2008 +0900
@@ -0,0 +1,166 @@
+########################################
+#
+# Macros for switching between source policy
+# and loadable policy module support
+#
+
+##############################
+#
+# For adding the module statement
+#
+define(`policy_module',`
+       ifdef(`self_contained_policy',`',`
+               module $1 $2;
+
+               require {
+                       role system_r;
+                       all_kernel_class_perms
+               }
+       ')
+')
+
+##############################
+#
+# For use in interfaces, to optionally insert a require block
+#
+define(`gen_require',`
+       ifdef(`self_contained_policy',`',`
+               define(`in_gen_require_block')
+               require {
+                       $1
+               }
+               undefine(`in_gen_require_block')
+       ')
+')
+
+##############################
+#
+# In the future interfaces should be in loadable modules
+#
+# template(name,rules)
+#
+define(`template',`
+       `define(`$1',`
+##### begin $1(dollarsstar)
+               $2
+##### end $1(dollarsstar)
+       '')
+')
+
+# helper function, since m4 wont expand macros
+# if a line is a comment (#):
+define(`policy_m4_comment',`dnl
+##### $2 depth: $1
+')dnl
+
+##############################
+#
+# In the future interfaces should be in loadable modules
+#
+# interface(name,rules)
+#
+define(`interface',`
+       `define(`$1',`
+
+       define(`policy_temp',incr(policy_call_depth))
+       pushdef(`policy_call_depth',policy_temp)
+       undefine(`policy_temp')
+
+       policy_m4_comment(policy_call_depth,begin `$1'(dollarsstar))
+
+       $2
+
+       define(`policy_temp',decr(policy_call_depth))
+       pushdef(`policy_call_depth',policy_temp)
+       undefine(`policy_temp')
+
+       policy_m4_comment(policy_call_depth,end `$1'(dollarsstar))
+
+       '')
+')
+
+define(`policy_call_depth',0)
+
+##############################
+#
+# Optional policy handling
+#
+define(`optional_policy',`
+       ifdef(`self_contained_policy',`
+               ifdef(`$1',`$2',`$3')
+       ',`
+               optional {
+                       $2
+               ifelse(`$3',`',`',`
+               } else {
+                       $3
+               ')
+               }
+       ')
+')
+
+##############################
+#
+# Determine if we should use the default
+# tunable value as specified by the policy
+# or if the override value should be used
+#
+define(`dflt_or_overr',`ifdef(`$1',$1,$2)')
+
+##############################
+#
+# Extract booleans out of an expression.
+# This needs to be reworked so expressions
+# with parentheses can work.
+
+define(`delcare_required_symbols',`
+ifelse(regexp($1, `\w'), -1, `', `dnl
+bool regexp($1, `\(\w+\)', `\1');
+delcare_required_symbols(regexp($1, `\w+\(.*\)', `\1'))dnl
+') dnl
+')
+
+##############################
+#
+# Tunable declaration
+#
+define(`gen_tunable',`
+       ifdef(`self_contained_policy',`
+               bool $1 dflt_or_overr(`$1'_conf,$2);
+       ',`
+               # loadable module tunable
+               # declaration will go here
+               # instead of bool when
+               # loadable modules support
+               # tunables
+               bool $1 dflt_or_overr(`$1'_conf,$2);
+       ')
+')
+
+##############################
+#
+# Tunable policy handling
+#
+define(`tunable_policy',`
+       ifdef(`self_contained_policy',`
+               if (`$1') {
+                       $2
+               } else {
+                       $3
+               }
+       ',`
+               # structure for tunables
+               # will go here instead of a
+               # conditional when loadable
+               # modules support tunables
+               gen_require(`
+                       delcare_required_symbols(`$1')
+               ')
+
+               if (`$1') {
+                       $2
+               } else {
+                       $3
+               }
+       ')
+')
diff -r 4ddd63b4be9b -r ec8eaab557d8 
tools/flask/policy/policy/support/misc_macros.spt
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/flask/policy/policy/support/misc_macros.spt Fri Sep 12 14:47:40 
2008 +0900
@@ -0,0 +1,32 @@
+
+########################################
+#
+# Helper macros
+#
+
+#
+# shiftn(num,list...)
+#
+# shift the list num times
+#
+define(`shiftn',`ifelse($1,0,`shift($*)',`shiftn(decr($1),shift(shift($*)))')')
+
+########################################
+#
+# gen_user(username, role_set, mls_defaultlevel, mls_range, [mcs_categories])
+#
+define(`gen_user',`user $1 roles { $2 }`'ifdef(`enable_mls', ` level $3 range 
$4')`'ifdef(`enable_mcs',` level s0 range s0`'ifelse(`$5',,,` - s0:$5')');')
+
+########################################
+#
+# gen_context(context,mls_sensitivity,[mcs_categories])
+#
+define(`gen_context',`$1`'ifdef(`enable_mls',`:$2')`'ifdef(`enable_mcs',`:s0`'ifelse(`$3',,,`:$3')')')
 dnl
+
+########################################
+#
+# gen_bool(name,default_value)
+#
+define(`gen_bool',`
+       bool $1 dflt_or_overr(`$1'_conf,$2);
+')
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/flask/policy/policy/systemuser
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/flask/policy/policy/systemuser      Fri Sep 12 14:47:40 2008 +0900
@@ -0,0 +1,19 @@
+##################################
+#
+# System User configuration.
+#
+
+#
+# gen_user(username, role_set, mls_defaultlevel, mls_range, [mcs_categories])
+#
+
+#
+# system_u is the user identity for system processes and objects.
+# There should be no corresponding Unix user identity for system,
+# and a user process should never be assigned the system user
+# identity.
+#
+gen_user(system_u, system_r, s0, s0 - s9:c0.c127, c0.c127)
+
+# Normal users should not be added to this file,
+# but instead added to the users file.
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/flask/policy/policy/users
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/flask/policy/policy/users   Fri Sep 12 14:47:40 2008 +0900
@@ -0,0 +1,39 @@
+
+##################################
+#
+# Core User configuration.
+#
+
+#
+# gen_user(username, role_set, mls_defaultlevel, mls_range, [mcs_catetories])
+#
+
+#
+# user_u is a generic user identity for Linux users who have no
+# SELinux user identity defined.  The modified daemons will use
+# this user identity in the security context if there is no matching
+# SELinux user identity for a Linux user.  If you do not want to
+# permit any access to such users, then remove this entry.
+#
+ifdef(`targeted_policy',`
+gen_user(user_u, user_r sysadm_r system_r, s0, s0 - s9:c0.c127)
+',`
+gen_user(user_u, user_r, s0, s0 - s9:c0.c127)
+')
+
+#
+# The following users correspond to Unix identities.
+# These identities are typically assigned as the user attribute
+# when login starts the user shell.  Users with access to the sysadm_r
+# role should use the staff_r role instead of the user_r role when
+# not in the sysadm_r.
+#
+ifdef(`targeted_policy',`
+       gen_user(root, user_r sysadm_r system_r, s0, s0 - s9:c0.c127, c0.c127)
+',`
+       ifdef(`direct_sysadm_daemon',`
+               gen_user(root, sysadm_r staff_r system_r, s0, s0 - s9:c0.c127, 
c0.c127)
+       ',`
+               gen_user(root, sysadm_r staff_r, s0, s0 - s9:c0.c127, c0.c127)
+       ')
+')
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/ioemu/hw/cirrus_vga.c
--- a/tools/ioemu/hw/cirrus_vga.c       Fri Sep 12 14:32:45 2008 +0900
+++ b/tools/ioemu/hw/cirrus_vga.c       Fri Sep 12 14:47:40 2008 +0900
@@ -2554,6 +2554,9 @@ static void set_vram_mapping(CirrusVGASt
 
     fprintf(logfile,"mapping vram to %lx - %lx\n", begin, end);
 
+    if (!s->vram_mfns)
+        return;
+
     xatp.domid = domid;
     xatp.space = XENMAPSPACE_mfn;
 
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/ioemu/hw/pass-through.c
--- a/tools/ioemu/hw/pass-through.c     Fri Sep 12 14:32:45 2008 +0900
+++ b/tools/ioemu/hw/pass-through.c     Fri Sep 12 14:47:40 2008 +0900
@@ -57,6 +57,10 @@ static uint32_t pt_irqpin_reg_init(struc
     struct pt_reg_info_tbl *reg, uint32_t real_offset);
 static uint32_t pt_bar_reg_init(struct pt_dev *ptdev,
     struct pt_reg_info_tbl *reg, uint32_t real_offset);
+static uint32_t pt_linkctrl_reg_init(struct pt_dev *ptdev,
+    struct pt_reg_info_tbl *reg, uint32_t real_offset);
+static uint32_t pt_devctrl2_reg_init(struct pt_dev *ptdev,
+    struct pt_reg_info_tbl *reg, uint32_t real_offset);
 static uint32_t pt_linkctrl2_reg_init(struct pt_dev *ptdev,
     struct pt_reg_info_tbl *reg, uint32_t real_offset);
 static uint32_t pt_msgctrl_reg_init(struct pt_dev *ptdev,
@@ -76,6 +80,8 @@ static uint8_t pt_msix_size_init(struct 
 static uint8_t pt_msix_size_init(struct pt_dev *ptdev,
     struct pt_reg_grp_info_tbl *grp_reg, uint32_t base_offset);
 static uint8_t pt_vendor_size_init(struct pt_dev *ptdev,
+    struct pt_reg_grp_info_tbl *grp_reg, uint32_t base_offset);
+static uint8_t pt_pcie_size_init(struct pt_dev *ptdev,
     struct pt_reg_grp_info_tbl *grp_reg, uint32_t base_offset);
 static int pt_byte_reg_read(struct pt_dev *ptdev,
     struct pt_reg_tbl *cfg_entry,
@@ -438,7 +444,7 @@ static struct pt_reg_info_tbl pt_emu_reg
         .init_val   = 0x0000,
         .ro_mask    = 0x0000,
         .emu_mask   = 0xFFFF,
-        .init       = pt_common_reg_init,
+        .init       = pt_linkctrl_reg_init,
         .u.w.read   = pt_word_reg_read,
         .u.w.write  = pt_linkctrl_reg_write,
     },
@@ -449,7 +455,7 @@ static struct pt_reg_info_tbl pt_emu_reg
         .init_val   = 0x0000,
         .ro_mask    = 0x0000,
         .emu_mask   = 0xFFFF,
-        .init       = pt_common_reg_init,
+        .init       = pt_devctrl2_reg_init,
         .u.w.read   = pt_word_reg_read,
         .u.w.write  = pt_devctrl2_reg_write,
     },
@@ -666,8 +672,8 @@ static const struct pt_reg_grp_info_tbl 
     {
         .grp_id     = PCI_CAP_ID_EXP,
         .grp_type   = GRP_TYPE_EMU,
-        .grp_size   = 0x3C,
-        .size_init  = pt_reg_grp_size_init,
+        .grp_size   = 0xFF,
+        .size_init  = pt_pcie_size_init,
         .emu_reg_tbl= pt_emu_reg_pcie_tbl,
     },
     /* MSI-X Capability Structure reg group */
@@ -1869,12 +1875,57 @@ static uint32_t pt_bar_reg_init(struct p
     return reg_field;
 }
 
+/* initialize Link Control register */
+static uint32_t pt_linkctrl_reg_init(struct pt_dev *ptdev,
+        struct pt_reg_info_tbl *reg, uint32_t real_offset)
+{
+    uint8_t cap_ver = 0;
+    uint8_t dev_type = 0;
+
+    cap_ver = (ptdev->dev.config[(real_offset - reg->offset) + PCI_EXP_FLAGS] &
+        (uint8_t)PCI_EXP_FLAGS_VERS);
+    dev_type = (ptdev->dev.config[(real_offset - reg->offset) + PCI_EXP_FLAGS] 
&
+        (uint8_t)PCI_EXP_FLAGS_TYPE) >> 4;
+    
+    /* no need to initialize in case of Root Complex Integrated Endpoint
+     * with cap_ver 1.x 
+     */
+    if ((dev_type == PCI_EXP_TYPE_ROOT_INT_EP) && (cap_ver == 1))
+        return PT_INVALID_REG;
+
+    return reg->init_val;
+}
+
+/* initialize Device Control 2 register */
+static uint32_t pt_devctrl2_reg_init(struct pt_dev *ptdev,
+        struct pt_reg_info_tbl *reg, uint32_t real_offset)
+{
+    uint8_t cap_ver = 0;
+
+    cap_ver = (ptdev->dev.config[(real_offset - reg->offset) + PCI_EXP_FLAGS] &
+        (uint8_t)PCI_EXP_FLAGS_VERS);
+    
+    /* no need to initialize in case of cap_ver 1.x */
+    if (cap_ver == 1)
+        return PT_INVALID_REG;
+
+    return reg->init_val;
+}
+
 /* initialize Link Control 2 register */
 static uint32_t pt_linkctrl2_reg_init(struct pt_dev *ptdev,
         struct pt_reg_info_tbl *reg, uint32_t real_offset)
 {
     int reg_field = 0;
-
+    uint8_t cap_ver = 0;
+
+    cap_ver = (ptdev->dev.config[(real_offset - reg->offset) + PCI_EXP_FLAGS] &
+        (uint8_t)PCI_EXP_FLAGS_VERS);
+    
+    /* no need to initialize in case of cap_ver 1.x */
+    if (cap_ver == 1)
+        return PT_INVALID_REG;
+    
     /* set Supported Link Speed */
     reg_field |= 
         (0x0F & 
@@ -2034,6 +2085,91 @@ static uint8_t pt_vendor_size_init(struc
         struct pt_reg_grp_info_tbl *grp_reg, uint32_t base_offset)
 {
     return ptdev->dev.config[base_offset + 0x02];
+}
+
+/* get PCI Express Capability Structure register group size */
+static uint8_t pt_pcie_size_init(struct pt_dev *ptdev,
+        struct pt_reg_grp_info_tbl *grp_reg, uint32_t base_offset)
+{
+    PCIDevice *d = &ptdev->dev;
+    uint16_t exp_flag = 0;
+    uint16_t type = 0;
+    uint16_t vers = 0;
+    uint8_t pcie_size = 0;
+
+    exp_flag = *((uint16_t*)(d->config + (base_offset + PCI_EXP_FLAGS)));
+    type = (exp_flag & PCI_EXP_FLAGS_TYPE) >> 4;
+    vers = (exp_flag & PCI_EXP_FLAGS_VERS);
+
+    /* calculate size depend on capability version and device/port type */
+    /* in case of PCI Express Base Specification Rev 1.x */
+    if (vers == 1)
+    {
+        /* The PCI Express Capabilities, Device Capabilities, and Device 
+         * Status/Control registers are required for all PCI Express devices. 
+         * The Link Capabilities and Link Status/Control are required for all 
+         * Endpoints that are not Root Complex Integrated Endpoints. Endpoints 
+         * are not required to implement registers other than those listed 
+         * above and terminate the capability structure.
+         */
+        switch (type) {
+        case PCI_EXP_TYPE_ENDPOINT:
+        case PCI_EXP_TYPE_LEG_END:
+            pcie_size = 0x14;
+            break;
+        case PCI_EXP_TYPE_ROOT_INT_EP:
+            /* has no link */
+            pcie_size = 0x0C;
+            break;
+        /* only EndPoint passthrough is supported */
+        case PCI_EXP_TYPE_ROOT_PORT:
+        case PCI_EXP_TYPE_UPSTREAM:
+        case PCI_EXP_TYPE_DOWNSTREAM:
+        case PCI_EXP_TYPE_PCI_BRIDGE:
+        case PCI_EXP_TYPE_PCIE_BRIDGE:
+        case PCI_EXP_TYPE_ROOT_EC:
+        default:
+            /* exit I/O emulator */
+            PT_LOG("Internal error: Unsupported device/port type[%d]. "
+                "I/O emulator exit.\n", type);
+            exit(1);
+        }
+    }
+    /* in case of PCI Express Base Specification Rev 2.0 */
+    else if (vers == 2)
+    {
+        switch (type) {
+        case PCI_EXP_TYPE_ENDPOINT:
+        case PCI_EXP_TYPE_LEG_END:
+        case PCI_EXP_TYPE_ROOT_INT_EP:
+            /* For Functions that do not implement the registers, 
+             * these spaces must be hardwired to 0b.
+             */
+            pcie_size = 0x3C;
+            break;
+        /* only EndPoint passthrough is supported */
+        case PCI_EXP_TYPE_ROOT_PORT:
+        case PCI_EXP_TYPE_UPSTREAM:
+        case PCI_EXP_TYPE_DOWNSTREAM:
+        case PCI_EXP_TYPE_PCI_BRIDGE:
+        case PCI_EXP_TYPE_PCIE_BRIDGE:
+        case PCI_EXP_TYPE_ROOT_EC:
+        default:
+            /* exit I/O emulator */
+            PT_LOG("Internal error: Unsupported device/port type[%d]. "
+                "I/O emulator exit.\n", type);
+            exit(1);
+        }
+    }
+    else
+    {
+        /* exit I/O emulator */
+        PT_LOG("Internal error: Unsupported capability version[%d]. "
+            "I/O emulator exit.\n", vers);
+        exit(1);
+    }
+
+    return pcie_size;
 }
 
 /* read byte size emulate register */
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/ioemu/hw/pass-through.h
--- a/tools/ioemu/hw/pass-through.h     Fri Sep 12 14:32:45 2008 +0900
+++ b/tools/ioemu/hw/pass-through.h     Fri Sep 12 14:47:40 2008 +0900
@@ -60,6 +60,21 @@
 #ifndef PCI_MSI_FLAGS_MASK_BIT
 /* interrupt masking & reporting supported */
 #define PCI_MSI_FLAGS_MASK_BIT  0x0100
+#endif
+
+#ifndef PCI_EXP_TYPE_PCIE_BRIDGE
+/* PCI/PCI-X to PCIE Bridge */
+#define PCI_EXP_TYPE_PCIE_BRIDGE 0x8
+#endif
+
+#ifndef PCI_EXP_TYPE_ROOT_INT_EP
+/* Root Complex Integrated Endpoint */
+#define PCI_EXP_TYPE_ROOT_INT_EP 0x9
+#endif
+
+#ifndef PCI_EXP_TYPE_ROOT_EC
+/* Root Complex Event Collector */
+#define PCI_EXP_TYPE_ROOT_EC     0xa
 #endif
 
 #define PT_INVALID_REG          0xFFFFFFFF      /* invalid register value */
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/ioemu/hw/pci.c
--- a/tools/ioemu/hw/pci.c      Fri Sep 12 14:32:45 2008 +0900
+++ b/tools/ioemu/hw/pci.c      Fri Sep 12 14:47:40 2008 +0900
@@ -45,7 +45,6 @@ static void pci_update_mappings(PCIDevic
 static void pci_update_mappings(PCIDevice *d);
 
 target_phys_addr_t pci_mem_base;
-static int pci_irq_index;
 static PCIBus *first_bus;
 
 PCIBus *pci_register_bus(pci_set_irq_fn set_irq, pci_map_irq_fn map_irq,
@@ -114,9 +113,6 @@ PCIDevice *pci_register_device(PCIBus *b
 {
     PCIDevice *pci_dev;
 
-    if (pci_irq_index >= PCI_DEVICES_MAX)
-        return NULL;
-    
     if (devfn < 0) {
         for(devfn = bus->devfn_min ; devfn < 256; devfn += 8) {
             if ( !bus->devices[devfn] &&
@@ -140,7 +136,6 @@ PCIDevice *pci_register_device(PCIBus *b
         config_write = pci_default_write_config;
     pci_dev->config_read = config_read;
     pci_dev->config_write = config_write;
-    pci_dev->irq_index = pci_irq_index++;
     bus->devices[devfn] = pci_dev;
     return pci_dev;
 }
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/ioemu/hw/pt-msi.c
--- a/tools/ioemu/hw/pt-msi.c   Fri Sep 12 14:32:45 2008 +0900
+++ b/tools/ioemu/hw/pt-msi.c   Fri Sep 12 14:47:40 2008 +0900
@@ -313,7 +313,7 @@ int pt_msix_init(struct pt_dev *dev, int
 
     table_off = pci_read_long(pd, pos + PCI_MSIX_TABLE);
     bar_index = dev->msix->bar_index = table_off & PCI_MSIX_BIR;
-    table_off &= table_off & ~PCI_MSIX_BIR;
+    table_off = dev->msix->table_off = table_off & ~PCI_MSIX_BIR;
     dev->msix->table_base = dev->pci_dev->base_addr[bar_index];
     PT_LOG("get MSI-X table bar base %llx\n",
            (unsigned long long)dev->msix->table_base);
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/ioemu/hw/vga.c
--- a/tools/ioemu/hw/vga.c      Fri Sep 12 14:32:45 2008 +0900
+++ b/tools/ioemu/hw/vga.c      Fri Sep 12 14:47:40 2008 +0900
@@ -2080,7 +2080,13 @@ void xen_vga_vram_map(uint64_t vram_addr
 
     if (copy)
         memcpy(vram, xen_vga_state->vram_ptr, VGA_RAM_SIZE);
-    qemu_free(xen_vga_state->vram_ptr);
+    if (xen_vga_state->vram_mfns) {
+        /* In case this function is called more than once */
+        free(xen_vga_state->vram_mfns);
+        munmap(xen_vga_state->vram_ptr, VGA_RAM_SIZE);
+    } else {
+        qemu_free(xen_vga_state->vram_ptr);
+    }
     xen_vga_state->vram_ptr = vram;
     xen_vga_state->vram_mfns = pfn_list;
 #ifdef CONFIG_STUBDOM
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/ioemu/hw/xen_machine_fv.c
--- a/tools/ioemu/hw/xen_machine_fv.c   Fri Sep 12 14:32:45 2008 +0900
+++ b/tools/ioemu/hw/xen_machine_fv.c   Fri Sep 12 14:47:40 2008 +0900
@@ -139,8 +139,10 @@ uint8_t *qemu_map_cache(target_phys_addr
         !test_bit(address_offset>>XC_PAGE_SHIFT, entry->valid_mapping))
         qemu_remap_bucket(entry, address_index);
 
-    if (!test_bit(address_offset>>XC_PAGE_SHIFT, entry->valid_mapping))
+    if (!test_bit(address_offset>>XC_PAGE_SHIFT, entry->valid_mapping)) {
+        last_address_index = ~0UL;
         return NULL;
+    }
 
     last_address_index = address_index;
     last_address_vaddr = entry->vaddr_base;
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/ioemu/vl.h
--- a/tools/ioemu/vl.h  Fri Sep 12 14:32:45 2008 +0900
+++ b/tools/ioemu/vl.h  Fri Sep 12 14:47:40 2008 +0900
@@ -812,8 +812,6 @@ struct PCIDevice {
     /* do not access the following fields */
     PCIConfigReadFunc *config_read;
     PCIConfigWriteFunc *config_write;
-    /* ??? This is a PC-specific hack, and should be removed.  */
-    int irq_index;
 
     /* Current IRQ levels.  Used internally by the generic PCI code.  */
     int irq_state[4];
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/libxc/ia64/xc_ia64_linux_save.c
--- a/tools/libxc/ia64/xc_ia64_linux_save.c     Fri Sep 12 14:32:45 2008 +0900
+++ b/tools/libxc/ia64/xc_ia64_linux_save.c     Fri Sep 12 14:47:40 2008 +0900
@@ -53,12 +53,12 @@ static inline void set_bit(int nr, volat
 }
 
 static int
-suspend_and_state(int (*suspend)(int), int xc_handle, int io_fd,
+suspend_and_state(int (*suspend)(void), int xc_handle, int io_fd,
                   int dom, xc_dominfo_t *info)
 {
     int i = 0;
 
-    if (!(*suspend)(dom)) {
+    if (!(*suspend)()) {
         ERROR("Suspend request failed");
         return -1;
     }
@@ -406,7 +406,7 @@ out:
 
 int
 xc_domain_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
-               uint32_t max_factor, uint32_t flags, int (*suspend)(int),
+               uint32_t max_factor, uint32_t flags, int (*suspend)(void),
                int hvm, void *(*init_qemu_maps)(int, unsigned),
                void (*qemu_flip_buffer)(int, int))
 {
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/libxc/xc_domain_save.c
--- a/tools/libxc/xc_domain_save.c      Fri Sep 12 14:32:45 2008 +0900
+++ b/tools/libxc/xc_domain_save.c      Fri Sep 12 14:47:40 2008 +0900
@@ -338,72 +338,23 @@ static int analysis_phase(int xc_handle,
 }
 
 
-static int suspend_and_state(int (*suspend)(int), int xc_handle, int io_fd,
+static int suspend_and_state(int (*suspend)(void), int xc_handle, int io_fd,
                              int dom, xc_dominfo_t *info)
 {
-    int i = 0;
-
-    if ( !(*suspend)(dom) )
+    if ( !(*suspend)() )
     {
         ERROR("Suspend request failed");
         return -1;
     }
 
- retry:
-
-    if ( xc_domain_getinfo(xc_handle, dom, 1, info) != 1 )
-    {
-        ERROR("Could not get domain info");
+    if ( (xc_domain_getinfo(xc_handle, dom, 1, info) != 1) ||
+         !info->shutdown || (info->shutdown_reason != SHUTDOWN_suspend) )
+    {
+        ERROR("Domain not in suspended state");
         return -1;
     }
 
-    if ( info->dying )
-    {
-        ERROR("domain is dying");
-        return -1;
-    }
-
-    if ( info->crashed )
-    {
-        ERROR("domain has crashed");
-        return -1;
-    }
-
-    if ( info->shutdown )
-    {
-        switch ( info->shutdown_reason )
-        {
-        case SHUTDOWN_poweroff:
-        case SHUTDOWN_reboot:
-            ERROR("domain has shut down");
-            return -1;
-        case SHUTDOWN_suspend:
-            return 0;
-        case SHUTDOWN_crash:
-            ERROR("domain has crashed");
-            return -1;
-        }
-    }
-
-    if ( info->paused )
-    {
-        /* Try unpausing domain, wait, and retest. */
-        xc_domain_unpause( xc_handle, dom );
-        ERROR("Domain was paused. Wait and re-test.");
-        usleep(10000); /* 10ms */
-        goto retry;
-    }
-
-    if ( ++i < 100 )
-    {
-        ERROR("Retry suspend domain");
-        usleep(10000); /* 10ms */
-        goto retry;
-    }
-
-    ERROR("Unable to suspend domain.");
-
-    return -1;
+    return 0;
 }
 
 /*
@@ -796,7 +747,7 @@ static xen_pfn_t *map_and_save_p2m_table
 
 
 int xc_domain_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
-                   uint32_t max_factor, uint32_t flags, int (*suspend)(int),
+                   uint32_t max_factor, uint32_t flags, int (*suspend)(void),
                    int hvm, void *(*init_qemu_maps)(int, unsigned), 
                    void (*qemu_flip_buffer)(int, int))
 {
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/libxc/xc_evtchn.c
--- a/tools/libxc/xc_evtchn.c   Fri Sep 12 14:32:45 2008 +0900
+++ b/tools/libxc/xc_evtchn.c   Fri Sep 12 14:47:40 2008 +0900
@@ -59,17 +59,8 @@ int xc_evtchn_reset(int xc_handle,
     return do_evtchn_op(xc_handle, EVTCHNOP_reset, &arg, sizeof(arg), 0);
 }
 
-int xc_evtchn_status(int xc_handle,
-                     uint32_t dom,
-                     uint32_t port)
+int xc_evtchn_status(int xc_handle, xc_evtchn_status_t *status)
 {
-    int rc;
-    struct evtchn_status arg = { .dom = (domid_t)dom,
-                                 .port = (evtchn_port_t)port };
-
-    rc = do_evtchn_op(xc_handle, EVTCHNOP_status, &arg, sizeof(arg), 1);
-    if ( rc == 0 )
-        rc = arg.status;
-
-    return rc;
+    return do_evtchn_op(xc_handle, EVTCHNOP_status, status,
+                        sizeof(*status), 1);
 }
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/libxc/xc_private.c
--- a/tools/libxc/xc_private.c  Fri Sep 12 14:32:45 2008 +0900
+++ b/tools/libxc/xc_private.c  Fri Sep 12 14:47:40 2008 +0900
@@ -307,6 +307,13 @@ int xc_memory_op(int xc_handle,
             goto out1;
         }
         break;
+    case XENMEM_remove_from_physmap:
+        if ( lock_pages(arg, sizeof(struct xen_remove_from_physmap)) )
+        {
+            PERROR("Could not lock");
+            goto out1;
+        }
+        break;
     case XENMEM_current_reservation:
     case XENMEM_maximum_reservation:
     case XENMEM_maximum_gpfn:
@@ -339,6 +346,9 @@ int xc_memory_op(int xc_handle,
         break;
     case XENMEM_add_to_physmap:
         unlock_pages(arg, sizeof(struct xen_add_to_physmap));
+        break;
+    case XENMEM_remove_from_physmap:
+        unlock_pages(arg, sizeof(struct xen_remove_from_physmap));
         break;
     case XENMEM_current_reservation:
     case XENMEM_maximum_reservation:
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h     Fri Sep 12 14:32:45 2008 +0900
+++ b/tools/libxc/xenctrl.h     Fri Sep 12 14:47:40 2008 +0900
@@ -502,9 +502,9 @@ xc_evtchn_alloc_unbound(int xc_handle,
 
 int xc_evtchn_reset(int xc_handle,
                     uint32_t dom);
-int xc_evtchn_status(int xc_handle,
-                     uint32_t dom,
-                     uint32_t port);
+
+typedef struct evtchn_status xc_evtchn_status_t;
+int xc_evtchn_status(int xc_handle, xc_evtchn_status_t *status);
 
 /*
  * Return a handle to the event channel driver, or -1 on failure, in which case
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/libxc/xenguest.h
--- a/tools/libxc/xenguest.h    Fri Sep 12 14:32:45 2008 +0900
+++ b/tools/libxc/xenguest.h    Fri Sep 12 14:47:40 2008 +0900
@@ -25,7 +25,7 @@
  */
 int xc_domain_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
                    uint32_t max_factor, uint32_t flags /* XCFLAGS_xxx */,
-                   int (*suspend)(int domid), int hvm,
+                   int (*suspend)(void), int hvm,
                    void *(*init_qemu_maps)(int, unsigned),  /* HVM only */
                    void (*qemu_flip_buffer)(int, int));     /* HVM only */
 
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/python/Makefile
--- a/tools/python/Makefile     Fri Sep 12 14:32:45 2008 +0900
+++ b/tools/python/Makefile     Fri Sep 12 14:47:40 2008 +0900
@@ -1,13 +1,5 @@ XEN_ROOT = ../..
 XEN_ROOT = ../..
 include $(XEN_ROOT)/tools/Rules.mk
-
-XEN_SECURITY_MODULE = dummy
-ifeq ($(FLASK_ENABLE),y)
-XEN_SECURITY_MODULE = flask
-endif
-ifeq ($(ACM_SECURITY),y)
-XEN_SECURITY_MODULE = acm
-endif
 
 .PHONY: all
 all: build
@@ -23,8 +15,8 @@ NLSDIR = /usr/share/locale
 NLSDIR = /usr/share/locale
 
 .PHONY: build buildpy
-buildpy: xsm.py
-       CC="$(CC)" CFLAGS="$(CFLAGS)" 
XEN_SECURITY_MODULE="$(XEN_SECURITY_MODULE)" python setup.py build
+buildpy: 
+       CC="$(CC)" CFLAGS="$(CFLAGS)" python setup.py build
 
 build: buildpy refresh-pot refresh-po $(CATALOGS)
 
@@ -61,18 +53,6 @@ refresh-po: $(POTFILE)
 %.mo: %.po
        $(MSGFMT) -c -o $@ $<
 
-xsm.py:
-       @(set -e; \
-         echo "XEN_SECURITY_MODULE = \""$(XEN_SECURITY_MODULE)"\""; \
-         echo "from xsm_core import *"; \
-         echo ""; \
-         echo "import 
xen.util.xsm."$(XEN_SECURITY_MODULE)"."$(XEN_SECURITY_MODULE)" as xsm_module"; \
-         echo ""; \
-         echo "xsm_init(xsm_module)"; \
-         echo "from 
xen.util.xsm."$(XEN_SECURITY_MODULE)"."$(XEN_SECURITY_MODULE)" import *"; \
-         echo "del xsm_module"; \
-         echo "") >xen/util/xsm/$@
-
 .PHONY: install
 ifndef XEN_PYTHON_NATIVE_INSTALL
 install: LIBPATH=$(shell PYTHONPATH=xen/util python -c "import auxbin; print 
auxbin.libpath()")
@@ -104,4 +84,4 @@ test:
 
 .PHONY: clean
 clean:
-       rm -rf build *.pyc *.pyo *.o *.a *~ $(CATALOGS) xen/util/xsm/xsm.py 
xen/util/auxbin.pyc
+       rm -rf build *.pyc *.pyo *.o *.a *~ $(CATALOGS) xen/util/auxbin.pyc
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/python/xen/util/xsconstants.py
--- a/tools/python/xen/util/xsconstants.py      Fri Sep 12 14:32:45 2008 +0900
+++ b/tools/python/xen/util/xsconstants.py      Fri Sep 12 14:47:40 2008 +0900
@@ -20,8 +20,10 @@ XS_INST_BOOT = (1 << 0)
 XS_INST_BOOT = (1 << 0)
 XS_INST_LOAD = (1 << 1)
 
-XS_POLICY_NONE  = 0
 XS_POLICY_ACM = (1 << 0)
+XS_POLICY_FLASK = (1 << 1)
+XS_POLICY_DUMMY  = (1 << 2)
+XS_POLICY_USE = 0
 
 # Some internal variables used by the Xen-API
 ACM_LABEL_VM  = (1 << 0)
@@ -107,6 +109,6 @@ ACM_POLICY_ID = 'ACM'
 
 INVALID_POLICY_PREFIX = 'INV_'
 
-INVALID_SSIDREF = 0xFFFFFFFF
+INVALID_SSIDREF = 0xFFFFFFFFL
 
 XS_INACCESSIBLE_LABEL = '__INACCESSIBLE__'
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/python/xen/util/xsm/flask/flask.py
--- a/tools/python/xen/util/xsm/flask/flask.py  Fri Sep 12 14:32:45 2008 +0900
+++ b/tools/python/xen/util/xsm/flask/flask.py  Fri Sep 12 14:47:40 2008 +0900
@@ -1,5 +1,6 @@ import sys
 import sys
 from xen.lowlevel import flask
+from xen.util import xsconstants
 from xen.xend import sxp
 
 #Functions exported through XML-RPC
@@ -12,7 +13,7 @@ def err(msg):
     raise XSMError(msg)
 
 def on():
-    return 0 #xsconstants.XS_POLICY_FLASK
+    return xsconstants.XS_POLICY_FLASK
 
 def ssidref2label(ssidref):
     try:
@@ -37,8 +38,9 @@ def set_security_label(policy, label):
     return label
 
 def ssidref2security_label(ssidref):
-    return ssidref2label(ssidref)
+    label = ssidref2label(ssidref)
+    return label
 
 def get_security_label(self, xspol=None):
-    label = self.info.get('security_label', '')
+    label = self.info['security_label']
     return label
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/python/xen/util/xsm/xsm.py
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/python/xen/util/xsm/xsm.py  Fri Sep 12 14:47:40 2008 +0900
@@ -0,0 +1,20 @@
+import sys
+import string
+from xen.xend import XendOptions
+from xen.util import xsconstants
+from xsm_core import xsm_init
+
+xoptions = XendOptions.instance()
+xsm_module_name = xoptions.get_xsm_module_name()
+
+xsconstants.XS_POLICY_USE = eval("xsconstants.XS_POLICY_" +
+                                 string.upper(xsm_module_name))
+
+xsm_module_path = "xen.util.xsm." + xsm_module_name + "." + xsm_module_name
+xsm_module = __import__(xsm_module_path, globals(), locals(), ['*'])
+
+xsm_init(xsm_module)
+
+for op in dir(xsm_module):
+    if not hasattr(sys.modules[__name__], op):
+        setattr(sys.modules[__name__], op, getattr(xsm_module, op, None))
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/python/xen/xend/XendConfig.py
--- a/tools/python/xen/xend/XendConfig.py       Fri Sep 12 14:32:45 2008 +0900
+++ b/tools/python/xen/xend/XendConfig.py       Fri Sep 12 14:47:40 2008 +0900
@@ -729,7 +729,7 @@ class XendConfig(dict):
             self.parse_cpuid(cfg, 'cpuid_check')
 
         import xen.util.xsm.xsm as security
-        if security.on() == xsconstants.XS_POLICY_ACM:
+        if security.on() == xsconstants.XS_POLICY_USE:
             from xen.util.acmpolicy import ACM_LABEL_UNLABELED
             if not 'security' in cfg and sxp.child_value(sxp_cfg, 'security'):
                 cfg['security'] = sxp.child_value(sxp_cfg, 'security')
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py   Fri Sep 12 14:32:45 2008 +0900
+++ b/tools/python/xen/xend/XendDomainInfo.py   Fri Sep 12 14:47:40 2008 +0900
@@ -2069,7 +2069,7 @@ class XendDomainInfo:
         balloon.free(2*1024) # 2MB should be plenty
 
         ssidref = 0
-        if security.on() == xsconstants.XS_POLICY_ACM:
+        if security.on() == xsconstants.XS_POLICY_USE:
             ssidref = security.calc_dom_ssidref_from_info(self.info)
             if security.has_authorization(ssidref) == False:
                 raise VmError("VM is not authorized to run.")
@@ -2855,10 +2855,6 @@ class XendDomainInfo:
             info["maxmem_kb"] = XendNode.instance() \
                                 .physinfo_dict()['total_memory'] * 1024
 
-        #ssidref field not used any longer
-        if 'ssidref' in info:
-            info.pop('ssidref')
-
         # make sure state is reset for info
         # TODO: we should eventually get rid of old_dom_states
 
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/python/xen/xend/XendOptions.py
--- a/tools/python/xen/xend/XendOptions.py      Fri Sep 12 14:32:45 2008 +0900
+++ b/tools/python/xen/xend/XendOptions.py      Fri Sep 12 14:47:40 2008 +0900
@@ -131,6 +131,9 @@ class XendOptions:
 
     """Default script to configure a backend network interface"""
     vif_script = osdep.vif_script
+
+    """Default Xen Security Module"""
+    xsm_module_default = 'dummy'
 
     """Default rotation count of qemu-dm log file."""
     qemu_dm_logrotate_count = 10
@@ -427,6 +430,11 @@ class XendOptionsFile(XendOptions):
         return self.get_config_value('xen-api-server',
                                      self.xen_api_server_default)
 
+    def get_xsm_module_name(self):
+        """Get the Xen Security Module name.
+        """
+        return self.get_config_string('xsm_module_name', 
self.xsm_module_default)
+
 if os.uname()[0] == 'SunOS':
     class XendOptionsSMF(XendOptions):
 
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/python/xen/xend/server/blkif.py
--- a/tools/python/xen/xend/server/blkif.py     Fri Sep 12 14:32:45 2008 +0900
+++ b/tools/python/xen/xend/server/blkif.py     Fri Sep 12 14:47:40 2008 +0900
@@ -78,7 +78,7 @@ class BlkifController(DevController):
         if uuid:
             back['uuid'] = uuid
 
-        if security.on() == xsconstants.XS_POLICY_ACM:
+        if security.on() == xsconstants.XS_POLICY_USE:
             self.do_access_control(config, uname)
 
         (device_path, devid) = blkif.blkdev_name_to_number(dev)
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/python/xen/xend/server/netif.py
--- a/tools/python/xen/xend/server/netif.py     Fri Sep 12 14:32:45 2008 +0900
+++ b/tools/python/xen/xend/server/netif.py     Fri Sep 12 14:47:40 2008 +0900
@@ -156,7 +156,7 @@ class NetifController(DevController):
             front = { 'handle' : "%i" % devid,
                       'mac'    : mac }
 
-        if security.on() == xsconstants.XS_POLICY_ACM:
+        if security.on() == xsconstants.XS_POLICY_USE:
             self.do_access_control(config)
 
         return (devid, back, front)
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/python/xen/xend/server/pciif.py
--- a/tools/python/xen/xend/server/pciif.py     Fri Sep 12 14:32:45 2008 +0900
+++ b/tools/python/xen/xend/server/pciif.py     Fri Sep 12 14:47:40 2008 +0900
@@ -286,7 +286,7 @@ class PciController(DevController):
                     )%(dev.name))
 
         if dev.has_non_page_aligned_bar and arch.type != "ia64":
-            raise VmError("pci: %: non-page-aligned MMIO BAR found." % 
dev.name)
+            raise VmError("pci: %s: non-page-aligned MMIO BAR found." % 
dev.name)
 
         self.CheckSiblingDevices(fe_domid, dev)
 
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/python/xen/xm/create.py
--- a/tools/python/xen/xm/create.py     Fri Sep 12 14:32:45 2008 +0900
+++ b/tools/python/xen/xm/create.py     Fri Sep 12 14:47:40 2008 +0900
@@ -566,11 +566,11 @@ gopts.var('hap', val='HAP',
           use="""Hap status (0=hap is disabled;
           1=hap is enabled.""")
 
-gopts.var('cpuid', val="IN[,SIN]:eax=EAX,ebx=EBX,exc=ECX,edx=EDX",
+gopts.var('cpuid', val="IN[,SIN]:eax=EAX,ebx=EBX,ecx=ECX,edx=EDX",
           fn=append_value, default=[],
           use="""Cpuid description.""")
 
-gopts.var('cpuid_check', val="IN[,SIN]:eax=EAX,ebx=EBX,exc=ECX,edx=EDX",
+gopts.var('cpuid_check', val="IN[,SIN]:eax=EAX,ebx=EBX,ecx=ECX,edx=EDX",
           fn=append_value, default=[],
           use="""Cpuid check description.""")
 
@@ -971,7 +971,7 @@ def preprocess_cpuid(vals, attr_name):
                         "of the register %s for input %s\n"
                         % (res['reg'], input) )
                 cpuid[input][res['reg']] = res['val'] # new register
-    setattr(vals, attr_name, cpuid)
+            setattr(vals, attr_name, cpuid)
 
 def preprocess_pci(vals):
     if not vals.pci: return
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/python/xen/xm/main.py
--- a/tools/python/xen/xm/main.py       Fri Sep 12 14:32:45 2008 +0900
+++ b/tools/python/xen/xm/main.py       Fri Sep 12 14:47:40 2008 +0900
@@ -1812,7 +1812,7 @@ def domain_name_to_domid(domain_name):
     else:
         dom = server.xend.domain(domain_name)
         domid = int(sxp.child_value(dom, 'domid', '-1'))
-    return domid
+    return int(domid)
 
 def xm_vncviewer(args):
     autopass = False;
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/xcutils/lsevtchn.c
--- a/tools/xcutils/lsevtchn.c  Fri Sep 12 14:32:45 2008 +0900
+++ b/tools/xcutils/lsevtchn.c  Fri Sep 12 14:47:40 2008 +0900
@@ -8,49 +8,55 @@
 #include <xenctrl.h>
 #include <xenguest.h>
 
-int
-main(int argc, char **argv)
+int main(int argc, char **argv)
 {
-    int xc_fd;
-    int domid = 0, port = 0, status;
-    const char *msg;
+    int xc_fd, domid, port, rc;
+    xc_evtchn_status_t status;
 
-    if ( argc > 1 )
-        domid = strtol(argv[1], NULL, 10);
+    domid = (argc > 1) ? strtol(argv[1], NULL, 10) : 0;
 
     xc_fd = xc_interface_open();
     if ( xc_fd < 0 )
         errx(1, "failed to open control interface");
 
-    while ( (status = xc_evtchn_status(xc_fd, domid, port)) >= 0 )
+    for ( port = 0; ; port++ )
     {
-        switch ( status )
+        status.dom = domid;
+        status.port = port;
+        rc = xc_evtchn_status(xc_fd, &status);
+        if ( rc < 0 )
+            break;
+
+        if ( status.status == EVTCHNSTAT_closed )
+            continue;
+
+        printf("%4d: VCPU %u: ", port, status.vcpu);
+
+        switch ( status.status )
         {
-        case EVTCHNSTAT_closed:
-            msg = "Channel is not in use.";
-            break;
         case EVTCHNSTAT_unbound:
-            msg = "Channel is waiting interdom connection.";
+            printf("Interdomain (Waiting connection) - Remote Domain %u",
+                   status.u.unbound.dom);
             break;
         case EVTCHNSTAT_interdomain:
-            msg = "Channel is connected to remote domain.";
+            printf("Interdomain (Connected) - Remote Domain %u, Port %u",
+                   status.u.interdomain.dom, status.u.interdomain.port);
             break;
         case EVTCHNSTAT_pirq:
-            msg = "Channel is bound to a phys IRQ line.";
+            printf("Physical IRQ %u", status.u.pirq);
             break;
         case EVTCHNSTAT_virq:
-            msg = "Channel is bound to a virtual IRQ line.";
+            printf("Virtual IRQ %u", status.u.virq);
             break;
         case EVTCHNSTAT_ipi:
-            msg = "Channel is bound to a virtual IPI line.";
+            printf("IPI");
             break;
         default:
-            msg = "Unknown.";
+            printf("Unknown");
             break;
+        }
 
-        }
-        printf("%03d: %d: %s\n", port, status, msg);
-        port++;
+        printf("\n");
     }
 
     xc_interface_close(xc_fd);
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/xcutils/xc_save.c
--- a/tools/xcutils/xc_save.c   Fri Sep 12 14:32:45 2008 +0900
+++ b/tools/xcutils/xc_save.c   Fri Sep 12 14:47:40 2008 +0900
@@ -32,7 +32,7 @@ static struct suspendinfo {
  * Issue a suspend request through stdout, and receive the acknowledgement
  * from stdin.  This is handled by XendCheckpoint in the Python layer.
  */
-static int compat_suspend(int domid)
+static int compat_suspend(void)
 {
     char ans[30];
 
@@ -43,16 +43,35 @@ static int compat_suspend(int domid)
             !strncmp(ans, "done\n", 5));
 }
 
-static int suspend_evtchn_release(int xc, int domid)
+static int suspend_evtchn_release(void)
 {
     if (si.suspend_evtchn >= 0) {
-       xc_evtchn_unbind(si.xce, si.suspend_evtchn);
-       si.suspend_evtchn = -1;
+        xc_evtchn_unbind(si.xce, si.suspend_evtchn);
+        si.suspend_evtchn = -1;
     }
     if (si.xce >= 0) {
-       xc_evtchn_close(si.xce);
-       si.xce = -1;
-    }
+        xc_evtchn_close(si.xce);
+        si.xce = -1;
+    }
+
+    return 0;
+}
+
+static int await_suspend(void)
+{
+    int rc;
+
+    do {
+        rc = xc_evtchn_pending(si.xce);
+        if (rc < 0) {
+            warnx("error polling suspend notification channel: %d", rc);
+            return -1;
+        }
+    } while (rc != si.suspend_evtchn);
+
+    /* harmless for one-off suspend */
+    if (xc_evtchn_unmask(si.xce, si.suspend_evtchn) < 0)
+        warnx("failed to unmask suspend notification channel: %d", rc);
 
     return 0;
 }
@@ -71,16 +90,16 @@ static int suspend_evtchn_init(int xc, i
 
     xs = xs_daemon_open();
     if (!xs) {
-       errx(1, "failed to get xenstore handle");
-       return -1;
+        warnx("failed to get xenstore handle");
+        return -1;
     }
     sprintf(path, "/local/domain/%d/device/suspend/event-channel", domid);
     portstr = xs_read(xs, XBT_NULL, path, &plen);
     xs_daemon_close(xs);
 
     if (!portstr || !plen) {
-       warnx("could not read suspend event channel");
-       return -1;
+        warnx("could not read suspend event channel");
+        return -1;
     }
 
     port = atoi(portstr);
@@ -88,27 +107,29 @@ static int suspend_evtchn_init(int xc, i
 
     si.xce = xc_evtchn_open();
     if (si.xce < 0) {
-       errx(1, "failed to open event channel handle");
-       goto cleanup;
+        warnx("failed to open event channel handle");
+        goto cleanup;
     }
 
     si.suspend_evtchn = xc_evtchn_bind_interdomain(si.xce, domid, port);
     if (si.suspend_evtchn < 0) {
-       errx(1, "failed to bind suspend event channel: %d",
-            si.suspend_evtchn);
-       goto cleanup;
+        warnx("failed to bind suspend event channel: %d", si.suspend_evtchn);
+        goto cleanup;
     }
 
     rc = xc_domain_subscribe_for_suspend(xc, domid, port);
     if (rc < 0) {
-       errx(1, "failed to subscribe to domain: %d", rc);
-       goto cleanup;
-    }
+        warnx("failed to subscribe to domain: %d", rc);
+        goto cleanup;
+    }
+
+    /* event channel is pending immediately after binding */
+    await_suspend();
 
     return 0;
 
   cleanup:
-    suspend_evtchn_release(xc, domid);
+    suspend_evtchn_release();
 
     return -1;
 }
@@ -116,29 +137,20 @@ static int suspend_evtchn_init(int xc, i
 /**
  * Issue a suspend request to a dedicated event channel in the guest, and
  * receive the acknowledgement from the subscribe event channel. */
-static int evtchn_suspend(int domid)
-{
-    int xcefd;
+static int evtchn_suspend(void)
+{
     int rc;
 
     rc = xc_evtchn_notify(si.xce, si.suspend_evtchn);
     if (rc < 0) {
-       errx(1, "failed to notify suspend request channel: %d", rc);
-       return 0;
-    }
-
-    xcefd = xc_evtchn_fd(si.xce);
-    do {
-      rc = xc_evtchn_pending(si.xce);
-      if (rc < 0) {
-       errx(1, "error polling suspend notification channel: %d", rc);
-       return 0;
-      }
-    } while (rc != si.suspend_evtchn);
-
-    /* harmless for one-off suspend */
-    if (xc_evtchn_unmask(si.xce, si.suspend_evtchn) < 0)
-       errx(1, "failed to unmask suspend notification channel: %d", rc);
+        warnx("failed to notify suspend request channel: %d", rc);
+        return 0;
+    }
+
+    if (await_suspend() < 0) {
+        warnx("suspend failed");
+        return 0;
+    }
 
     /* notify xend that it can do device migration */
     printf("suspended\n");
@@ -147,12 +159,12 @@ static int evtchn_suspend(int domid)
     return 1;
 }
 
-static int suspend(int domid)
+static int suspend(void)
 {
     if (si.suspend_evtchn >= 0)
-       return evtchn_suspend(domid);
-
-    return compat_suspend(domid);
+        return evtchn_suspend();
+
+    return compat_suspend();
 }
 
 /* For HVM guests, there are two sources of dirty pages: the Xen shadow
@@ -195,11 +207,9 @@ static void qemu_flip_buffer(int domid, 
 
     /* Tell qemu that we want it to start writing log-dirty bits to the
      * other buffer */
-    if (!xs_write(xs, XBT_NULL, qemu_next_active_path, &digit, 1)) {
+    if (!xs_write(xs, XBT_NULL, qemu_next_active_path, &digit, 1))
         errx(1, "can't write next-active to store path (%s)\n", 
-              qemu_next_active_path);
-        exit(1);
-    }
+             qemu_next_active_path);
 
     /* Wait a while for qemu to signal that it has switched to the new 
      * active buffer */
@@ -208,10 +218,8 @@ static void qemu_flip_buffer(int domid, 
     tv.tv_usec = 0;
     FD_ZERO(&fdset);
     FD_SET(xs_fileno(xs), &fdset);
-    if ((select(xs_fileno(xs) + 1, &fdset, NULL, NULL, &tv)) != 1) {
+    if ((select(xs_fileno(xs) + 1, &fdset, NULL, NULL, &tv)) != 1)
         errx(1, "timed out waiting for qemu to switch buffers\n");
-        exit(1);
-    }
     watch = xs_read_watch(xs, &len);
     free(watch);
     
@@ -221,7 +229,7 @@ static void qemu_flip_buffer(int domid, 
         goto read_again;
 }
 
-static void * init_qemu_maps(int domid, unsigned int bitmap_size)
+static void *init_qemu_maps(int domid, unsigned int bitmap_size)
 {
     key_t key;
     char key_ascii[17] = {0,};
@@ -293,7 +301,7 @@ main(int argc, char **argv)
     int ret;
 
     if (argc != 6)
-       errx(1, "usage: %s iofd domid maxit maxf flags", argv[0]);
+        errx(1, "usage: %s iofd domid maxit maxf flags", argv[0]);
 
     xc_fd = xc_interface_open();
     if (xc_fd < 0)
@@ -305,13 +313,14 @@ main(int argc, char **argv)
     max_f = atoi(argv[4]);
     flags = atoi(argv[5]);
 
-    suspend_evtchn_init(xc_fd, domid);
+    if (suspend_evtchn_init(xc_fd, domid) < 0)
+        warnx("suspend event channel initialization failed, using slow path");
 
     ret = xc_domain_save(xc_fd, io_fd, domid, maxit, max_f, flags, 
                          &suspend, !!(flags & XCFLAGS_HVM),
                          &init_qemu_maps, &qemu_flip_buffer);
 
-    suspend_evtchn_release(xc_fd, domid);
+    suspend_evtchn_release();
 
     xc_interface_close(xc_fd);
 
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/xenstore/xs.c
--- a/tools/xenstore/xs.c       Fri Sep 12 14:32:45 2008 +0900
+++ b/tools/xenstore/xs.c       Fri Sep 12 14:47:40 2008 +0900
@@ -795,8 +795,11 @@ char *xs_get_domain_path(struct xs_handl
 
 bool xs_is_domain_introduced(struct xs_handle *h, unsigned int domid)
 {
-       return strcmp("F",
-                     single_with_domid(h, XS_IS_DOMAIN_INTRODUCED, domid));
+       char *domain = single_with_domid(h, XS_IS_DOMAIN_INTRODUCED, domid);
+       int rc = strcmp("F", domain);
+
+       free(domain);
+       return rc;
 }
 
 /* Only useful for DEBUG versions */
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/xentrace/formats
--- a/tools/xentrace/formats    Fri Sep 12 14:32:45 2008 +0900
+++ b/tools/xentrace/formats    Fri Sep 12 14:47:40 2008 +0900
@@ -4,56 +4,69 @@ 0x0001f002  CPU%(cpu)d  %(tsc)d (+%(relt
 0x0001f002  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  wrap_buffer       0x%(1)08x
 0x0001f003  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  cpu_change        0x%(1)08x
 
-0x0002f001  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  sched_add_domain  [ domid = 
0x%(1)08x, edomid = 0x%(2)08x ]
-0x0002f002  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  sched_rem_domain  [ domid = 
0x%(1)08x, edomid = 0x%(2)08x ]
-0x0002f003  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  domain_sleep      [ domid = 
0x%(1)08x, edomid = 0x%(2)08x ]
-0x0002f004  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  domain_wake       [ domid = 
0x%(1)08x, edomid = 0x%(2)08x ]
-0x0002f005  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  do_yield          [ domid = 
0x%(1)08x, edomid = 0x%(2)08x ]
-0x0002f006  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  do_block          [ domid = 
0x%(1)08x, edomid = 0x%(2)08x ]
-0x0002f007  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  domain_shutdown          [ 
domid = 0x%(1)08x, edomid = 0x%(2)08x, reason = 0x%(3)08x ]
-0x0002f008  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  sched_ctl
-0x0002f009  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  sched_adjdom      [ domid = 
0x%(1)08x ]
-0x0002f00a  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  __enter_scheduler [ 
prev<domid:edomid> = 0x%(1)08x : 0x%(2)08x, next<domid:edomid> = 0x%(3)08x : 
0x%(4)08x ]
-0x0002f00B  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  s_timer_fn
-0x0002f00c  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  t_timer_fn
-0x0002f00d  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  dom_timer_fn
-0x0002f00e  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  switch_infprev    [ old_domid 
= 0x%(1)08x, runtime = %(2)d ]
-0x0002f00f  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  switch_infnext    [ new_domid 
= 0x%(1)08x, time = %(2)d, r_time = %(3)d ]
+0x00021011  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  running_to_runnable [ dom:vcpu 
= 0x%(1)08x ]
+0x00021021  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  running_to_blocked  [ dom:vcpu 
= 0x%(1)08x ]
+0x00021031  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  running_to_offline  [ dom:vcpu 
= 0x%(1)08x ]
+0x00021101  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  runnable_to_running [ dom:vcpu 
= 0x%(1)08x ]
+0x00021121  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  runnable_to_blocked [ dom:vcpu 
= 0x%(1)08x ]
+0x00021131  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  runnable_to_offline [ dom:vcpu 
= 0x%(1)08x ]
+0x00021201  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  blocked_to_running  [ dom:vcpu 
= 0x%(1)08x ]
+0x00021211  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  blocked_to_runnable [ dom:vcpu 
= 0x%(1)08x ]
+0x00021231  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  blocked_to_offline  [ dom:vcpu 
= 0x%(1)08x ]
+0x00021301  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  offline_to_running  [ dom:vcpu 
= 0x%(1)08x ]
+0x00021311  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  offline_to_runnable [ dom:vcpu 
= 0x%(1)08x ]
+0x00021321  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  offline_to_blocked  [ dom:vcpu 
= 0x%(1)08x ]
 
-0x00081001  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  VMENTRY     [ dom:vcpu = 
0x%(1)08x ]
-0x00081002  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  VMEXIT      [ dom:vcpu = 
0x%(1)08x, exitcode = 0x%(2)08x, rIP  = 0x%(3)08x ]
-0x00081102  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  VMEXIT      [ dom:vcpu = 
0x%(1)08x, exitcode = 0x%(2)08x, rIP  = 0x%(3)016x ]
-0x00082001  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  PF_XEN      [ dom:vcpu = 
0x%(1)08x, errorcode = 0x%(2)02x, virt = 0x%(3)08x ]
-0x00082101  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  PF_XEN      [ dom:vcpu = 
0x%(1)08x, errorcode = 0x%(2)02x, virt = 0x%(3)016x ]
-0x00082002  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  PF_INJECT   [ dom:vcpu = 
0x%(1)08x, errorcode = 0x%(2)02x, virt = 0x%(3)08x ]
-0x00082102  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  PF_INJECT   [ dom:vcpu = 
0x%(1)08x,  errorcode = 0x%(2)02x, virt = 0x%(3)016x ]
-0x00082003  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  INJ_EXC     [ dom:vcpu = 
0x%(1)08x, vector = 0x%(2)02x, errorcode = 0x%(3)04x ]
-0x00082004  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  INJ_VIRQ    [ dom:vcpu = 
0x%(1)08x, vector = 0x%(2)02x, fake = %(3)d ]
-0x00082005  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  REINJ_VIRQ  [ dom:vcpu = 
0x%(1)08x, vector = 0x%(2)02x ]
-0x00082006  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  IO_READ     [ dom:vcpu = 
0x%(1)08x, port = 0x%(2)04x, size = %(3)d ]
-0x00082007  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  IO_WRITE    [ dom:vcpu = 
0x%(1)08x, port = 0x%(2)04x, size = %(3)d ]
-0x00082008  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  CR_READ     [ dom:vcpu = 
0x%(1)08x, CR# = %(2)d, value = 0x%(3)08x ]
-0x00082108  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  CR_READ     [ dom:vcpu = 
0x%(1)08x, CR# = %(2)d, value = 0x%(3)016x ]
-0x00082009  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  CR_WRITE    [ dom:vcpu = 
0x%(1)08x, CR# = %(2)d, value = 0x%(3)08x ]
-0x00082109  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  CR_WRITE    [ dom:vcpu = 
0x%(1)08x, CR# = %(2)d, value = 0x%(3)016x ]
-0x0008200A  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  DR_READ     [ dom:vcpu = 
0x%(1)08x ]
-0x0008200B  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  DR_WRITE    [ dom:vcpu = 
0x%(1)08x ]
-0x0008200C  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  MSR_READ    [ dom:vcpu = 
0x%(1)08x, MSR# = 0x%(2)08x, value = 0x%(3)016x ]
-0x0008200D  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  MSR_WRITE   [ dom:vcpu = 
0x%(1)08x, MSR# = 0x%(2)08x, value = 0x%(3)016x ]
-0x0008200E  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  CPUID       [ dom:vcpu = 
0x%(1)08x, func = 0x%(2)08x, eax = 0x%(3)08x, ebx = 0x%(4)08x, ecx=0x%(5)08x, 
edx = 0x%(6)08x ]
-0x0008200F  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  INTR        [ dom:vcpu = 
0x%(1)08x, vector = 0x%(2)02x ]
-0x00082010  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  NMI         [ dom:vcpu = 
0x%(1)08x ]
-0x00082011  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  SMI         [ dom:vcpu = 
0x%(1)08x ]
-0x00082012  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  VMMCALL     [ dom:vcpu = 
0x%(1)08x, func = 0x%(2)08x ]
-0x00082013  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  HLT         [ dom:vcpu = 
0x%(1)08x, intpending = %(2)d ]
-0x00082014  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  INVLPG      [ dom:vcpu = 
0x%(1)08x, is invlpga? = %(2)d, virt = 0x%(3)08x ]
-0x00082114  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  INVLPG      [ dom:vcpu = 
0x%(1)08x, is invlpga? = %(2)d, virt = 0x%(3)016x ]
-0x00082015  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  MCE         [ dom:vcpu = 
0x%(1)08x ]
-0x00082016  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  IO_ASSIST   [ dom:vcpu = 
0x%(1)08x, data = 0x%(2)04x ]
-0x00082017  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  MMIO_ASSIST [ dom:vcpu = 
0x%(1)08x, data = 0x%(2)04x ]
-0x00082018  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  CLTS        [ dom:vcpu = 
0x%(1)08x ]
-0x00082019  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  LMSW        [ dom:vcpu = 
0x%(1)08x, value = 0x%(2)08x ]
-0x00082119  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  LMSW        [ dom:vcpu = 
0x%(1)08x, value = 0x%(2)016x ]
+0x00028001  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  sched_add_domain  [ domid = 
0x%(1)08x, edomid = 0x%(2)08x ]
+0x00028002  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  sched_rem_domain  [ domid = 
0x%(1)08x, edomid = 0x%(2)08x ]
+0x00028003  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  domain_sleep      [ domid = 
0x%(1)08x, edomid = 0x%(2)08x ]
+0x00028004  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  domain_wake       [ domid = 
0x%(1)08x, edomid = 0x%(2)08x ]
+0x00028005  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  do_yield          [ domid = 
0x%(1)08x, edomid = 0x%(2)08x ]
+0x00028006  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  do_block          [ domid = 
0x%(1)08x, edomid = 0x%(2)08x ]
+0x00028007  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  domain_shutdown          [ 
domid = 0x%(1)08x, edomid = 0x%(2)08x, reason = 0x%(3)08x ]
+0x00028008  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  sched_ctl
+0x00028009  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  sched_adjdom      [ domid = 
0x%(1)08x ]
+0x0002800a  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  __enter_scheduler [ 
prev<domid:edomid> = 0x%(1)08x : 0x%(2)08x, next<domid:edomid> = 0x%(3)08x : 
0x%(4)08x ]
+0x0002800b  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  s_timer_fn
+0x0002800c  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  t_timer_fn
+0x0002800d  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  dom_timer_fn
+0x0002800e  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  switch_infprev    [ old_domid 
= 0x%(1)08x, runtime = %(2)d ]
+0x0002800f  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  switch_infnext    [ new_domid 
= 0x%(1)08x, time = %(2)d, r_time = %(3)d ]
+
+0x00081001  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  VMENTRY
+0x00081002  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  VMEXIT      [ exitcode = 
0x%(1)08x, rIP  = 0x%(2)08x ]
+0x00081102  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  VMEXIT      [ exitcode = 
0x%(1)08x, rIP  = 0x%(2)016x ]
+0x00082001  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  PF_XEN      [ errorcode = 
0x%(2)02x, virt = 0x%(1)08x ]
+0x00082101  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  PF_XEN      [ errorcode = 
0x%(2)02x, virt = 0x%(1)016x ]
+0x00082002  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  PF_INJECT   [ errorcode = 
0x%(1)02x, virt = 0x%(2)08x ]
+0x00082102  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  PF_INJECT   [ errorcode = 
0x%(1)02x, virt = 0x%(2)016x ]
+0x00082003  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  INJ_EXC     [ vector = 
0x%(1)02x, errorcode = 0x%(2)04x ]
+0x00082004  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  INJ_VIRQ    [ vector = 
0x%(1)02x, fake = %(2)d ]
+0x00082005  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  REINJ_VIRQ  [ vector = 
0x%(1)02x ]
+0x00082006  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  IO_READ     [ port = 
0x%(1)04x, size = %(2)d ]
+0x00082007  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  IO_WRITE    [ port = 
0x%(1)04x, size = %(2)d ]
+0x00082008  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  CR_READ     [ CR# = %(1)d, 
value = 0x%(2)08x ]
+0x00082108  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  CR_READ     [ CR# = %(1)d, 
value = 0x%(2)016x ]
+0x00082009  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  CR_WRITE    [ CR# = %(1)d, 
value = 0x%(2)08x ]
+0x00082109  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  CR_WRITE    [ CR# = %(1)d, 
value = 0x%(2)016x ]
+0x0008200A  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  DR_READ    
+0x0008200B  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  DR_WRITE
+0x0008200C  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  MSR_READ    [ MSR# = 
0x%(1)08x, value = 0x%(2)016x ]
+0x0008200D  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  MSR_WRITE   [ MSR# = 
0x%(1)08x, value = 0x%(2)016x ]
+0x0008200E  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  CPUID       [ func = 
0x%(1)08x, eax = 0x%(2)08x, ebx = 0x%(3)08x, ecx=0x%(4)08x, edx = 0x%(5)08x ]
+0x0008200F  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  INTR        [ vector = 
0x%(1)02x ]
+0x00082010  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  NMI
+0x00082011  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  SMI
+0x00082012  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  VMMCALL     [ func = 0x%(1)08x 
]
+0x00082013  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  HLT         [ intpending = 
%(1)d ]
+0x00082014  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  INVLPG      [ is invlpga? = 
%(1)d, virt = 0x%(2)08x ]
+0x00082114  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  INVLPG      [ is invlpga? = 
%(1)d, virt = 0x%(2)016x ]
+0x00082015  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  MCE
+0x00082016  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  IO_ASSIST   [ data = 0x%(1)04x 
]
+0x00082017  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  MMIO_ASSIST [ data = 0x%(1)04x 
]
+0x00082018  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  CLTS
+0x00082019  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  LMSW        [ value = 
0x%(1)08x ]
+0x00082119  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  LMSW        [ value = 
0x%(1)016x ]
 
 0x0010f001  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  page_grant_map      [ domid = 
%(1)d ]
 0x0010f002  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  page_grant_unmap    [ domid = 
%(1)d ]
@@ -65,3 +78,41 @@ 0x0020f103  CPU%(cpu)d  %(tsc)d (+%(relt
 0x0020f103  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  trap       [ rip = 0x%(1)016x, 
trapnr:error = 0x%(2)08x ]
 0x0020f004  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  page_fault [ eip = 0x%(1)08x, 
addr = 0x%(2)08x, error = 0x%(3)08x ]
 0x0020f104  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  page_fault [ rip = 0x%(1)16x, 
addr = 0x%(3)16x, error = 0x%(5)08x ]
+
+0x0020f006  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  emulate_privop      [ eip = 
0x%(1)08x ]
+0x0020f106  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  emulate_privop      [ rip = 
0x%(1)16x ]
+0x0020f007  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  emulate_4G          [ eip = 
0x%(1)08x ]
+0x0020f107  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  emulate_4G          [ rip = 
0x%(1)16x ]
+0x0020f00c  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  ptwr_emulation_pae  [ addr = 
0x%(2)08x, eip = 0x%(1)08x, npte = 0x%(1)16x ]
+0x0020f10c  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  ptwr_emulation_pae  [ addr = 
0x%(2)16x, rip = 0x%(1)16x, npte = 0x%(1)16x ]
+
+0x0040f001  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_not_shadow              
   [ gl1e = 0x%(1)16x, va = 0x%(2)08x, flags = 0x%(3)08x ]
+0x0040f101  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_not_shadow              
   [ gl1e = 0x%(1)16x, va = 0x%(2)16x, flags = 0x%(3)08x ]
+0x0040f002  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_fast_propagate          
   [ va = 0x%(1)08x ]
+0x0040f102  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_fast_propagate          
   [ va = 0x%(1)16x ]
+0x0040f003  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_fast_mmio               
   [ va = 0x%(1)08x ]
+0x0040f103  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_fast_mmio               
   [ va = 0x%(1)16x ]
+0x0040f004  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_false_fast_path         
   [ va = 0x%(1)08x ]
+0x0040f104  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_false_fast_path         
   [ va = 0x%(1)16x ]
+0x0040f005  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_mmio                    
   [ va = 0x%(1)08x ]
+0x0040f105  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_mmio                    
   [ va = 0x%(1)16x ]
+0x0040f006  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_fixup                   
   [ gl1e = 0x%(1)08x, va = 0x%(2)08x, flags = 0x%(3)08x ]
+0x0040f106  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_fixup                   
   [ gl1e = 0x%(1)16x, va = 0x%(2)16x, flags = 0x%(3)08x ]
+0x0040f007  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_domf_dying              
   [ va = 0x%(1)08x ]
+0x0040f107  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_domf_dying              
   [ va = 0x%(1)16x ]
+0x0040f008  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_emulate                 
   [ gl1e = 0x%(1)08x, write_val = 0x%(2)08x, va = 0x%(3)08x, flags = 
0x%(4)08x, emulation_count = 0x%(5)08x]
+0x0040f108  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_emulate                 
   [ gl1e = 0x%(1)16x, write_val = 0x%(2)16x, va = 0x%(3)16x, flags = 
0x%(4)08x, emulation_count = 0x%(5)08x]
+0x0040f009  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_emulate_unshadow_user   
   [ va = 0x%(1)08x, gfn = 0x%(2)08x ]
+0x0040f109  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_emulate_unshadow_user   
   [ va = 0x%(1)16x, gfn = 0x%(2)16x ]
+0x0040f00a  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_emulate_unshadow_evtinj 
   [ va = 0x%(1)08x, gfn = 0x%(2)08x ]
+0x0040f10a  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_emulate_unshadow_evtinj 
   [ va = 0x%(1)16x, gfn = 0x%(2)16x ]
+0x0040f00b  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  
shadow_emulate_unshadow_unhandled [ va = 0x%(1)08x, gfn = 0x%(2)08x ]
+0x0040f10b  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  
shadow_emulate_unshadow_unhandled [ va = 0x%(1)16x, gfn = 0x%(2)16x ]
+0x0040f00c  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_emulate_wrmap_bf        
   [ gfn = 0x%(1)08x ]
+0x0040f10c  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_emulate_wrmap_bf        
   [ gfn = 0x%(1)16x ]
+0x0040f00d  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_emulate_prealloc_unpin  
   [ gfn = 0x%(1)08x ]
+0x0040f10d  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_emulate_prealloc_unpin  
   [ gfn = 0x%(1)16x ]
+0x0040f00e  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_emulate_resync_full     
   [ gfn = 0x%(1)08x ]
+0x0040f10e  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_emulate_resync_full     
   [ gfn = 0x%(1)16x ]
+0x0040f00f  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_emulate_resync_only     
   [ gfn = 0x%(1)08x ]
+0x0040f10f  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_emulate_resync_only     
   [ gfn = 0x%(1)16x ]
diff -r 4ddd63b4be9b -r ec8eaab557d8 tools/xentrace/xentrace.c
--- a/tools/xentrace/xentrace.c Fri Sep 12 14:32:45 2008 +0900
+++ b/tools/xentrace/xentrace.c Fri Sep 12 14:47:40 2008 +0900
@@ -56,6 +56,7 @@ typedef struct settings_st {
     unsigned long tbuf_size;
     unsigned long disk_rsvd;
     unsigned long timeout;
+    unsigned long memory_buffer;
     uint8_t discard:1,
         disable_tracing:1;
 } settings_t;
@@ -67,10 +68,243 @@ static int xc_handle = -1;
 static int xc_handle = -1;
 static int event_fd = -1;
 static int virq_port = -1;
+static int outfd = 1;
 
 static void close_handler(int signal)
 {
     interrupted = 1;
+}
+
+static struct {
+    char * buf;
+    unsigned long prod, cons, size;
+    unsigned long pending_size, pending_prod;
+} membuf = { 0 };
+
+#define MEMBUF_INDEX_RESET_THRESHOLD (1<<29)
+
+/* FIXME -- make a power of 2 so we can mask instead. */
+#define MEMBUF_POINTER(_i) (membuf.buf + ((_i) % membuf.size))
+#define MEMBUF_CONS_INCREMENT(_n)               \
+    do {                                        \
+        membuf.cons += (_n);                    \
+    } while(0)
+#define MEMBUF_PROD_SET(_x)                                             \
+    do {                                                                \
+        if ( (_x) < membuf.prod ) {                                     \
+            fprintf(stderr, "%s: INTERNAL_ERROR: prod %lu, trying to set to 
%lu!\n", \
+                    __func__, membuf.prod, (unsigned long)(_x));        \
+            exit(1);                                                    \
+        }                                                               \
+        membuf.prod = (_x);                                             \
+        if ( (_x) > MEMBUF_INDEX_RESET_THRESHOLD )                      \
+        {                                                               \
+            membuf.prod %= membuf.size;                                 \
+            membuf.cons %= membuf.size;                                 \
+            if( membuf.prod < membuf.cons )                             \
+                membuf.prod += membuf.size;                             \
+        }                                                               \
+    } while(0) 
+
+struct cpu_change_record {
+    uint32_t header;
+    struct {
+        int cpu;
+        unsigned window_size;
+    } data;
+};
+
+#define CPU_CHANGE_HEADER                                           \
+    (TRC_TRACE_CPU_CHANGE                                           \
+     | (((sizeof(struct cpu_change_record)/sizeof(uint32_t)) - 1)   \
+        << TRACE_EXTRA_SHIFT) )
+
+void membuf_alloc(unsigned long size)
+{
+    membuf.buf = malloc(size);
+
+    if(!membuf.buf)
+    {
+        fprintf(stderr, "%s: Couldn't malloc %lu bytes!\n",
+                __func__, size);
+        exit(1);
+    }
+
+    membuf.prod = membuf.cons = 0;
+    membuf.size = size;
+}
+
+/*
+ * Reserve a new window in the buffer.  Move the 'consumer' forward size
+ * bytes, re-adjusting the cpu window sizes as necessary, and insert a
+ * cpu_change record.
+ */
+void membuf_reserve_window(unsigned cpu, unsigned long window_size)
+{
+    struct cpu_change_record *rec;
+    long need_to_consume, free, freed;
+
+    if ( membuf.pending_size > 0 )
+    {
+        fprintf(stderr, "%s: INTERNAL_ERROR: pending_size %lu\n",
+                __func__, membuf.pending_size);
+        exit(1);
+    }
+
+    need_to_consume = window_size + sizeof(*rec);
+
+    if ( window_size > membuf.size )
+    {
+        fprintf(stderr, "%s: reserve size %lu larger than buffer size %lu!\n",
+                __func__, window_size, membuf.size);
+        exit(1);
+    }
+
+    /* Subtract free space already in buffer. */
+    free = membuf.size - (membuf.prod - membuf.cons);
+    if( need_to_consume < free)
+        goto start_window;
+
+    need_to_consume -= free;
+
+    /*
+     * "Free" up full windows until we have enough for this window.
+     * It's a bit wasteful to throw away partial buffers, but the only
+     * other option is to scan throught he buffer headers.  Since the
+     * common case is that it's going to be thrown away next anyway, I
+     * think minimizing the overall impact is more important.
+     */
+    do {
+        rec = (struct cpu_change_record *)MEMBUF_POINTER(membuf.cons);
+        if( rec->header != CPU_CHANGE_HEADER )
+        {
+            fprintf(stderr, "%s: INTERNAL ERROR: no cpu_change record at 
consumer!\n",
+                    __func__);
+            exit(EXIT_FAILURE);
+        }
+
+        freed = sizeof(*rec) + rec->data.window_size;
+
+        if ( need_to_consume > 0 )
+        {
+            MEMBUF_CONS_INCREMENT(freed);
+            need_to_consume -= freed;
+        }
+    } while( need_to_consume > 0 );
+
+start_window:
+    /*
+     * Start writing "pending" data.  Update prod once all this data is
+     * written.
+     */
+    membuf.pending_prod = membuf.prod;
+    membuf.pending_size = window_size;
+
+    rec = (struct cpu_change_record *)MEMBUF_POINTER(membuf.pending_prod);
+
+    rec->header = CPU_CHANGE_HEADER;
+    rec->data.cpu = cpu;
+    rec->data.window_size = window_size;
+
+    membuf.pending_prod += sizeof(*rec);
+}
+
+void membuf_write(void *start, unsigned long size) {
+    char * p;
+    unsigned long wsize;
+
+    if( (membuf.size - (membuf.prod - membuf.cons)) < size )
+    {
+        fprintf(stderr, "%s: INTERNAL ERROR: need %lu bytes, only have %lu!\n",
+                __func__, size, membuf.prod - membuf.cons);
+        exit(1);
+    }
+
+    if( size > membuf.pending_size )
+    {
+        fprintf(stderr, "%s: INTERNAL ERROR: size %lu, pending %lu!\n",
+                __func__, size, membuf.pending_size);
+        exit(1);
+    }
+
+    wsize = size;
+    p = MEMBUF_POINTER(membuf.pending_prod);
+
+    /* If the buffer overlaps the "wrap", do an extra write */
+    if ( p + size > membuf.buf + membuf.size )
+    {
+        int usize = ( membuf.buf + membuf.size ) - p;
+
+        memcpy(p, start, usize);
+
+        start += usize;
+        wsize -= usize;
+        p = membuf.buf;
+    }
+
+    memcpy(p, start, wsize);
+
+    membuf.pending_prod += size;
+    membuf.pending_size -= size;
+
+    if ( membuf.pending_size == 0 )
+    {
+        MEMBUF_PROD_SET(membuf.pending_prod);
+    }
+}
+
+void membuf_dump(void) {
+    /* Dump circular memory buffer */
+    int cons, prod, wsize, written;
+    char * wstart;
+
+    fprintf(stderr, "Dumping memory buffer.\n");
+
+    cons = membuf.cons % membuf.size; 
+    prod = membuf.prod % membuf.size;
+   
+    if(prod > cons)
+    {
+        /* Write in one go */
+        wstart = membuf.buf + cons;
+        wsize = prod - cons;
+
+        written = write(outfd, wstart, wsize);
+        if ( written != wsize )
+            goto fail;
+    }
+    else
+    {
+        /* Write in two pieces: cons->end, beginning->prod. */
+        wstart = membuf.buf + cons;
+        wsize = membuf.size - cons;
+
+        written = write(outfd, wstart, wsize);
+        if ( written != wsize )
+        {
+            fprintf(stderr, "Write failed! (size %d, returned %d)\n",
+                    wsize, written);
+            goto fail;
+        }
+
+        wstart = membuf.buf;
+        wsize = prod;
+
+        written = write(outfd, wstart, wsize);
+        if ( written != wsize )
+        {
+            fprintf(stderr, "Write failed! (size %d, returned %d)\n",
+                    wsize, written);
+            goto fail;
+        }
+    }
+
+    membuf.cons = membuf.prod = 0;
+    
+    return;
+fail:
+    exit(1);
+    return;
 }
 
 /**
@@ -85,20 +319,20 @@ static void close_handler(int signal)
  * of the buffer write.
  */
 static void write_buffer(unsigned int cpu, unsigned char *start, int size,
-               int total_size, int outfd)
+                         int total_size)
 {
     struct statvfs stat;
     size_t written = 0;
     
-    if ( opts.disk_rsvd != 0 )
+    if ( opts.memory_buffer == 0 && opts.disk_rsvd != 0 )
     {
         unsigned long long freespace;
 
         /* Check that filesystem has enough space. */
         if ( fstatvfs (outfd, &stat) )
         {
-                fprintf(stderr, "Statfs failed!\n");
-                goto fail;
+            fprintf(stderr, "Statfs failed!\n");
+            goto fail;
         }
 
         freespace = stat.f_frsize * (unsigned long long)stat.f_bfree;
@@ -112,8 +346,8 @@ static void write_buffer(unsigned int cp
 
         if ( freespace <= opts.disk_rsvd )
         {
-                fprintf(stderr, "Disk space limit reached (free space: %lluMB, 
limit: %luMB).\n", freespace, opts.disk_rsvd);
-                exit (EXIT_FAILURE);
+            fprintf(stderr, "Disk space limit reached (free space: %lluMB, 
limit: %luMB).\n", freespace, opts.disk_rsvd);
+            exit (EXIT_FAILURE);
         }
     }
 
@@ -122,40 +356,46 @@ static void write_buffer(unsigned int cp
      * first write. */
     if ( total_size != 0 )
     {
-        struct {
-            uint32_t header;
-            struct {
-                unsigned cpu;
-                unsigned byte_count;
-            } extra;
-        } rec;
-
-        rec.header = TRC_TRACE_CPU_CHANGE
-            | ((sizeof(rec.extra)/sizeof(uint32_t)) << TRACE_EXTRA_SHIFT);
-        rec.extra.cpu = cpu;
-        rec.extra.byte_count = total_size;
-
-        written = write(outfd, &rec, sizeof(rec));
-
-        if ( written != sizeof(rec) )
-        {
-            fprintf(stderr, "Cannot write cpu change (write returned %zd)\n",
-                    written);
+        if ( opts.memory_buffer )
+        {
+            membuf_reserve_window(cpu, total_size);
+        }
+        else
+        {
+            struct cpu_change_record rec;
+
+            rec.header = CPU_CHANGE_HEADER;
+            rec.data.cpu = cpu;
+            rec.data.window_size = total_size;
+
+            written = write(outfd, &rec, sizeof(rec));
+            if ( written != sizeof(rec) )
+            {
+                fprintf(stderr, "Cannot write cpu change (write returned 
%zd)\n",
+                        written);
+                goto fail;
+            }
+        }
+    }
+
+    if ( opts.memory_buffer )
+    {
+        membuf_write(start, size);
+    }
+    else
+    {
+        written = write(outfd, start, size);
+        if ( written != size )
+        {
+            fprintf(stderr, "Write failed! (size %d, returned %zd)\n",
+                    size, written);
             goto fail;
         }
     }
 
-    written = write(outfd, start, size);
-    if ( written != size )
-    {
-        fprintf(stderr, "Write failed! (size %d, returned %zd)\n",
-                size, written);
-        goto fail;
-    }
-
     return;
 
- fail:
+fail:
     PERROR("Failed to write trace data");
     exit(EXIT_FAILURE);
 }
@@ -394,7 +634,7 @@ static void wait_for_event_or_timeout(un
  * monitor_tbufs - monitor the contents of tbufs and output to a file
  * @logfile:       the FILE * representing the file to log to
  */
-static int monitor_tbufs(int outfd)
+static int monitor_tbufs(void)
 {
     int i;
 
@@ -429,9 +669,9 @@ static int monitor_tbufs(int outfd)
             meta[i]->cons = meta[i]->prod;
 
     /* now, scan buffers for events */
-    while ( !interrupted )
-    {
-        for ( i = 0; (i < num) && !interrupted; i++ )
+    while ( 1 )
+    {
+        for ( i = 0; i < num; i++ )
         {
             unsigned long start_offset, end_offset, window_size, cons, prod;
                 
@@ -463,8 +703,7 @@ static int monitor_tbufs(int outfd)
                 /* If window does not wrap, write in one big chunk */
                 write_buffer(i, data[i]+start_offset,
                              window_size,
-                             window_size,
-                             outfd);
+                             window_size);
             }
             else
             {
@@ -474,23 +713,28 @@ static int monitor_tbufs(int outfd)
                  */
                 write_buffer(i, data[i] + start_offset,
                              data_size - start_offset,
-                             window_size,
-                             outfd);
+                             window_size);
                 write_buffer(i, data[i],
                              end_offset,
-                             0,
-                             outfd);
+                             0);
             }
 
             xen_mb(); /* read buffer, then update cons. */
             meta[i]->cons = prod;
-        }
+
+        }
+
+        if ( interrupted )
+            break;
 
         wait_for_event_or_timeout(opts.poll_sleep);
     }
 
-    if(opts.disable_tracing)
+    if ( opts.disable_tracing )
         disable_tbufs();
+
+    if ( opts.memory_buffer )
+        membuf_dump();
 
     /* cleanup */
     free(meta);
@@ -538,6 +782,8 @@ static void usage(void)
 "  -T  --time-interval=s   Run xentrace for s seconds and quit.\n" \
 "  -?, --help              Show this message\n" \
 "  -V, --version           Print program version\n" \
+"  -M, --memory-buffer=b   Copy trace records to a circular memory buffer.\n" \
+"                          Dump to file on exit.\n" \
 "\n" \
 "This tool is used to capture trace buffer data from Xen. The\n" \
 "data is output in a binary format, in the following order:\n" \
@@ -551,6 +797,53 @@ static void usage(void)
     printf("\nReport bugs to %s\n", program_bug_address);
 
     exit(EXIT_FAILURE);
+}
+
+/* convert the argument string pointed to by arg to a long int representation,
+ * including suffixes such as 'M' and 'k'. */
+#define MB (1024*1024)
+#define KB (1024)
+long sargtol(const char *restrict arg, int base)
+{
+    char *endp;
+    long val;
+
+    errno = 0;
+    val = strtol(arg, &endp, base);
+    
+    if ( errno != 0 )
+    {
+        fprintf(stderr, "Invalid option argument: %s\n", arg);
+        fprintf(stderr, "Error: %s\n\n", strerror(errno));
+        usage();
+    }
+    else if (endp == arg)
+    {
+        goto invalid;
+    }
+
+    switch(*endp)
+    {
+    case '\0':
+        break;
+    case 'M':
+        val *= MB;
+        break;
+    case 'K':
+    case 'k':
+        val *= KB;
+        break;
+    default:
+        fprintf(stderr, "Unknown suffix %c\n", *endp);
+        exit(1);
+    }
+
+
+    return val;
+invalid:
+    return 0;
+    fprintf(stderr, "Invalid option argument: %s\n\n", arg);
+    usage();
 }
 
 /* convert the argument string pointed to by arg to a long int representation 
*/
@@ -606,6 +899,7 @@ static void parse_args(int argc, char **
         { "trace-buf-size", required_argument, 0, 'S' },
         { "reserve-disk-space", required_argument, 0, 'r' },
         { "time-interval",  required_argument, 0, 'T' },
+        { "memory-buffer",  required_argument, 0, 'M' },
         { "discard-buffers", no_argument,      0, 'D' },
         { "dont-disable-tracing", no_argument, 0, 'x' },
         { "help",           no_argument,       0, '?' },
@@ -613,7 +907,7 @@ static void parse_args(int argc, char **
         { 0, 0, 0, 0 }
     };
 
-    while ( (option = getopt_long(argc, argv, "c:e:s:S:t:?V",
+    while ( (option = getopt_long(argc, argv, "t:s:c:e:S:r:T:M:Dx?V",
                     long_options, NULL)) != -1) 
     {
         switch ( option )
@@ -653,6 +947,10 @@ static void parse_args(int argc, char **
 
         case 'T':
             opts.timeout = argtol(optarg, 0);
+            break;
+
+        case 'M':
+            opts.memory_buffer = sargtol(optarg, 0);
             break;
 
         default:
@@ -674,7 +972,7 @@ static void parse_args(int argc, char **
 
 int main(int argc, char **argv)
 {
-    int outfd = 1, ret;
+    int ret;
     struct sigaction act;
 
     opts.outfile = 0;
@@ -719,6 +1017,9 @@ int main(int argc, char **argv)
         fprintf(stderr, "Cannot output to a TTY, specify a log file.\n");
         exit(EXIT_FAILURE);
     }
+
+    if ( opts.memory_buffer > 0 )
+        membuf_alloc(opts.memory_buffer);
 
     /* ensure that if we get a signal, we'll do cleanup, then exit */
     act.sa_handler = close_handler;
@@ -729,7 +1030,7 @@ int main(int argc, char **argv)
     sigaction(SIGINT,  &act, NULL);
     sigaction(SIGALRM, &act, NULL);
 
-    ret = monitor_tbufs(outfd);
+    ret = monitor_tbufs();
 
     return ret;
 }
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/arch/x86/acpi/Makefile
--- a/xen/arch/x86/acpi/Makefile        Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/arch/x86/acpi/Makefile        Fri Sep 12 14:47:40 2008 +0900
@@ -1,5 +1,5 @@ subdir-y += cpufreq
 subdir-y += cpufreq
 
 obj-y += boot.o
-obj-y += power.o suspend.o wakeup_prot.o cpu_idle.o
+obj-y += power.o suspend.o wakeup_prot.o cpu_idle.o cpuidle_menu.o
 obj-y += pmstat.o
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/arch/x86/acpi/cpu_idle.c
--- a/xen/arch/x86/acpi/cpu_idle.c      Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/arch/x86/acpi/cpu_idle.c      Fri Sep 12 14:47:40 2008 +0900
@@ -39,6 +39,7 @@
 #include <xen/smp.h>
 #include <xen/guest_access.h>
 #include <xen/keyhandler.h>
+#include <xen/cpuidle.h>
 #include <asm/cache.h>
 #include <asm/io.h>
 #include <asm/hpet.h>
@@ -49,12 +50,9 @@
 #define DEBUG_PM_CX
 
 #define US_TO_PM_TIMER_TICKS(t)     ((t * (PM_TIMER_FREQUENCY/1000)) / 1000)
+#define PM_TIMER_TICKS_TO_US(t)     ((t * 1000) / (PM_TIMER_FREQUENCY / 1000))
 #define C2_OVERHEAD         4   /* 1us (3.579 ticks per us) */
 #define C3_OVERHEAD         4   /* 1us (3.579 ticks per us) */
-
-#define ACPI_PROCESSOR_MAX_POWER        8
-#define ACPI_PROCESSOR_MAX_C2_LATENCY   100
-#define ACPI_PROCESSOR_MAX_C3_LATENCY   1000
 
 static void (*lapic_timer_off)(void);
 static void (*lapic_timer_on)(void);
@@ -65,66 +63,6 @@ static void (*pm_idle_save) (void) __rea
 static void (*pm_idle_save) (void) __read_mostly;
 unsigned int max_cstate __read_mostly = 2;
 integer_param("max_cstate", max_cstate);
-/*
- * bm_history -- bit-mask with a bit per jiffy of bus-master activity
- * 1000 HZ: 0xFFFFFFFF: 32 jiffies = 32ms
- * 800 HZ: 0xFFFFFFFF: 32 jiffies = 40ms
- * 100 HZ: 0x0000000F: 4 jiffies = 40ms
- * reduce history for more aggressive entry into C3
- */
-unsigned int bm_history __read_mostly =
-    (HZ >= 800 ? 0xFFFFFFFF : ((1U << (HZ / 25)) - 1));
-integer_param("bm_history", bm_history);
-
-struct acpi_processor_cx;
-
-struct acpi_processor_cx_policy
-{
-    u32 count;
-    struct acpi_processor_cx *state;
-    struct
-    {
-        u32 time;
-        u32 ticks;
-        u32 count;
-        u32 bm;
-    } threshold;
-};
-
-struct acpi_processor_cx
-{
-    u8 valid;
-    u8 type;
-    u32 address;
-    u8 space_id;
-    u32 latency;
-    u32 latency_ticks;
-    u32 power;
-    u32 usage;
-    u64 time;
-    struct acpi_processor_cx_policy promotion;
-    struct acpi_processor_cx_policy demotion;
-};
-
-struct acpi_processor_flags
-{
-    u8 bm_control:1;
-    u8 bm_check:1;
-    u8 has_cst:1;
-    u8 power_setup_done:1;
-    u8 bm_rld_set:1;
-};
-
-struct acpi_processor_power
-{
-    struct acpi_processor_flags flags;
-    struct acpi_processor_cx *state;
-    s_time_t bm_check_timestamp;
-    u32 default_state;
-    u32 bm_activity;
-    u32 count;
-    struct acpi_processor_cx states[ACPI_PROCESSOR_MAX_POWER];
-};
 
 static struct acpi_processor_power processor_powers[NR_CPUS];
 
@@ -133,26 +71,21 @@ static void print_acpi_power(uint32_t cp
     uint32_t i;
 
     printk("==cpu%d==\n", cpu);
-    printk("active state:\t\tC%d\n", (power->state)?power->state->type:-1);
+    printk("active state:\t\tC%d\n",
+           (power->last_state) ? power->last_state->type : -1);
     printk("max_cstate:\t\tC%d\n", max_cstate);
-    printk("bus master activity:\t%08x\n", power->bm_activity);
     printk("states:\n");
     
     for ( i = 1; i < power->count; i++ )
     {
-        printk((power->states[i].type == power->state->type) ? "   *" : "    
");
+        if ( power->last_state && 
+             power->states[i].type == power->last_state->type )
+            printk("   *");
+        else
+            printk("    ");
         printk("C%d:\t\t", i);
         printk("type[C%d] ", power->states[i].type);
-        if ( power->states[i].promotion.state )
-            printk("promotion[C%d] ", power->states[i].promotion.state->type);
-        else
-            printk("promotion[--] ");
-        if ( power->states[i].demotion.state )
-            printk("demotion[C%d] ", power->states[i].demotion.state->type);
-        else
-            printk("demotion[--] ");
-        printk("latency[%03d]\n ", power->states[i].latency);
-        printk("\t\t\t");
+        printk("latency[%03d] ", power->states[i].latency);
         printk("usage[%08d] ", power->states[i].usage);
         printk("duration[%"PRId64"]\n", power->states[i].time);
     }
@@ -180,48 +113,6 @@ static inline u32 ticks_elapsed(u32 t1, 
         return (((0x00FFFFFF - t1) + t2) & 0x00FFFFFF);
     else
         return ((0xFFFFFFFF - t1) + t2);
-}
-
-static void acpi_processor_power_activate(struct acpi_processor_power *power,
-                                          struct acpi_processor_cx *new)
-{
-    struct acpi_processor_cx *old;
-
-    if ( !power || !new )
-        return;
-
-    old = power->state;
-
-    if ( old )
-        old->promotion.count = 0;
-    new->demotion.count = 0;
-
-    /* Cleanup from old state. */
-    if ( old )
-    {
-        switch ( old->type )
-        {
-        case ACPI_STATE_C3:
-            /* Disable bus master reload */
-            if ( new->type != ACPI_STATE_C3 && power->flags.bm_check )
-                acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0);
-            break;
-        }
-    }
-
-    /* Prepare to use new state. */
-    switch ( new->type )
-    {
-    case ACPI_STATE_C3:
-        /* Enable bus master reload */
-        if ( old->type != ACPI_STATE_C3 && power->flags.bm_check )
-            acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 1);
-        break;
-    }
-
-    power->state = new;
-
-    return;
 }
 
 static void acpi_safe_halt(void)
@@ -263,13 +154,50 @@ static void acpi_idle_do_entry(struct ac
     }
 }
 
-static atomic_t c3_cpu_count;
+static inline void acpi_idle_update_bm_rld(struct acpi_processor_power *power,
+                                           struct acpi_processor_cx *target)
+{
+    if ( !power->flags.bm_check )
+        return;
+
+    if ( power->flags.bm_rld_set && target->type != ACPI_STATE_C3 )
+    {
+        acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0);
+        power->flags.bm_rld_set = 0;
+    }
+
+    if ( !power->flags.bm_rld_set && target->type == ACPI_STATE_C3 )
+    {
+        acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 1);
+        power->flags.bm_rld_set = 1;
+    }
+}
+
+static int acpi_idle_bm_check(void)
+{
+    u32 bm_status = 0;
+
+    acpi_get_register(ACPI_BITREG_BUS_MASTER_STATUS, &bm_status);
+    if ( bm_status )
+        acpi_set_register(ACPI_BITREG_BUS_MASTER_STATUS, 1);
+    /*
+     * TBD: PIIX4 Erratum #18: Note that BM_STS doesn't always reflect
+     * the true state of bus mastering activity; forcing us to
+     * manually check the BMIDEA bit of each IDE channel.
+     */
+    return bm_status;
+}
+
+static struct {
+    spinlock_t lock;
+    unsigned int count;
+} c3_cpu_status = { .lock = SPIN_LOCK_UNLOCKED };
 
 static void acpi_processor_idle(void)
 {
     struct acpi_processor_power *power = NULL;
     struct acpi_processor_cx *cx = NULL;
-    struct acpi_processor_cx *next_state = NULL;
+    int next_state;
     int sleep_ticks = 0;
     u32 t1, t2 = 0;
 
@@ -287,7 +215,16 @@ static void acpi_processor_idle(void)
         return;
     }
 
-    cx = power->state;
+    next_state = cpuidle_current_governor->select(power);
+    if ( next_state > 0 )
+    {
+        cx = &power->states[next_state];
+        if ( power->flags.bm_check && acpi_idle_bm_check()
+             && cx->type == ACPI_STATE_C3 )
+            cx = power->safe_state;
+        if ( cx->type > max_cstate )
+            cx = &power->states[max_cstate];
+    }
     if ( !cx )
     {
         if ( pm_idle_save )
@@ -303,69 +240,14 @@ static void acpi_processor_idle(void)
         return;
     }
 
-    /*
-     * Check BM Activity
-     * -----------------
-     * Check for bus mastering activity (if required), record, and check
-     * for demotion.
-     */
-    if ( power->flags.bm_check )
-    {
-        u32 bm_status = 0;
-        unsigned long diff = (NOW() - power->bm_check_timestamp) >> 23;
-
-        if ( diff > 31 )
-            diff = 31;
-
-        power->bm_activity <<= diff;
-
-        acpi_get_register(ACPI_BITREG_BUS_MASTER_STATUS, &bm_status);
-        if ( bm_status )
-        {
-            power->bm_activity |= 0x1;
-            acpi_set_register(ACPI_BITREG_BUS_MASTER_STATUS, 1);
-        }
-        /*
-         * PIIX4 Erratum #18: Note that BM_STS doesn't always reflect
-         * the true state of bus mastering activity; forcing us to
-         * manually check the BMIDEA bit of each IDE channel.
-         */
-        /*else if ( errata.piix4.bmisx )
-        {
-            if ( (inb_p(errata.piix4.bmisx + 0x02) & 0x01)
-                || (inb_p(errata.piix4.bmisx + 0x0A) & 0x01) )
-                pr->power.bm_activity |= 0x1;
-        }*/
-
-        power->bm_check_timestamp = NOW();
-
-        /*
-         * If bus mastering is or was active this jiffy, demote
-         * to avoid a faulty transition.  Note that the processor
-         * won't enter a low-power state during this call (to this
-         * function) but should upon the next.
-         *
-         * TBD: A better policy might be to fallback to the demotion
-         *      state (use it for this quantum only) istead of
-         *      demoting -- and rely on duration as our sole demotion
-         *      qualification.  This may, however, introduce DMA
-         *      issues (e.g. floppy DMA transfer overrun/underrun).
-         */
-        if ( (power->bm_activity & 0x1) && cx->demotion.threshold.bm )
-        {
-            local_irq_enable();
-            next_state = cx->demotion.state;
-            goto end;
-        }
-    }
+    power->last_state = cx;
 
     /*
      * Sleep:
      * ------
      * Invoke the current Cx state to put the processor to sleep.
      */
-    if ( cx->type == ACPI_STATE_C2 || cx->type == ACPI_STATE_C3 )
-        smp_mb__after_clear_bit();
+    acpi_idle_update_bm_rld(power, cx);
 
     switch ( cx->type )
     {
@@ -399,8 +281,7 @@ static void acpi_processor_idle(void)
         /* Re-enable interrupts */
         local_irq_enable();
         /* Compute time (ticks) that we were actually asleep */
-        sleep_ticks =
-            ticks_elapsed(t1, t2) - cx->latency_ticks - C2_OVERHEAD;
+        sleep_ticks = ticks_elapsed(t1, t2);
         break;
 
     case ACPI_STATE_C3:
@@ -416,8 +297,8 @@ static void acpi_processor_idle(void)
          */
         if ( power->flags.bm_check && power->flags.bm_control )
         {
-            atomic_inc(&c3_cpu_count);
-            if ( atomic_read(&c3_cpu_count) == num_online_cpus() )
+            spin_lock(&c3_cpu_status.lock);
+            if ( ++c3_cpu_status.count == num_online_cpus() )
             {
                 /*
                  * All CPUs are trying to go to C3
@@ -425,6 +306,7 @@ static void acpi_processor_idle(void)
                  */
                 acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1);
             }
+            spin_unlock(&c3_cpu_status.lock);
         }
         else if ( !power->flags.bm_check )
         {
@@ -455,8 +337,10 @@ static void acpi_processor_idle(void)
         if ( power->flags.bm_check && power->flags.bm_control )
         {
             /* Enable bus master arbitration */
-            atomic_dec(&c3_cpu_count);
-            acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0);
+            spin_lock(&c3_cpu_status.lock);
+            if ( c3_cpu_status.count-- == num_online_cpus() )
+                acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0);
+            spin_unlock(&c3_cpu_status.lock);
         }
 
         /* Re-enable interrupts */
@@ -465,8 +349,6 @@ static void acpi_processor_idle(void)
         lapic_timer_on();
         /* Compute time (ticks) that we were actually asleep */
         sleep_ticks = ticks_elapsed(t1, t2);
-        /* Do not account our idle-switching overhead: */
-        sleep_ticks -= cx->latency_ticks + C3_OVERHEAD;
 
         break;
 
@@ -476,163 +358,14 @@ static void acpi_processor_idle(void)
     }
 
     cx->usage++;
-    if ( (cx->type != ACPI_STATE_C1) && (sleep_ticks > 0) )
+    if ( sleep_ticks > 0 )
+    {
+        power->last_residency = PM_TIMER_TICKS_TO_US(sleep_ticks);
         cx->time += sleep_ticks;
-
-    next_state = power->state;
-
-    /*
-     * Promotion?
-     * ----------
-     * Track the number of longs (time asleep is greater than threshold)
-     * and promote when the count threshold is reached.  Note that bus
-     * mastering activity may prevent promotions.
-     * Do not promote above max_cstate.
-     */
-    if ( cx->promotion.state &&
-         ((cx->promotion.state - power->states) <= max_cstate) )
-    {
-        if ( sleep_ticks > cx->promotion.threshold.ticks )
-        {
-            cx->promotion.count++;
-            cx->demotion.count = 0;
-            if ( cx->promotion.count >= cx->promotion.threshold.count )
-            {
-                if ( power->flags.bm_check )
-                {
-                    if ( !(power->bm_activity & cx->promotion.threshold.bm) )
-                    {
-                        next_state = cx->promotion.state;
-                        goto end;
-                    }
-                }
-                else
-                {
-                    next_state = cx->promotion.state;
-                    goto end;
-                }
-            }
-        }
-    }
-
-    /*
-     * Demotion?
-     * ---------
-     * Track the number of shorts (time asleep is less than time threshold)
-     * and demote when the usage threshold is reached.
-     */
-    if ( cx->demotion.state )
-    {
-        if ( sleep_ticks < cx->demotion.threshold.ticks )
-        {
-            cx->demotion.count++;
-            cx->promotion.count = 0;
-            if ( cx->demotion.count >= cx->demotion.threshold.count )
-            {
-                next_state = cx->demotion.state;
-                goto end;
-            }
-        }
-    }
-
-end:
-    /*
-     * Demote if current state exceeds max_cstate
-     */
-    if ( (power->state - power->states) > max_cstate )
-    {
-        if ( cx->demotion.state )
-            next_state = cx->demotion.state;
-    }
-
-    /*
-     * New Cx State?
-     * -------------
-     * If we're going to start using a new Cx state we must clean up
-     * from the previous and prepare to use the new.
-     */
-    if ( next_state != power->state )
-        acpi_processor_power_activate(power, next_state);
-}
-
-static int acpi_processor_set_power_policy(struct acpi_processor_power *power)
-{
-    unsigned int i;
-    unsigned int state_is_set = 0;
-    struct acpi_processor_cx *lower = NULL;
-    struct acpi_processor_cx *higher = NULL;
-    struct acpi_processor_cx *cx;
-
-    if ( !power )
-        return -EINVAL;
-
-    /*
-     * This function sets the default Cx state policy (OS idle handler).
-     * Our scheme is to promote quickly to C2 but more conservatively
-     * to C3.  We're favoring C2  for its characteristics of low latency
-     * (quick response), good power savings, and ability to allow bus
-     * mastering activity.  Note that the Cx state policy is completely
-     * customizable and can be altered dynamically.
-     */
-
-    /* startup state */
-    for ( i = 1; i < ACPI_PROCESSOR_MAX_POWER; i++ )
-    {
-        cx = &power->states[i];
-        if ( !cx->valid )
-            continue;
-
-        if ( !state_is_set )
-            power->state = cx;
-        state_is_set++;
-        break;
-    }
-
-    if ( !state_is_set )
-        return -ENODEV;
-
-    /* demotion */
-    for ( i = 1; i < ACPI_PROCESSOR_MAX_POWER; i++ )
-    {
-        cx = &power->states[i];
-        if ( !cx->valid )
-            continue;
-
-        if ( lower )
-        {
-            cx->demotion.state = lower;
-            cx->demotion.threshold.ticks = cx->latency_ticks;
-            cx->demotion.threshold.count = 1;
-            if ( cx->type == ACPI_STATE_C3 )
-                cx->demotion.threshold.bm = bm_history;
-        }
-
-        lower = cx;
-    }
-
-    /* promotion */
-    for ( i = (ACPI_PROCESSOR_MAX_POWER - 1); i > 0; i-- )
-    {
-        cx = &power->states[i];
-        if ( !cx->valid )
-            continue;
-
-        if ( higher )
-        {
-            cx->promotion.state = higher;
-            cx->promotion.threshold.ticks = cx->latency_ticks;
-            if ( cx->type >= ACPI_STATE_C2 )
-                cx->promotion.threshold.count = 4;
-            else
-                cx->promotion.threshold.count = 10;
-            if ( higher->type == ACPI_STATE_C3 )
-                cx->promotion.threshold.bm = bm_history;
-        }
-
-        higher = cx;
-    }
-
-    return 0;
+    }
+
+    if ( cpuidle_current_governor->reflect )
+        cpuidle_current_governor->reflect(power);
 }
 
 static int init_cx_pminfo(struct acpi_processor_power *acpi_power)
@@ -821,6 +554,8 @@ static int check_cx(struct acpi_processo
     return 0;
 }
 
+static unsigned int latency_factor = 2;
+
 static void set_cx(
     struct acpi_processor_power *acpi_power,
     xen_processor_cx_t *xen_cx)
@@ -842,6 +577,9 @@ static void set_cx(
     cx->power    = xen_cx->power;
     
     cx->latency_ticks = US_TO_PM_TIMER_TICKS(cx->latency);
+    cx->target_residency = cx->latency * latency_factor;
+    if ( cx->type == ACPI_STATE_C1 || cx->type == ACPI_STATE_C2 )
+        acpi_power->safe_state = cx;
 }
 
 int get_cpu_id(u8 acpi_id)
@@ -936,6 +674,7 @@ long set_cx_pminfo(uint32_t cpu, struct 
 
     init_cx_pminfo(acpi_power);
 
+    acpi_power->cpu = cpu_id;
     acpi_power->flags.bm_check = power->flags.bm_check;
     acpi_power->flags.bm_control = power->flags.bm_control;
     acpi_power->flags.has_cst = power->flags.has_cst;
@@ -950,10 +689,11 @@ long set_cx_pminfo(uint32_t cpu, struct 
         set_cx(acpi_power, &xen_cx);
     }
 
+    if ( cpuidle_current_governor->enable &&
+         cpuidle_current_governor->enable(acpi_power) )
+        return -EFAULT;
+
     /* FIXME: C-state dependency is not supported by far */
-    
-    /* initialize default policy */
-    acpi_processor_set_power_policy(acpi_power);
 
     print_acpi_power(cpu_id, acpi_power);
 
@@ -978,7 +718,7 @@ int pmstat_get_cx_stat(uint32_t cpuid, s
     uint64_t usage;
     int i;
 
-    stat->last = (power->state) ? power->state->type : 0;
+    stat->last = (power->last_state) ? power->last_state->type : 0;
     stat->nr = processor_powers[cpuid].count;
     stat->idle_time = v->runstate.time[RUNSTATE_running];
     if ( v->is_running )
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/arch/x86/acpi/cpufreq/cpufreq.c
--- a/xen/arch/x86/acpi/cpufreq/cpufreq.c       Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/arch/x86/acpi/cpufreq/cpufreq.c       Fri Sep 12 14:47:40 2008 +0900
@@ -48,7 +48,7 @@ struct cpufreq_policy xen_px_policy[NR_C
 struct cpufreq_policy xen_px_policy[NR_CPUS];
 
 static cpumask_t *cpufreq_dom_pt;
-static cpumask_t cpufreq_dom_mask;
+static unsigned long *cpufreq_dom_mask;
 static unsigned int cpufreq_dom_max;
 
 enum {
@@ -562,7 +562,8 @@ void cpufreq_dom_exit(void)
 void cpufreq_dom_exit(void)
 {
     cpufreq_dom_max = 0;
-    cpus_clear(cpufreq_dom_mask);
+    if (cpufreq_dom_mask)
+        xfree(cpufreq_dom_mask);
     if (cpufreq_dom_pt)
         xfree(cpufreq_dom_pt);
 }
@@ -572,22 +573,28 @@ int cpufreq_dom_init(void)
     unsigned int i;
 
     cpufreq_dom_max = 0;
-    cpus_clear(cpufreq_dom_mask);
 
     for_each_online_cpu(i) {
-        cpu_set(processor_pminfo[i].perf.domain_info.domain, cpufreq_dom_mask);
         if (cpufreq_dom_max < processor_pminfo[i].perf.domain_info.domain)
             cpufreq_dom_max = processor_pminfo[i].perf.domain_info.domain;
     }
     cpufreq_dom_max++;
+
+    cpufreq_dom_mask = xmalloc_array(unsigned long,
+                                     BITS_TO_LONGS(cpufreq_dom_max));
+    if (!cpufreq_dom_mask)
+        return -ENOMEM;
+    bitmap_zero(cpufreq_dom_mask, cpufreq_dom_max);
 
     cpufreq_dom_pt = xmalloc_array(cpumask_t, cpufreq_dom_max);
     if (!cpufreq_dom_pt)
         return -ENOMEM;
     memset(cpufreq_dom_pt, 0, cpufreq_dom_max * sizeof(cpumask_t));
 
-    for_each_online_cpu(i)
+    for_each_online_cpu(i) {
+        __set_bit(processor_pminfo[i].perf.domain_info.domain, 
cpufreq_dom_mask);
         cpu_set(i, 
cpufreq_dom_pt[processor_pminfo[i].perf.domain_info.domain]);
+    }
 
     for_each_online_cpu(i)
         processor_pminfo[i].perf.shared_cpu_map =
@@ -616,10 +623,11 @@ static int cpufreq_cpu_init(void)
 
 int cpufreq_dom_dbs(unsigned int event)
 {
-    int cpu, dom, ret = 0;
-
-    for (dom=0; dom<cpufreq_dom_max; dom++) {
-        if (!cpu_isset(dom, cpufreq_dom_mask))
+    unsigned int cpu, dom;
+    int ret = 0;
+
+    for (dom = 0; dom < cpufreq_dom_max; dom++) {
+        if (!test_bit(dom, cpufreq_dom_mask))
             continue;
         cpu = first_cpu(cpufreq_dom_pt[dom]);
         ret = cpufreq_governor_dbs(&xen_px_policy[cpu], event);
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/arch/x86/acpi/cpufreq/powernow.c
--- a/xen/arch/x86/acpi/cpufreq/powernow.c      Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/arch/x86/acpi/cpufreq/powernow.c      Fri Sep 12 14:47:40 2008 +0900
@@ -197,8 +197,8 @@ static int powernow_cpufreq_cpu_init(str
 
     data->max_freq = perf->states[0].core_frequency * 1000;
     /* table init */
-    for (i=0; i<perf->state_count && i<max_hw_pstate; i++) {
-        if (i>0 && perf->states[i].core_frequency >=
+    for (i = 0; i < perf->state_count && i <= max_hw_pstate; i++) {
+        if (i > 0 && perf->states[i].core_frequency >=
             data->freq_table[valid_states-1].frequency / 1000)
             continue;
 
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/arch/x86/acpi/cpuidle_menu.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/acpi/cpuidle_menu.c  Fri Sep 12 14:47:40 2008 +0900
@@ -0,0 +1,132 @@
+/*
+ * cpuidle_menu - menu governor for cpu idle, main idea come from Linux.
+ *            drivers/cpuidle/governors/menu.c 
+ *
+ *  Copyright (C) 2006-2007 Adam Belay <abelay@xxxxxxxxxx>
+ *  Copyright (C) 2007, 2008 Intel Corporation
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or (at
+ *  your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+#include <xen/config.h>
+#include <xen/errno.h>
+#include <xen/lib.h>
+#include <xen/types.h>
+#include <xen/acpi.h>
+#include <xen/timer.h>
+#include <xen/cpuidle.h>
+
+#define BREAK_FUZZ      4       /* 4 us */
+#define USEC_PER_SEC 1000000
+
+struct menu_device
+{
+    int             last_state_idx;
+    unsigned int    expected_us;
+    unsigned int    predicted_us;
+    unsigned int    last_measured_us;
+    unsigned int    elapsed_us;
+};
+
+static DEFINE_PER_CPU(struct menu_device, menu_devices);
+
+static s_time_t get_sleep_length_ns(void)
+{
+    return per_cpu(timer_deadline, smp_processor_id()) - NOW();
+}
+
+static int menu_select(struct acpi_processor_power *power)
+{
+    struct menu_device *data = &__get_cpu_var(menu_devices);
+    int i;
+
+    /* determine the expected residency time */
+    data->expected_us = (u32) get_sleep_length_ns() / 1000;
+
+    /* find the deepest idle state that satisfies our constraints */
+    for ( i = 1; i < power->count; i++ )
+    {
+        struct acpi_processor_cx *s = &power->states[i];
+
+        if ( s->target_residency > data->expected_us + s->latency )
+            break;
+        if ( s->target_residency > data->predicted_us )
+            break;
+        /* TBD: we need to check the QoS requirment in future */
+    }
+
+    data->last_state_idx = i - 1;
+    return i - 1;
+}
+
+static void menu_reflect(struct acpi_processor_power *power)
+{
+    struct menu_device *data = &__get_cpu_var(menu_devices);
+    struct acpi_processor_cx *target = &power->states[data->last_state_idx];
+    unsigned int last_residency; 
+    unsigned int measured_us;
+
+    /*
+     * Ugh, this idle state doesn't support residency measurements, so we
+     * are basically lost in the dark.  As a compromise, assume we slept
+     * for one full standard timer tick.  However, be aware that this
+     * could potentially result in a suboptimal state transition.
+     */
+    if ( target->type == ACPI_STATE_C1 )
+        last_residency = USEC_PER_SEC / HZ;
+    else
+        last_residency = power->last_residency;
+
+    measured_us = last_residency + data->elapsed_us;
+
+    /* if wrapping, set to max uint (-1) */
+    measured_us = data->elapsed_us <= measured_us ? measured_us : -1;
+
+    /* Predict time remaining until next break event */
+    data->predicted_us = max(measured_us, data->last_measured_us);
+
+    /* Distinguish between expected & non-expected events */
+    if ( last_residency + BREAK_FUZZ
+         < data->expected_us + target->latency )
+    {
+        data->last_measured_us = measured_us;
+        data->elapsed_us = 0;
+    }
+    else
+        data->elapsed_us = measured_us;
+}
+
+static int menu_enable_device(struct acpi_processor_power *power)
+{
+    struct menu_device *data = &per_cpu(menu_devices, power->cpu);
+
+    memset(data, 0, sizeof(struct menu_device));
+
+    return 0;
+}
+
+static struct cpuidle_governor menu_governor =
+{
+    .name =         "menu",
+    .rating =       20,
+    .enable =       menu_enable_device,
+    .select =       menu_select,
+    .reflect =      menu_reflect,
+};
+
+struct cpuidle_governor *cpuidle_current_governor = &menu_governor;
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c     Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/arch/x86/domain.c     Fri Sep 12 14:47:40 2008 +0900
@@ -31,6 +31,7 @@
 #include <xen/compat.h>
 #include <xen/acpi.h>
 #include <xen/pci.h>
+#include <xen/paging.h>
 #include <asm/regs.h>
 #include <asm/mc146818rtc.h>
 #include <asm/system.h>
@@ -40,7 +41,6 @@
 #include <asm/i387.h>
 #include <asm/mpspec.h>
 #include <asm/ldt.h>
-#include <asm/paging.h>
 #include <asm/hypercall.h>
 #include <asm/hvm/hvm.h>
 #include <asm/hvm/support.h>
@@ -302,7 +302,8 @@ int vcpu_initialise(struct vcpu *v)
     else
     {
         /* PV guests by default have a 100Hz ticker. */
-        v->periodic_period = MILLISECS(10);
+        if ( !is_idle_domain(d) )
+            v->periodic_period = MILLISECS(10);
 
         /* PV guests get an emulated PIT too for video BIOSes to use. */
         if ( !is_idle_domain(d) && (v->vcpu_id == 0) )
@@ -1645,23 +1646,26 @@ static int relinquish_memory(
 
         /*
          * Forcibly invalidate top-most, still valid page tables at this point
-         * to break circular 'linear page table' references. This is okay
-         * because MMU structures are not shared across domains and this domain
-         * is now dead. Thus top-most valid tables are not in use so a non-zero
-         * count means circular reference.
+         * to break circular 'linear page table' references as well as clean up
+         * partially validated pages. This is okay because MMU structures are
+         * not shared across domains and this domain is now dead. Thus top-most
+         * valid tables are not in use so a non-zero count means circular
+         * reference or partially validated.
          */
         y = page->u.inuse.type_info;
         for ( ; ; )
         {
             x = y;
-            if ( likely((x & (PGT_type_mask|PGT_validated)) !=
-                        (type|PGT_validated)) )
+            if ( likely((x & PGT_type_mask) != type) ||
+                 likely(!(x & (PGT_validated|PGT_partial))) )
                 break;
 
-            y = cmpxchg(&page->u.inuse.type_info, x, x & ~PGT_validated);
+            y = cmpxchg(&page->u.inuse.type_info, x,
+                        x & ~(PGT_validated|PGT_partial));
             if ( likely(y == x) )
             {
-                free_page_type(page, type);
+                if ( free_page_type(page, x, 0) != 0 )
+                    BUG();
                 break;
             }
         }
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/arch/x86/domain_build.c
--- a/xen/arch/x86/domain_build.c       Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/arch/x86/domain_build.c       Fri Sep 12 14:47:40 2008 +0900
@@ -26,6 +26,7 @@
 #include <asm/desc.h>
 #include <asm/i387.h>
 #include <asm/paging.h>
+#include <asm/p2m.h>
 #include <asm/e820.h>
 
 #include <public/version.h>
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/arch/x86/domctl.c
--- a/xen/arch/x86/domctl.c     Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/arch/x86/domctl.c     Fri Sep 12 14:47:40 2008 +0900
@@ -20,7 +20,7 @@
 #include <xen/trace.h>
 #include <xen/console.h>
 #include <xen/iocap.h>
-#include <asm/paging.h>
+#include <xen/paging.h>
 #include <asm/irq.h>
 #include <asm/hvm/hvm.h>
 #include <asm/hvm/support.h>
@@ -67,14 +67,6 @@ long arch_do_domctl(
         ret = -ESRCH;
         if ( unlikely((d = rcu_lock_domain_by_id(domctl->domain)) == NULL) )
             break;
-
-        ret = xsm_ioport_permission(d, fp, 
-                                    domctl->u.ioport_permission.allow_access);
-        if ( ret )
-        {
-            rcu_unlock_domain(d);
-            break;
-        }
 
         if ( np == 0 )
             ret = 0;
@@ -550,6 +542,10 @@ long arch_do_domctl(
         if ( (d = rcu_lock_domain_by_id(domctl->domain)) == NULL )
             break;
 
+        ret = xsm_sendtrigger(d);
+        if ( ret )
+            goto sendtrigger_out;
+
         ret = -EINVAL;
         if ( domctl->u.sendtrigger.vcpu >= MAX_VIRT_CPUS )
             goto sendtrigger_out;
@@ -628,6 +624,10 @@ long arch_do_domctl(
         bus = (domctl->u.assign_device.machine_bdf >> 16) & 0xff;
         devfn = (domctl->u.assign_device.machine_bdf >> 8) & 0xff;
 
+        ret = xsm_test_assign_device(domctl->u.assign_device.machine_bdf);
+        if ( ret )
+            break;
+
         if ( device_assigned(bus, devfn) )
         {
             gdprintk(XENLOG_ERR, "XEN_DOMCTL_test_assign_device: "
@@ -655,6 +655,11 @@ long arch_do_domctl(
                 "XEN_DOMCTL_assign_device: get_domain_by_id() failed\n");
             break;
         }
+
+        ret = xsm_assign_device(d, domctl->u.assign_device.machine_bdf);
+        if ( ret )
+            goto assign_device_out;
+
         bus = (domctl->u.assign_device.machine_bdf >> 16) & 0xff;
         devfn = (domctl->u.assign_device.machine_bdf >> 8) & 0xff;
 
@@ -680,6 +685,7 @@ long arch_do_domctl(
                      "assign device (%x:%x:%x) failed\n",
                      bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
 
+    assign_device_out:
         put_domain(d);
     }
     break;
@@ -700,6 +706,11 @@ long arch_do_domctl(
                 "XEN_DOMCTL_deassign_device: get_domain_by_id() failed\n"); 
             break;
         }
+
+        ret = xsm_assign_device(d, domctl->u.assign_device.machine_bdf);
+        if ( ret )
+            goto deassign_device_out;
+
         bus = (domctl->u.assign_device.machine_bdf >> 16) & 0xff;
         devfn = (domctl->u.assign_device.machine_bdf >> 8) & 0xff;
 
@@ -720,6 +731,8 @@ long arch_do_domctl(
         deassign_device(d, bus, devfn);
         gdprintk(XENLOG_INFO, "XEN_DOMCTL_deassign_device: bdf = %x:%x:%x\n",
             bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
+
+    deassign_device_out:
         put_domain(d);
     }
     break;
@@ -733,10 +746,17 @@ long arch_do_domctl(
         if ( (d = rcu_lock_domain_by_id(domctl->domain)) == NULL )
             break;
         bind = &(domctl->u.bind_pt_irq);
+
+        ret = xsm_bind_pt_irq(d, bind);
+        if ( ret )
+            goto bind_out;
+
         if ( iommu_enabled )
             ret = pt_irq_create_bind_vtd(d, bind);
         if ( ret < 0 )
             gdprintk(XENLOG_ERR, "pt_irq_create_bind failed!\n");
+
+    bind_out:
         rcu_unlock_domain(d);
     }
     break;    
@@ -877,11 +897,16 @@ long arch_do_domctl(
         if ( d == NULL )
             break;
 
+        ret = xsm_pin_mem_cacheattr(d);
+        if ( ret )
+            goto pin_out;
+
         ret = hvm_set_mem_pinned_cacheattr(
             d, domctl->u.pin_mem_cacheattr.start,
             domctl->u.pin_mem_cacheattr.end,
             domctl->u.pin_mem_cacheattr.type);
 
+    pin_out:
         rcu_unlock_domain(d);
     }
     break;
@@ -899,6 +924,10 @@ long arch_do_domctl(
         d = rcu_lock_domain_by_id(domctl->domain);
         if ( d == NULL )
             break;
+
+        ret = xsm_ext_vcpucontext(d, domctl->cmd);
+        if ( ret )
+            goto ext_vcpucontext_out;
 
         ret = -ESRCH;
         if ( (evc->vcpu >= MAX_VIRT_CPUS) ||
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/arch/x86/hpet.c
--- a/xen/arch/x86/hpet.c       Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/arch/x86/hpet.c       Fri Sep 12 14:47:40 2008 +0900
@@ -100,6 +100,13 @@ static int reprogram_hpet_evt_channel(
 
     ch->next_event = expire;
 
+    if ( expire == STIME_MAX )
+    {
+        /* We assume it will take a long time for the timer to wrap. */
+        hpet_write32(0, HPET_T0_CMP);
+        return 0;
+    }
+
     delta = min_t(int64_t, delta, MAX_DELTA_NS);
     delta = max_t(int64_t, delta, MIN_DELTA_NS);
     delta = ns2ticks(delta, ch->shift, ch->mult);
@@ -206,9 +213,11 @@ void hpet_broadcast_enter(void)
 {
     struct hpet_event_channel *ch = &hpet_event;
 
+    spin_lock(&ch->lock);
+
+    disable_APIC_timer();
+
     cpu_set(smp_processor_id(), ch->cpumask);
-
-    spin_lock(&ch->lock);
 
     /* reprogram if current cpu expire time is nearer */
     if ( this_cpu(timer_deadline) < ch->next_event )
@@ -222,8 +231,23 @@ void hpet_broadcast_exit(void)
     struct hpet_event_channel *ch = &hpet_event;
     int cpu = smp_processor_id();
 
+    spin_lock_irq(&ch->lock);
+
     if ( cpu_test_and_clear(cpu, ch->cpumask) )
-        reprogram_timer(per_cpu(timer_deadline, cpu));
+    {
+        /* Cancel any outstanding LAPIC event and re-enable interrupts. */
+        reprogram_timer(0);
+        enable_APIC_timer();
+        
+        /* Reprogram the deadline; trigger timer work now if it has passed. */
+        if ( !reprogram_timer(per_cpu(timer_deadline, cpu)) )
+            raise_softirq(TIMER_SOFTIRQ);
+
+        if ( cpus_empty(ch->cpumask) && ch->next_event != STIME_MAX )
+            reprogram_hpet_evt_channel(ch, STIME_MAX, 0, 0);
+    }
+
+    spin_unlock_irq(&ch->lock);
 }
 
 int hpet_broadcast_is_available(void)
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c    Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/arch/x86/hvm/hvm.c    Fri Sep 12 14:47:40 2008 +0900
@@ -31,10 +31,11 @@
 #include <xen/hypercall.h>
 #include <xen/guest_access.h>
 #include <xen/event.h>
+#include <xen/paging.h>
+#include <asm/shadow.h>
 #include <asm/current.h>
 #include <asm/e820.h>
 #include <asm/io.h>
-#include <asm/paging.h>
 #include <asm/regs.h>
 #include <asm/cpufeature.h>
 #include <asm/processor.h>
@@ -772,7 +773,7 @@ void hvm_hlt(unsigned long rflags)
 
     do_sched_op_compat(SCHEDOP_block, 0);
 
-    HVMTRACE_1D(HLT, curr, /* pending = */ vcpu_runnable(curr));
+    HVMTRACE_1D(HLT, /* pending = */ vcpu_runnable(curr));
 }
 
 void hvm_triple_fault(void)
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/arch/x86/hvm/svm/intr.c
--- a/xen/arch/x86/hvm/svm/intr.c       Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/arch/x86/hvm/svm/intr.c       Fri Sep 12 14:47:40 2008 +0900
@@ -80,7 +80,7 @@ static void enable_intr_window(struct vc
 
     ASSERT(intack.source != hvm_intsrc_none);
 
-    HVMTRACE_2D(INJ_VIRQ, v, 0x0, /*fake=*/ 1);
+    HVMTRACE_2D(INJ_VIRQ, 0x0, /*fake=*/ 1);
 
     /*
      * Create a dummy virtual interrupt to intercept as soon as the
@@ -199,7 +199,7 @@ asmlinkage void svm_intr_assist(void)
     }
     else
     {
-        HVMTRACE_2D(INJ_VIRQ, v, intack.vector, /*fake=*/ 0);
+        HVMTRACE_2D(INJ_VIRQ, intack.vector, /*fake=*/ 0);
         svm_inject_extint(v, intack.vector);
         pt_intr_post(v, intack);
     }
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c        Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/arch/x86/hvm/svm/svm.c        Fri Sep 12 14:47:40 2008 +0900
@@ -759,11 +759,11 @@ static void svm_inject_exception(
     if ( trapnr == TRAP_page_fault )
     {
         vmcb->cr2 = curr->arch.hvm_vcpu.guest_cr[2] = cr2;
-        HVMTRACE_LONG_2D(PF_INJECT, curr, errcode, TRC_PAR_LONG(cr2));
+        HVMTRACE_LONG_2D(PF_INJECT, errcode, TRC_PAR_LONG(cr2));
     }
     else
     {
-        HVMTRACE_2D(INJ_EXC, curr, trapnr, errcode);
+        HVMTRACE_2D(INJ_EXC, trapnr, errcode);
     }
 
     if ( (trapnr == TRAP_debug) &&
@@ -919,7 +919,7 @@ static void svm_cpuid_intercept(
             __clear_bit(X86_FEATURE_APIC & 31, edx);
     }
 
-    HVMTRACE_5D (CPUID, v, input, *eax, *ebx, *ecx, *edx);
+    HVMTRACE_5D (CPUID, input, *eax, *ebx, *ecx, *edx);
 }
 
 static void svm_vmexit_do_cpuid(struct cpu_user_regs *regs)
@@ -946,7 +946,7 @@ static void svm_vmexit_do_cpuid(struct c
 
 static void svm_dr_access(struct vcpu *v, struct cpu_user_regs *regs)
 {
-    HVMTRACE_0D(DR_WRITE, v);
+    HVMTRACE_0D(DR_WRITE);
     __restore_debug_registers(v);
 }
 
@@ -1018,7 +1018,7 @@ static int svm_msr_read_intercept(struct
     regs->edx = msr_content >> 32;
 
  done:
-    HVMTRACE_3D (MSR_READ, v, ecx, regs->eax, regs->edx);
+    HVMTRACE_3D (MSR_READ, ecx, regs->eax, regs->edx);
     HVM_DBG_LOG(DBG_LEVEL_1, "returns: ecx=%x, eax=%lx, edx=%lx",
                 ecx, (unsigned long)regs->eax, (unsigned long)regs->edx);
     return X86EMUL_OKAY;
@@ -1037,7 +1037,7 @@ static int svm_msr_write_intercept(struc
 
     msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
 
-    HVMTRACE_3D (MSR_WRITE, v, ecx, regs->eax, regs->edx);
+    HVMTRACE_3D (MSR_WRITE, ecx, regs->eax, regs->edx);
 
     switch ( ecx )
     {
@@ -1168,7 +1168,7 @@ static void svm_invlpg_intercept(unsigne
 static void svm_invlpg_intercept(unsigned long vaddr)
 {
     struct vcpu *curr = current;
-    HVMTRACE_LONG_2D(INVLPG, curr, 0, TRC_PAR_LONG(vaddr));
+    HVMTRACE_LONG_2D(INVLPG, 0, TRC_PAR_LONG(vaddr));
     paging_invlpg(curr, vaddr);
     svm_asid_g_invlpg(curr, vaddr);
 }
@@ -1191,7 +1191,7 @@ asmlinkage void svm_vmexit_handler(struc
 
     exit_reason = vmcb->exitcode;
 
-    HVMTRACE_ND(VMEXIT64, 1/*cycles*/, v, 3, exit_reason,
+    HVMTRACE_ND(VMEXIT64, 1/*cycles*/, 3, exit_reason,
                 (uint32_t)regs->eip, (uint32_t)((uint64_t)regs->eip >> 32),
                 0, 0, 0);
 
@@ -1216,17 +1216,17 @@ asmlinkage void svm_vmexit_handler(struc
     {
     case VMEXIT_INTR:
         /* Asynchronous event, handled when we STGI'd after the VMEXIT. */
-        HVMTRACE_0D(INTR, v);
+        HVMTRACE_0D(INTR);
         break;
 
     case VMEXIT_NMI:
         /* Asynchronous event, handled when we STGI'd after the VMEXIT. */
-        HVMTRACE_0D(NMI, v);
+        HVMTRACE_0D(NMI);
         break;
 
     case VMEXIT_SMI:
         /* Asynchronous event, handled when we STGI'd after the VMEXIT. */
-        HVMTRACE_0D(SMI, v);
+        HVMTRACE_0D(SMI);
         break;
 
     case VMEXIT_EXCEPTION_DB:
@@ -1261,10 +1261,12 @@ asmlinkage void svm_vmexit_handler(struc
 
         if ( paging_fault(va, regs) )
         {
-            if (hvm_long_mode_enabled(v))
-                HVMTRACE_LONG_2D(PF_XEN, v, regs->error_code, 
TRC_PAR_LONG(va));
+            if ( trace_will_trace_event(TRC_SHADOW) )
+                break;
+            if ( hvm_long_mode_enabled(v) )
+                HVMTRACE_LONG_2D(PF_XEN, regs->error_code, TRC_PAR_LONG(va));
             else
-                HVMTRACE_2D(PF_XEN, v, regs->error_code, va);
+                HVMTRACE_2D(PF_XEN, regs->error_code, va);
             break;
         }
 
@@ -1274,7 +1276,7 @@ asmlinkage void svm_vmexit_handler(struc
 
     /* Asynchronous event, handled when we STGI'd after the VMEXIT. */
     case VMEXIT_EXCEPTION_MC:
-        HVMTRACE_0D(MCE, v);
+        HVMTRACE_0D(MCE);
         break;
 
     case VMEXIT_VINTR:
@@ -1331,7 +1333,7 @@ asmlinkage void svm_vmexit_handler(struc
     case VMEXIT_VMMCALL:
         if ( (inst_len = __get_instruction_length(v, INSTR_VMCALL)) == 0 )
             break;
-        HVMTRACE_1D(VMMCALL, v, regs->eax);
+        HVMTRACE_1D(VMMCALL, regs->eax);
         rc = hvm_do_hypercall(regs);
         if ( rc != HVM_HCALL_preempted )
         {
@@ -1406,7 +1408,7 @@ asmlinkage void svm_vmexit_handler(struc
 
 asmlinkage void svm_trace_vmentry(void)
 {
-    HVMTRACE_ND (VMENTRY, 1/*cycles*/, current, 0, 0, 0, 0, 0, 0, 0);
+    HVMTRACE_ND (VMENTRY, 1/*cycles*/, 0, 0, 0, 0, 0, 0, 0);
 }
   
 /*
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/arch/x86/hvm/vmx/intr.c
--- a/xen/arch/x86/hvm/vmx/intr.c       Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/arch/x86/hvm/vmx/intr.c       Fri Sep 12 14:47:40 2008 +0900
@@ -198,7 +198,7 @@ asmlinkage void vmx_intr_assist(void)
     }
     else
     {
-        HVMTRACE_2D(INJ_VIRQ, v, intack.vector, /*fake=*/ 0);
+        HVMTRACE_2D(INJ_VIRQ, intack.vector, /*fake=*/ 0);
         vmx_inject_extint(v, intack.vector);
         pt_intr_post(v, intack);
     }
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c        Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/arch/x86/hvm/vmx/vmx.c        Fri Sep 12 14:47:40 2008 +0900
@@ -1114,10 +1114,10 @@ static void __vmx_inject_exception(
     __vmwrite(VM_ENTRY_INTR_INFO, intr_fields);
 
     if ( trap == TRAP_page_fault )
-        HVMTRACE_LONG_2D(PF_INJECT, v, error_code,
+        HVMTRACE_LONG_2D(PF_INJECT, error_code,
             TRC_PAR_LONG(v->arch.hvm_vcpu.guest_cr[2]));
     else
-        HVMTRACE_2D(INJ_EXC, v, trap, error_code);
+        HVMTRACE_2D(INJ_EXC, trap, error_code);
 }
 
 void vmx_inject_hw_exception(struct vcpu *v, int trap, int error_code)
@@ -1345,7 +1345,7 @@ static void vmx_cpuid_intercept(
             break;
     }
 
-    HVMTRACE_5D (CPUID, current, input, *eax, *ebx, *ecx, *edx);
+    HVMTRACE_5D (CPUID, input, *eax, *ebx, *ecx, *edx);
 }
 
 static void vmx_do_cpuid(struct cpu_user_regs *regs)
@@ -1370,7 +1370,7 @@ static void vmx_dr_access(unsigned long 
 {
     struct vcpu *v = current;
 
-    HVMTRACE_0D(DR_WRITE, v);
+    HVMTRACE_0D(DR_WRITE);
 
     if ( !v->arch.hvm_vcpu.flag_dr_dirty )
         __restore_debug_registers(v);
@@ -1383,7 +1383,7 @@ static void vmx_invlpg_intercept(unsigne
 static void vmx_invlpg_intercept(unsigned long vaddr)
 {
     struct vcpu *curr = current;
-    HVMTRACE_LONG_2D(INVLPG, curr, /*invlpga=*/ 0, TRC_PAR_LONG(vaddr));
+    HVMTRACE_LONG_2D(INVLPG, /*invlpga=*/ 0, TRC_PAR_LONG(vaddr));
     if ( paging_invlpg(curr, vaddr) )
         vpid_sync_vcpu_gva(curr, vaddr);
 }
@@ -1434,7 +1434,7 @@ static int mov_to_cr(int gp, int cr, str
         goto exit_and_crash;
     }
 
-    HVMTRACE_LONG_2D(CR_WRITE, v, cr, TRC_PAR_LONG(value));
+    HVMTRACE_LONG_2D(CR_WRITE, cr, TRC_PAR_LONG(value));
 
     HVM_DBG_LOG(DBG_LEVEL_1, "CR%d, value = %lx", cr, value);
 
@@ -1505,7 +1505,7 @@ static void mov_from_cr(int cr, int gp, 
         break;
     }
 
-    HVMTRACE_LONG_2D(CR_READ, v, cr, TRC_PAR_LONG(value));
+    HVMTRACE_LONG_2D(CR_READ, cr, TRC_PAR_LONG(value));
 
     HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR%d, value = %lx", cr, value);
 }
@@ -1531,13 +1531,13 @@ static int vmx_cr_access(unsigned long e
     case VMX_CONTROL_REG_ACCESS_TYPE_CLTS:
         v->arch.hvm_vcpu.guest_cr[0] &= ~X86_CR0_TS;
         vmx_update_guest_cr(v, 0);
-        HVMTRACE_0D(CLTS, current);
+        HVMTRACE_0D(CLTS);
         break;
     case VMX_CONTROL_REG_ACCESS_TYPE_LMSW:
         value = v->arch.hvm_vcpu.guest_cr[0];
         /* LMSW can: (1) set bits 0-3; (2) clear bits 1-3. */
         value = (value & ~0xe) | ((exit_qualification >> 16) & 0xf);
-        HVMTRACE_LONG_1D(LMSW, current, value);
+        HVMTRACE_LONG_1D(LMSW, value);
         return !hvm_set_cr0(value);
     default:
         BUG();
@@ -1692,7 +1692,7 @@ static int vmx_msr_read_intercept(struct
     regs->edx = (uint32_t)(msr_content >> 32);
 
 done:
-    HVMTRACE_3D (MSR_READ, v, ecx, regs->eax, regs->edx);
+    HVMTRACE_3D (MSR_READ, ecx, regs->eax, regs->edx);
     HVM_DBG_LOG(DBG_LEVEL_1, "returns: ecx=%x, eax=%lx, edx=%lx",
                 ecx, (unsigned long)regs->eax,
                 (unsigned long)regs->edx);
@@ -1803,7 +1803,7 @@ static int vmx_msr_write_intercept(struc
 
     msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
 
-    HVMTRACE_3D (MSR_WRITE, v, ecx, regs->eax, regs->edx);
+    HVMTRACE_3D (MSR_WRITE, ecx, regs->eax, regs->edx);
 
     switch ( ecx )
     {
@@ -1894,7 +1894,7 @@ static void vmx_do_extint(struct cpu_use
     BUG_ON(!(vector & INTR_INFO_VALID_MASK));
 
     vector &= INTR_INFO_VECTOR_MASK;
-    HVMTRACE_1D(INTR, current, vector);
+    HVMTRACE_1D(INTR, vector);
 
     switch ( vector )
     {
@@ -2010,7 +2010,7 @@ static void vmx_failed_vmentry(unsigned 
         break;
     case EXIT_REASON_MACHINE_CHECK:
         printk("caused by machine check.\n");
-        HVMTRACE_0D(MCE, curr);
+        HVMTRACE_0D(MCE);
         do_machine_check(regs);
         break;
     default:
@@ -2037,7 +2037,7 @@ asmlinkage void vmx_vmexit_handler(struc
 
     exit_reason = __vmread(VM_EXIT_REASON);
 
-    HVMTRACE_ND(VMEXIT64, 1/*cycles*/, v, 3, exit_reason,
+    HVMTRACE_ND(VMEXIT64, 1/*cycles*/, 3, exit_reason,
                 (uint32_t)regs->eip, (uint32_t)((uint64_t)regs->eip >> 32),
                 0, 0, 0);
 
@@ -2101,7 +2101,8 @@ asmlinkage void vmx_vmexit_handler(struc
              !(__vmread(IDT_VECTORING_INFO) & INTR_INFO_VALID_MASK) &&
              (vector != TRAP_double_fault) )
             __vmwrite(GUEST_INTERRUPTIBILITY_INFO,
-                    __vmread(GUEST_INTERRUPTIBILITY_INFO)|VMX_INTR_SHADOW_NMI);
+                      __vmread(GUEST_INTERRUPTIBILITY_INFO)
+                      | VMX_INTR_SHADOW_NMI);
 
         perfc_incra(cause_vector, vector);
 
@@ -2128,12 +2129,14 @@ asmlinkage void vmx_vmexit_handler(struc
 
             if ( paging_fault(exit_qualification, regs) )
             {
+                if ( trace_will_trace_event(TRC_SHADOW) )
+                    break;
                 if ( hvm_long_mode_enabled(v) )
-                    HVMTRACE_LONG_2D (PF_XEN, v, regs->error_code,
-                        TRC_PAR_LONG(exit_qualification) );
+                    HVMTRACE_LONG_2D(PF_XEN, regs->error_code,
+                                     TRC_PAR_LONG(exit_qualification) );
                 else
-                    HVMTRACE_2D (PF_XEN, v,
-                        regs->error_code, exit_qualification );
+                    HVMTRACE_2D(PF_XEN,
+                                regs->error_code, exit_qualification );
                 break;
             }
 
@@ -2144,11 +2147,11 @@ asmlinkage void vmx_vmexit_handler(struc
             if ( (intr_info & INTR_INFO_INTR_TYPE_MASK) !=
                  (X86_EVENTTYPE_NMI << 8) )
                 goto exit_and_crash;
-            HVMTRACE_0D(NMI, v);
+            HVMTRACE_0D(NMI);
             do_nmi(regs); /* Real NMI, vector 2: normal processing. */
             break;
         case TRAP_machine_check:
-            HVMTRACE_0D(MCE, v);
+            HVMTRACE_0D(MCE);
             do_machine_check(regs);
             break;
         default:
@@ -2213,7 +2216,7 @@ asmlinkage void vmx_vmexit_handler(struc
     case EXIT_REASON_VMCALL:
     {
         int rc;
-        HVMTRACE_1D(VMMCALL, v, regs->eax);
+        HVMTRACE_1D(VMMCALL, regs->eax);
         inst_len = __get_instruction_length(); /* Safe: VMCALL */
         rc = hvm_do_hypercall(regs);
         if ( rc != HVM_HCALL_preempted )
@@ -2300,7 +2303,7 @@ asmlinkage void vmx_vmexit_handler(struc
 
 asmlinkage void vmx_trace_vmentry(void)
 {
-    HVMTRACE_ND (VMENTRY, 1/*cycles*/, current, 0, 0, 0, 0, 0, 0, 0);
+    HVMTRACE_ND (VMENTRY, 1/*cycles*/, 0, 0, 0, 0, 0, 0, 0);
 }
 
 /*
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/arch/x86/io_apic.c
--- a/xen/arch/x86/io_apic.c    Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/arch/x86/io_apic.c    Fri Sep 12 14:47:40 2008 +0900
@@ -45,23 +45,14 @@ int (*ioapic_renumber_irq)(int ioapic, i
 int (*ioapic_renumber_irq)(int ioapic, int irq);
 atomic_t irq_mis_count;
 
-int msi_enable = 0;
-boolean_param("msi", msi_enable);
-
 int domain_irq_to_vector(struct domain *d, int irq)
 {
-    if ( !msi_enable )
-        return irq_to_vector(irq);
-    else
-        return d->arch.pirq_vector[irq];
+    return d->arch.pirq_vector[irq];
 }
 
 int domain_vector_to_irq(struct domain *d, int vector)
 {
-    if ( !msi_enable )
-        return vector_to_irq(vector);
-    else
-        return d->arch.vector_pirq[vector];
+    return d->arch.vector_pirq[vector];
 }
 
 /* Where if anywhere is the i8259 connect in external int mode */
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/arch/x86/irq.c
--- a/xen/arch/x86/irq.c        Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/arch/x86/irq.c        Fri Sep 12 14:47:40 2008 +0900
@@ -737,9 +737,12 @@ __initcall(setup_dump_irqs);
 
 void fixup_irqs(cpumask_t map)
 {
-    unsigned int irq;
+    unsigned int irq, sp;
     static int warned;
-
+    irq_guest_action_t *action;
+    struct pending_eoi *peoi;
+
+    /* Direct all future interrupts away from this CPU. */
     for ( irq = 0; irq < NR_IRQS; irq++ )
     {
         cpumask_t mask;
@@ -758,8 +761,24 @@ void fixup_irqs(cpumask_t map)
             printk("Cannot set affinity for irq %i\n", irq);
     }
 
+    /* Service any interrupts that beat us in the re-direction race. */
     local_irq_enable();
     mdelay(1);
     local_irq_disable();
+
+    /* Clean up cpu_eoi_map of every interrupt to exclude this CPU. */
+    for ( irq = 0; irq < NR_IRQS; irq++ )
+    {
+        if ( !(irq_desc[irq].status & IRQ_GUEST) )
+            continue;
+        action = (irq_guest_action_t *)irq_desc[irq].action;
+        cpu_clear(smp_processor_id(), action->cpu_eoi_map);
+    }
+
+    /* Flush the interrupt EOI stack. */
+    peoi = this_cpu(pending_eoi);
+    for ( sp = 0; sp < pending_eoi_sp(peoi); sp++ )
+        peoi[sp].ready = 1;
+    flush_ready_eoi(NULL);
 }
 #endif
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/arch/x86/mm.c Fri Sep 12 14:47:40 2008 +0900
@@ -507,11 +507,11 @@ static int alloc_segdesc_page(struct pag
             goto fail;
 
     unmap_domain_page(descs);
-    return 1;
+    return 0;
 
  fail:
     unmap_domain_page(descs);
-    return 0;
+    return -EINVAL;
 }
 
 
@@ -565,20 +565,23 @@ static int get_page_from_pagenr(unsigned
 
 static int get_page_and_type_from_pagenr(unsigned long page_nr, 
                                          unsigned long type,
-                                         struct domain *d)
+                                         struct domain *d,
+                                         int preemptible)
 {
     struct page_info *page = mfn_to_page(page_nr);
+    int rc;
 
     if ( unlikely(!get_page_from_pagenr(page_nr, d)) )
-        return 0;
-
-    if ( unlikely(!get_page_type(page, type)) )
-    {
+        return -EINVAL;
+
+    rc = (preemptible ?
+          get_page_type_preemptible(page, type) :
+          (get_page_type(page, type) ? 0 : -EINVAL));
+
+    if ( rc )
         put_page(page);
-        return 0;
-    }
-
-    return 1;
+
+    return rc;
 }
 
 /*
@@ -754,22 +757,22 @@ get_page_from_l2e(
     if ( unlikely((l2e_get_flags(l2e) & L2_DISALLOW_MASK)) )
     {
         MEM_LOG("Bad L2 flags %x", l2e_get_flags(l2e) & L2_DISALLOW_MASK);
-        return 0;
-    }
-
-    rc = get_page_and_type_from_pagenr(l2e_get_pfn(l2e), PGT_l1_page_table, d);
-    if ( unlikely(!rc) )
-        rc = get_l2_linear_pagetable(l2e, pfn, d);
+        return -EINVAL;
+    }
+
+    rc = get_page_and_type_from_pagenr(
+        l2e_get_pfn(l2e), PGT_l1_page_table, d, 0);
+    if ( unlikely(rc == -EINVAL) && get_l2_linear_pagetable(l2e, pfn, d) )
+        rc = 0;
 
     return rc;
 }
 
 
-#if CONFIG_PAGING_LEVELS >= 3
 define_get_linear_pagetable(l3);
 static int
 get_page_from_l3e(
-    l3_pgentry_t l3e, unsigned long pfn, struct domain *d)
+    l3_pgentry_t l3e, unsigned long pfn, struct domain *d, int preemptible)
 {
     int rc;
 
@@ -779,22 +782,22 @@ get_page_from_l3e(
     if ( unlikely((l3e_get_flags(l3e) & l3_disallow_mask(d))) )
     {
         MEM_LOG("Bad L3 flags %x", l3e_get_flags(l3e) & l3_disallow_mask(d));
-        return 0;
-    }
-
-    rc = get_page_and_type_from_pagenr(l3e_get_pfn(l3e), PGT_l2_page_table, d);
-    if ( unlikely(!rc) )
-        rc = get_l3_linear_pagetable(l3e, pfn, d);
+        return -EINVAL;
+    }
+
+    rc = get_page_and_type_from_pagenr(
+        l3e_get_pfn(l3e), PGT_l2_page_table, d, preemptible);
+    if ( unlikely(rc == -EINVAL) && get_l3_linear_pagetable(l3e, pfn, d) )
+        rc = 0;
 
     return rc;
 }
-#endif /* 3 level */
 
 #if CONFIG_PAGING_LEVELS >= 4
 define_get_linear_pagetable(l4);
 static int
 get_page_from_l4e(
-    l4_pgentry_t l4e, unsigned long pfn, struct domain *d)
+    l4_pgentry_t l4e, unsigned long pfn, struct domain *d, int preemptible)
 {
     int rc;
 
@@ -804,12 +807,13 @@ get_page_from_l4e(
     if ( unlikely((l4e_get_flags(l4e) & L4_DISALLOW_MASK)) )
     {
         MEM_LOG("Bad L4 flags %x", l4e_get_flags(l4e) & L4_DISALLOW_MASK);
-        return 0;
-    }
-
-    rc = get_page_and_type_from_pagenr(l4e_get_pfn(l4e), PGT_l3_page_table, d);
-    if ( unlikely(!rc) )
-        rc = get_l4_linear_pagetable(l4e, pfn, d);
+        return -EINVAL;
+    }
+
+    rc = get_page_and_type_from_pagenr(
+        l4e_get_pfn(l4e), PGT_l3_page_table, d, preemptible);
+    if ( unlikely(rc == -EINVAL) && get_l4_linear_pagetable(l4e, pfn, d) )
+        rc = 0;
 
     return rc;
 }
@@ -946,29 +950,35 @@ void put_page_from_l1e(l1_pgentry_t l1e,
  * NB. Virtual address 'l2e' maps to a machine address within frame 'pfn'.
  * Note also that this automatically deals correctly with linear p.t.'s.
  */
-static void put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn)
+static int put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn)
 {
     if ( (l2e_get_flags(l2e) & _PAGE_PRESENT) && 
          (l2e_get_pfn(l2e) != pfn) )
+    {
         put_page_and_type(l2e_get_page(l2e));
-}
-
-
-#if CONFIG_PAGING_LEVELS >= 3
-static void put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn)
+        return 0;
+    }
+    return 1;
+}
+
+
+static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn,
+                             int preemptible)
 {
     if ( (l3e_get_flags(l3e) & _PAGE_PRESENT) && 
          (l3e_get_pfn(l3e) != pfn) )
-        put_page_and_type(l3e_get_page(l3e));
-}
-#endif
+        return put_page_and_type_preemptible(l3e_get_page(l3e), preemptible);
+    return 1;
+}
 
 #if CONFIG_PAGING_LEVELS >= 4
-static void put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn)
+static int put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn,
+                             int preemptible)
 {
     if ( (l4e_get_flags(l4e) & _PAGE_PRESENT) && 
          (l4e_get_pfn(l4e) != pfn) )
-        put_page_and_type(l4e_get_page(l4e));
+        return put_page_and_type_preemptible(l4e_get_page(l4e), preemptible);
+    return 1;
 }
 #endif
 
@@ -977,7 +987,7 @@ static int alloc_l1_table(struct page_in
     struct domain *d = page_get_owner(page);
     unsigned long  pfn = page_to_mfn(page);
     l1_pgentry_t  *pl1e;
-    int            i;
+    unsigned int   i;
 
     pl1e = map_domain_page(pfn);
 
@@ -991,7 +1001,7 @@ static int alloc_l1_table(struct page_in
     }
 
     unmap_domain_page(pl1e);
-    return 1;
+    return 0;
 
  fail:
     MEM_LOG("Failure in alloc_l1_table: entry %d", i);
@@ -1000,7 +1010,7 @@ static int alloc_l1_table(struct page_in
             put_page_from_l1e(pl1e[i], d);
 
     unmap_domain_page(pl1e);
-    return 0;
+    return -EINVAL;
 }
 
 static int create_pae_xen_mappings(struct domain *d, l3_pgentry_t *pl3e)
@@ -1128,47 +1138,53 @@ static void pae_flush_pgd(
 # define pae_flush_pgd(mfn, idx, nl3e) ((void)0)
 #endif
 
-static int alloc_l2_table(struct page_info *page, unsigned long type)
+static int alloc_l2_table(struct page_info *page, unsigned long type,
+                          int preemptible)
 {
     struct domain *d = page_get_owner(page);
     unsigned long  pfn = page_to_mfn(page);
     l2_pgentry_t  *pl2e;
-    int            i;
+    unsigned int   i;
+    int            rc = 0;
 
     pl2e = map_domain_page(pfn);
 
-    for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
-    {
-        if ( !is_guest_l2_slot(d, type, i) )
+    for ( i = page->nr_validated_ptes; i < L2_PAGETABLE_ENTRIES; i++ )
+    {
+        if ( preemptible && i && hypercall_preempt_check() )
+        {
+            page->nr_validated_ptes = i;
+            rc = -EAGAIN;
+            break;
+        }
+
+        if ( !is_guest_l2_slot(d, type, i) ||
+             (rc = get_page_from_l2e(pl2e[i], pfn, d)) > 0 )
             continue;
 
-        if ( unlikely(!get_page_from_l2e(pl2e[i], pfn, d)) )
-            goto fail;
-        
+        if ( rc < 0 )
+        {
+            MEM_LOG("Failure in alloc_l2_table: entry %d", i);
+            while ( i-- > 0 )
+                if ( is_guest_l2_slot(d, type, i) )
+                    put_page_from_l2e(pl2e[i], pfn);
+            break;
+        }
+
         adjust_guest_l2e(pl2e[i], d);
     }
 
     unmap_domain_page(pl2e);
-    return 1;
-
- fail:
-    MEM_LOG("Failure in alloc_l2_table: entry %d", i);
-    while ( i-- > 0 )
-        if ( is_guest_l2_slot(d, type, i) )
-            put_page_from_l2e(pl2e[i], pfn);
-
-    unmap_domain_page(pl2e);
-    return 0;
-}
-
-
-#if CONFIG_PAGING_LEVELS >= 3
-static int alloc_l3_table(struct page_info *page)
+    return rc > 0 ? 0 : rc;
+}
+
+static int alloc_l3_table(struct page_info *page, int preemptible)
 {
     struct domain *d = page_get_owner(page);
     unsigned long  pfn = page_to_mfn(page);
     l3_pgentry_t  *pl3e;
-    int            i;
+    unsigned int   i;
+    int            rc = 0;
 
 #if CONFIG_PAGING_LEVELS == 3
     /*
@@ -1181,7 +1197,7 @@ static int alloc_l3_table(struct page_in
          d->vcpu[0] && d->vcpu[0]->is_initialised )
     {
         MEM_LOG("PAE pgd must be below 4GB (0x%lx >= 0x100000)", pfn);
-        return 0;
+        return -EINVAL;
     }
 #endif
 
@@ -1197,64 +1213,96 @@ static int alloc_l3_table(struct page_in
     if ( is_pv_32on64_domain(d) )
         memset(pl3e + 4, 0, (L3_PAGETABLE_ENTRIES - 4) * sizeof(*pl3e));
 
-    for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
+    for ( i = page->nr_validated_ptes; i < L3_PAGETABLE_ENTRIES; i++ )
     {
         if ( is_pv_32bit_domain(d) && (i == 3) )
         {
             if ( !(l3e_get_flags(pl3e[i]) & _PAGE_PRESENT) ||
-                 (l3e_get_flags(pl3e[i]) & l3_disallow_mask(d)) ||
-                 !get_page_and_type_from_pagenr(l3e_get_pfn(pl3e[i]),
-                                                PGT_l2_page_table |
-                                                PGT_pae_xen_l2,
-                                                d) )
-                goto fail;
-        }
-        else if ( !is_guest_l3_slot(i) )
+                 (l3e_get_flags(pl3e[i]) & l3_disallow_mask(d)) )
+                rc = -EINVAL;
+            else
+                rc = get_page_and_type_from_pagenr(l3e_get_pfn(pl3e[i]),
+                                                   PGT_l2_page_table |
+                                                   PGT_pae_xen_l2,
+                                                   d, preemptible);
+        }
+        else if ( !is_guest_l3_slot(i) ||
+                  (rc = get_page_from_l3e(pl3e[i], pfn, d, preemptible)) > 0 )
             continue;
-        else if ( unlikely(!get_page_from_l3e(pl3e[i], pfn, d)) )
-            goto fail;
+
+        if ( rc == -EAGAIN )
+        {
+            page->nr_validated_ptes = i;
+            page->partial_pte = 1;
+        }
+        else if ( rc == -EINTR && i )
+        {
+            page->nr_validated_ptes = i;
+            page->partial_pte = 0;
+            rc = -EAGAIN;
+        }
+        if ( rc < 0 )
+            break;
 
         adjust_guest_l3e(pl3e[i], d);
     }
 
-    if ( !create_pae_xen_mappings(d, pl3e) )
-        goto fail;
+    if ( rc >= 0 && !create_pae_xen_mappings(d, pl3e) )
+        rc = -EINVAL;
+    if ( rc < 0 && rc != -EAGAIN && rc != -EINTR )
+    {
+        MEM_LOG("Failure in alloc_l3_table: entry %d", i);
+        while ( i-- > 0 )
+        {
+            if ( !is_guest_l3_slot(i) )
+                continue;
+            unadjust_guest_l3e(pl3e[i], d);
+            put_page_from_l3e(pl3e[i], pfn, 0);
+        }
+    }
 
     unmap_domain_page(pl3e);
-    return 1;
-
- fail:
-    MEM_LOG("Failure in alloc_l3_table: entry %d", i);
-    while ( i-- > 0 )
-    {
-        if ( !is_guest_l3_slot(i) )
-            continue;
-        unadjust_guest_l3e(pl3e[i], d);
-        put_page_from_l3e(pl3e[i], pfn);
-    }
-
-    unmap_domain_page(pl3e);
-    return 0;
-}
-#else
-#define alloc_l3_table(page) (0)
-#endif
+    return rc > 0 ? 0 : rc;
+}
 
 #if CONFIG_PAGING_LEVELS >= 4
-static int alloc_l4_table(struct page_info *page)
+static int alloc_l4_table(struct page_info *page, int preemptible)
 {
     struct domain *d = page_get_owner(page);
     unsigned long  pfn = page_to_mfn(page);
     l4_pgentry_t  *pl4e = page_to_virt(page);
-    int            i;
-
-    for ( i = 0; i < L4_PAGETABLE_ENTRIES; i++ )
-    {
-        if ( !is_guest_l4_slot(d, i) )
+    unsigned int   i;
+    int            rc = 0;
+
+    for ( i = page->nr_validated_ptes; i < L4_PAGETABLE_ENTRIES; i++ )
+    {
+        if ( !is_guest_l4_slot(d, i) ||
+             (rc = get_page_from_l4e(pl4e[i], pfn, d, preemptible)) > 0 )
             continue;
 
-        if ( unlikely(!get_page_from_l4e(pl4e[i], pfn, d)) )
-            goto fail;
+        if ( rc == -EAGAIN )
+        {
+            page->nr_validated_ptes = i;
+            page->partial_pte = 1;
+        }
+        else if ( rc == -EINTR )
+        {
+            if ( i )
+            {
+                page->nr_validated_ptes = i;
+                page->partial_pte = 0;
+                rc = -EAGAIN;
+            }
+        }
+        else if ( rc < 0 )
+        {
+            MEM_LOG("Failure in alloc_l4_table: entry %d", i);
+            while ( i-- > 0 )
+                if ( is_guest_l4_slot(d, i) )
+                    put_page_from_l4e(pl4e[i], pfn, 0);
+        }
+        if ( rc < 0 )
+            return rc;
 
         adjust_guest_l4e(pl4e[i], d);
     }
@@ -1269,18 +1317,10 @@ static int alloc_l4_table(struct page_in
         l4e_from_page(virt_to_page(d->arch.mm_perdomain_l3),
                       __PAGE_HYPERVISOR);
 
-    return 1;
-
- fail:
-    MEM_LOG("Failure in alloc_l4_table: entry %d", i);
-    while ( i-- > 0 )
-        if ( is_guest_l4_slot(d, i) )
-            put_page_from_l4e(pl4e[i], pfn);
-
-    return 0;
+    return rc > 0 ? 0 : rc;
 }
 #else
-#define alloc_l4_table(page) (0)
+#define alloc_l4_table(page, preemptible) (-EINVAL)
 #endif
 
 
@@ -1289,7 +1329,7 @@ static void free_l1_table(struct page_in
     struct domain *d = page_get_owner(page);
     unsigned long pfn = page_to_mfn(page);
     l1_pgentry_t *pl1e;
-    int i;
+    unsigned int  i;
 
     pl1e = map_domain_page(pfn);
 
@@ -1301,74 +1341,114 @@ static void free_l1_table(struct page_in
 }
 
 
-static void free_l2_table(struct page_info *page)
+static int free_l2_table(struct page_info *page, int preemptible)
 {
 #ifdef CONFIG_COMPAT
     struct domain *d = page_get_owner(page);
 #endif
     unsigned long pfn = page_to_mfn(page);
     l2_pgentry_t *pl2e;
-    int i;
+    unsigned int  i = page->nr_validated_ptes - 1;
+    int err = 0;
 
     pl2e = map_domain_page(pfn);
 
-    for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
-        if ( is_guest_l2_slot(d, page->u.inuse.type_info, i) )
-            put_page_from_l2e(pl2e[i], pfn);
+    ASSERT(page->nr_validated_ptes);
+    do {
+        if ( is_guest_l2_slot(d, page->u.inuse.type_info, i) &&
+             put_page_from_l2e(pl2e[i], pfn) == 0 &&
+             preemptible && i && hypercall_preempt_check() )
+        {
+           page->nr_validated_ptes = i;
+           err = -EAGAIN;
+        }
+    } while ( !err && i-- );
 
     unmap_domain_page(pl2e);
 
-    page->u.inuse.type_info &= ~PGT_pae_xen_l2;
-}
-
-
-#if CONFIG_PAGING_LEVELS >= 3
-
-static void free_l3_table(struct page_info *page)
+    if ( !err )
+        page->u.inuse.type_info &= ~PGT_pae_xen_l2;
+
+    return err;
+}
+
+static int free_l3_table(struct page_info *page, int preemptible)
 {
     struct domain *d = page_get_owner(page);
     unsigned long pfn = page_to_mfn(page);
     l3_pgentry_t *pl3e;
-    int           i;
+    unsigned int  i = page->nr_validated_ptes - !page->partial_pte;
+    int rc = 0;
 
 #ifdef DOMAIN_DESTRUCT_AVOID_RECURSION
     if ( d->arch.relmem == RELMEM_l3 )
-        return;
+        return 0;
 #endif
 
     pl3e = map_domain_page(pfn);
 
-    for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
+    do {
         if ( is_guest_l3_slot(i) )
         {
-            put_page_from_l3e(pl3e[i], pfn);
+            rc = put_page_from_l3e(pl3e[i], pfn, preemptible);
+            if ( rc > 0 )
+                continue;
+            if ( rc )
+                break;
             unadjust_guest_l3e(pl3e[i], d);
         }
+    } while ( i-- );
 
     unmap_domain_page(pl3e);
-}
-
-#endif
+
+    if ( rc == -EAGAIN )
+    {
+        page->nr_validated_ptes = i;
+        page->partial_pte = 1;
+    }
+    else if ( rc == -EINTR && i < L3_PAGETABLE_ENTRIES - 1 )
+    {
+        page->nr_validated_ptes = i + 1;
+        page->partial_pte = 0;
+        rc = -EAGAIN;
+    }
+    return rc > 0 ? 0 : rc;
+}
 
 #if CONFIG_PAGING_LEVELS >= 4
-
-static void free_l4_table(struct page_info *page)
+static int free_l4_table(struct page_info *page, int preemptible)
 {
     struct domain *d = page_get_owner(page);
     unsigned long pfn = page_to_mfn(page);
     l4_pgentry_t *pl4e = page_to_virt(page);
-    int           i;
+    unsigned int  i = page->nr_validated_ptes - !page->partial_pte;
+    int rc = 0;
 
 #ifdef DOMAIN_DESTRUCT_AVOID_RECURSION
     if ( d->arch.relmem == RELMEM_l4 )
-        return;
+        return 0;
 #endif
 
-    for ( i = 0; i < L4_PAGETABLE_ENTRIES; i++ )
+    do {
         if ( is_guest_l4_slot(d, i) )
-            put_page_from_l4e(pl4e[i], pfn);
-}
-
+            rc = put_page_from_l4e(pl4e[i], pfn, preemptible);
+    } while ( rc >= 0 && i-- );
+
+    if ( rc == -EAGAIN )
+    {
+        page->nr_validated_ptes = i;
+        page->partial_pte = 1;
+    }
+    else if ( rc == -EINTR && i < L4_PAGETABLE_ENTRIES - 1 )
+    {
+        page->nr_validated_ptes = i + 1;
+        page->partial_pte = 0;
+        rc = -EAGAIN;
+    }
+    return rc > 0 ? 0 : rc;
+}
+#else
+#define free_l4_table(page, preemptible) (-EINVAL)
 #endif
 
 static void page_lock(struct page_info *page)
@@ -1560,7 +1640,7 @@ static int mod_l2_entry(l2_pgentry_t *pl
             return rc;
         }
 
-        if ( unlikely(!get_page_from_l2e(nl2e, pfn, d)) )
+        if ( unlikely(get_page_from_l2e(nl2e, pfn, d) < 0) )
             return page_unlock(l2pg), 0;
 
         adjust_guest_l2e(nl2e, d);
@@ -1582,25 +1662,24 @@ static int mod_l2_entry(l2_pgentry_t *pl
     put_page_from_l2e(ol2e, pfn);
     return rc;
 }
-
-#if CONFIG_PAGING_LEVELS >= 3
 
 /* Update the L3 entry at pl3e to new value nl3e. pl3e is within frame pfn. */
 static int mod_l3_entry(l3_pgentry_t *pl3e, 
                         l3_pgentry_t nl3e, 
                         unsigned long pfn,
-                        int preserve_ad)
+                        int preserve_ad,
+                        int preemptible)
 {
     l3_pgentry_t ol3e;
     struct vcpu *curr = current;
     struct domain *d = curr->domain;
     struct page_info *l3pg = mfn_to_page(pfn);
-    int rc = 1;
+    int rc = 0;
 
     if ( unlikely(!is_guest_l3_slot(pgentry_ptr_to_slot(pl3e))) )
     {
         MEM_LOG("Illegal L3 update attempt in Xen-private area %p", pl3e);
-        return 0;
+        return -EINVAL;
     }
 
     /*
@@ -1608,12 +1687,12 @@ static int mod_l3_entry(l3_pgentry_t *pl
      * would be a pain to ensure they remain continuously valid throughout.
      */
     if ( is_pv_32bit_domain(d) && (pgentry_ptr_to_slot(pl3e) >= 3) )
-        return 0;
+        return -EINVAL;
 
     page_lock(l3pg);
 
     if ( unlikely(__copy_from_user(&ol3e, pl3e, sizeof(ol3e)) != 0) )
-        return page_unlock(l3pg), 0;
+        return page_unlock(l3pg), -EFAULT;
 
     if ( l3e_get_flags(nl3e) & _PAGE_PRESENT )
     {
@@ -1622,7 +1701,7 @@ static int mod_l3_entry(l3_pgentry_t *pl
             page_unlock(l3pg);
             MEM_LOG("Bad L3 flags %x",
                     l3e_get_flags(nl3e) & l3_disallow_mask(d));
-            return 0;
+            return -EINVAL;
         }
 
         /* Fast path for identical mapping and presence. */
@@ -1631,28 +1710,30 @@ static int mod_l3_entry(l3_pgentry_t *pl
             adjust_guest_l3e(nl3e, d);
             rc = UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, curr, preserve_ad);
             page_unlock(l3pg);
-            return rc;
-        }
-
-        if ( unlikely(!get_page_from_l3e(nl3e, pfn, d)) )
-            return page_unlock(l3pg), 0;
+            return rc ? 0 : -EFAULT;
+        }
+
+        rc = get_page_from_l3e(nl3e, pfn, d, preemptible);
+        if ( unlikely(rc < 0) )
+            return page_unlock(l3pg), rc;
+        rc = 0;
 
         adjust_guest_l3e(nl3e, d);
         if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, curr,
                                     preserve_ad)) )
         {
             ol3e = nl3e;
-            rc = 0;
+            rc = -EFAULT;
         }
     }
     else if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, curr,
                                      preserve_ad)) )
     {
         page_unlock(l3pg);
-        return 0;
-    }
-
-    if ( likely(rc) )
+        return -EFAULT;
+    }
+
+    if ( likely(rc == 0) )
     {
         if ( !create_pae_xen_mappings(d, pl3e) )
             BUG();
@@ -1661,11 +1742,9 @@ static int mod_l3_entry(l3_pgentry_t *pl
     }
 
     page_unlock(l3pg);
-    put_page_from_l3e(ol3e, pfn);
+    put_page_from_l3e(ol3e, pfn, 0);
     return rc;
 }
-
-#endif
 
 #if CONFIG_PAGING_LEVELS >= 4
 
@@ -1673,24 +1752,25 @@ static int mod_l4_entry(l4_pgentry_t *pl
 static int mod_l4_entry(l4_pgentry_t *pl4e, 
                         l4_pgentry_t nl4e, 
                         unsigned long pfn,
-                        int preserve_ad)
+                        int preserve_ad,
+                        int preemptible)
 {
     struct vcpu *curr = current;
     struct domain *d = curr->domain;
     l4_pgentry_t ol4e;
     struct page_info *l4pg = mfn_to_page(pfn);
-    int rc = 1;
+    int rc = 0;
 
     if ( unlikely(!is_guest_l4_slot(d, pgentry_ptr_to_slot(pl4e))) )
     {
         MEM_LOG("Illegal L4 update attempt in Xen-private area %p", pl4e);
-        return 0;
+        return -EINVAL;
     }
 
     page_lock(l4pg);
 
     if ( unlikely(__copy_from_user(&ol4e, pl4e, sizeof(ol4e)) != 0) )
-        return page_unlock(l4pg), 0;
+        return page_unlock(l4pg), -EFAULT;
 
     if ( l4e_get_flags(nl4e) & _PAGE_PRESENT )
     {
@@ -1699,7 +1779,7 @@ static int mod_l4_entry(l4_pgentry_t *pl
             page_unlock(l4pg);
             MEM_LOG("Bad L4 flags %x",
                     l4e_get_flags(nl4e) & L4_DISALLOW_MASK);
-            return 0;
+            return -EINVAL;
         }
 
         /* Fast path for identical mapping and presence. */
@@ -1708,29 +1788,31 @@ static int mod_l4_entry(l4_pgentry_t *pl
             adjust_guest_l4e(nl4e, d);
             rc = UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, curr, preserve_ad);
             page_unlock(l4pg);
-            return rc;
-        }
-
-        if ( unlikely(!get_page_from_l4e(nl4e, pfn, d)) )
-            return page_unlock(l4pg), 0;
+            return rc ? 0 : -EFAULT;
+        }
+
+        rc = get_page_from_l4e(nl4e, pfn, d, preemptible);
+        if ( unlikely(rc < 0) )
+            return page_unlock(l4pg), rc;
+        rc = 0;
 
         adjust_guest_l4e(nl4e, d);
         if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, curr,
                                     preserve_ad)) )
         {
             ol4e = nl4e;
-            rc = 0;
+            rc = -EFAULT;
         }
     }
     else if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, curr,
                                      preserve_ad)) )
     {
         page_unlock(l4pg);
-        return 0;
+        return -EFAULT;
     }
 
     page_unlock(l4pg);
-    put_page_from_l4e(ol4e, pfn);
+    put_page_from_l4e(ol4e, pfn, 0);
     return rc;
 }
 
@@ -1788,9 +1870,11 @@ int get_page(struct page_info *page, str
 }
 
 
-static int alloc_page_type(struct page_info *page, unsigned long type)
+static int alloc_page_type(struct page_info *page, unsigned long type,
+                           int preemptible)
 {
     struct domain *owner = page_get_owner(page);
+    int rc;
 
     /* A page table is dirtied when its type count becomes non-zero. */
     if ( likely(owner != NULL) )
@@ -1799,30 +1883,65 @@ static int alloc_page_type(struct page_i
     switch ( type & PGT_type_mask )
     {
     case PGT_l1_page_table:
-        return alloc_l1_table(page);
+        alloc_l1_table(page);
+        rc = 0;
+        break;
     case PGT_l2_page_table:
-        return alloc_l2_table(page, type);
+        rc = alloc_l2_table(page, type, preemptible);
+        break;
     case PGT_l3_page_table:
-        return alloc_l3_table(page);
+        rc = alloc_l3_table(page, preemptible);
+        break;
     case PGT_l4_page_table:
-        return alloc_l4_table(page);
+        rc = alloc_l4_table(page, preemptible);
+        break;
     case PGT_seg_desc_page:
-        return alloc_segdesc_page(page);
+        rc = alloc_segdesc_page(page);
+        break;
     default:
         printk("Bad type in alloc_page_type %lx t=%" PRtype_info " c=%x\n", 
                type, page->u.inuse.type_info,
                page->count_info);
+        rc = -EINVAL;
         BUG();
     }
 
-    return 0;
-}
-
-
-void free_page_type(struct page_info *page, unsigned long type)
+    /* No need for atomic update of type_info here: noone else updates it. */
+    wmb();
+    if ( rc == -EAGAIN )
+    {
+        page->u.inuse.type_info |= PGT_partial;
+    }
+    else if ( rc == -EINTR )
+    {
+        ASSERT((page->u.inuse.type_info &
+                (PGT_count_mask|PGT_validated|PGT_partial)) == 1);
+        page->u.inuse.type_info &= ~PGT_count_mask;
+    }
+    else if ( rc )
+    {
+        ASSERT(rc < 0);
+        MEM_LOG("Error while validating mfn %lx (pfn %lx) for type %"
+                PRtype_info ": caf=%08x taf=%" PRtype_info,
+                page_to_mfn(page), get_gpfn_from_mfn(page_to_mfn(page)),
+                type, page->count_info, page->u.inuse.type_info);
+        page->u.inuse.type_info = 0;
+    }
+    else
+    {
+        page->u.inuse.type_info |= PGT_validated;
+    }
+
+    return rc;
+}
+
+
+int free_page_type(struct page_info *page, unsigned long type,
+                   int preemptible)
 {
     struct domain *owner = page_get_owner(page);
     unsigned long gmfn;
+    int rc;
 
     if ( likely(owner != NULL) )
     {
@@ -1842,7 +1961,7 @@ void free_page_type(struct page_info *pa
             paging_mark_dirty(owner, page_to_mfn(page));
 
             if ( shadow_mode_refcounts(owner) )
-                return;
+                return 0;
 
             gmfn = mfn_to_gmfn(owner, page_to_mfn(page));
             ASSERT(VALID_M2P(gmfn));
@@ -1850,42 +1969,80 @@ void free_page_type(struct page_info *pa
         }
     }
 
+    if ( !(type & PGT_partial) )
+    {
+        page->nr_validated_ptes = 1U << PAGETABLE_ORDER;
+        page->partial_pte = 0;
+    }
     switch ( type & PGT_type_mask )
     {
     case PGT_l1_page_table:
         free_l1_table(page);
+        rc = 0;
         break;
-
     case PGT_l2_page_table:
-        free_l2_table(page);
+        rc = free_l2_table(page, preemptible);
         break;
-
-#if CONFIG_PAGING_LEVELS >= 3
     case PGT_l3_page_table:
-        free_l3_table(page);
+#if CONFIG_PAGING_LEVELS == 3
+        if ( !(type & PGT_partial) )
+            page->nr_validated_ptes = L3_PAGETABLE_ENTRIES;
+#endif
+        rc = free_l3_table(page, preemptible);
         break;
-#endif
-
-#if CONFIG_PAGING_LEVELS >= 4
     case PGT_l4_page_table:
-        free_l4_table(page);
+        rc = free_l4_table(page, preemptible);
         break;
-#endif
-
     default:
-        printk("%s: type %lx pfn %lx\n",__FUNCTION__,
-               type, page_to_mfn(page));
+        MEM_LOG("type %lx pfn %lx\n", type, page_to_mfn(page));
+        rc = -EINVAL;
         BUG();
     }
-}
-
-
-void put_page_type(struct page_info *page)
+
+    /* No need for atomic update of type_info here: noone else updates it. */
+    if ( rc == 0 )
+    {
+        /*
+         * Record TLB information for flush later. We do not stamp page tables
+         * when running in shadow mode:
+         *  1. Pointless, since it's the shadow pt's which must be tracked.
+         *  2. Shadow mode reuses this field for shadowed page tables to
+         *     store flags info -- we don't want to conflict with that.
+         */
+        if ( !(shadow_mode_enabled(page_get_owner(page)) &&
+               (page->count_info & PGC_page_table)) )
+            page->tlbflush_timestamp = tlbflush_current_time();
+        wmb();
+        page->u.inuse.type_info--;
+    }
+    else if ( rc == -EINTR )
+    {
+        ASSERT(!(page->u.inuse.type_info &
+                 (PGT_count_mask|PGT_validated|PGT_partial)));
+        if ( !(shadow_mode_enabled(page_get_owner(page)) &&
+               (page->count_info & PGC_page_table)) )
+            page->tlbflush_timestamp = tlbflush_current_time();
+        wmb();
+        page->u.inuse.type_info |= PGT_validated;
+    }
+    else
+    {
+        BUG_ON(rc != -EAGAIN);
+        wmb();
+        page->u.inuse.type_info |= PGT_partial;
+    }
+
+    return rc;
+}
+
+
+static int __put_page_type(struct page_info *page,
+                           int preemptible)
 {
     unsigned long nx, x, y = page->u.inuse.type_info;
 
- again:
-    do {
+    for ( ; ; )
+    {
         x  = y;
         nx = x - 1;
 
@@ -1894,21 +2051,19 @@ void put_page_type(struct page_info *pag
         if ( unlikely((nx & PGT_count_mask) == 0) )
         {
             if ( unlikely((nx & PGT_type_mask) <= PGT_l4_page_table) &&
-                 likely(nx & PGT_validated) )
+                 likely(nx & (PGT_validated|PGT_partial)) )
             {
                 /*
                  * Page-table pages must be unvalidated when count is zero. The
                  * 'free' is safe because the refcnt is non-zero and validated
                  * bit is clear => other ops will spin or fail.
                  */
-                if ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, 
-                                           x & ~PGT_validated)) != x) )
-                    goto again;
+                nx = x & ~(PGT_validated|PGT_partial);
+                if ( unlikely((y = cmpxchg(&page->u.inuse.type_info,
+                                           x, nx)) != x) )
+                    continue;
                 /* We cleared the 'valid bit' so we do the clean up. */
-                free_page_type(page, x);
-                /* Carry on, but with the 'valid bit' now clear. */
-                x  &= ~PGT_validated;
-                nx &= ~PGT_validated;
+                return free_page_type(page, x, preemptible);
             }
 
             /*
@@ -1922,25 +2077,33 @@ void put_page_type(struct page_info *pag
                    (page->count_info & PGC_page_table)) )
                 page->tlbflush_timestamp = tlbflush_current_time();
         }
-    }
-    while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) );
-}
-
-
-int get_page_type(struct page_info *page, unsigned long type)
+
+        if ( likely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) == x) )
+            break;
+
+        if ( preemptible && hypercall_preempt_check() )
+            return -EINTR;
+    }
+
+    return 0;
+}
+
+
+static int __get_page_type(struct page_info *page, unsigned long type,
+                           int preemptible)
 {
     unsigned long nx, x, y = page->u.inuse.type_info;
 
     ASSERT(!(type & ~(PGT_type_mask | PGT_pae_xen_l2)));
 
- again:
-    do {
+    for ( ; ; )
+    {
         x  = y;
         nx = x + 1;
         if ( unlikely((nx & PGT_count_mask) == 0) )
         {
             MEM_LOG("Type count overflow on pfn %lx", page_to_mfn(page));
-            return 0;
+            return -EINVAL;
         }
         else if ( unlikely((x & PGT_count_mask) == 0) )
         {
@@ -1993,28 +2156,43 @@ int get_page_type(struct page_info *page
             /* Don't log failure if it could be a recursive-mapping attempt. */
             if ( ((x & PGT_type_mask) == PGT_l2_page_table) &&
                  (type == PGT_l1_page_table) )
-                return 0;
+                return -EINVAL;
             if ( ((x & PGT_type_mask) == PGT_l3_page_table) &&
                  (type == PGT_l2_page_table) )
-                return 0;
+                return -EINVAL;
             if ( ((x & PGT_type_mask) == PGT_l4_page_table) &&
                  (type == PGT_l3_page_table) )
-                return 0;
+                return -EINVAL;
             MEM_LOG("Bad type (saw %" PRtype_info " != exp %" PRtype_info ") "
                     "for mfn %lx (pfn %lx)",
                     x, type, page_to_mfn(page),
                     get_gpfn_from_mfn(page_to_mfn(page)));
-            return 0;
+            return -EINVAL;
         }
         else if ( unlikely(!(x & PGT_validated)) )
         {
-            /* Someone else is updating validation of this page. Wait... */
-            while ( (y = page->u.inuse.type_info) == x )
-                cpu_relax();
-            goto again;
-        }
-    }
-    while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) );
+            if ( !(x & PGT_partial) )
+            {
+                /* Someone else is updating validation of this page. Wait... */
+                while ( (y = page->u.inuse.type_info) == x )
+                {
+                    if ( preemptible && hypercall_preempt_check() )
+                        return -EINTR;
+                    cpu_relax();
+                }
+                continue;
+            }
+            /* Type ref count was left at 1 when PGT_partial got set. */
+            ASSERT((x & PGT_count_mask) == 1);
+            nx = x & ~PGT_partial;
+        }
+
+        if ( likely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) == x) )
+            break;
+
+        if ( preemptible && hypercall_preempt_check() )
+            return -EINTR;
+    }
 
     if ( unlikely((x & PGT_type_mask) != type) )
     {
@@ -2032,25 +2210,42 @@ int get_page_type(struct page_info *page
 
     if ( unlikely(!(nx & PGT_validated)) )
     {
-        /* Try to validate page type; drop the new reference on failure. */
-        if ( unlikely(!alloc_page_type(page, type)) )
-        {
-            MEM_LOG("Error while validating mfn %lx (pfn %lx) for type %"
-                    PRtype_info ": caf=%08x taf=%" PRtype_info,
-                    page_to_mfn(page), get_gpfn_from_mfn(page_to_mfn(page)),
-                    type, page->count_info, page->u.inuse.type_info);
-            /* Noone else can get a reference. We hold the only ref. */
-            page->u.inuse.type_info = 0;
-            return 0;
-        }
-
-        /* Noone else is updating simultaneously. */
-        __set_bit(_PGT_validated, &page->u.inuse.type_info);
-    }
-
-    return 1;
-}
-
+        if ( !(x & PGT_partial) )
+        {
+            page->nr_validated_ptes = 0;
+            page->partial_pte = 0;
+        }
+        return alloc_page_type(page, type, preemptible);
+    }
+
+    return 0;
+}
+
+void put_page_type(struct page_info *page)
+{
+    int rc = __put_page_type(page, 0);
+    ASSERT(rc == 0);
+    (void)rc;
+}
+
+int get_page_type(struct page_info *page, unsigned long type)
+{
+    int rc = __get_page_type(page, type, 0);
+    if ( likely(rc == 0) )
+        return 1;
+    ASSERT(rc == -EINVAL);
+    return 0;
+}
+
+int put_page_type_preemptible(struct page_info *page)
+{
+    return __put_page_type(page, 1);
+}
+
+int get_page_type_preemptible(struct page_info *page, unsigned long type)
+{
+    return __get_page_type(page, type, 1);
+}
 
 void cleanup_page_cacheattr(struct page_info *page)
 {
@@ -2087,7 +2282,7 @@ int new_guest_cr3(unsigned long mfn)
                     l4e_from_pfn(
                         mfn,
                         (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED)),
-                    pagetable_get_pfn(v->arch.guest_table), 0);
+                    pagetable_get_pfn(v->arch.guest_table), 0, 0) == 0;
         if ( unlikely(!okay) )
         {
             MEM_LOG("Error while installing new compat baseptr %lx", mfn);
@@ -2102,7 +2297,7 @@ int new_guest_cr3(unsigned long mfn)
 #endif
     okay = paging_mode_refcounts(d)
         ? get_page_from_pagenr(mfn, d)
-        : get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d);
+        : !get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d, 0);
     if ( unlikely(!okay) )
     {
         MEM_LOG("Error while installing new baseptr %lx", mfn);
@@ -2276,9 +2471,7 @@ int do_mmuext_op(
     {
         if ( hypercall_preempt_check() )
         {
-            rc = hypercall_create_continuation(
-                __HYPERVISOR_mmuext_op, "hihi",
-                uops, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom);
+            rc = -EAGAIN;
             break;
         }
 
@@ -2325,10 +2518,14 @@ int do_mmuext_op(
             if ( paging_mode_refcounts(FOREIGNDOM) )
                 break;
 
-            okay = get_page_and_type_from_pagenr(mfn, type, FOREIGNDOM);
+            rc = get_page_and_type_from_pagenr(mfn, type, FOREIGNDOM, 1);
+            okay = !rc;
             if ( unlikely(!okay) )
             {
-                MEM_LOG("Error while pinning mfn %lx", mfn);
+                if ( rc == -EINTR )
+                    rc = -EAGAIN;
+                else if ( rc != -EAGAIN )
+                    MEM_LOG("Error while pinning mfn %lx", mfn);
                 break;
             }
 
@@ -2373,8 +2570,11 @@ int do_mmuext_op(
             {
                 put_page_and_type(page);
                 put_page(page);
-                /* A page is dirtied when its pin status is cleared. */
-                paging_mark_dirty(d, mfn);
+                if ( !rc )
+                {
+                    /* A page is dirtied when its pin status is cleared. */
+                    paging_mark_dirty(d, mfn);
+                }
             }
             else
             {
@@ -2398,8 +2598,8 @@ int do_mmuext_op(
                 if ( paging_mode_refcounts(d) )
                     okay = get_page_from_pagenr(mfn, d);
                 else
-                    okay = get_page_and_type_from_pagenr(
-                        mfn, PGT_root_page_table, d);
+                    okay = !get_page_and_type_from_pagenr(
+                        mfn, PGT_root_page_table, d, 0);
                 if ( unlikely(!okay) )
                 {
                     MEM_LOG("Error while installing new mfn %lx", mfn);
@@ -2517,6 +2717,11 @@ int do_mmuext_op(
         guest_handle_add_offset(uops, 1);
     }
 
+    if ( rc == -EAGAIN )
+        rc = hypercall_create_continuation(
+            __HYPERVISOR_mmuext_op, "hihi",
+            uops, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom);
+
     process_deferred_ops();
 
     perfc_add(num_mmuext_ops, i);
@@ -2576,9 +2781,7 @@ int do_mmu_update(
     {
         if ( hypercall_preempt_check() )
         {
-            rc = hypercall_create_continuation(
-                __HYPERVISOR_mmu_update, "hihi",
-                ureqs, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom);
+            rc = -EAGAIN;
             break;
         }
 
@@ -2601,7 +2804,7 @@ int do_mmu_update(
              */
         case MMU_NORMAL_PT_UPDATE:
         case MMU_PT_UPDATE_PRESERVE_AD:
-            rc = xsm_mmu_normal_update(d, req.val);
+            rc = xsm_mmu_normal_update(d, FOREIGNDOM, req.val);
             if ( rc )
                 break;
 
@@ -2653,27 +2856,29 @@ int do_mmu_update(
                                         cmd == MMU_PT_UPDATE_PRESERVE_AD);
                 }
                 break;
-#if CONFIG_PAGING_LEVELS >= 3
                 case PGT_l3_page_table:
                 {
                     l3_pgentry_t l3e = l3e_from_intpte(req.val);
-                    okay = mod_l3_entry(va, l3e, mfn,
-                                        cmd == MMU_PT_UPDATE_PRESERVE_AD);
+                    rc = mod_l3_entry(va, l3e, mfn,
+                                      cmd == MMU_PT_UPDATE_PRESERVE_AD, 1);
+                    okay = !rc;
                 }
                 break;
-#endif
 #if CONFIG_PAGING_LEVELS >= 4
                 case PGT_l4_page_table:
                 {
                     l4_pgentry_t l4e = l4e_from_intpte(req.val);
-                    okay = mod_l4_entry(va, l4e, mfn,
-                                        cmd == MMU_PT_UPDATE_PRESERVE_AD);
+                    rc = mod_l4_entry(va, l4e, mfn,
+                                      cmd == MMU_PT_UPDATE_PRESERVE_AD, 1);
+                    okay = !rc;
                 }
                 break;
 #endif
                 }
 
                 put_page_type(page);
+                if ( rc == -EINTR )
+                    rc = -EAGAIN;
             }
             break;
 
@@ -2741,6 +2946,11 @@ int do_mmu_update(
 
         guest_handle_add_offset(ureqs, 1);
     }
+
+    if ( rc == -EAGAIN )
+        rc = hypercall_create_continuation(
+            __HYPERVISOR_mmu_update, "hihi",
+            ureqs, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom);
 
     process_deferred_ops();
 
@@ -3111,7 +3321,7 @@ int do_update_va_mapping(unsigned long v
     if ( unlikely(!access_ok(va, 1) && !paging_mode_external(d)) )
         return -EINVAL;
 
-    rc = xsm_update_va_mapping(d, val);
+    rc = xsm_update_va_mapping(d, FOREIGNDOM, val);
     if ( rc )
         return rc;
 
@@ -3695,9 +3905,8 @@ static int ptwr_emulated_update(
     nl1e = l1e_from_intpte(val);
     if ( unlikely(!get_page_from_l1e(nl1e, d)) )
     {
-        if ( (CONFIG_PAGING_LEVELS >= 3) && is_pv_32bit_domain(d) &&
-             (bytes == 4) && (unaligned_addr & 4) && !do_cmpxchg &&
-             (l1e_get_flags(nl1e) & _PAGE_PRESENT) )
+        if ( is_pv_32bit_domain(d) && (bytes == 4) && (unaligned_addr & 4) &&
+             !do_cmpxchg && (l1e_get_flags(nl1e) & _PAGE_PRESENT) )
         {
             /*
              * If this is an upper-half write to a PAE PTE then we assume that
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/arch/x86/mm/hap/hap.c
--- a/xen/arch/x86/mm/hap/hap.c Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/arch/x86/mm/hap/hap.c Fri Sep 12 14:47:40 2008 +0900
@@ -37,6 +37,7 @@
 #include <asm/shared.h>
 #include <asm/hap.h>
 #include <asm/paging.h>
+#include <asm/p2m.h>
 #include <asm/domain.h>
 #include <xen/numa.h>
 
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/arch/x86/mm/shadow/common.c
--- a/xen/arch/x86/mm/shadow/common.c   Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/arch/x86/mm/shadow/common.c   Fri Sep 12 14:47:40 2008 +0900
@@ -39,6 +39,7 @@
 #include <xen/numa.h>
 #include "private.h"
 
+DEFINE_PER_CPU(uint32_t,trace_shadow_path_flags);
 
 /* Set up the shadow-specific parts of a domain struct at start of day.
  * Called for every domain from arch_domain_create() */
@@ -630,6 +631,8 @@ void oos_fixup_add(struct vcpu *v, mfn_t
 
             if ( mfn_x(oos_fixup[idx].smfn[next]) != INVALID_MFN )
             {
+                TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_OOS_FIXUP_EVICT);
+
                 /* Reuse this slot and remove current writable mapping. */
                 sh_remove_write_access_from_sl1p(v, gmfn, 
                                                  oos_fixup[idx].smfn[next],
@@ -645,6 +648,8 @@ void oos_fixup_add(struct vcpu *v, mfn_t
             oos_fixup[idx].smfn[next] = smfn;
             oos_fixup[idx].off[next] = off;
             oos_fixup[idx].next = (next + 1) % SHADOW_OOS_FIXUPS;
+
+            TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_OOS_FIXUP_ADD);
             return;
         }
     }
@@ -687,6 +692,16 @@ static int oos_remove_write_access(struc
 }
 
 
+static inline void trace_resync(int event, mfn_t gmfn)
+{
+    if ( tb_init_done )
+    {
+        /* Convert gmfn to gfn */
+        unsigned long gfn = mfn_to_gfn(current->domain, gmfn);
+        __trace_var(event, 0/*!tsc*/, sizeof(gfn), (unsigned char*)&gfn);
+    }
+}
+
 /* Pull all the entries on an out-of-sync page back into sync. */
 static void _sh_resync(struct vcpu *v, mfn_t gmfn,
                        struct oos_fixup *fixup, mfn_t snp)
@@ -700,8 +715,8 @@ static void _sh_resync(struct vcpu *v, m
              & ~SHF_L1_ANY));
     ASSERT(!sh_page_has_multiple_shadows(mfn_to_page(gmfn)));
 
-    SHADOW_PRINTK("d=%d, v=%d, gmfn=%05lx, va=%lx\n",
-                  v->domain->domain_id, v->vcpu_id, mfn_x(gmfn), va);
+    SHADOW_PRINTK("d=%d, v=%d, gmfn=%05lx\n",
+                  v->domain->domain_id, v->vcpu_id, mfn_x(gmfn));
 
     /* Need to pull write access so the page *stays* in sync. */
     if ( oos_remove_write_access(v, gmfn, fixup) )
@@ -719,6 +734,7 @@ static void _sh_resync(struct vcpu *v, m
     /* Now we know all the entries are synced, and will stay that way */
     pg->shadow_flags &= ~SHF_out_of_sync;
     perfc_incr(shadow_resync);
+    trace_resync(TRC_SHADOW_RESYNC_FULL, gmfn);
 }
 
 
@@ -930,6 +946,7 @@ void sh_resync_all(struct vcpu *v, int s
                 /* Update the shadows and leave the page OOS. */
                 if ( sh_skip_sync(v, oos[idx]) )
                     continue;
+                trace_resync(TRC_SHADOW_RESYNC_ONLY, oos[idx]);
                 _sh_resync_l1(other, oos[idx], oos_snapshot[idx]);
             }
             else
@@ -945,15 +962,16 @@ void sh_resync_all(struct vcpu *v, int s
     }
 }
 
-/* Allow a shadowed page to go out of sync */
+/* Allow a shadowed page to go out of sync. Unsyncs are traced in
+ * multi.c:sh_page_fault() */
 int sh_unsync(struct vcpu *v, mfn_t gmfn)
 {
     struct page_info *pg;
     
     ASSERT(shadow_locked_by_me(v->domain));
 
-    SHADOW_PRINTK("d=%d, v=%d, gmfn=%05lx va %lx\n",
-                  v->domain->domain_id, v->vcpu_id, mfn_x(gmfn), va);
+    SHADOW_PRINTK("d=%d, v=%d, gmfn=%05lx\n",
+                  v->domain->domain_id, v->vcpu_id, mfn_x(gmfn));
 
     pg = mfn_to_page(gmfn);
  
@@ -970,6 +988,7 @@ int sh_unsync(struct vcpu *v, mfn_t gmfn
     pg->shadow_flags |= SHF_out_of_sync|SHF_oos_may_write;
     oos_hash_add(v, gmfn);
     perfc_incr(shadow_unsync);
+    TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_UNSYNC);
     return 1;
 }
 
@@ -1005,6 +1024,7 @@ void shadow_promote(struct vcpu *v, mfn_
 
     ASSERT(!test_bit(type, &page->shadow_flags));
     set_bit(type, &page->shadow_flags);
+    TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_PROMOTE);
 }
 
 void shadow_demote(struct vcpu *v, mfn_t gmfn, u32 type)
@@ -1027,6 +1047,8 @@ void shadow_demote(struct vcpu *v, mfn_t
 #endif 
         clear_bit(_PGC_page_table, &page->count_info);
     }
+
+    TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_DEMOTE);
 }
 
 /**************************************************************************/
@@ -1094,6 +1116,7 @@ sh_validate_guest_entry(struct vcpu *v, 
     ASSERT((page->shadow_flags 
             & (SHF_L4_64|SHF_L3_64|SHF_L2H_64|SHF_L2_64|SHF_L1_64)) == 0);
 #endif
+    this_cpu(trace_shadow_path_flags) |= (result<<(TRCE_SFLAG_SET_CHANGED)); 
 
     return result;
 }
@@ -1295,6 +1318,18 @@ static void shadow_unhook_mappings(struc
     }
 }
 
+static inline void trace_shadow_prealloc_unpin(struct domain *d, mfn_t smfn)
+{
+    if ( tb_init_done )
+    {
+        /* Convert smfn to gfn */
+        unsigned long gfn;
+        ASSERT(mfn_valid(smfn));
+        gfn = mfn_to_gfn(d, _mfn(mfn_to_shadow_page(smfn)->backpointer));
+        __trace_var(TRC_SHADOW_PREALLOC_UNPIN, 0/*!tsc*/,
+                    sizeof(gfn), (unsigned char*)&gfn);
+    }
+}
 
 /* Make sure there are at least count order-sized pages
  * available in the shadow page pool. */
@@ -1327,6 +1362,7 @@ static void _shadow_prealloc(
         smfn = shadow_page_to_mfn(sp);
 
         /* Unpin this top-level shadow */
+        trace_shadow_prealloc_unpin(d, smfn);
         sh_unpin(v, smfn);
 
         /* See if that freed up enough space */
@@ -1343,6 +1379,7 @@ static void _shadow_prealloc(
         {
             if ( !pagetable_is_null(v2->arch.shadow_table[i]) )
             {
+                TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_PREALLOC_UNHOOK);
                 shadow_unhook_mappings(v, 
                                pagetable_get_mfn(v2->arch.shadow_table[i]));
 
@@ -2200,6 +2237,16 @@ void sh_destroy_shadow(struct vcpu *v, m
     }    
 }
 
+static inline void trace_shadow_wrmap_bf(mfn_t gmfn)
+{
+    if ( tb_init_done )
+    {
+        /* Convert gmfn to gfn */
+        unsigned long gfn = mfn_to_gfn(current->domain, gmfn);
+        __trace_var(TRC_SHADOW_WRMAP_BF, 0/*!tsc*/, sizeof(gfn), (unsigned 
char*)&gfn);
+    }
+}
+
 /**************************************************************************/
 /* Remove all writeable mappings of a guest frame from the shadow tables 
  * Returns non-zero if we need to flush TLBs. 
@@ -2265,6 +2312,8 @@ int sh_remove_write_access(struct vcpu *
          || (pg->u.inuse.type_info & PGT_count_mask) == 0 )
         return 0;
 
+    TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_WRMAP);
+
     perfc_incr(shadow_writeable);
 
     /* If this isn't a "normal" writeable page, the domain is trying to 
@@ -2285,11 +2334,14 @@ int sh_remove_write_access(struct vcpu *
          * and that mapping is likely to be in the current pagetable,
          * in the guest's linear map (on non-HIGHPTE linux and windows)*/
 
-#define GUESS(_a, _h) do {                                                \
+#define GUESS(_a, _h) do {                                              \
             if ( v->arch.paging.mode->shadow.guess_wrmap(v, (_a), gmfn) ) \
-                perfc_incr(shadow_writeable_h_ ## _h);                   \
-            if ( (pg->u.inuse.type_info & PGT_count_mask) == 0 )          \
-                return 1;                                                 \
+                perfc_incr(shadow_writeable_h_ ## _h);                  \
+            if ( (pg->u.inuse.type_info & PGT_count_mask) == 0 )        \
+            {                                                           \
+                TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_WRMAP_GUESS_FOUND);   \
+                return 1;                                               \
+            }                                                           \
         } while (0)
 
         if ( level == 0 && fault_addr )
@@ -2377,6 +2429,7 @@ int sh_remove_write_access(struct vcpu *
 #endif /* SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC */
     
     /* Brute-force search of all the shadows, by walking the hash */
+    trace_shadow_wrmap_bf(gmfn);
     if ( level == 0 )
         perfc_incr(shadow_writeable_bf_1);
     else
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/arch/x86/mm/shadow/multi.c
--- a/xen/arch/x86/mm/shadow/multi.c    Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/arch/x86/mm/shadow/multi.c    Fri Sep 12 14:47:40 2008 +0900
@@ -225,6 +225,7 @@ static uint32_t set_ad_bits(void *guest_
 static uint32_t set_ad_bits(void *guest_p, void *walk_p, int set_dirty)
 {
     guest_intpte_t old, new;
+    int ret = 0;
 
     old = *(guest_intpte_t *)walk_p;
     new = old | _PAGE_ACCESSED | (set_dirty ? _PAGE_DIRTY : 0);
@@ -234,10 +235,16 @@ static uint32_t set_ad_bits(void *guest_
          * into the guest table as well.  If the guest table has changed
          * under out feet then leave it alone. */
         *(guest_intpte_t *)walk_p = new;
-        if ( cmpxchg(((guest_intpte_t *)guest_p), old, new) == old ) 
-            return 1;
-    }
-    return 0;
+        if( cmpxchg(((guest_intpte_t *)guest_p), old, new) == old ) 
+            ret = 1;
+
+        /* FIXME -- this code is longer than necessary */
+        if(set_dirty)
+            TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_SET_AD);
+        else
+            TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_SET_A);
+    }
+    return ret;
 }
 
 /* This validation is called with lock held, and after write permission
@@ -1432,6 +1439,7 @@ static int shadow_set_l1e(struct vcpu *v
     {
         /* About to install a new reference */        
         if ( shadow_mode_refcounts(d) ) {
+            TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_SHADOW_L1_GET_REF);
             if ( shadow_get_page_from_l1e(new_sl1e, d) == 0 ) 
             {
                 /* Doesn't look like a pagetable. */
@@ -1461,6 +1469,7 @@ static int shadow_set_l1e(struct vcpu *v
         {
             shadow_vram_put_l1e(old_sl1e, sl1e, sl1mfn, d);
             shadow_put_page_from_l1e(old_sl1e, d);
+            TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_SHADOW_L1_PUT_REF);
         } 
     }
     return flags;
@@ -2896,6 +2905,7 @@ static inline void check_for_early_unsha
     {
         perfc_incr(shadow_early_unshadow);
         sh_remove_shadows(v, gmfn, 1, 0 /* Fast, can fail to unshadow */ );
+        TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_EARLY_UNSHADOW);
     }
     v->arch.paging.shadow.last_emulated_mfn_for_unshadow = mfn_x(gmfn);
 #endif
@@ -3012,6 +3022,132 @@ static void sh_prefetch(struct vcpu *v, 
 
 #endif /* SHADOW_OPTIMIZATIONS & SHOPT_PREFETCH */
 
+#if GUEST_PAGING_LEVELS == 4
+typedef u64 guest_va_t;
+typedef u64 guest_pa_t;
+#elif GUEST_PAGING_LEVELS == 3
+typedef u32 guest_va_t;
+typedef u64 guest_pa_t;
+#else
+typedef u32 guest_va_t;
+typedef u32 guest_pa_t;
+#endif
+
+static inline void trace_shadow_gen(u32 event, guest_va_t va)
+{
+    if ( tb_init_done )
+    {
+        event |= (GUEST_PAGING_LEVELS-2)<<8;
+        __trace_var(event, 0/*!tsc*/, sizeof(va), (unsigned char*)&va);
+    }
+}
+
+static inline void trace_shadow_fixup(guest_l1e_t gl1e,
+                                      guest_va_t va)
+{
+    if ( tb_init_done )
+    {
+        struct {
+            /* for PAE, guest_l1e may be 64 while guest_va may be 32;
+               so put it first for alignment sake. */
+            guest_l1e_t gl1e;
+            guest_va_t va;
+            u32 flags;
+        } __attribute__((packed)) d;
+        u32 event;
+
+        event = TRC_SHADOW_FIXUP | ((GUEST_PAGING_LEVELS-2)<<8);
+
+        d.gl1e = gl1e;
+        d.va = va;
+        d.flags = this_cpu(trace_shadow_path_flags);
+
+        __trace_var(event, 0/*!tsc*/, sizeof(d), (unsigned char*)&d);
+    }
+}
+                                          
+static inline void trace_not_shadow_fault(guest_l1e_t gl1e,
+                                          guest_va_t va)
+{
+    if ( tb_init_done )
+    {
+        struct {
+            /* for PAE, guest_l1e may be 64 while guest_va may be 32;
+               so put it first for alignment sake. */
+            guest_l1e_t gl1e;
+            guest_va_t va;
+            u32 flags;
+        } __attribute__((packed)) d;
+        u32 event;
+
+        event = TRC_SHADOW_NOT_SHADOW | ((GUEST_PAGING_LEVELS-2)<<8);
+
+        d.gl1e = gl1e;
+        d.va = va;
+        d.flags = this_cpu(trace_shadow_path_flags);
+
+        __trace_var(event, 0/*!tsc*/, sizeof(d), (unsigned char*)&d);
+    }
+}
+                                          
+static inline void trace_shadow_emulate_other(u32 event,
+                                                 guest_va_t va,
+                                                 gfn_t gfn)
+{
+    if ( tb_init_done )
+    {
+        struct {
+            /* for PAE, guest_l1e may be 64 while guest_va may be 32;
+               so put it first for alignment sake. */
+#if GUEST_PAGING_LEVELS == 2
+            u32 gfn;
+#else
+            u64 gfn;
+#endif
+            guest_va_t va;
+        } __attribute__((packed)) d;
+
+        event |= ((GUEST_PAGING_LEVELS-2)<<8);
+
+        d.gfn=gfn_x(gfn);
+        d.va = va;
+
+        __trace_var(event, 0/*!tsc*/, sizeof(d), (unsigned char*)&d);
+    }
+}
+
+#if GUEST_PAGING_LEVELS == 3
+static DEFINE_PER_CPU(guest_va_t,trace_emulate_initial_va);
+static DEFINE_PER_CPU(int,trace_extra_emulation_count);
+#endif
+static DEFINE_PER_CPU(guest_pa_t,trace_emulate_write_val);
+
+static inline void trace_shadow_emulate(guest_l1e_t gl1e, unsigned long va)
+{
+    if ( tb_init_done )
+    {
+        struct {
+            /* for PAE, guest_l1e may be 64 while guest_va may be 32;
+               so put it first for alignment sake. */
+            guest_l1e_t gl1e, write_val;
+            guest_va_t va;
+            unsigned flags:29, emulation_count:3;
+        } __attribute__((packed)) d;
+        u32 event;
+
+        event = TRC_SHADOW_EMULATE | ((GUEST_PAGING_LEVELS-2)<<8);
+
+        d.gl1e = gl1e;
+        d.write_val.l1 = this_cpu(trace_emulate_write_val);
+        d.va = va;
+#if GUEST_PAGING_LEVELS == 3
+        d.emulation_count = this_cpu(trace_extra_emulation_count);
+#endif
+        d.flags = this_cpu(trace_shadow_path_flags);
+
+        __trace_var(event, 0/*!tsc*/, sizeof(d), (unsigned char*)&d);
+    }
+}
 
 /**************************************************************************/
 /* Entry points into the shadow code */
@@ -3027,8 +3163,8 @@ static int sh_page_fault(struct vcpu *v,
 {
     struct domain *d = v->domain;
     walk_t gw;
-    gfn_t gfn;
-    mfn_t gmfn, sl1mfn=_mfn(0);
+    gfn_t gfn = _gfn(0);
+    mfn_t gmfn, sl1mfn = _mfn(0);
     shadow_l1e_t sl1e, *ptr_sl1e;
     paddr_t gpa;
     struct sh_emulate_ctxt emul_ctxt;
@@ -3043,7 +3179,7 @@ static int sh_page_fault(struct vcpu *v,
 
     SHADOW_PRINTK("d:v=%u:%u va=%#lx err=%u, rip=%lx\n",
                   v->domain->domain_id, v->vcpu_id, va, regs->error_code,
-                  regs->rip);
+                  regs->eip);
 
     perfc_incr(shadow_fault);
 
@@ -3132,6 +3268,7 @@ static int sh_page_fault(struct vcpu *v,
                 reset_early_unshadow(v);
                 perfc_incr(shadow_fault_fast_gnp);
                 SHADOW_PRINTK("fast path not-present\n");
+                trace_shadow_gen(TRC_SHADOW_FAST_PROPAGATE, va);
                 return 0;
             }
             else
@@ -3145,6 +3282,7 @@ static int sh_page_fault(struct vcpu *v,
             perfc_incr(shadow_fault_fast_mmio);
             SHADOW_PRINTK("fast path mmio %#"PRIpaddr"\n", gpa);
             reset_early_unshadow(v);
+            trace_shadow_gen(TRC_SHADOW_FAST_MMIO, va);
             return (handle_mmio_with_translation(va, gpa >> PAGE_SHIFT)
                     ? EXCRET_fault_fixed : 0);
         }
@@ -3155,6 +3293,7 @@ static int sh_page_fault(struct vcpu *v,
              * Retry and let the hardware give us the right fault next time. */
             perfc_incr(shadow_fault_fast_fail);
             SHADOW_PRINTK("fast path false alarm!\n");            
+            trace_shadow_gen(TRC_SHADOW_FALSE_FAST_PATH, va);
             return EXCRET_fault_fixed;
         }
     }
@@ -3190,7 +3329,7 @@ static int sh_page_fault(struct vcpu *v,
         perfc_incr(shadow_fault_bail_real_fault);
         SHADOW_PRINTK("not a shadow fault\n");
         reset_early_unshadow(v);
-        return 0;
+        goto propagate;
     }
 
     /* It's possible that the guest has put pagetables in memory that it has 
@@ -3200,7 +3339,7 @@ static int sh_page_fault(struct vcpu *v,
     if ( unlikely(d->is_shutting_down) )
     {
         SHADOW_PRINTK("guest is shutting down\n");
-        return 0;
+        goto propagate;
     }
 
     /* What kind of access are we dealing with? */
@@ -3218,7 +3357,7 @@ static int sh_page_fault(struct vcpu *v,
         SHADOW_PRINTK("BAD gfn=%"SH_PRI_gfn" gmfn=%"PRI_mfn"\n", 
                       gfn_x(gfn), mfn_x(gmfn));
         reset_early_unshadow(v);
-        return 0;
+        goto propagate;
     }
 
 #if (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB)
@@ -3229,6 +3368,8 @@ static int sh_page_fault(struct vcpu *v,
 
     shadow_lock(d);
 
+    TRACE_CLEAR_PATH_FLAGS;
+    
     rc = gw_remove_write_accesses(v, va, &gw);
 
     /* First bit set: Removed write access to a page. */
@@ -3281,6 +3422,7 @@ static int sh_page_fault(struct vcpu *v,
          * Get out of the fault handler immediately. */
         ASSERT(d->is_shutting_down);
         shadow_unlock(d);
+        trace_shadow_gen(TRC_SHADOW_DOMF_DYING, va);
         return 0;
     }
 
@@ -3383,6 +3525,7 @@ static int sh_page_fault(struct vcpu *v,
     d->arch.paging.log_dirty.fault_count++;
     reset_early_unshadow(v);
 
+    trace_shadow_fixup(gw.l1e, va);
  done:
     sh_audit_gw(v, &gw);
     SHADOW_PRINTK("fixed\n");
@@ -3405,6 +3548,8 @@ static int sh_page_fault(struct vcpu *v,
                       mfn_x(gmfn));
         perfc_incr(shadow_fault_emulate_failed);
         sh_remove_shadows(v, gmfn, 0 /* thorough */, 1 /* must succeed */);
+        trace_shadow_emulate_other(TRC_SHADOW_EMULATE_UNSHADOW_USER,
+                                      va, gfn);
         goto done;
     }
 
@@ -3421,6 +3566,8 @@ static int sh_page_fault(struct vcpu *v,
     shadow_audit_tables(v);
     shadow_unlock(d);
 
+    this_cpu(trace_emulate_write_val) = 0;
+
 #if SHADOW_OPTIMIZATIONS & SHOPT_FAST_EMULATION
  early_emulation:
 #endif
@@ -3446,6 +3593,8 @@ static int sh_page_fault(struct vcpu *v,
                      "injection: cr2=%#lx, mfn=%#lx\n", 
                      va, mfn_x(gmfn));
             sh_remove_shadows(v, gmfn, 0 /* thorough */, 1 /* must succeed */);
+            trace_shadow_emulate_other(TRC_SHADOW_EMULATE_UNSHADOW_EVTINJ,
+                                       va, gfn);
             return EXCRET_fault_fixed;
         }
     }
@@ -3478,6 +3627,10 @@ static int sh_page_fault(struct vcpu *v,
          * to support more operations in the emulator.  More likely, 
          * though, this is a hint that this page should not be shadowed. */
         shadow_remove_all_shadows(v, gmfn);
+
+        trace_shadow_emulate_other(TRC_SHADOW_EMULATE_UNSHADOW_UNHANDLED,
+                                   va, gfn);
+        goto emulate_done;
     }
 
 #if SHADOW_OPTIMIZATIONS & SHOPT_FAST_EMULATION
@@ -3504,7 +3657,8 @@ static int sh_page_fault(struct vcpu *v,
 
 #if GUEST_PAGING_LEVELS == 3 /* PAE guest */
     if ( r == X86EMUL_OKAY ) {
-        int i;
+        int i, emulation_count=0;
+        this_cpu(trace_emulate_initial_va) = va;
         /* Emulate up to four extra instructions in the hope of catching 
          * the "second half" of a 64-bit pagetable write. */
         for ( i = 0 ; i < 4 ; i++ )
@@ -3513,10 +3667,12 @@ static int sh_page_fault(struct vcpu *v,
             v->arch.paging.last_write_was_pt = 0;
             r = x86_emulate(&emul_ctxt.ctxt, emul_ops);
             if ( r == X86EMUL_OKAY )
-            {
+            { 
+                emulation_count++;
                 if ( v->arch.paging.last_write_was_pt )
                 {
                     perfc_incr(shadow_em_ex_pt);
+                    
TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_EMULATION_2ND_PT_WRITTEN);
                     break; /* Don't emulate past the other half of the write */
                 }
                 else 
@@ -3525,12 +3681,16 @@ static int sh_page_fault(struct vcpu *v,
             else
             {
                 perfc_incr(shadow_em_ex_fail);
+                TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_EMULATION_LAST_FAILED);
                 break; /* Don't emulate again if we failed! */
             }
         }
+        this_cpu(trace_extra_emulation_count)=emulation_count;
     }
 #endif /* PAE guest */
 
+    trace_shadow_emulate(gw.l1e, va);
+ emulate_done:
     SHADOW_PRINTK("emulated\n");
     return EXCRET_fault_fixed;
 
@@ -3543,6 +3703,7 @@ static int sh_page_fault(struct vcpu *v,
     shadow_audit_tables(v);
     reset_early_unshadow(v);
     shadow_unlock(d);
+    trace_shadow_gen(TRC_SHADOW_MMIO, va);
     return (handle_mmio_with_translation(va, gpa >> PAGE_SHIFT)
             ? EXCRET_fault_fixed : 0);
 
@@ -3552,6 +3713,10 @@ static int sh_page_fault(struct vcpu *v,
     shadow_audit_tables(v);
     reset_early_unshadow(v);
     shadow_unlock(d);
+
+propagate:
+    trace_not_shadow_fault(gw.l1e, va);
+
     return 0;
 }
 
@@ -3990,7 +4155,7 @@ sh_detach_old_tables(struct vcpu *v)
             sh_unmap_domain_page_global(v->arch.paging.shadow.guest_vtable);
         v->arch.paging.shadow.guest_vtable = NULL;
     }
-#endif
+#endif // !NDEBUG
 
 
     ////
@@ -4446,6 +4611,7 @@ static int sh_guess_wrmap(struct vcpu *v
     sl1e = shadow_l1e_remove_flags(sl1e, _PAGE_RW);
     r = shadow_set_l1e(v, sl1p, sl1e, sl1mfn);
     ASSERT( !(r & SHADOW_SET_ERROR) );
+    TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_WRMAP_GUESS_FOUND);
     return 1;
 }
 #endif
@@ -4800,7 +4966,7 @@ static void emulate_unmap_dest(struct vc
 
 static int
 sh_x86_emulate_write(struct vcpu *v, unsigned long vaddr, void *src,
-                      u32 bytes, struct sh_emulate_ctxt *sh_ctxt)
+                     u32 bytes, struct sh_emulate_ctxt *sh_ctxt)
 {
     void *addr;
 
@@ -4814,6 +4980,22 @@ sh_x86_emulate_write(struct vcpu *v, uns
 
     shadow_lock(v->domain);
     memcpy(addr, src, bytes);
+
+    if ( tb_init_done )
+    {
+#if GUEST_PAGING_LEVELS == 3
+        if ( vaddr == this_cpu(trace_emulate_initial_va) )
+            memcpy(&this_cpu(trace_emulate_write_val), src, bytes);
+        else if ( (vaddr & ~(0x7UL)) == this_cpu(trace_emulate_initial_va) )
+        {
+            TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_EMULATE_FULL_PT);
+            memcpy(&this_cpu(trace_emulate_write_val),
+                   (void *)(((unsigned long) addr) & ~(0x7UL)), 
GUEST_PTE_SIZE);
+        }
+#else
+        memcpy(&this_cpu(trace_emulate_write_val), src, bytes);
+#endif
+    }
 
     emulate_unmap_dest(v, addr, bytes, sh_ctxt);
     shadow_audit_tables(v);
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/arch/x86/mm/shadow/private.h
--- a/xen/arch/x86/mm/shadow/private.h  Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/arch/x86/mm/shadow/private.h  Fri Sep 12 14:47:40 2008 +0900
@@ -90,6 +90,43 @@ extern int shadow_audit_enable;
 #define SHADOW_DEBUG_EMULATE           1
 #define SHADOW_DEBUG_P2M               1
 #define SHADOW_DEBUG_LOGDIRTY          0
+
+/******************************************************************************
+ * Tracing
+ */
+DECLARE_PER_CPU(uint32_t,trace_shadow_path_flags);
+
+#define TRACE_SHADOW_PATH_FLAG(_x)                      \
+    do {                                                \
+        this_cpu(trace_shadow_path_flags) |= (1<<(_x));      \
+    } while(0)
+
+#define TRACE_CLEAR_PATH_FLAGS                  \
+    this_cpu(trace_shadow_path_flags) = 0
+
+enum {
+    TRCE_SFLAG_SET_AD,
+    TRCE_SFLAG_SET_A,
+    TRCE_SFLAG_SHADOW_L1_GET_REF,
+    TRCE_SFLAG_SHADOW_L1_PUT_REF,
+    TRCE_SFLAG_L2_PROPAGATE,
+    TRCE_SFLAG_SET_CHANGED,
+    TRCE_SFLAG_SET_FLUSH,
+    TRCE_SFLAG_SET_ERROR,
+    TRCE_SFLAG_DEMOTE,
+    TRCE_SFLAG_PROMOTE,
+    TRCE_SFLAG_WRMAP,
+    TRCE_SFLAG_WRMAP_GUESS_FOUND,
+    TRCE_SFLAG_WRMAP_BRUTE_FORCE,
+    TRCE_SFLAG_EARLY_UNSHADOW,
+    TRCE_SFLAG_EMULATION_2ND_PT_WRITTEN,
+    TRCE_SFLAG_EMULATION_LAST_FAILED,
+    TRCE_SFLAG_EMULATE_FULL_PT,
+    TRCE_SFLAG_PREALLOC_UNHOOK,
+    TRCE_SFLAG_UNSYNC,
+    TRCE_SFLAG_OOS_FIXUP_ADD,
+    TRCE_SFLAG_OOS_FIXUP_EVICT,
+};
 
 /******************************************************************************
  * The shadow lock.
@@ -143,6 +180,12 @@ extern int shadow_audit_enable;
     } while (0)
 
 
+/* Size (in bytes) of a guest PTE */
+#if GUEST_PAGING_LEVELS >= 3
+# define GUEST_PTE_SIZE 8
+#else
+# define GUEST_PTE_SIZE 4
+#endif
 
 /******************************************************************************
  * Auditing routines 
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/arch/x86/physdev.c
--- a/xen/arch/x86/physdev.c    Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/arch/x86/physdev.c    Fri Sep 12 14:47:40 2008 +0900
@@ -58,9 +58,6 @@ static int get_free_pirq(struct domain *
     return i;
 }
 
-/*
- * Caller hold the irq_lock
- */
 static int map_domain_pirq(struct domain *d, int pirq, int vector,
                            struct physdev_map_pirq *map)
 {
@@ -136,13 +133,12 @@ done:
     return ret;
 }
 
-/*
- * The pirq should has been unbound before this call
- */
+/* The pirq should have been unbound before this call. */
 static int unmap_domain_pirq(struct domain *d, int pirq)
 {
-    int ret = 0;
-    int vector;
+    unsigned long flags;
+    irq_desc_t *desc;
+    int vector, ret = 0;
 
     if ( d == NULL || pirq < 0 || pirq >= NR_PIRQS )
         return -EINVAL;
@@ -159,33 +155,29 @@ static int unmap_domain_pirq(struct doma
         gdprintk(XENLOG_G_ERR, "domain %X: pirq %x not mapped still\n",
                  d->domain_id, pirq);
         ret = -EINVAL;
-    }
-    else
-    {
-        unsigned long flags;
-        irq_desc_t *desc;
-
-        desc = &irq_desc[vector];
-        spin_lock_irqsave(&desc->lock, flags);
-        if ( desc->msi_desc )
-            pci_disable_msi(vector);
-
-        if ( desc->handler == &pci_msi_type )
-        {
-            /* MSI is not shared, so should be released already */
-            BUG_ON(desc->status & IRQ_GUEST);
-            irq_desc[vector].handler = &no_irq_type;
-        }
-        spin_unlock_irqrestore(&desc->lock, flags);
-
-        d->arch.pirq_vector[pirq] = d->arch.vector_pirq[vector] = 0;
-    }
+        goto done;
+    }
+
+    desc = &irq_desc[vector];
+    spin_lock_irqsave(&desc->lock, flags);
+    if ( desc->msi_desc )
+        pci_disable_msi(vector);
+
+    if ( desc->handler == &pci_msi_type )
+    {
+        /* MSI is not shared, so should be released already */
+        BUG_ON(desc->status & IRQ_GUEST);
+        irq_desc[vector].handler = &no_irq_type;
+    }
+    spin_unlock_irqrestore(&desc->lock, flags);
+
+    d->arch.pirq_vector[pirq] = d->arch.vector_pirq[vector] = 0;
 
     ret = irq_deny_access(d, pirq);
-
     if ( ret )
         gdprintk(XENLOG_G_ERR, "deny irq %x access failed\n", pirq);
 
+ done:
     return ret;
 }
 
@@ -194,10 +186,6 @@ static int physdev_map_pirq(struct physd
     struct domain *d;
     int vector, pirq, ret = 0;
     unsigned long flags;
-
-    /* if msi_enable is not enabled, map always succeeds */
-    if ( !msi_enable )
-        return 0;
 
     if ( !IS_PRIV(current->domain) )
         return -EPERM;
@@ -308,14 +296,8 @@ static int physdev_unmap_pirq(struct phy
     unsigned long flags;
     int ret;
 
-    if ( !msi_enable )
-        return 0;
-
     if ( !IS_PRIV(current->domain) )
         return -EPERM;
-
-    if ( !unmap )
-        return -EINVAL;
 
     if ( unmap->domid == DOMID_SELF )
         d = rcu_lock_domain(current->domain);
@@ -323,14 +305,12 @@ static int physdev_unmap_pirq(struct phy
         d = rcu_lock_domain_by_id(unmap->domid);
 
     if ( d == NULL )
-    {
-        rcu_unlock_domain(d);
         return -ESRCH;
-    }
 
     spin_lock_irqsave(&d->arch.irq_lock, flags);
     ret = unmap_domain_pirq(d, unmap->pirq);
     spin_unlock_irqrestore(&d->arch.irq_lock, flags);
+
     rcu_unlock_domain(d);
 
     return ret;
@@ -452,20 +432,14 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
 
         irq = irq_op.irq;
         ret = -EINVAL;
-        if ( ((irq < 0) && (irq != AUTO_ASSIGN)) || (irq >= NR_IRQS) )
+        if ( (irq < 0) || (irq >= NR_IRQS) )
             break;
 
         irq_op.vector = assign_irq_vector(irq);
 
-        ret = 0;
-
-        if ( msi_enable )
-        {
-            spin_lock_irqsave(&dom0->arch.irq_lock, flags);
-            if ( irq != AUTO_ASSIGN )
-                ret = map_domain_pirq(dom0, irq_op.irq, irq_op.vector, NULL);
-            spin_unlock_irqrestore(&dom0->arch.irq_lock, flags);
-        }
+        spin_lock_irqsave(&dom0->arch.irq_lock, flags);
+        ret = map_domain_pirq(dom0, irq_op.irq, irq_op.vector, NULL);
+        spin_unlock_irqrestore(&dom0->arch.irq_lock, flags);
 
         if ( copy_to_guest(arg, &irq_op, 1) != 0 )
             ret = -EFAULT;
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/arch/x86/platform_hypercall.c
--- a/xen/arch/x86/platform_hypercall.c Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/arch/x86/platform_hypercall.c Fri Sep 12 14:47:40 2008 +0900
@@ -192,6 +192,10 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
     break;
 
     case XENPF_firmware_info:
+        ret = xsm_firmware_info();
+        if ( ret )
+            break;
+
         switch ( op->u.firmware_info.type )
         {
         case XEN_FW_DISK_INFO: {
@@ -280,10 +284,18 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
         break;
 
     case XENPF_enter_acpi_sleep:
+        ret = xsm_acpi_sleep();
+        if ( ret )
+            break;
+
         ret = acpi_enter_sleep(&op->u.enter_acpi_sleep);
         break;
 
     case XENPF_change_freq:
+        ret = xsm_change_freq();
+        if ( ret )
+            break;
+
         ret = -ENOSYS;
         if ( cpufreq_controller != FREQCTL_dom0_kernel )
             break;
@@ -305,6 +317,10 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
         cpumask_t cpumap;
         XEN_GUEST_HANDLE(uint8) cpumap_bitmap;
         XEN_GUEST_HANDLE(uint64) idletimes;
+
+        ret = xsm_getidletime();
+        if ( ret )
+            break;
 
         ret = -ENOSYS;
         if ( cpufreq_controller != FREQCTL_dom0_kernel )
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/arch/x86/smpboot.c
--- a/xen/arch/x86/smpboot.c    Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/arch/x86/smpboot.c    Fri Sep 12 14:47:40 2008 +0900
@@ -1225,15 +1225,6 @@ int __cpu_disable(void)
        if (cpu == 0)
                return -EBUSY;
 
-       /*
-        * Only S3 is using this path, and thus idle vcpus are running on all
-        * APs when we are called. To support full cpu hotplug, other 
-        * notification mechanisms should be introduced (e.g., migrate vcpus
-        * off this physical cpu before rendezvous point).
-        */
-       if (!is_idle_vcpu(current))
-               return -EINVAL;
-
        local_irq_disable();
        clear_local_APIC();
        /* Allow any queued timer interrupts to get serviced */
@@ -1249,6 +1240,9 @@ int __cpu_disable(void)
        fixup_irqs(map);
        /* It's now safe to remove this processor from the online map */
        cpu_clear(cpu, cpu_online_map);
+
+       cpu_disable_scheduler();
+
        return 0;
 }
 
@@ -1275,28 +1269,6 @@ static int take_cpu_down(void *unused)
     return __cpu_disable();
 }
 
-/* 
- * XXX: One important thing missed here is to migrate vcpus
- * from dead cpu to other online ones and then put whole
- * system into a stop state. It assures a safe environment
- * for a cpu hotplug/remove at normal running state.
- *
- * However for xen PM case, at this point:
- *     -> All other domains should be notified with PM event,
- *        and then in following states:
- *             * Suspend state, or
- *             * Paused state, which is a force step to all
- *               domains if they do nothing to suspend
- *     -> All vcpus of dom0 (except vcpu0) have already beem
- *        hot removed
- * with the net effect that all other cpus only have idle vcpu
- * running. In this special case, we can avoid vcpu migration
- * then and system can be considered in a stop state.
- *
- * So current cpu hotplug is a special version for PM specific
- * usage, and need more effort later for full cpu hotplug.
- * (ktian1)
- */
 int cpu_down(unsigned int cpu)
 {
        int err = 0;
@@ -1304,6 +1276,12 @@ int cpu_down(unsigned int cpu)
        spin_lock(&cpu_add_remove_lock);
        if (num_online_cpus() == 1) {
                err = -EBUSY;
+               goto out;
+       }
+
+       /* Can not offline BSP */
+       if (cpu == 0) {
+               err = -EINVAL;
                goto out;
        }
 
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/arch/x86/time.c
--- a/xen/arch/x86/time.c       Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/arch/x86/time.c       Fri Sep 12 14:47:40 2008 +0900
@@ -993,15 +993,16 @@ static void local_time_calibration(void)
  * All CPUS snapshot their local TSC and extrapolation of system time.
  */
 struct calibration_rendezvous {
+    cpumask_t cpu_calibration_map;
     atomic_t nr_cpus;
     s_time_t master_stime;
 };
 
 static void time_calibration_rendezvous(void *_r)
 {
-    unsigned int total_cpus = num_online_cpus();
     struct cpu_calibration *c = &this_cpu(cpu_calibration);
     struct calibration_rendezvous *r = _r;
+    unsigned int total_cpus = cpus_weight(r->cpu_calibration_map);
 
     if ( smp_processor_id() == 0 )
     {
@@ -1029,11 +1030,13 @@ static void time_calibration(void *unuse
 static void time_calibration(void *unused)
 {
     struct calibration_rendezvous r = {
+        .cpu_calibration_map = cpu_online_map,
         .nr_cpus = ATOMIC_INIT(0)
     };
 
     /* @wait=1 because we must wait for all cpus before freeing @r. */
-    on_each_cpu(time_calibration_rendezvous, &r, 0, 1);
+    on_selected_cpus(r.cpu_calibration_map,
+                     time_calibration_rendezvous, &r, 0, 1);
 }
 
 void init_percpu_time(void)
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c      Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/arch/x86/traps.c      Fri Sep 12 14:47:40 2008 +0900
@@ -47,7 +47,7 @@
 #include <xen/version.h>
 #include <xen/kexec.h>
 #include <xen/trace.h>
-#include <asm/paging.h>
+#include <xen/paging.h>
 #include <asm/system.h>
 #include <asm/io.h>
 #include <asm/atomic.h>
@@ -2116,6 +2116,36 @@ static int emulate_privileged_op(struct 
             if ( wrmsr_safe(regs->ecx, eax, edx) != 0 )
                 goto fail;
             break;
+        case MSR_AMD64_NB_CFG:
+            if ( boot_cpu_data.x86_vendor != X86_VENDOR_AMD ||
+                 boot_cpu_data.x86 < 0x10 || boot_cpu_data.x86 > 0x11 )
+                goto fail;
+            if ( !IS_PRIV(v->domain) )
+                break;
+            if ( (rdmsr_safe(MSR_AMD64_NB_CFG, l, h) != 0) ||
+                 (eax != l) ||
+                 ((edx ^ h) & ~(1 << (AMD64_NB_CFG_CF8_EXT_ENABLE_BIT - 32))) )
+                goto invalid;
+            if ( wrmsr_safe(MSR_AMD64_NB_CFG, eax, edx) != 0 )
+                goto fail;
+            break;
+        case MSR_FAM10H_MMIO_CONF_BASE:
+            if ( boot_cpu_data.x86_vendor != X86_VENDOR_AMD ||
+                 boot_cpu_data.x86 < 0x10 || boot_cpu_data.x86 > 0x11 )
+                goto fail;
+            if ( !IS_PRIV(v->domain) )
+                break;
+            if ( (rdmsr_safe(MSR_FAM10H_MMIO_CONF_BASE, l, h) != 0) ||
+                 (((((u64)h << 32) | l) ^ res) &
+                  ~((1 << FAM10H_MMIO_CONF_ENABLE_BIT) |
+                    (FAM10H_MMIO_CONF_BUSRANGE_MASK <<
+                     FAM10H_MMIO_CONF_BUSRANGE_SHIFT) |
+                    ((u64)FAM10H_MMIO_CONF_BASE_MASK <<
+                     FAM10H_MMIO_CONF_BASE_SHIFT))) )
+                goto invalid;
+            if ( wrmsr_safe(MSR_FAM10H_MMIO_CONF_BASE, eax, edx) != 0 )
+                goto fail;
+            break;
         case MSR_IA32_PERF_CTL:
             if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL )
                 goto fail;
@@ -2124,11 +2154,18 @@ static int emulate_privileged_op(struct 
             if ( wrmsr_safe(regs->ecx, eax, edx) != 0 )
                 goto fail;
             break;
+        case MSR_IA32_THERM_CONTROL:
+            if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL )
+                goto fail;
+            if ( wrmsr_safe(regs->ecx, eax, edx) != 0 )
+                goto fail;
+            break;
         default:
             if ( wrmsr_hypervisor_regs(regs->ecx, eax, edx) )
                 break;
             if ( (rdmsr_safe(regs->ecx, l, h) != 0) ||
                  (eax != l) || (edx != h) )
+        invalid:
                 gdprintk(XENLOG_WARNING, "Domain attempted WRMSR %p from "
                         "%08x:%08x to %08x:%08x.\n",
                         _p(regs->ecx), h, l, edx, eax);
@@ -2198,6 +2235,12 @@ static int emulate_privileged_op(struct 
             regs->eax |= MSR_IA32_MISC_ENABLE_BTS_UNAVAIL |
                          MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL |
                          MSR_IA32_MISC_ENABLE_XTPR_DISABLE;
+            break;
+        case MSR_IA32_THERM_CONTROL:
+            if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL )
+                goto fail;
+            if ( rdmsr_safe(regs->ecx, regs->eax, regs->edx) )
+                goto fail;
             break;
         default:
             if ( rdmsr_hypervisor_regs(regs->ecx, &l, &h) )
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/common/domain.c
--- a/xen/common/domain.c       Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/common/domain.c       Fri Sep 12 14:47:40 2008 +0900
@@ -651,9 +651,11 @@ void vcpu_reset(struct vcpu *v)
 
     set_bit(_VPF_down, &v->pause_flags);
 
+    clear_bit(v->vcpu_id, d->poll_mask);
+    v->poll_evtchn = 0;
+
     v->fpu_initialised = 0;
     v->fpu_dirtied     = 0;
-    v->is_polling      = 0;
     v->is_initialised  = 0;
     v->nmi_pending     = 0;
     v->mce_pending     = 0;
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/common/domctl.c
--- a/xen/common/domctl.c       Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/common/domctl.c       Fri Sep 12 14:47:40 2008 +0900
@@ -655,9 +655,6 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
         spin_lock(&d->page_alloc_lock);
         if ( new_max >= d->tot_pages )
         {
-            ret = guest_physmap_max_mem_pages(d, new_max);
-            if ( ret != 0 )
-                break;
             d->max_pages = new_max;
             ret = 0;
         }
@@ -729,16 +726,11 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
         if ( d == NULL )
             break;
 
-        ret = xsm_irq_permission(d, pirq, op->u.irq_permission.allow_access);
-        if ( ret )
-            goto irq_permission_out;
-        
         if ( op->u.irq_permission.allow_access )
             ret = irq_permit_access(d, pirq);
         else
             ret = irq_deny_access(d, pirq);
 
-    irq_permission_out:
         rcu_unlock_domain(d);
     }
     break;
@@ -757,17 +749,12 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
         d = rcu_lock_domain_by_id(op->domain);
         if ( d == NULL )
             break;
-
-        ret = xsm_iomem_permission(d, mfn, 
op->u.iomem_permission.allow_access);
-        if ( ret )
-            goto iomem_permission_out;
 
         if ( op->u.iomem_permission.allow_access )
             ret = iomem_permit_access(d, mfn, mfn + nr_mfns - 1);
         else
             ret = iomem_deny_access(d, mfn, mfn + nr_mfns - 1);
 
-    iomem_permission_out:
         rcu_unlock_domain(d);
     }
     break;
@@ -813,6 +800,12 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
         {
             put_domain(e);
             goto set_target_out;
+        }
+
+        ret = xsm_set_target(d, e);
+        if ( ret ) {
+            put_domain(e);
+            goto set_target_out;            
         }
 
         /* Hold reference on @e until we destroy @d. */
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/common/event_channel.c
--- a/xen/common/event_channel.c        Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/common/event_channel.c        Fri Sep 12 14:47:40 2008 +0900
@@ -545,6 +545,7 @@ static int evtchn_set_pending(struct vcp
 static int evtchn_set_pending(struct vcpu *v, int port)
 {
     struct domain *d = v->domain;
+    int vcpuid;
 
     /*
      * The following bit operations must happen in strict order.
@@ -564,15 +565,19 @@ static int evtchn_set_pending(struct vcp
     }
     
     /* Check if some VCPU might be polling for this event. */
-    if ( unlikely(d->is_polling) )
-    {
-        d->is_polling = 0;
-        smp_mb(); /* check vcpu poll-flags /after/ clearing domain poll-flag */
-        for_each_vcpu ( d, v )
+    if ( likely(bitmap_empty(d->poll_mask, MAX_VIRT_CPUS)) )
+        return 0;
+
+    /* Wake any interested (or potentially interested) pollers. */
+    for ( vcpuid = find_first_bit(d->poll_mask, MAX_VIRT_CPUS);
+          vcpuid < MAX_VIRT_CPUS;
+          vcpuid = find_next_bit(d->poll_mask, MAX_VIRT_CPUS, vcpuid+1) )
+    {
+        v = d->vcpu[vcpuid];
+        if ( ((v->poll_evtchn <= 0) || (v->poll_evtchn == port)) &&
+             test_and_clear_bit(vcpuid, d->poll_mask) )
         {
-            if ( !v->is_polling )
-                continue;
-            v->is_polling = 0;
+            v->poll_evtchn = 0;
             vcpu_unblock(v);
         }
     }
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/common/rangeset.c
--- a/xen/common/rangeset.c     Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/common/rangeset.c     Fri Sep 12 14:47:40 2008 +0900
@@ -10,6 +10,7 @@
 #include <xen/sched.h>
 #include <xen/errno.h>
 #include <xen/rangeset.h>
+#include <xsm/xsm.h>
 
 /* An inclusive range [s,e] and pointer to next range in ascending order. */
 struct range {
@@ -95,6 +96,10 @@ int rangeset_add_range(
 {
     struct range *x, *y;
     int rc = 0;
+
+    rc = xsm_add_range(r->domain, r->name, s, e);
+    if ( rc )
+        return rc;
 
     ASSERT(s <= e);
 
@@ -164,6 +169,10 @@ int rangeset_remove_range(
     struct range *x, *y, *t;
     int rc = 0;
 
+    rc = xsm_remove_range(r->domain, r->name, s, e);
+    if ( rc )
+        return rc;
+
     ASSERT(s <= e);
 
     spin_lock(&r->lock);
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/common/sched_credit.c
--- a/xen/common/sched_credit.c Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/common/sched_credit.c Fri Sep 12 14:47:40 2008 +0900
@@ -1107,6 +1107,10 @@ csched_load_balance(int cpu, struct csch
 
     BUG_ON( cpu != snext->vcpu->processor );
 
+    /* If this CPU is going offline we shouldn't steal work. */
+    if ( unlikely(!cpu_online(cpu)) )
+        goto out;
+
     if ( snext->pri == CSCHED_PRI_IDLE )
         CSCHED_STAT_CRANK(load_balance_idle);
     else if ( snext->pri == CSCHED_PRI_TS_OVER )
@@ -1149,6 +1153,7 @@ csched_load_balance(int cpu, struct csch
             return speer;
     }
 
+ out:
     /* Failed to find more important work elsewhere... */
     __runq_remove(snext);
     return snext;
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/common/schedule.c
--- a/xen/common/schedule.c     Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/common/schedule.c     Fri Sep 12 14:47:40 2008 +0900
@@ -63,11 +63,31 @@ static struct scheduler ops;
          (( ops.fn != NULL ) ? ops.fn( __VA_ARGS__ )      \
           : (typeof(ops.fn(__VA_ARGS__)))0 )
 
+static inline void trace_runstate_change(struct vcpu *v, int new_state)
+{
+    struct { uint32_t vcpu:16, domain:16; } d;
+    uint32_t event;
+
+    if ( likely(!tb_init_done) )
+        return;
+
+    d.vcpu = v->vcpu_id;
+    d.domain = v->domain->domain_id;
+
+    event = TRC_SCHED_RUNSTATE_CHANGE;
+    event |= ( v->runstate.state & 0x3 ) << 8;
+    event |= ( new_state & 0x3 ) << 4;
+
+    __trace_var(event, 1/*tsc*/, sizeof(d), (unsigned char *)&d);
+}
+
 static inline void vcpu_runstate_change(
     struct vcpu *v, int new_state, s_time_t new_entry_time)
 {
     ASSERT(v->runstate.state != new_state);
     ASSERT(spin_is_locked(&per_cpu(schedule_data,v->processor).schedule_lock));
+
+    trace_runstate_change(v, new_state);
 
     v->runstate.time[v->runstate.state] +=
         new_entry_time - v->runstate.state_entry_time;
@@ -198,6 +218,27 @@ void vcpu_wake(struct vcpu *v)
     TRACE_2D(TRC_SCHED_WAKE, v->domain->domain_id, v->vcpu_id);
 }
 
+void vcpu_unblock(struct vcpu *v)
+{
+    if ( !test_and_clear_bit(_VPF_blocked, &v->pause_flags) )
+        return;
+
+    /* Polling period ends when a VCPU is unblocked. */
+    if ( unlikely(v->poll_evtchn != 0) )
+    {
+        v->poll_evtchn = 0;
+        /*
+         * We *must* re-clear _VPF_blocked to avoid racing other wakeups of
+         * this VCPU (and it then going back to sleep on poll_mask).
+         * Test-and-clear is idiomatic and ensures clear_bit not reordered.
+         */
+        if ( test_and_clear_bit(v->vcpu_id, v->domain->poll_mask) )
+            clear_bit(_VPF_blocked, &v->pause_flags);
+    }
+
+    vcpu_wake(v);
+}
+
 static void vcpu_migrate(struct vcpu *v)
 {
     unsigned long flags;
@@ -247,6 +288,48 @@ void vcpu_force_reschedule(struct vcpu *
     }
 }
 
+/*
+ * This function is used by cpu_hotplug code from stop_machine context.
+ * Hence we can avoid needing to take the 
+ */
+void cpu_disable_scheduler(void)
+{
+    struct domain *d;
+    struct vcpu *v;
+    unsigned int cpu = smp_processor_id();
+
+    for_each_domain ( d )
+    {
+        for_each_vcpu ( d, v )
+        {
+            if ( is_idle_vcpu(v) )
+                continue;
+
+            if ( (cpus_weight(v->cpu_affinity) == 1) &&
+                 cpu_isset(cpu, v->cpu_affinity) )
+            {
+                printk("Breaking vcpu affinity for domain %d vcpu %d\n",
+                        v->domain->domain_id, v->vcpu_id);
+                cpus_setall(v->cpu_affinity);
+            }
+
+            /*
+             * Migrate single-shot timers to CPU0. A new cpu will automatically
+             * be chosen when the timer is next re-set.
+             */
+            if ( v->singleshot_timer.cpu == cpu )
+                migrate_timer(&v->singleshot_timer, 0);
+
+            if ( v->processor == cpu )
+            {
+                set_bit(_VPF_migrating, &v->pause_flags);
+                vcpu_sleep_nosync(v);
+                vcpu_migrate(v);
+            }
+        }
+    }
+}
+
 static int __vcpu_set_affinity(
     struct vcpu *v, cpumask_t *affinity,
     bool_t old_lock_status, bool_t new_lock_status)
@@ -337,7 +420,7 @@ static long do_poll(struct sched_poll *s
     struct vcpu   *v = current;
     struct domain *d = v->domain;
     evtchn_port_t  port;
-    long           rc = 0;
+    long           rc;
     unsigned int   i;
 
     /* Fairly arbitrary limit. */
@@ -348,11 +431,24 @@ static long do_poll(struct sched_poll *s
         return -EFAULT;
 
     set_bit(_VPF_blocked, &v->pause_flags);
-    v->is_polling = 1;
-    d->is_polling = 1;
-
+    v->poll_evtchn = -1;
+    set_bit(v->vcpu_id, d->poll_mask);
+
+#ifndef CONFIG_X86 /* set_bit() implies mb() on x86 */
     /* Check for events /after/ setting flags: avoids wakeup waiting race. */
-    smp_wmb();
+    smp_mb();
+
+    /*
+     * Someone may have seen we are blocked but not that we are polling, or
+     * vice versa. We are certainly being woken, so clean up and bail. Beyond
+     * this point others can be guaranteed to clean up for us if they wake us.
+     */
+    rc = 0;
+    if ( (v->poll_evtchn == 0) ||
+         !test_bit(_VPF_blocked, &v->pause_flags) ||
+         !test_bit(v->vcpu_id, d->poll_mask) )
+        goto out;
+#endif
 
     for ( i = 0; i < sched_poll->nr_ports; i++ )
     {
@@ -369,6 +465,9 @@ static long do_poll(struct sched_poll *s
             goto out;
     }
 
+    if ( sched_poll->nr_ports == 1 )
+        v->poll_evtchn = port;
+
     if ( sched_poll->timeout != 0 )
         set_timer(&v->poll_timer, sched_poll->timeout);
 
@@ -378,7 +477,8 @@ static long do_poll(struct sched_poll *s
     return 0;
 
  out:
-    v->is_polling = 0;
+    v->poll_evtchn = 0;
+    clear_bit(v->vcpu_id, d->poll_mask);
     clear_bit(_VPF_blocked, &v->pause_flags);
     return rc;
 }
@@ -628,7 +728,9 @@ static void vcpu_periodic_timer_work(str
         return;
 
     periodic_next_event = v->periodic_last_event + v->periodic_period;
-    if ( now > periodic_next_event )
+
+    /* The timer subsystem may call us up to TIME_SLOP ahead of deadline. */
+    if ( (now + TIME_SLOP) > periodic_next_event )
     {
         send_timer_event(v);
         v->periodic_last_event = now;
@@ -758,11 +860,8 @@ static void poll_timer_fn(void *data)
 {
     struct vcpu *v = data;
 
-    if ( !v->is_polling )
-        return;
-
-    v->is_polling = 0;
-    vcpu_unblock(v);
+    if ( test_and_clear_bit(v->vcpu_id, v->domain->poll_mask) )
+        vcpu_unblock(v);
 }
 
 /* Initialise the data structures. */
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/common/sysctl.c
--- a/xen/common/sysctl.c       Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/common/sysctl.c       Fri Sep 12 14:47:40 2008 +0900
@@ -149,6 +149,10 @@ long do_sysctl(XEN_GUEST_HANDLE(xen_sysc
         char c;
         uint32_t i;
 
+        ret = xsm_debug_keys();
+        if ( ret )
+            break;
+
         for ( i = 0; i < op->u.debug_keys.nr_keys; i++ )
         {
             if ( copy_from_guest_offset(&c, op->u.debug_keys.keys, i, 1) )
@@ -166,6 +170,10 @@ long do_sysctl(XEN_GUEST_HANDLE(xen_sysc
 
         nr_cpus = min_t(uint32_t, op->u.getcpuinfo.max_cpus, NR_CPUS);
 
+        ret = xsm_getcpuinfo();
+        if ( ret )
+            break;
+
         for ( i = 0; i < nr_cpus; i++ )
         {
             /* Assume no holes in idle-vcpu map. */
@@ -188,6 +196,10 @@ long do_sysctl(XEN_GUEST_HANDLE(xen_sysc
 
     case XEN_SYSCTL_availheap:
     { 
+        ret = xsm_availheap();
+        if ( ret )
+            break;
+
         op->u.availheap.avail_bytes = avail_domheap_pages_region(
             op->u.availheap.node,
             op->u.availheap.min_bitwidth,
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/common/trace.c
--- a/xen/common/trace.c        Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/common/trace.c        Fri Sep 12 14:47:40 2008 +0900
@@ -58,6 +58,7 @@ static int t_buf_highwater;
 
 /* Number of records lost due to per-CPU trace buffer being full. */
 static DEFINE_PER_CPU(unsigned long, lost_records);
+static DEFINE_PER_CPU(unsigned long, lost_records_first_tsc);
 
 /* a flag recording whether initialization has been done */
 /* or more properly, if the tbuf subsystem is enabled right now */
@@ -147,6 +148,31 @@ static int tb_set_size(int size)
     return 0;
 }
 
+int trace_will_trace_event(u32 event)
+{
+    if ( !tb_init_done )
+        return 0;
+
+    /*
+     * Copied from __trace_var()
+     */
+    if ( (tb_event_mask & event) == 0 )
+        return 0;
+
+    /* match class */
+    if ( ((tb_event_mask >> TRC_CLS_SHIFT) & (event >> TRC_CLS_SHIFT)) == 0 )
+        return 0;
+
+    /* then match subclass */
+    if ( (((tb_event_mask >> TRC_SUBCLS_SHIFT) & 0xf )
+                & ((event >> TRC_SUBCLS_SHIFT) & 0xf )) == 0 )
+        return 0;
+
+    if ( !cpu_isset(smp_processor_id(), tb_cpu_mask) )
+        return 0;
+
+    return 1;
+}
 
 /**
  * init_trace_bufs - performs initialization of the per-cpu trace buffers.
@@ -354,22 +380,27 @@ static inline int insert_wrap_record(str
                     NULL);
 }
 
-#define LOST_REC_SIZE 8
+#define LOST_REC_SIZE (4 + 8 + 16) /* header + tsc + sizeof(struct ed) */
 
 static inline int insert_lost_records(struct t_buf *buf)
 {
     struct {
         u32 lost_records;
-    } ed;
-
+        u32 did:16, vid:16;
+        u64 first_tsc;
+    } __attribute__((packed)) ed;
+
+    ed.vid = current->vcpu_id;
+    ed.did = current->domain->domain_id;
     ed.lost_records = this_cpu(lost_records);
+    ed.first_tsc = this_cpu(lost_records_first_tsc);
 
     this_cpu(lost_records) = 0;
 
     return __insert_record(buf,
                            TRC_LOST_RECORDS,
                            sizeof(ed),
-                           0 /* !cycles */,
+                           1 /* cycles */,
                            LOST_REC_SIZE,
                            (unsigned char *)&ed);
 }
@@ -401,7 +432,8 @@ void __trace_var(u32 event, int cycles, 
     int extra_word;
     int started_below_highwater;
 
-    ASSERT(tb_init_done);
+    if( !tb_init_done )
+        return;
 
     /* Convert byte count into word count, rounding up */
     extra_word = (extra / sizeof(u32));
@@ -479,7 +511,8 @@ void __trace_var(u32 event, int cycles, 
     /* Do we have enough space for everything? */
     if ( total_size > bytes_to_tail )
     {
-        this_cpu(lost_records)++;
+        if ( ++this_cpu(lost_records) == 1 )
+            this_cpu(lost_records_first_tsc)=(u64)get_cycles();
         local_irq_restore(flags);
         return;
     }
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/drivers/acpi/hwregs.c
--- a/xen/drivers/acpi/hwregs.c Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/drivers/acpi/hwregs.c Fri Sep 12 14:47:40 2008 +0900
@@ -239,11 +239,13 @@ acpi_status acpi_set_register(u32 regist
 
        case ACPI_REGISTER_PM2_CONTROL:
 
+#if 0 /* Redundant read in original Linux code. */
                status = acpi_hw_register_read(ACPI_REGISTER_PM2_CONTROL,
                                               &register_value);
                if (ACPI_FAILURE(status)) {
                        goto unlock_and_exit;
                }
+#endif
 
                ACPI_DEBUG_PRINT((ACPI_DB_IO,
                                  "PM2 control: Read %X from %8.8X%8.8X\n",
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/drivers/passthrough/iommu.c
--- a/xen/drivers/passthrough/iommu.c   Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/drivers/passthrough/iommu.c   Fri Sep 12 14:47:40 2008 +0900
@@ -33,11 +33,13 @@ int amd_iov_detect(void);
  *   pv                         Enable IOMMU for PV domains
  *   no-pv                      Disable IOMMU for PV domains (default)
  *   force|required             Don't boot unless IOMMU is enabled
+ *   passthrough                Bypass VT-d translation for Dom0
  */
 custom_param("iommu", parse_iommu_param);
 int iommu_enabled = 0;
 int iommu_pv_enabled = 0;
 int force_iommu = 0;
+int iommu_passthrough = 0;
 
 static void __init parse_iommu_param(char *s)
 {
@@ -58,6 +60,8 @@ static void __init parse_iommu_param(cha
             iommu_pv_enabled = 0;
         else if ( !strcmp(s, "force") || !strcmp(s, "required") )
             force_iommu = 1;
+        else if ( !strcmp(s, "passthrough") )
+            iommu_passthrough = 1;
 
         s = ss + 1;
     } while ( ss );
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/drivers/passthrough/vtd/iommu.c
--- a/xen/drivers/passthrough/vtd/iommu.c       Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/drivers/passthrough/vtd/iommu.c       Fri Sep 12 14:47:40 2008 +0900
@@ -1090,12 +1090,13 @@ static int domain_context_mapping_one(
     }
 
     spin_lock_irqsave(&iommu->lock, flags);
-
-#ifdef CONTEXT_PASSTHRU
-    if ( ecap_pass_thru(iommu->ecap) && (domain->domain_id == 0) )
+    if ( iommu_passthrough &&
+         ecap_pass_thru(iommu->ecap) && (domain->domain_id == 0) )
+    {
         context_set_translation_type(*context, CONTEXT_TT_PASS_THRU);
+        agaw = level_to_agaw(iommu->nr_pt_levels);
+    }
     else
-#endif
     {
         /* Ensure we have pagetables allocated down to leaf PTE. */
         if ( hd->pgd_maddr == 0 )
@@ -1459,11 +1460,13 @@ int intel_iommu_map_page(
     u64 pg_maddr;
     int pte_present;
 
-#ifdef CONTEXT_PASSTHRU
+    drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
+    iommu = drhd->iommu;
+
     /* do nothing if dom0 and iommu supports pass thru */
-    if ( ecap_pass_thru(iommu->ecap) && (d->domain_id == 0) )
+    if ( iommu_passthrough &&
+         ecap_pass_thru(iommu->ecap) && (d->domain_id == 0) )
         return 0;
-#endif
 
     pg_maddr = addr_to_dma_page_maddr(d, (paddr_t)gfn << PAGE_SHIFT_4K, 1);
     if ( pg_maddr == 0 )
@@ -1500,11 +1503,10 @@ int intel_iommu_unmap_page(struct domain
     drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
     iommu = drhd->iommu;
 
-#ifdef CONTEXT_PASSTHRU
     /* do nothing if dom0 and iommu supports pass thru */
-    if ( ecap_pass_thru(iommu->ecap) && (d->domain_id == 0) )
+    if ( iommu_passthrough &&
+         ecap_pass_thru(iommu->ecap) && (d->domain_id == 0) )
         return 0;
-#endif
 
     dma_pte_clear_one(d, (paddr_t)gfn << PAGE_SHIFT_4K);
 
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/include/asm-ia64/shadow.h
--- a/xen/include/asm-ia64/shadow.h     Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/include/asm-ia64/shadow.h     Fri Sep 12 14:47:40 2008 +0900
@@ -63,8 +63,6 @@ shadow_mark_page_dirty(struct domain *d,
         return 0;
 }
 
-#define guest_physmap_max_mem_pages(d, n) (0)
-
 #endif // _XEN_SHADOW_H
 
 /*
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/include/asm-x86/bitops.h
--- a/xen/include/asm-x86/bitops.h      Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/include/asm-x86/bitops.h      Fri Sep 12 14:47:40 2008 +0900
@@ -116,8 +116,8 @@ static inline void __clear_bit(int nr, v
     __clear_bit(nr, addr);                              \
 })
 
-#define smp_mb__before_clear_bit() barrier()
-#define smp_mb__after_clear_bit()  barrier()
+#define smp_mb__before_clear_bit() ((void)0)
+#define smp_mb__after_clear_bit()  ((void)0)
 
 /**
  * __change_bit - Toggle a bit in memory
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/include/asm-x86/guest_access.h
--- a/xen/include/asm-x86/guest_access.h        Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/include/asm-x86/guest_access.h        Fri Sep 12 14:47:40 2008 +0900
@@ -8,7 +8,7 @@
 #define __ASM_X86_GUEST_ACCESS_H__
 
 #include <asm/uaccess.h>
-#include <asm/shadow.h>
+#include <asm/paging.h>
 #include <asm/hvm/support.h>
 #include <asm/hvm/guest_access.h>
 
@@ -87,10 +87,10 @@
  * Allows use of faster __copy_* functions.
  */
 #define guest_handle_okay(hnd, nr)                      \
-    (shadow_mode_external(current->domain) ||           \
+    (paging_mode_external(current->domain) ||           \
      array_access_ok((hnd).p, (nr), sizeof(*(hnd).p)))
 #define guest_handle_subrange_okay(hnd, first, last)    \
-    (shadow_mode_external(current->domain) ||           \
+    (paging_mode_external(current->domain) ||           \
      array_access_ok((hnd).p + (first),                 \
                      (last)-(first)+1,                  \
                      sizeof(*(hnd).p)))
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/include/asm-x86/hvm/trace.h
--- a/xen/include/asm-x86/hvm/trace.h   Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/include/asm-x86/hvm/trace.h   Fri Sep 12 14:47:40 2008 +0900
@@ -56,16 +56,13 @@
 #define TRC_PAR_LONG(par) (par)
 #endif
 
-#define HVMTRACE_ND(evt, cycles, vcpu, count, d1, d2, d3, d4, d5, d6)   \
+#define HVMTRACE_ND(evt, cycles, count, d1, d2, d3, d4, d5, d6)         \
     do {                                                                \
         if ( unlikely(tb_init_done) && DO_TRC_HVM_ ## evt )             \
         {                                                               \
             struct {                                                    \
-                u32 did:16, vid:16;                                     \
                 u32 d[6];                                               \
             } _d;                                                       \
-            _d.did=(vcpu)->domain->domain_id;                           \
-            _d.vid=(vcpu)->vcpu_id;                                     \
             _d.d[0]=(d1);                                               \
             _d.d[1]=(d2);                                               \
             _d.d[2]=(d3);                                               \
@@ -77,32 +74,32 @@
         }                                                               \
     } while(0)
 
-#define HVMTRACE_6D(evt, vcpu, d1, d2, d3, d4, d5, d6)    \
-                      HVMTRACE_ND(evt, 0, vcpu, 6, d1, d2, d3,  d4, d5, d6)
-#define HVMTRACE_5D(evt, vcpu, d1, d2, d3, d4, d5)        \
-                      HVMTRACE_ND(evt, 0, vcpu, 5, d1, d2, d3,  d4, d5, 0)
-#define HVMTRACE_4D(evt, vcpu, d1, d2, d3, d4)               \
-                      HVMTRACE_ND(evt, 0, vcpu, 4, d1, d2, d3,  d4, 0, 0)
-#define HVMTRACE_3D(evt, vcpu, d1, d2, d3)                   \
-                      HVMTRACE_ND(evt, 0, vcpu, 3, d1, d2, d3,  0, 0, 0)
-#define HVMTRACE_2D(evt, vcpu, d1, d2)                       \
-                      HVMTRACE_ND(evt, 0, vcpu, 2, d1, d2,  0,  0, 0, 0)
-#define HVMTRACE_1D(evt, vcpu, d1)                           \
-                      HVMTRACE_ND(evt, 0, vcpu, 1, d1,  0,  0,  0, 0, 0)
-#define HVMTRACE_0D(evt, vcpu)                               \
-                      HVMTRACE_ND(evt, 0, vcpu, 0, 0,  0,  0,  0, 0, 0)
+#define HVMTRACE_6D(evt, d1, d2, d3, d4, d5, d6)    \
+                      HVMTRACE_ND(evt, 0, 6, d1, d2, d3,  d4, d5, d6)
+#define HVMTRACE_5D(evt, d1, d2, d3, d4, d5)        \
+                      HVMTRACE_ND(evt, 0, 5, d1, d2, d3,  d4, d5, 0)
+#define HVMTRACE_4D(evt, d1, d2, d3, d4)               \
+                      HVMTRACE_ND(evt, 0, 4, d1, d2, d3,  d4, 0, 0)
+#define HVMTRACE_3D(evt, d1, d2, d3)                   \
+                      HVMTRACE_ND(evt, 0, 3, d1, d2, d3,  0, 0, 0)
+#define HVMTRACE_2D(evt, d1, d2)                       \
+                      HVMTRACE_ND(evt, 0, 2, d1, d2,  0,  0, 0, 0)
+#define HVMTRACE_1D(evt, d1)                           \
+                      HVMTRACE_ND(evt, 0, 1, d1,  0,  0,  0, 0, 0)
+#define HVMTRACE_0D(evt)                               \
+                      HVMTRACE_ND(evt, 0, 0, 0,  0,  0,  0, 0, 0)
 
 
 
 #ifdef __x86_64__
-#define HVMTRACE_LONG_1D(evt, vcpu, d1)                  \
-                   HVMTRACE_2D(evt ## 64, vcpu, (d1) & 0xFFFFFFFF, (d1) >> 32)
-#define HVMTRACE_LONG_2D(evt,vcpu,d1,d2, ...)              \
-                   HVMTRACE_3D(evt ## 64, vcpu, d1, d2)
-#define HVMTRACE_LONG_3D(evt, vcpu, d1, d2, d3, ...)      \
-                   HVMTRACE_4D(evt ## 64, vcpu, d1, d2, d3)
-#define HVMTRACE_LONG_4D(evt, vcpu, d1, d2, d3, d4, ...)  \
-                   HVMTRACE_5D(evt ## 64, vcpu, d1, d2, d3, d4)
+#define HVMTRACE_LONG_1D(evt, d1)                  \
+                   HVMTRACE_2D(evt ## 64, (d1) & 0xFFFFFFFF, (d1) >> 32)
+#define HVMTRACE_LONG_2D(evt, d1, d2, ...)              \
+                   HVMTRACE_3D(evt ## 64, d1, d2)
+#define HVMTRACE_LONG_3D(evt, d1, d2, d3, ...)      \
+                   HVMTRACE_4D(evt ## 64, d1, d2, d3)
+#define HVMTRACE_LONG_4D(evt, d1, d2, d3, d4, ...)  \
+                   HVMTRACE_5D(evt ## 64, d1, d2, d3, d4)
 #else
 #define HVMTRACE_LONG_1D HVMTRACE_1D
 #define HVMTRACE_LONG_2D HVMTRACE_2D
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/include/asm-x86/io_apic.h
--- a/xen/include/asm-x86/io_apic.h     Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/include/asm-x86/io_apic.h     Fri Sep 12 14:47:40 2008 +0900
@@ -162,8 +162,6 @@ static inline void io_apic_modify(unsign
 /* 1 if "noapic" boot option passed */
 extern int skip_ioapic_setup;
 
-extern int msi_enable;
-
 /*
  * If we use the IO-APIC for IRQ routing, disable automatic
  * assignment of PCI IRQ's.
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/include/asm-x86/mm.h
--- a/xen/include/asm-x86/mm.h  Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/include/asm-x86/mm.h  Fri Sep 12 14:47:40 2008 +0900
@@ -57,6 +57,17 @@ struct page_info
          * (except page table pages when the guest is in shadow mode).
          */
         u32 tlbflush_timestamp;
+
+        /*
+         * When PGT_partial is true then this field is valid and indicates
+         * that PTEs in the range [0, @nr_validated_ptes) have been validated.
+         * If @partial_pte is true then PTE at @nr_validated_ptes+1 has been
+         * partially validated.
+         */
+        struct {
+            u16 nr_validated_ptes;
+            bool_t partial_pte;
+        };
 
         /*
          * Guest pages with a shadow.  This does not conflict with
@@ -86,9 +97,12 @@ struct page_info
  /* PAE only: is this an L2 page directory containing Xen-private mappings? */
 #define _PGT_pae_xen_l2     26
 #define PGT_pae_xen_l2      (1U<<_PGT_pae_xen_l2)
-
- /* 26-bit count of uses of this frame as its current type. */
-#define PGT_count_mask      ((1U<<26)-1)
+/* Has this page been *partially* validated for use as its current type? */
+#define _PGT_partial        25
+#define PGT_partial         (1U<<_PGT_partial)
+
+ /* 25-bit count of uses of this frame as its current type. */
+#define PGT_count_mask      ((1U<<25)-1)
 
  /* Cleared when the owning guest 'frees' this page. */
 #define _PGC_allocated      31
@@ -154,7 +168,8 @@ extern unsigned long total_pages;
 extern unsigned long total_pages;
 void init_frametable(void);
 
-void free_page_type(struct page_info *page, unsigned long type);
+int free_page_type(struct page_info *page, unsigned long type,
+                   int preemptible);
 int _shadow_mode_refcounts(struct domain *d);
 
 void cleanup_page_cacheattr(struct page_info *page);
@@ -165,6 +180,8 @@ int  get_page(struct page_info *page, st
 int  get_page(struct page_info *page, struct domain *domain);
 void put_page_type(struct page_info *page);
 int  get_page_type(struct page_info *page, unsigned long type);
+int  put_page_type_preemptible(struct page_info *page);
+int  get_page_type_preemptible(struct page_info *page, unsigned long type);
 int  get_page_from_l1e(l1_pgentry_t l1e, struct domain *d);
 void put_page_from_l1e(l1_pgentry_t l1e, struct domain *d);
 
@@ -174,6 +191,19 @@ static inline void put_page_and_type(str
     put_page(page);
 }
 
+static inline int put_page_and_type_preemptible(struct page_info *page,
+                                                int preemptible)
+{
+    int rc = 0;
+
+    if ( preemptible )
+        rc = put_page_type_preemptible(page);
+    else
+        put_page_type(page);
+    if ( likely(rc == 0) )
+        put_page(page);
+    return rc;
+}
 
 static inline int get_page_and_type(struct page_info *page,
                                     struct domain *domain,
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/include/asm-x86/msr-index.h
--- a/xen/include/asm-x86/msr-index.h   Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/include/asm-x86/msr-index.h   Fri Sep 12 14:47:40 2008 +0900
@@ -194,10 +194,22 @@
 #define _K8_VMCR_SVME_DISABLE          4
 #define K8_VMCR_SVME_DISABLE           (1 << _K8_VMCR_SVME_DISABLE)
 
+/* AMD64 MSRs */
+#define MSR_AMD64_NB_CFG               0xc001001f
+#define AMD64_NB_CFG_CF8_EXT_ENABLE_BIT        46
+
 /* AMD Family10h machine check MSRs */
 #define MSR_F10_MC4_MISC1              0xc0000408
 #define MSR_F10_MC4_MISC2              0xc0000409
 #define MSR_F10_MC4_MISC3              0xc000040A
+
+/* Other AMD Fam10h MSRs */
+#define MSR_FAM10H_MMIO_CONF_BASE      0xc0010058
+#define FAM10H_MMIO_CONF_ENABLE_BIT    0
+#define FAM10H_MMIO_CONF_BUSRANGE_MASK 0xf
+#define FAM10H_MMIO_CONF_BUSRANGE_SHIFT 2
+#define FAM10H_MMIO_CONF_BASE_MASK     0xfffffff
+#define FAM10H_MMIO_CONF_BASE_SHIFT    20
 
 /* K6 MSRs */
 #define MSR_K6_EFER                    0xc0000080
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/include/asm-x86/shadow.h
--- a/xen/include/asm-x86/shadow.h      Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/include/asm-x86/shadow.h      Fri Sep 12 14:47:40 2008 +0900
@@ -115,8 +115,6 @@ static inline void shadow_remove_all_sha
     sh_remove_shadows(v, gmfn, 0 /* Be thorough */, 1 /* Must succeed */);
 }
 
-#define guest_physmap_max_mem_pages(d, n) (0)
-
 #endif /* _XEN_SHADOW_H */
 
 /*
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/include/public/trace.h
--- a/xen/include/public/trace.h        Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/include/public/trace.h        Fri Sep 12 14:47:40 2008 +0900
@@ -37,6 +37,7 @@
 #define TRC_HVM      0x0008f000    /* Xen HVM trace            */
 #define TRC_MEM      0x0010f000    /* Xen memory trace         */
 #define TRC_PV       0x0020f000    /* Xen PV traces            */
+#define TRC_SHADOW   0x0040f000    /* Xen shadow tracing       */
 #define TRC_ALL      0x0ffff000
 #define TRC_HD_TO_EVENT(x) ((x)&0x0fffffff)
 #define TRC_HD_CYCLE_FLAG (1UL<<31)
@@ -50,26 +51,30 @@
 #define TRC_HVM_ENTRYEXIT 0x00081000   /* VMENTRY and #VMEXIT       */
 #define TRC_HVM_HANDLER   0x00082000   /* various HVM handlers      */
 
+#define TRC_SCHED_MIN       0x00021000   /* Just runstate changes */
+#define TRC_SCHED_VERBOSE   0x00028000   /* More inclusive scheduling */
+
 /* Trace events per class */
 #define TRC_LOST_RECORDS        (TRC_GEN + 1)
 #define TRC_TRACE_WRAP_BUFFER  (TRC_GEN + 2)
 #define TRC_TRACE_CPU_CHANGE    (TRC_GEN + 3)
 
-#define TRC_SCHED_DOM_ADD       (TRC_SCHED +  1)
-#define TRC_SCHED_DOM_REM       (TRC_SCHED +  2)
-#define TRC_SCHED_SLEEP         (TRC_SCHED +  3)
-#define TRC_SCHED_WAKE          (TRC_SCHED +  4)
-#define TRC_SCHED_YIELD         (TRC_SCHED +  5)
-#define TRC_SCHED_BLOCK         (TRC_SCHED +  6)
-#define TRC_SCHED_SHUTDOWN      (TRC_SCHED +  7)
-#define TRC_SCHED_CTL           (TRC_SCHED +  8)
-#define TRC_SCHED_ADJDOM        (TRC_SCHED +  9)
-#define TRC_SCHED_SWITCH        (TRC_SCHED + 10)
-#define TRC_SCHED_S_TIMER_FN    (TRC_SCHED + 11)
-#define TRC_SCHED_T_TIMER_FN    (TRC_SCHED + 12)
-#define TRC_SCHED_DOM_TIMER_FN  (TRC_SCHED + 13)
-#define TRC_SCHED_SWITCH_INFPREV (TRC_SCHED + 14)
-#define TRC_SCHED_SWITCH_INFNEXT (TRC_SCHED + 15)
+#define TRC_SCHED_RUNSTATE_CHANGE (TRC_SCHED_MIN + 1)
+#define TRC_SCHED_DOM_ADD        (TRC_SCHED_VERBOSE +  1)
+#define TRC_SCHED_DOM_REM        (TRC_SCHED_VERBOSE +  2)
+#define TRC_SCHED_SLEEP          (TRC_SCHED_VERBOSE +  3)
+#define TRC_SCHED_WAKE           (TRC_SCHED_VERBOSE +  4)
+#define TRC_SCHED_YIELD          (TRC_SCHED_VERBOSE +  5)
+#define TRC_SCHED_BLOCK          (TRC_SCHED_VERBOSE +  6)
+#define TRC_SCHED_SHUTDOWN       (TRC_SCHED_VERBOSE +  7)
+#define TRC_SCHED_CTL            (TRC_SCHED_VERBOSE +  8)
+#define TRC_SCHED_ADJDOM         (TRC_SCHED_VERBOSE +  9)
+#define TRC_SCHED_SWITCH         (TRC_SCHED_VERBOSE + 10)
+#define TRC_SCHED_S_TIMER_FN     (TRC_SCHED_VERBOSE + 11)
+#define TRC_SCHED_T_TIMER_FN     (TRC_SCHED_VERBOSE + 12)
+#define TRC_SCHED_DOM_TIMER_FN   (TRC_SCHED_VERBOSE + 13)
+#define TRC_SCHED_SWITCH_INFPREV (TRC_SCHED_VERBOSE + 14)
+#define TRC_SCHED_SWITCH_INFNEXT (TRC_SCHED_VERBOSE + 15)
 
 #define TRC_MEM_PAGE_GRANT_MAP      (TRC_MEM + 1)
 #define TRC_MEM_PAGE_GRANT_UNMAP    (TRC_MEM + 2)
@@ -88,6 +93,22 @@
 #define TRC_PV_PTWR_EMULATION_PAE    (TRC_PV + 12)
   /* Indicates that addresses in trace record are 64 bits */
 #define TRC_64_FLAG               (0x100) 
+
+#define TRC_SHADOW_NOT_SHADOW                 (TRC_SHADOW +  1)
+#define TRC_SHADOW_FAST_PROPAGATE             (TRC_SHADOW +  2)
+#define TRC_SHADOW_FAST_MMIO                  (TRC_SHADOW +  3)
+#define TRC_SHADOW_FALSE_FAST_PATH            (TRC_SHADOW +  4)
+#define TRC_SHADOW_MMIO                       (TRC_SHADOW +  5)
+#define TRC_SHADOW_FIXUP                      (TRC_SHADOW +  6)
+#define TRC_SHADOW_DOMF_DYING                 (TRC_SHADOW +  7)
+#define TRC_SHADOW_EMULATE                    (TRC_SHADOW +  8)
+#define TRC_SHADOW_EMULATE_UNSHADOW_USER      (TRC_SHADOW +  9)
+#define TRC_SHADOW_EMULATE_UNSHADOW_EVTINJ    (TRC_SHADOW + 10)
+#define TRC_SHADOW_EMULATE_UNSHADOW_UNHANDLED (TRC_SHADOW + 11)
+#define TRC_SHADOW_WRMAP_BF                   (TRC_SHADOW + 12)
+#define TRC_SHADOW_PREALLOC_UNPIN             (TRC_SHADOW + 13)
+#define TRC_SHADOW_RESYNC_FULL                (TRC_SHADOW + 14)
+#define TRC_SHADOW_RESYNC_ONLY                (TRC_SHADOW + 15)
 
 /* trace events per subclass */
 #define TRC_HVM_VMENTRY         (TRC_HVM_ENTRYEXIT + 0x01)
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/include/xen/cpuidle.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/include/xen/cpuidle.h Fri Sep 12 14:47:40 2008 +0900
@@ -0,0 +1,82 @@
+/*
+ * cpuidle.h - xen idle state module derived from Linux 
+ *
+ * (C) 2007 Venkatesh Pallipadi <venkatesh.pallipadi@xxxxxxxxx>
+ *          Shaohua Li <shaohua.li@xxxxxxxxx>
+ *          Adam Belay <abelay@xxxxxxxxxx>
+ *  Copyright (C) 2008 Intel Corporation
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or (at
+ *  your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+#ifndef _XEN_CPUIDLE_H
+#define _XEN_CPUIDLE_H
+
+#define ACPI_PROCESSOR_MAX_POWER        8
+#define CPUIDLE_NAME_LEN                16
+
+struct acpi_processor_cx
+{
+    u8 valid;
+    u8 type;
+    u32 address;
+    u8 space_id;
+    u32 latency;
+    u32 latency_ticks;
+    u32 power;
+    u32 usage;
+    u64 time;
+    u32 target_residency;
+};
+
+struct acpi_processor_flags
+{
+    u8 bm_control:1;
+    u8 bm_check:1;
+    u8 has_cst:1;
+    u8 power_setup_done:1;
+    u8 bm_rld_set:1;
+};
+
+struct acpi_processor_power
+{
+    unsigned int cpu;
+    struct acpi_processor_flags flags;
+    struct acpi_processor_cx *last_state;
+    struct acpi_processor_cx *safe_state;
+    u32 last_residency;
+    void *gdata; /* governor specific data */
+    u32 count;
+    struct acpi_processor_cx states[ACPI_PROCESSOR_MAX_POWER];
+};
+
+struct cpuidle_governor
+{
+    char                    name[CPUIDLE_NAME_LEN];
+    unsigned int            rating;
+
+    int  (*enable)          (struct acpi_processor_power *dev);
+    void (*disable)         (struct acpi_processor_power *dev);
+
+    int  (*select)          (struct acpi_processor_power *dev);
+    void (*reflect)         (struct acpi_processor_power *dev);
+};
+
+extern struct cpuidle_governor *cpuidle_current_governor;
+
+#endif /* _XEN_CPUIDLE_H */
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/include/xen/iommu.h
--- a/xen/include/xen/iommu.h   Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/include/xen/iommu.h   Fri Sep 12 14:47:40 2008 +0900
@@ -31,6 +31,7 @@ extern int iommu_enabled;
 extern int iommu_enabled;
 extern int iommu_pv_enabled;
 extern int force_iommu;
+extern int iommu_passthrough;
 
 #define domain_hvm_iommu(d)     (&d->arch.hvm_domain.hvm_iommu)
 
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/include/xen/sched.h
--- a/xen/include/xen/sched.h   Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/include/xen/sched.h   Fri Sep 12 14:47:40 2008 +0900
@@ -106,8 +106,6 @@ struct vcpu
     bool_t           fpu_initialised;
     /* Has the FPU been used since it was last saved? */
     bool_t           fpu_dirtied;
-    /* Is this VCPU polling any event channels (SCHEDOP_poll)? */
-    bool_t           is_polling;
     /* Initialization completed for this VCPU? */
     bool_t           is_initialised;
     /* Currently running on a CPU? */
@@ -133,6 +131,13 @@ struct vcpu
     bool_t           paused_for_shutdown;
     /* VCPU affinity is temporarily locked from controller changes? */
     bool_t           affinity_locked;
+
+    /*
+     * > 0: a single port is being polled;
+     * = 0: nothing is being polled (vcpu should be clear in d->poll_mask);
+     * < 0: multiple ports may be being polled.
+     */
+    int              poll_evtchn;
 
     unsigned long    pause_flags;
     atomic_t         pause_count;
@@ -209,14 +214,15 @@ struct domain
     struct domain   *target;
     /* Is this guest being debugged by dom0? */
     bool_t           debugger_attached;
-    /* Are any VCPUs polling event channels (SCHEDOP_poll)? */
-    bool_t           is_polling;
     /* Is this guest dying (i.e., a zombie)? */
     enum { DOMDYING_alive, DOMDYING_dying, DOMDYING_dead } is_dying;
     /* Domain is paused by controller software? */
     bool_t           is_paused_by_controller;
     /* Domain's VCPUs are pinned 1:1 to physical CPUs? */
     bool_t           is_pinned;
+
+    /* Are any VCPUs polling event channels (SCHEDOP_poll)? */
+    DECLARE_BITMAP(poll_mask, MAX_VIRT_CPUS);
 
     /* Guest has shut down (inc. reason code)? */
     spinlock_t       shutdown_lock;
@@ -507,6 +513,7 @@ static inline int vcpu_runnable(struct v
              atomic_read(&v->domain->pause_count));
 }
 
+void vcpu_unblock(struct vcpu *v);
 void vcpu_pause(struct vcpu *v);
 void vcpu_pause_nosync(struct vcpu *v);
 void domain_pause(struct domain *d);
@@ -517,17 +524,12 @@ void cpu_init(void);
 void cpu_init(void);
 
 void vcpu_force_reschedule(struct vcpu *v);
+void cpu_disable_scheduler(void);
 int vcpu_set_affinity(struct vcpu *v, cpumask_t *affinity);
 int vcpu_lock_affinity(struct vcpu *v, cpumask_t *affinity);
 void vcpu_unlock_affinity(struct vcpu *v, cpumask_t *affinity);
 
 void vcpu_runstate_get(struct vcpu *v, struct vcpu_runstate_info *runstate);
-
-static inline void vcpu_unblock(struct vcpu *v)
-{
-    if ( test_and_clear_bit(_VPF_blocked, &v->pause_flags) )
-        vcpu_wake(v);
-}
 
 #define IS_PRIV(_d) ((_d)->is_privileged)
 #define IS_PRIV_FOR(_d, _t) (IS_PRIV(_d) || ((_d)->target && (_d)->target == 
(_t)))
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/include/xen/trace.h
--- a/xen/include/xen/trace.h   Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/include/xen/trace.h   Fri Sep 12 14:47:40 2008 +0900
@@ -33,6 +33,8 @@ void init_trace_bufs(void);
 
 /* used to retrieve the physical address of the trace buffers */
 int tb_control(struct xen_sysctl_tbuf_op *tbc);
+
+int trace_will_trace_event(u32 event);
 
 void __trace_var(u32 event, int cycles, int extra, unsigned char *extra_data);
 
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/include/xsm/xsm.h
--- a/xen/include/xsm/xsm.h     Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/include/xsm/xsm.h     Fri Sep 12 14:47:40 2008 +0900
@@ -64,16 +64,17 @@ struct xsm_operations {
     int (*getvcpucontext) (struct domain *d);
     int (*getvcpuinfo) (struct domain *d);
     int (*domain_settime) (struct domain *d);
+    int (*set_target) (struct domain *d, struct domain *e);
     int (*tbufcontrol) (void);
     int (*readconsole) (uint32_t clear);
     int (*sched_id) (void);
     int (*setdomainmaxmem) (struct domain *d);
     int (*setdomainhandle) (struct domain *d);
     int (*setdebugging) (struct domain *d);
-    int (*irq_permission) (struct domain *d, uint8_t pirq, uint8_t access);
-    int (*iomem_permission) (struct domain *d, unsigned long mfn, 
-                                                                uint8_t 
access);
     int (*perfcontrol) (void);
+    int (*debug_keys) (void);
+    int (*getcpuinfo) (void);
+    int (*availheap) (void);
 
     int (*evtchn_unbound) (struct domain *d, struct evtchn *chn, domid_t id2);
     int (*evtchn_interdomain) (struct domain *d1, struct evtchn *chn1,
@@ -106,13 +107,13 @@ struct xsm_operations {
 
     int (*kexec) (void);
     int (*schedop_shutdown) (struct domain *d1, struct domain *d2);
+    int (*add_range) (struct domain *d, char *name, unsigned long s, unsigned 
long e);
+    int (*remove_range) (struct domain *d, char *name, unsigned long s, 
unsigned long e);
 
     long (*__do_xsm_op) (XEN_GUEST_HANDLE(xsm_op_t) op);
 
 #ifdef CONFIG_X86
     int (*shadow_control) (struct domain *d, uint32_t op);
-    int (*ioport_permission) (struct domain *d, uint32_t ioport, 
-                                                                uint8_t 
access);
     int (*getpageframeinfo) (struct page_info *page);
     int (*getmemlist) (struct domain *d);
     int (*hypercall_init) (struct domain *d);
@@ -130,13 +131,26 @@ struct xsm_operations {
     int (*microcode) (void);
     int (*physinfo) (void);
     int (*platform_quirk) (uint32_t);
+    int (*firmware_info) (void);
+    int (*acpi_sleep) (void);
+    int (*change_freq) (void);
+    int (*getidletime) (void);
     int (*machine_memory_map) (void);
     int (*domain_memory_map) (struct domain *d);
-    int (*mmu_normal_update) (struct domain *d, intpte_t fpte);
+    int (*mmu_normal_update) (struct domain *d, struct domain *f, 
+                                                                intpte_t fpte);
     int (*mmu_machphys_update) (struct domain *d, unsigned long mfn);
-    int (*update_va_mapping) (struct domain *d, l1_pgentry_t pte);
+    int (*update_va_mapping) (struct domain *d, struct domain *f, 
+                                                            l1_pgentry_t pte);
     int (*add_to_physmap) (struct domain *d1, struct domain *d2);
     int (*remove_from_physmap) (struct domain *d1, struct domain *d2);
+    int (*sendtrigger) (struct domain *d);
+    int (*test_assign_device) (uint32_t machine_bdf);
+    int (*assign_device) (struct domain *d, uint32_t machine_bdf);
+    int (*deassign_device) (struct domain *d, uint32_t machine_bdf);
+    int (*bind_pt_irq) (struct domain *d, struct xen_domctl_bind_pt_irq *bind);
+    int (*pin_mem_cacheattr) (struct domain *d);
+    int (*ext_vcpucontext) (struct domain *d, uint32_t cmd);
 #endif
 };
 
@@ -215,6 +229,11 @@ static inline int xsm_domain_settime (st
     return xsm_call(domain_settime(d));
 }
 
+static inline int xsm_set_target (struct domain *d, struct domain *e)
+{
+    return xsm_call(set_target(d, e));
+}
+
 static inline int xsm_tbufcontrol (void)
 {
     return xsm_call(tbufcontrol());
@@ -245,21 +264,24 @@ static inline int xsm_setdebugging (stru
     return xsm_call(setdebugging(d));
 }
 
-static inline int xsm_irq_permission (struct domain *d, uint8_t pirq,
-                                                                uint8_t access)
-{
-    return xsm_call(irq_permission(d, pirq, access));
-} 
-
-static inline int xsm_iomem_permission (struct domain *d, unsigned long mfn,
-                                                                uint8_t access)
-{
-    return xsm_call(iomem_permission(d, mfn, access));
-}
-
 static inline int xsm_perfcontrol (void)
 {
     return xsm_call(perfcontrol());
+}
+
+static inline int xsm_debug_keys (void)
+{
+    return xsm_call(debug_keys());
+}
+
+static inline int xsm_availheap (void)
+{
+    return xsm_call(availheap());
+}
+
+static inline int xsm_getcpuinfo (void)
+{
+    return xsm_call(getcpuinfo());
 }
 
 static inline int xsm_evtchn_unbound (struct domain *d1, struct evtchn *chn,
@@ -385,6 +407,18 @@ static inline int xsm_schedop_shutdown (
 static inline int xsm_schedop_shutdown (struct domain *d1, struct domain *d2)
 {
     return xsm_call(schedop_shutdown(d1, d2));
+}
+
+static inline int xsm_add_range (struct domain *d, char *name, unsigned long s,
+                                                                        
unsigned long e)
+{
+    return xsm_call(add_range(d, name, s, e));
+}
+ 
+static inline int xsm_remove_range (struct domain *d, char *name, unsigned 
long s,
+                                                                        
unsigned long e)
+{
+    return xsm_call(remove_range(d, name, s, e));
 }
 
 static inline long __do_xsm_op (XEN_GUEST_HANDLE(xsm_op_t) op)
@@ -413,12 +447,6 @@ static inline int xsm_shadow_control (st
     return xsm_call(shadow_control(d, op));
 }
 
-static inline int xsm_ioport_permission (struct domain *d, uint32_t ioport,
-                                                                uint8_t access)
-{
-    return xsm_call(ioport_permission(d, ioport, access));
-}
-
 static inline int xsm_getpageframeinfo (struct page_info *page)
 {
     return xsm_call(getpageframeinfo(page));
@@ -504,6 +532,26 @@ static inline int xsm_platform_quirk (ui
     return xsm_call(platform_quirk(quirk));
 }
 
+static inline int xsm_firmware_info (void)
+{
+    return xsm_call(firmware_info());
+}
+
+static inline int xsm_acpi_sleep (void)
+{
+    return xsm_call(acpi_sleep());
+}
+
+static inline int xsm_change_freq (void)
+{
+    return xsm_call(change_freq());
+}
+
+static inline int xsm_getidletime (void)
+{
+    return xsm_call(getidletime());
+}
+
 static inline int xsm_machine_memory_map(void)
 {
     return xsm_call(machine_memory_map());
@@ -514,9 +562,10 @@ static inline int xsm_domain_memory_map(
     return xsm_call(domain_memory_map(d));
 }
 
-static inline int xsm_mmu_normal_update (struct domain *d, intpte_t fpte)
-{
-    return xsm_call(mmu_normal_update(d, fpte));
+static inline int xsm_mmu_normal_update (struct domain *d, struct domain *f, 
+                                                                intpte_t fpte)
+{
+    return xsm_call(mmu_normal_update(d, f, fpte));
 }
 
 static inline int xsm_mmu_machphys_update (struct domain *d, unsigned long mfn)
@@ -524,9 +573,10 @@ static inline int xsm_mmu_machphys_updat
     return xsm_call(mmu_machphys_update(d, mfn));
 }
 
-static inline int xsm_update_va_mapping(struct domain *d, l1_pgentry_t pte)
-{
-    return xsm_call(update_va_mapping(d, pte));
+static inline int xsm_update_va_mapping(struct domain *d, struct domain *f, 
+                                                            l1_pgentry_t pte)
+{
+    return xsm_call(update_va_mapping(d, f, pte));
 }
 
 static inline int xsm_add_to_physmap(struct domain *d1, struct domain *d2)
@@ -538,6 +588,42 @@ static inline int xsm_remove_from_physma
 {
     return xsm_call(remove_from_physmap(d1, d2));
 }
+
+static inline int xsm_sendtrigger(struct domain *d)
+{
+    return xsm_call(sendtrigger(d));
+}
+
+static inline int xsm_test_assign_device(uint32_t machine_bdf)
+{
+    return xsm_call(test_assign_device(machine_bdf));
+}
+
+static inline int xsm_assign_device(struct domain *d, uint32_t machine_bdf)
+{
+    return xsm_call(assign_device(d, machine_bdf));
+}
+
+static inline int xsm_deassign_device(struct domain *d, uint32_t machine_bdf)
+{
+    return xsm_call(deassign_device(d, machine_bdf));
+}
+
+static inline int xsm_bind_pt_irq(struct domain *d, 
+                                                struct xen_domctl_bind_pt_irq 
*bind)
+{
+    return xsm_call(bind_pt_irq(d, bind));
+}
+
+static inline int xsm_pin_mem_cacheattr(struct domain *d)
+{
+    return xsm_call(pin_mem_cacheattr(d));
+}
+
+static inline int xsm_ext_vcpucontext(struct domain *d, uint32_t cmd)
+{
+    return xsm_call(ext_vcpucontext(d, cmd));
+}
 #endif /* CONFIG_X86 */
 
 #endif /* __XSM_H */
diff -r 4ddd63b4be9b -r ec8eaab557d8 xen/xsm/dummy.c
--- a/xen/xsm/dummy.c   Fri Sep 12 14:32:45 2008 +0900
+++ b/xen/xsm/dummy.c   Fri Sep 12 14:47:40 2008 +0900
@@ -84,6 +84,11 @@ static int dummy_domain_settime (struct 
     return 0;
 }
 
+static int dummy_set_target (struct domain *d, struct domain *e)
+{
+    return 0;
+}
+
 static int dummy_tbufcontrol (void)
 {
     return 0;
@@ -114,18 +119,22 @@ static int dummy_setdebugging (struct do
     return 0;
 }
 
-static int dummy_irq_permission (struct domain *d, uint8_t pirq, uint8_t 
access)
-{
-    return 0;
-}
-
-static int dummy_iomem_permission (struct domain *d, unsigned long mfn,
-                                                                uint8_t access)
-{
-    return 0;
-}
-
 static int dummy_perfcontrol (void)
+{
+    return 0;
+}
+
+static int dummy_debug_keys (void)
+{
+    return 0;
+}
+
+static int dummy_getcpuinfo (void)
+{
+    return 0;
+}
+
+static int dummy_availheap (void)
 {
     return 0;
 }
@@ -259,18 +268,23 @@ static long dummy___do_xsm_op(XEN_GUEST_
     return -ENOSYS;
 }
 
+static int dummy_add_range (struct domain *d, char *name, unsigned long s, 
unsigned long e)
+{
+    return 0;
+}
+
+static int dummy_remove_range (struct domain *d, char *name, unsigned long s, 
+                                                                        
unsigned long e)
+{
+    return 0;
+}
+
 #ifdef CONFIG_X86
 static int dummy_shadow_control (struct domain *d, uint32_t op)
 {
     return 0;
 }
 
-static int dummy_ioport_permission (struct domain *d, uint32_t ioport, 
-                                                                uint8_t access)
-{
-    return 0;
-}
-
 static int dummy_getpageframeinfo (struct page_info *page)
 {
     return 0;
@@ -356,6 +370,26 @@ static int dummy_platform_quirk (uint32_
     return 0;
 }
 
+static int dummy_firmware_info (void)
+{
+    return 0;
+}
+
+static int dummy_acpi_sleep (void)
+{
+    return 0;
+}
+
+static int dummy_change_freq (void)
+{
+    return 0;
+}
+
+static int dummy_getidletime (void)
+{
+    return 0;
+}
+
 static int dummy_machine_memory_map (void)
 {
     return 0;
@@ -366,7 +400,8 @@ static int dummy_domain_memory_map (stru
     return 0;
 }
 
-static int dummy_mmu_normal_update (struct domain *d, intpte_t fpte)
+static int dummy_mmu_normal_update (struct domain *d, struct domain *f, 
+                                                                intpte_t fpte)
 {
     return 0;
 }
@@ -376,12 +411,48 @@ static int dummy_mmu_machphys_update (st
     return 0;
 }
 

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>