| # HG changeset patch
# User Hollis Blanchard <hollisb@xxxxxxxxxx>
# Date 1173386392 21600
# Node ID dcec453681bc85f9f6f3aa49431ded9a63aa1c9b
# Parent  8f0b5295bb1bd66c9e5c86368845bdb055b3d86c
# Parent  38513d22d23420a90f94e7e0f70c564100e83851
[POWERPC][XEN] Merge with xen-unstable.hg.
Signed-off-by: Hollis Blanchard <hollisb@xxxxxxxxxx>
---
 linux-2.6-xen-sparse/arch/i386/kernel/alternative-xen.c          |  389 -----
 linux-2.6-xen-sparse/arch/i386/kernel/cpu/intel_cacheinfo-xen.c  |  774 
----------
 linux-2.6-xen-sparse/arch/x86_64/kernel/x8664_ksyms-xen.c        |   59 
 linux-2.6-xen-sparse/mm/Kconfig                                  |  157 --
 tools/ptsname/Makefile                                           |   22 
 tools/ptsname/ptsname.c                                          |   44 
 tools/ptsname/setup.py                                           |   11 
 xen/arch/x86/mm/shadow/page-guest32.h                            |  100 -
 Config.mk                                                        |    7 
 config/StdGNU.mk                                                 |    6 
 config/SunOS.mk                                                  |    6 
 docs/misc/dump-core-format.txt                                   |   14 
 linux-2.6-xen-sparse/arch/i386/Kconfig                           |    6 
 linux-2.6-xen-sparse/arch/i386/Kconfig.cpu                       |    4 
 linux-2.6-xen-sparse/arch/i386/kernel/entry-xen.S                |   59 
 linux-2.6-xen-sparse/arch/i386/kernel/head-xen.S                 |    8 
 linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c                |    3 
 linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c                 |   13 
 linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c                  |    4 
 linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c                  |   58 
 linux-2.6-xen-sparse/arch/x86_64/Kconfig                         |    2 
 linux-2.6-xen-sparse/arch/x86_64/kernel/Makefile                 |    2 
 linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S              |   83 -
 linux-2.6-xen-sparse/arch/x86_64/kernel/head-xen.S               |   25 
 linux-2.6-xen-sparse/arch/x86_64/kernel/init_task.c              |    2 
 linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c              |   27 
 linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c                   |  218 +-
 linux-2.6-xen-sparse/arch/x86_64/mm/pageattr-xen.c               |   11 
 linux-2.6-xen-sparse/drivers/char/mem.c                          |    2 
 linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c                  |  192 +-
 linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c                 |    2 
 linux-2.6-xen-sparse/drivers/xen/char/mem.c                      |    2 
 linux-2.6-xen-sparse/drivers/xen/core/machine_reboot.c           |   55 
 linux-2.6-xen-sparse/drivers/xen/evtchn/evtchn.c                 |    2 
 linux-2.6-xen-sparse/drivers/xen/netback/common.h                |   15 
 linux-2.6-xen-sparse/drivers/xen/netback/interface.c             |   18 
 linux-2.6-xen-sparse/drivers/xen/netback/netback.c               |   21 
 linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c                |    4 
 linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c             |   64 
 linux-2.6-xen-sparse/drivers/xen/pcifront/pci_op.c               |   11 
 linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c               |    2 
 linux-2.6-xen-sparse/drivers/xen/tpmback/tpmback.c               |    2 
 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c             |   54 
 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h        |   17 
 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/system.h      |    2 
 linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/fixmap.h    |    5 
 linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/io.h        |    6 
 linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h   |   13 
 linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/processor.h |    2 
 linux-2.6-xen-sparse/include/linux/page-flags.h                  |    6 
 linux-2.6-xen-sparse/include/xen/cpu_hotplug.h                   |    2 
 patches/linux-2.6.18/blktap-aio-16_03_06.patch                   |    5 
 tools/Makefile                                                   |    9 
 tools/blktap/lib/Makefile                                        |    4 
 tools/console/Makefile                                           |    4 
 tools/firmware/rombios/rombios.c                                 |  102 -
 tools/guest-headers/Makefile                                     |    2 
 tools/ioemu/Makefile                                             |    8 
 tools/ioemu/hw/ide.c                                             |  115 +
 tools/ioemu/target-i386-dm/qemu-ifup                             |    3 
 tools/ioemu/vl.c                                                 |    8 
 tools/ioemu/vnc.c                                                |    8 
 tools/libxc/xc_core.c                                            |   26 
 tools/libxc/xc_core.h                                            |    4 
 tools/libxc/xc_core_ia64.c                                       |    4 
 tools/libxc/xc_core_ia64.h                                       |    2 
 tools/libxc/xc_core_x86.h                                        |    2 
 tools/libxc/xc_dom_core.c                                        |    3 
 tools/libxc/xc_linux_restore.c                                   |   24 
 tools/libxc/xc_linux_save.c                                      |   16 
 tools/libxc/xc_ptrace_core.c                                     |   24 
 tools/libxen/Makefile                                            |    4 
 tools/pygrub/Makefile                                            |    7 
 tools/python/Makefile                                            |    3 
 tools/python/ptsname/ptsname.c                                   |   44 
 tools/python/setup.py                                            |    9 
 tools/python/xen/xend/XendBootloader.py                          |    3 
 tools/python/xen/xend/XendDomainInfo.py                          |   14 
 tools/python/xen/xend/XendNode.py                                |   41 
 tools/security/Makefile                                          |   35 
 tools/vnet/libxutil/Makefile                                     |    2 
 tools/xenfb/Makefile                                             |    9 
 tools/xenfb/xenfb.c                                              |    5 
 tools/xenstore/Makefile                                          |   12 
 xen/Rules.mk                                                     |    8 
 xen/arch/x86/domain.c                                            |   25 
 xen/arch/x86/domain_build.c                                      |    4 
 xen/arch/x86/domctl.c                                            |    7 
 xen/arch/x86/hvm/hvm.c                                           |    3 
 xen/arch/x86/hvm/svm/emulate.c                                   |   22 
 xen/arch/x86/hvm/svm/svm.c                                       |  388 +++--
 xen/arch/x86/hvm/svm/vmcb.c                                      |   10 
 xen/arch/x86/mm.c                                                |    5 
 xen/arch/x86/mm/Makefile                                         |    1 
 xen/arch/x86/mm/hap/Makefile                                     |    2 
 xen/arch/x86/mm/hap/hap.c                                        |  708 
+++++++++
 xen/arch/x86/mm/hap/private.h                                    |  112 +
 xen/arch/x86/mm/hap/support.c                                    |  334 ++++
 xen/arch/x86/mm/page-guest32.h                                   |  100 +
 xen/arch/x86/mm/paging.c                                         |   34 
 xen/arch/x86/mm/shadow/common.c                                  |   21 
 xen/arch/x86/mm/shadow/private.h                                 |    4 
 xen/arch/x86/mm/shadow/types.h                                   |    2 
 xen/common/event_channel.c                                       |    3 
 xen/common/page_alloc.c                                          |  165 --
 xen/common/xmalloc.c                                             |    6 
 xen/drivers/acpi/numa.c                                          |    9 
 xen/drivers/char/console.c                                       |    2 
 xen/include/acm/acm_hooks.h                                      |    4 
 xen/include/asm-x86/domain.h                                     |   16 
 xen/include/asm-x86/hap.h                                        |  122 +
 xen/include/asm-x86/hvm/svm/emulate.h                            |   10 
 xen/include/asm-x86/hvm/svm/svm.h                                |   35 
 xen/include/asm-x86/hvm/svm/vmcb.h                               |   16 
 xen/include/public/arch-x86/xen.h                                |    1 
 xen/include/public/xen.h                                         |   19 
 116 files changed, 2946 insertions(+), 2426 deletions(-)
diff -r 8f0b5295bb1b -r dcec453681bc Config.mk
--- a/Config.mk Mon Mar 05 12:49:12 2007 -0600
+++ b/Config.mk Thu Mar 08 14:39:52 2007 -0600
@@ -73,9 +73,10 @@ ACM_DEFAULT_SECURITY_POLICY ?= ACM_NULL_
 ACM_DEFAULT_SECURITY_POLICY ?= ACM_NULL_POLICY
 
 # Optional components
-XENSTAT_XENTOP ?= y
-VTPM_TOOLS ?= n
+XENSTAT_XENTOP     ?= y
+VTPM_TOOLS         ?= n
 LIBXENAPI_BINDINGS ?= n
-XENFB_TOOLS ?= n
+XENFB_TOOLS        ?= n
+PYTHON_TOOLS       ?= y
 
 -include $(XEN_ROOT)/.config
diff -r 8f0b5295bb1b -r dcec453681bc config/StdGNU.mk
--- a/config/StdGNU.mk  Mon Mar 05 12:49:12 2007 -0600
+++ b/config/StdGNU.mk  Thu Mar 08 14:39:52 2007 -0600
@@ -12,9 +12,9 @@ MSGFMT     = msgfmt
 MSGFMT     = msgfmt
 
 INSTALL      = install
-INSTALL_DIR  = $(INSTALL) -d -m0755
-INSTALL_DATA = $(INSTALL) -m0644
-INSTALL_PROG = $(INSTALL) -m0755
+INSTALL_DIR  = $(INSTALL) -d -m0755 -p
+INSTALL_DATA = $(INSTALL) -m0644 -p
+INSTALL_PROG = $(INSTALL) -m0755 -p
 
 LIB64DIR = lib64
 
diff -r 8f0b5295bb1b -r dcec453681bc config/SunOS.mk
--- a/config/SunOS.mk   Mon Mar 05 12:49:12 2007 -0600
+++ b/config/SunOS.mk   Thu Mar 08 14:39:52 2007 -0600
@@ -14,9 +14,9 @@ SHELL      = bash
 SHELL      = bash
 
 INSTALL      = ginstall
-INSTALL_DIR  = $(INSTALL) -d -m0755
-INSTALL_DATA = $(INSTALL) -m0644
-INSTALL_PROG = $(INSTALL) -m0755
+INSTALL_DIR  = $(INSTALL) -d -m0755 -p
+INSTALL_DATA = $(INSTALL) -m0644 -p
+INSTALL_PROG = $(INSTALL) -m0755 -p
 
 LIB64DIR = lib/amd64
 
diff -r 8f0b5295bb1b -r dcec453681bc docs/misc/dump-core-format.txt
--- a/docs/misc/dump-core-format.txt    Mon Mar 05 12:49:12 2007 -0600
+++ b/docs/misc/dump-core-format.txt    Thu Mar 08 14:39:52 2007 -0600
@@ -26,11 +26,12 @@ Elf header
 Elf header
 ----------
 The elf header members are set as follows
+        e_ident[EI_CLASS] = ELFCLASS64 = 2
         e_ident[EI_OSABI] = ELFOSABI_SYSV = 0
         e_type = ET_CORE = 4
-e_ident[EI_CLASS], e_ident[EI_DATA] and e_flags are set according
-to an architecture which a file is created. Other members are set as usual.
-
+ELFCLASS64 is always used independent of architecture.
+e_ident[EI_DATA] and e_flags are set according to the dumping system's
+architecture. Other members are set as usual.
 
 Sections
 --------
@@ -221,5 +222,10 @@ format_version descriptor
 
 Format version history
 ----------------------
-The currently only (major, minor) = (0, 1) is used.
+Currently only (major, minor) = (0, 1) is used.
 [When the format is changed, it would be described here.]
+
+(0, 1) update
+- EI_CLASS member of elf header was changed to ELFCLASS64 independent of
+  architecture. This is mainly for x86_32pae.
+  The format version isn't bumped because analysis tools can distinguish it.
diff -r 8f0b5295bb1b -r dcec453681bc linux-2.6-xen-sparse/arch/i386/Kconfig
--- a/linux-2.6-xen-sparse/arch/i386/Kconfig    Mon Mar 05 12:49:12 2007 -0600
+++ b/linux-2.6-xen-sparse/arch/i386/Kconfig    Thu Mar 08 14:39:52 2007 -0600
@@ -255,7 +255,6 @@ config SCHED_SMT
 config SCHED_SMT
        bool "SMT (Hyperthreading) scheduler support"
        depends on X86_HT
-       depends on !X86_XEN
        help
          SMT scheduler support improves the CPU scheduler's decision making
          when dealing with Intel Pentium 4 chips with HyperThreading at a
@@ -311,11 +310,6 @@ config X86_VISWS_APIC
 config X86_VISWS_APIC
        bool
        depends on X86_VISWS
-       default y
-
-config X86_TSC
-       bool
-       depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || 
MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII 
|| M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MGEODEGX1) && !X86_NUMAQ && 
!X86_XEN
        default y
 
 config X86_MCE
diff -r 8f0b5295bb1b -r dcec453681bc linux-2.6-xen-sparse/arch/i386/Kconfig.cpu
--- a/linux-2.6-xen-sparse/arch/i386/Kconfig.cpu        Mon Mar 05 12:49:12 
2007 -0600
+++ b/linux-2.6-xen-sparse/arch/i386/Kconfig.cpu        Thu Mar 08 14:39:52 
2007 -0600
@@ -311,5 +311,5 @@ config X86_OOSTORE
 
 config X86_TSC
        bool
-       depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || 
MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII 
|| M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MGEODEGX1 || MGEODE_LX) && 
!X86_NUMAQ
-       default y
+       depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || 
MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII 
|| M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MGEODEGX1 || MGEODE_LX) && 
!X86_NUMAQ && !X86_XEN
+       default y
diff -r 8f0b5295bb1b -r dcec453681bc 
linux-2.6-xen-sparse/arch/i386/kernel/alternative-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/kernel/alternative-xen.c   Mon Mar 05 
12:49:12 2007 -0600
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,389 +0,0 @@
-#include <linux/module.h>
-#include <linux/spinlock.h>
-#include <linux/list.h>
-#include <asm/alternative.h>
-#include <asm/sections.h>
-
-static int no_replacement    = 0;
-static int smp_alt_once      = 0;
-static int debug_alternative = 0;
-
-static int __init noreplacement_setup(char *s)
-{
-       no_replacement = 1;
-       return 1;
-}
-static int __init bootonly(char *str)
-{
-       smp_alt_once = 1;
-       return 1;
-}
-static int __init debug_alt(char *str)
-{
-       debug_alternative = 1;
-       return 1;
-}
-
-__setup("noreplacement", noreplacement_setup);
-__setup("smp-alt-boot", bootonly);
-__setup("debug-alternative", debug_alt);
-
-#define DPRINTK(fmt, args...) if (debug_alternative) \
-       printk(KERN_DEBUG fmt, args)
-
-#ifdef GENERIC_NOP1
-/* Use inline assembly to define this because the nops are defined
-   as inline assembly strings in the include files and we cannot
-   get them easily into strings. */
-asm("\t.data\nintelnops: "
-       GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 
GENERIC_NOP6
-       GENERIC_NOP7 GENERIC_NOP8);
-extern unsigned char intelnops[];
-static unsigned char *intel_nops[ASM_NOP_MAX+1] = {
-       NULL,
-       intelnops,
-       intelnops + 1,
-       intelnops + 1 + 2,
-       intelnops + 1 + 2 + 3,
-       intelnops + 1 + 2 + 3 + 4,
-       intelnops + 1 + 2 + 3 + 4 + 5,
-       intelnops + 1 + 2 + 3 + 4 + 5 + 6,
-       intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
-};
-#endif
-
-#ifdef K8_NOP1
-asm("\t.data\nk8nops: "
-       K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
-       K8_NOP7 K8_NOP8);
-extern unsigned char k8nops[];
-static unsigned char *k8_nops[ASM_NOP_MAX+1] = {
-       NULL,
-       k8nops,
-       k8nops + 1,
-       k8nops + 1 + 2,
-       k8nops + 1 + 2 + 3,
-       k8nops + 1 + 2 + 3 + 4,
-       k8nops + 1 + 2 + 3 + 4 + 5,
-       k8nops + 1 + 2 + 3 + 4 + 5 + 6,
-       k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
-};
-#endif
-
-#ifdef K7_NOP1
-asm("\t.data\nk7nops: "
-       K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
-       K7_NOP7 K7_NOP8);
-extern unsigned char k7nops[];
-static unsigned char *k7_nops[ASM_NOP_MAX+1] = {
-       NULL,
-       k7nops,
-       k7nops + 1,
-       k7nops + 1 + 2,
-       k7nops + 1 + 2 + 3,
-       k7nops + 1 + 2 + 3 + 4,
-       k7nops + 1 + 2 + 3 + 4 + 5,
-       k7nops + 1 + 2 + 3 + 4 + 5 + 6,
-       k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
-};
-#endif
-
-#ifdef CONFIG_X86_64
-
-extern char __vsyscall_0;
-static inline unsigned char** find_nop_table(void)
-{
-       return k8_nops;
-}
-
-#else /* CONFIG_X86_64 */
-
-static struct nop {
-       int cpuid;
-       unsigned char **noptable;
-} noptypes[] = {
-       { X86_FEATURE_K8, k8_nops },
-       { X86_FEATURE_K7, k7_nops },
-       { -1, NULL }
-};
-
-static unsigned char** find_nop_table(void)
-{
-       unsigned char **noptable = intel_nops;
-       int i;
-
-       for (i = 0; noptypes[i].cpuid >= 0; i++) {
-               if (boot_cpu_has(noptypes[i].cpuid)) {
-                       noptable = noptypes[i].noptable;
-                       break;
-               }
-       }
-       return noptable;
-}
-
-#endif /* CONFIG_X86_64 */
-
-extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
-extern struct alt_instr __smp_alt_instructions[], __smp_alt_instructions_end[];
-extern u8 *__smp_locks[], *__smp_locks_end[];
-
-extern u8 __smp_alt_begin[], __smp_alt_end[];
-
-/* Replace instructions with better alternatives for this CPU type.
-   This runs before SMP is initialized to avoid SMP problems with
-   self modifying code. This implies that assymetric systems where
-   APs have less capabilities than the boot processor are not handled.
-   Tough. Make sure you disable such features by hand. */
-
-void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
-{
-       unsigned char **noptable = find_nop_table();
-       struct alt_instr *a;
-       u8 *instr;
-       int diff, i, k;
-
-       DPRINTK("%s: alt table %p -> %p\n", __FUNCTION__, start, end);
-       for (a = start; a < end; a++) {
-               BUG_ON(a->replacementlen > a->instrlen);
-               if (!boot_cpu_has(a->cpuid))
-                       continue;
-               instr = a->instr;
-#ifdef CONFIG_X86_64
-               /* vsyscall code is not mapped yet. resolve it manually. */
-               if (instr >= (u8 *)VSYSCALL_START && instr < (u8*)VSYSCALL_END) 
{
-                       instr -= VSYSCALL_START - (unsigned long)&__vsyscall_0;
-                       DPRINTK("%s: vsyscall fixup: %p => %p\n",
-                               __FUNCTION__, a->instr, instr);
-               }
-#endif
-               memcpy(instr, a->replacement, a->replacementlen);
-               diff = a->instrlen - a->replacementlen;
-               /* Pad the rest with nops */
-               for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
-                       k = diff;
-                       if (k > ASM_NOP_MAX)
-                               k = ASM_NOP_MAX;
-                       memcpy(a->instr + i, noptable[k], k);
-               }
-       }
-}
-
-#ifdef CONFIG_SMP
-
-static void alternatives_smp_save(struct alt_instr *start, struct alt_instr 
*end)
-{
-       struct alt_instr *a;
-
-       DPRINTK("%s: alt table %p-%p\n", __FUNCTION__, start, end);
-       for (a = start; a < end; a++) {
-               memcpy(a->replacement + a->replacementlen,
-                      a->instr,
-                      a->instrlen);
-       }
-}
-
-static void alternatives_smp_apply(struct alt_instr *start, struct alt_instr 
*end)
-{
-       struct alt_instr *a;
-
-       for (a = start; a < end; a++) {
-               memcpy(a->instr,
-                      a->replacement + a->replacementlen,
-                      a->instrlen);
-       }
-}
-
-static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end)
-{
-       u8 **ptr;
-
-       for (ptr = start; ptr < end; ptr++) {
-               if (*ptr < text)
-                       continue;
-               if (*ptr > text_end)
-                       continue;
-               **ptr = 0xf0; /* lock prefix */
-       };
-}
-
-static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 
*text_end)
-{
-       unsigned char **noptable = find_nop_table();
-       u8 **ptr;
-
-       for (ptr = start; ptr < end; ptr++) {
-               if (*ptr < text)
-                       continue;
-               if (*ptr > text_end)
-                       continue;
-               **ptr = noptable[1][0];
-       };
-}
-
-struct smp_alt_module {
-       /* what is this ??? */
-       struct module   *mod;
-       char            *name;
-
-       /* ptrs to lock prefixes */
-       u8              **locks;
-       u8              **locks_end;
-
-       /* .text segment, needed to avoid patching init code ;) */
-       u8              *text;
-       u8              *text_end;
-
-       struct list_head next;
-};
-static LIST_HEAD(smp_alt_modules);
-static DEFINE_SPINLOCK(smp_alt);
-
-void alternatives_smp_module_add(struct module *mod, char *name,
-                                void *locks, void *locks_end,
-                                void *text,  void *text_end)
-{
-       struct smp_alt_module *smp;
-       unsigned long flags;
-
-       if (no_replacement)
-               return;
-
-       if (smp_alt_once) {
-               if (boot_cpu_has(X86_FEATURE_UP))
-                       alternatives_smp_unlock(locks, locks_end,
-                                               text, text_end);
-               return;
-       }
-
-       smp = kzalloc(sizeof(*smp), GFP_KERNEL);
-       if (NULL == smp)
-               return; /* we'll run the (safe but slow) SMP code then ... */
-
-       smp->mod        = mod;
-       smp->name       = name;
-       smp->locks      = locks;
-       smp->locks_end  = locks_end;
-       smp->text       = text;
-       smp->text_end   = text_end;
-       DPRINTK("%s: locks %p -> %p, text %p -> %p, name %s\n",
-               __FUNCTION__, smp->locks, smp->locks_end,
-               smp->text, smp->text_end, smp->name);
-
-       spin_lock_irqsave(&smp_alt, flags);
-       list_add_tail(&smp->next, &smp_alt_modules);
-       if (boot_cpu_has(X86_FEATURE_UP))
-               alternatives_smp_unlock(smp->locks, smp->locks_end,
-                                       smp->text, smp->text_end);
-       spin_unlock_irqrestore(&smp_alt, flags);
-}
-
-void alternatives_smp_module_del(struct module *mod)
-{
-       struct smp_alt_module *item;
-       unsigned long flags;
-
-       if (no_replacement || smp_alt_once)
-               return;
-
-       spin_lock_irqsave(&smp_alt, flags);
-       list_for_each_entry(item, &smp_alt_modules, next) {
-               if (mod != item->mod)
-                       continue;
-               list_del(&item->next);
-               spin_unlock_irqrestore(&smp_alt, flags);
-               DPRINTK("%s: %s\n", __FUNCTION__, item->name);
-               kfree(item);
-               return;
-       }
-       spin_unlock_irqrestore(&smp_alt, flags);
-}
-
-void alternatives_smp_switch(int smp)
-{
-       struct smp_alt_module *mod;
-       unsigned long flags;
-
-#ifdef CONFIG_LOCKDEP
-       /*
-        * A not yet fixed binutils section handling bug prevents
-        * alternatives-replacement from working reliably, so turn
-        * it off:
-        */
-       printk("lockdep: not fixing up alternatives.\n");
-       return;
-#endif
-
-       if (no_replacement || smp_alt_once)
-               return;
-       BUG_ON(!smp && (num_online_cpus() > 1));
-
-       spin_lock_irqsave(&smp_alt, flags);
-       if (smp) {
-               printk(KERN_INFO "SMP alternatives: switching to SMP code\n");
-               clear_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability);
-               clear_bit(X86_FEATURE_UP, cpu_data[0].x86_capability);
-               alternatives_smp_apply(__smp_alt_instructions,
-                                      __smp_alt_instructions_end);
-               list_for_each_entry(mod, &smp_alt_modules, next)
-                       alternatives_smp_lock(mod->locks, mod->locks_end,
-                                             mod->text, mod->text_end);
-       } else {
-               printk(KERN_INFO "SMP alternatives: switching to UP code\n");
-               set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability);
-               set_bit(X86_FEATURE_UP, cpu_data[0].x86_capability);
-               apply_alternatives(__smp_alt_instructions,
-                                  __smp_alt_instructions_end);
-               list_for_each_entry(mod, &smp_alt_modules, next)
-                       alternatives_smp_unlock(mod->locks, mod->locks_end,
-                                               mod->text, mod->text_end);
-       }
-       spin_unlock_irqrestore(&smp_alt, flags);
-}
-
-#endif
-
-void __init alternative_instructions(void)
-{
-       if (no_replacement) {
-               printk(KERN_INFO "(SMP-)alternatives turned off\n");
-               free_init_pages("SMP alternatives",
-                               (unsigned long)__smp_alt_begin,
-                               (unsigned long)__smp_alt_end);
-               return;
-       }
-       apply_alternatives(__alt_instructions, __alt_instructions_end);
-
-       /* switch to patch-once-at-boottime-only mode and free the
-        * tables in case we know the number of CPUs will never ever
-        * change */
-#ifdef CONFIG_HOTPLUG_CPU
-       if (num_possible_cpus() < 2)
-               smp_alt_once = 1;
-#else
-       smp_alt_once = 1;
-#endif
-
-#ifdef CONFIG_SMP
-       if (smp_alt_once) {
-               if (1 == num_possible_cpus()) {
-                       printk(KERN_INFO "SMP alternatives: switching to UP 
code\n");
-                       set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability);
-                       set_bit(X86_FEATURE_UP, cpu_data[0].x86_capability);
-                       apply_alternatives(__smp_alt_instructions,
-                                          __smp_alt_instructions_end);
-                       alternatives_smp_unlock(__smp_locks, __smp_locks_end,
-                                               _text, _etext);
-               }
-               free_init_pages("SMP alternatives",
-                               (unsigned long)__smp_alt_begin,
-                               (unsigned long)__smp_alt_end);
-       } else {
-               alternatives_smp_save(__smp_alt_instructions,
-                                     __smp_alt_instructions_end);
-               alternatives_smp_module_add(NULL, "core kernel",
-                                           __smp_locks, __smp_locks_end,
-                                           _text, _etext);
-               alternatives_smp_switch(0);
-       }
-#endif
-}
diff -r 8f0b5295bb1b -r dcec453681bc 
linux-2.6-xen-sparse/arch/i386/kernel/cpu/intel_cacheinfo-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/kernel/cpu/intel_cacheinfo-xen.c   Mon Mar 
05 12:49:12 2007 -0600
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,774 +0,0 @@
-/*
- *      Routines to indentify caches on Intel CPU.
- *
- *      Changes:
- *      Venkatesh Pallipadi    : Adding cache identification through cpuid(4)
- *             Ashok Raj <ashok.raj@xxxxxxxxx>: Work with CPU hotplug 
infrastructure.
- *     Andi Kleen              : CPUID4 emulation on AMD.
- */
-
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/device.h>
-#include <linux/compiler.h>
-#include <linux/cpu.h>
-#include <linux/sched.h>
-
-#include <asm/processor.h>
-#include <asm/smp.h>
-
-#define LVL_1_INST     1
-#define LVL_1_DATA     2
-#define LVL_2          3
-#define LVL_3          4
-#define LVL_TRACE      5
-
-struct _cache_table
-{
-       unsigned char descriptor;
-       char cache_type;
-       short size;
-};
-
-/* all the cache descriptor types we care about (no TLB or trace cache 
entries) */
-static struct _cache_table cache_table[] __cpuinitdata =
-{
-       { 0x06, LVL_1_INST, 8 },        /* 4-way set assoc, 32 byte line size */
-       { 0x08, LVL_1_INST, 16 },       /* 4-way set assoc, 32 byte line size */
-       { 0x0a, LVL_1_DATA, 8 },        /* 2 way set assoc, 32 byte line size */
-       { 0x0c, LVL_1_DATA, 16 },       /* 4-way set assoc, 32 byte line size */
-       { 0x22, LVL_3,      512 },      /* 4-way set assoc, sectored cache, 64 
byte line size */
-       { 0x23, LVL_3,      1024 },     /* 8-way set assoc, sectored cache, 64 
byte line size */
-       { 0x25, LVL_3,      2048 },     /* 8-way set assoc, sectored cache, 64 
byte line size */
-       { 0x29, LVL_3,      4096 },     /* 8-way set assoc, sectored cache, 64 
byte line size */
-       { 0x2c, LVL_1_DATA, 32 },       /* 8-way set assoc, 64 byte line size */
-       { 0x30, LVL_1_INST, 32 },       /* 8-way set assoc, 64 byte line size */
-       { 0x39, LVL_2,      128 },      /* 4-way set assoc, sectored cache, 64 
byte line size */
-       { 0x3a, LVL_2,      192 },      /* 6-way set assoc, sectored cache, 64 
byte line size */
-       { 0x3b, LVL_2,      128 },      /* 2-way set assoc, sectored cache, 64 
byte line size */
-       { 0x3c, LVL_2,      256 },      /* 4-way set assoc, sectored cache, 64 
byte line size */
-       { 0x3d, LVL_2,      384 },      /* 6-way set assoc, sectored cache, 64 
byte line size */
-       { 0x3e, LVL_2,      512 },      /* 4-way set assoc, sectored cache, 64 
byte line size */
-       { 0x41, LVL_2,      128 },      /* 4-way set assoc, 32 byte line size */
-       { 0x42, LVL_2,      256 },      /* 4-way set assoc, 32 byte line size */
-       { 0x43, LVL_2,      512 },      /* 4-way set assoc, 32 byte line size */
-       { 0x44, LVL_2,      1024 },     /* 4-way set assoc, 32 byte line size */
-       { 0x45, LVL_2,      2048 },     /* 4-way set assoc, 32 byte line size */
-       { 0x46, LVL_3,      4096 },     /* 4-way set assoc, 64 byte line size */
-       { 0x47, LVL_3,      8192 },     /* 8-way set assoc, 64 byte line size */
-       { 0x49, LVL_3,      4096 },     /* 16-way set assoc, 64 byte line size 
*/
-       { 0x4a, LVL_3,      6144 },     /* 12-way set assoc, 64 byte line size 
*/
-       { 0x4b, LVL_3,      8192 },     /* 16-way set assoc, 64 byte line size 
*/
-       { 0x4c, LVL_3,     12288 },     /* 12-way set assoc, 64 byte line size 
*/
-       { 0x4d, LVL_3,     16384 },     /* 16-way set assoc, 64 byte line size 
*/
-       { 0x60, LVL_1_DATA, 16 },       /* 8-way set assoc, sectored cache, 64 
byte line size */
-       { 0x66, LVL_1_DATA, 8 },        /* 4-way set assoc, sectored cache, 64 
byte line size */
-       { 0x67, LVL_1_DATA, 16 },       /* 4-way set assoc, sectored cache, 64 
byte line size */
-       { 0x68, LVL_1_DATA, 32 },       /* 4-way set assoc, sectored cache, 64 
byte line size */
-       { 0x70, LVL_TRACE,  12 },       /* 8-way set assoc */
-       { 0x71, LVL_TRACE,  16 },       /* 8-way set assoc */
-       { 0x72, LVL_TRACE,  32 },       /* 8-way set assoc */
-       { 0x73, LVL_TRACE,  64 },       /* 8-way set assoc */
-       { 0x78, LVL_2,    1024 },       /* 4-way set assoc, 64 byte line size */
-       { 0x79, LVL_2,     128 },       /* 8-way set assoc, sectored cache, 64 
byte line size */
-       { 0x7a, LVL_2,     256 },       /* 8-way set assoc, sectored cache, 64 
byte line size */
-       { 0x7b, LVL_2,     512 },       /* 8-way set assoc, sectored cache, 64 
byte line size */
-       { 0x7c, LVL_2,    1024 },       /* 8-way set assoc, sectored cache, 64 
byte line size */
-       { 0x7d, LVL_2,    2048 },       /* 8-way set assoc, 64 byte line size */
-       { 0x7f, LVL_2,     512 },       /* 2-way set assoc, 64 byte line size */
-       { 0x82, LVL_2,     256 },       /* 8-way set assoc, 32 byte line size */
-       { 0x83, LVL_2,     512 },       /* 8-way set assoc, 32 byte line size */
-       { 0x84, LVL_2,    1024 },       /* 8-way set assoc, 32 byte line size */
-       { 0x85, LVL_2,    2048 },       /* 8-way set assoc, 32 byte line size */
-       { 0x86, LVL_2,     512 },       /* 4-way set assoc, 64 byte line size */
-       { 0x87, LVL_2,    1024 },       /* 8-way set assoc, 64 byte line size */
-       { 0x00, 0, 0}
-};
-
-
-enum _cache_type
-{
-       CACHE_TYPE_NULL = 0,
-       CACHE_TYPE_DATA = 1,
-       CACHE_TYPE_INST = 2,
-       CACHE_TYPE_UNIFIED = 3
-};
-
-union _cpuid4_leaf_eax {
-       struct {
-               enum _cache_type        type:5;
-               unsigned int            level:3;
-               unsigned int            is_self_initializing:1;
-               unsigned int            is_fully_associative:1;
-               unsigned int            reserved:4;
-               unsigned int            num_threads_sharing:12;
-               unsigned int            num_cores_on_die:6;
-       } split;
-       u32 full;
-};
-
-union _cpuid4_leaf_ebx {
-       struct {
-               unsigned int            coherency_line_size:12;
-               unsigned int            physical_line_partition:10;
-               unsigned int            ways_of_associativity:10;
-       } split;
-       u32 full;
-};
-
-union _cpuid4_leaf_ecx {
-       struct {
-               unsigned int            number_of_sets:32;
-       } split;
-       u32 full;
-};
-
-struct _cpuid4_info {
-       union _cpuid4_leaf_eax eax;
-       union _cpuid4_leaf_ebx ebx;
-       union _cpuid4_leaf_ecx ecx;
-       unsigned long size;
-       cpumask_t shared_cpu_map;
-};
-
-unsigned short                 num_cache_leaves;
-
-/* AMD doesn't have CPUID4. Emulate it here to report the same
-   information to the user.  This makes some assumptions about the machine:
-   No L3, L2 not shared, no SMT etc. that is currently true on AMD CPUs.
-
-   In theory the TLBs could be reported as fake type (they are in "dummy").
-   Maybe later */
-union l1_cache {
-       struct {
-               unsigned line_size : 8;
-               unsigned lines_per_tag : 8;
-               unsigned assoc : 8;
-               unsigned size_in_kb : 8;
-       };
-       unsigned val;
-};
-
-union l2_cache {
-       struct {
-               unsigned line_size : 8;
-               unsigned lines_per_tag : 4;
-               unsigned assoc : 4;
-               unsigned size_in_kb : 16;
-       };
-       unsigned val;
-};
-
-static const unsigned short assocs[] = {
-       [1] = 1, [2] = 2, [4] = 4, [6] = 8,
-       [8] = 16,
-       [0xf] = 0xffff // ??
-       };
-static const unsigned char levels[] = { 1, 1, 2 };
-static const unsigned char types[] = { 1, 2, 3 };
-
-static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
-                      union _cpuid4_leaf_ebx *ebx,
-                      union _cpuid4_leaf_ecx *ecx)
-{
-       unsigned dummy;
-       unsigned line_size, lines_per_tag, assoc, size_in_kb;
-       union l1_cache l1i, l1d;
-       union l2_cache l2;
-
-       eax->full = 0;
-       ebx->full = 0;
-       ecx->full = 0;
-
-       cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
-       cpuid(0x80000006, &dummy, &dummy, &l2.val, &dummy);
-
-       if (leaf > 2 || !l1d.val || !l1i.val || !l2.val)
-               return;
-
-       eax->split.is_self_initializing = 1;
-       eax->split.type = types[leaf];
-       eax->split.level = levels[leaf];
-       eax->split.num_threads_sharing = 0;
-       eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1;
-
-       if (leaf <= 1) {
-               union l1_cache *l1 = leaf == 0 ? &l1d : &l1i;
-               assoc = l1->assoc;
-               line_size = l1->line_size;
-               lines_per_tag = l1->lines_per_tag;
-               size_in_kb = l1->size_in_kb;
-       } else {
-               assoc = l2.assoc;
-               line_size = l2.line_size;
-               lines_per_tag = l2.lines_per_tag;
-               /* cpu_data has errata corrections for K7 applied */
-               size_in_kb = current_cpu_data.x86_cache_size;
-       }
-
-       if (assoc == 0xf)
-               eax->split.is_fully_associative = 1;
-       ebx->split.coherency_line_size = line_size - 1;
-       ebx->split.ways_of_associativity = assocs[assoc] - 1;
-       ebx->split.physical_line_partition = lines_per_tag - 1;
-       ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
-               (ebx->split.ways_of_associativity + 1) - 1;
-}
-
-static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info 
*this_leaf)
-{
-       union _cpuid4_leaf_eax  eax;
-       union _cpuid4_leaf_ebx  ebx;
-       union _cpuid4_leaf_ecx  ecx;
-       unsigned                edx;
-
-       if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
-               amd_cpuid4(index, &eax, &ebx, &ecx);
-       else
-               cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full,  &edx);
-       if (eax.split.type == CACHE_TYPE_NULL)
-               return -EIO; /* better error ? */
-
-       this_leaf->eax = eax;
-       this_leaf->ebx = ebx;
-       this_leaf->ecx = ecx;
-       this_leaf->size = (ecx.split.number_of_sets + 1) *
-               (ebx.split.coherency_line_size + 1) *
-               (ebx.split.physical_line_partition + 1) *
-               (ebx.split.ways_of_associativity + 1);
-       return 0;
-}
-
-/* will only be called once; __init is safe here */
-static int __init find_num_cache_leaves(void)
-{
-       unsigned int            eax, ebx, ecx, edx;
-       union _cpuid4_leaf_eax  cache_eax;
-       int                     i = -1;
-
-       do {
-               ++i;
-               /* Do cpuid(4) loop to find out num_cache_leaves */
-               cpuid_count(4, i, &eax, &ebx, &ecx, &edx);
-               cache_eax.full = eax;
-       } while (cache_eax.split.type != CACHE_TYPE_NULL);
-       return i;
-}
-
-unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
-{
-       unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache 
sizes */
-       unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
-       unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
-       unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
-#ifdef CONFIG_X86_HT
-       unsigned int cpu = (c == &boot_cpu_data) ? 0 : (c - cpu_data);
-#endif
-
-       if (c->cpuid_level > 3) {
-               static int is_initialized;
-
-               if (is_initialized == 0) {
-                       /* Init num_cache_leaves from boot CPU */
-                       num_cache_leaves = find_num_cache_leaves();
-                       is_initialized++;
-               }
-
-               /*
-                * Whenever possible use cpuid(4), deterministic cache
-                * parameters cpuid leaf to find the cache details
-                */
-               for (i = 0; i < num_cache_leaves; i++) {
-                       struct _cpuid4_info this_leaf;
-
-                       int retval;
-
-                       retval = cpuid4_cache_lookup(i, &this_leaf);
-                       if (retval >= 0) {
-                               switch(this_leaf.eax.split.level) {
-                                   case 1:
-                                       if (this_leaf.eax.split.type ==
-                                                       CACHE_TYPE_DATA)
-                                               new_l1d = this_leaf.size/1024;
-                                       else if (this_leaf.eax.split.type ==
-                                                       CACHE_TYPE_INST)
-                                               new_l1i = this_leaf.size/1024;
-                                       break;
-                                   case 2:
-                                       new_l2 = this_leaf.size/1024;
-                                       num_threads_sharing = 1 + 
this_leaf.eax.split.num_threads_sharing;
-                                       index_msb = 
get_count_order(num_threads_sharing);
-                                       l2_id = c->apicid >> index_msb;
-                                       break;
-                                   case 3:
-                                       new_l3 = this_leaf.size/1024;
-                                       num_threads_sharing = 1 + 
this_leaf.eax.split.num_threads_sharing;
-                                       index_msb = 
get_count_order(num_threads_sharing);
-                                       l3_id = c->apicid >> index_msb;
-                                       break;
-                                   default:
-                                       break;
-                               }
-                       }
-               }
-       }
-       /*
-        * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
-        * trace cache
-        */
-       if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
-               /* supports eax=2  call */
-               int i, j, n;
-               int regs[4];
-               unsigned char *dp = (unsigned char *)regs;
-               int only_trace = 0;
-
-               if (num_cache_leaves != 0 && c->x86 == 15)
-                       only_trace = 1;
-
-               /* Number of times to iterate */
-               n = cpuid_eax(2) & 0xFF;
-
-               for ( i = 0 ; i < n ; i++ ) {
-                       cpuid(2, ®s[0], ®s[1], ®s[2], ®s[3]);
-
-                       /* If bit 31 is set, this is an unknown format */
-                       for ( j = 0 ; j < 3 ; j++ ) {
-                               if ( regs[j] < 0 ) regs[j] = 0;
-                       }
-
-                       /* Byte 0 is level count, not a descriptor */
-                       for ( j = 1 ; j < 16 ; j++ ) {
-                               unsigned char des = dp[j];
-                               unsigned char k = 0;
-
-                               /* look up this descriptor in the table */
-                               while (cache_table[k].descriptor != 0)
-                               {
-                                       if (cache_table[k].descriptor == des) {
-                                               if (only_trace && 
cache_table[k].cache_type != LVL_TRACE)
-                                                       break;
-                                               switch 
(cache_table[k].cache_type) {
-                                               case LVL_1_INST:
-                                                       l1i += 
cache_table[k].size;
-                                                       break;
-                                               case LVL_1_DATA:
-                                                       l1d += 
cache_table[k].size;
-                                                       break;
-                                               case LVL_2:
-                                                       l2 += 
cache_table[k].size;
-                                                       break;
-                                               case LVL_3:
-                                                       l3 += 
cache_table[k].size;
-                                                       break;
-                                               case LVL_TRACE:
-                                                       trace += 
cache_table[k].size;
-                                                       break;
-                                               }
-
-                                               break;
-                                       }
-
-                                       k++;
-                               }
-                       }
-               }
-       }
-
-       if (new_l1d)
-               l1d = new_l1d;
-
-       if (new_l1i)
-               l1i = new_l1i;
-
-       if (new_l2) {
-               l2 = new_l2;
-#ifdef CONFIG_X86_HT
-               cpu_llc_id[cpu] = l2_id;
-#endif
-       }
-
-       if (new_l3) {
-               l3 = new_l3;
-#ifdef CONFIG_X86_HT
-               cpu_llc_id[cpu] = l3_id;
-#endif
-       }
-
-       if (trace)
-               printk (KERN_INFO "CPU: Trace cache: %dK uops", trace);
-       else if ( l1i )
-               printk (KERN_INFO "CPU: L1 I cache: %dK", l1i);
-
-       if (l1d)
-               printk(", L1 D cache: %dK\n", l1d);
-       else
-               printk("\n");
-
-       if (l2)
-               printk(KERN_INFO "CPU: L2 cache: %dK\n", l2);
-
-       if (l3)
-               printk(KERN_INFO "CPU: L3 cache: %dK\n", l3);
-
-       c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
-
-       return l2;
-}
-
-/* pointer to _cpuid4_info array (for each cache leaf) */
-static struct _cpuid4_info *cpuid4_info[NR_CPUS];
-#define CPUID4_INFO_IDX(x,y)    (&((cpuid4_info[x])[y]))
-
-#ifdef CONFIG_SMP
-static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
-{
-       struct _cpuid4_info     *this_leaf, *sibling_leaf;
-       unsigned long num_threads_sharing;
-       int index_msb, i;
-       struct cpuinfo_x86 *c = cpu_data;
-
-       this_leaf = CPUID4_INFO_IDX(cpu, index);
-       num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing;
-
-       if (num_threads_sharing == 1)
-               cpu_set(cpu, this_leaf->shared_cpu_map);
-       else {
-               index_msb = get_count_order(num_threads_sharing);
-
-               for_each_online_cpu(i) {
-                       if (c[i].apicid >> index_msb ==
-                           c[cpu].apicid >> index_msb) {
-                               cpu_set(i, this_leaf->shared_cpu_map);
-                               if (i != cpu && cpuid4_info[i])  {
-                                       sibling_leaf = CPUID4_INFO_IDX(i, 
index);
-                                       cpu_set(cpu, 
sibling_leaf->shared_cpu_map);
-                               }
-                       }
-               }
-       }
-}
-static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
-{
-       struct _cpuid4_info     *this_leaf, *sibling_leaf;
-       int sibling;
-
-       this_leaf = CPUID4_INFO_IDX(cpu, index);
-       for_each_cpu_mask(sibling, this_leaf->shared_cpu_map) {
-               sibling_leaf = CPUID4_INFO_IDX(sibling, index); 
-               cpu_clear(cpu, sibling_leaf->shared_cpu_map);
-       }
-}
-#else
-static void __init cache_shared_cpu_map_setup(unsigned int cpu, int index) {}
-static void __init cache_remove_shared_cpu_map(unsigned int cpu, int index) {}
-#endif
-
-static void free_cache_attributes(unsigned int cpu)
-{
-       kfree(cpuid4_info[cpu]);
-       cpuid4_info[cpu] = NULL;
-}
-
-static int __cpuinit detect_cache_attributes(unsigned int cpu)
-{
-       struct _cpuid4_info     *this_leaf;
-       unsigned long           j;
-       int                     retval;
-       cpumask_t               oldmask;
-
-       if (num_cache_leaves == 0)
-               return -ENOENT;
-
-       cpuid4_info[cpu] = kmalloc(
-           sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL);
-       if (unlikely(cpuid4_info[cpu] == NULL))
-               return -ENOMEM;
-       memset(cpuid4_info[cpu], 0,
-           sizeof(struct _cpuid4_info) * num_cache_leaves);
-
-       oldmask = current->cpus_allowed;
-       retval = set_cpus_allowed(current, cpumask_of_cpu(cpu));
-       if (retval)
-               goto out;
-
-       /* Do cpuid and store the results */
-       retval = 0;
-       for (j = 0; j < num_cache_leaves; j++) {
-               this_leaf = CPUID4_INFO_IDX(cpu, j);
-               retval = cpuid4_cache_lookup(j, this_leaf);
-               if (unlikely(retval < 0))
-                       break;
-               cache_shared_cpu_map_setup(cpu, j);
-       }
-       set_cpus_allowed(current, oldmask);
-
-out:
-       if (retval)
-               free_cache_attributes(cpu);
-       return retval;
-}
-
-#ifdef CONFIG_SYSFS
-
-#include <linux/kobject.h>
-#include <linux/sysfs.h>
-
-extern struct sysdev_class cpu_sysdev_class; /* from drivers/base/cpu.c */
-
-/* pointer to kobject for cpuX/cache */
-static struct kobject * cache_kobject[NR_CPUS];
-
-struct _index_kobject {
-       struct kobject kobj;
-       unsigned int cpu;
-       unsigned short index;
-};
-
-/* pointer to array of kobjects for cpuX/cache/indexY */
-static struct _index_kobject *index_kobject[NR_CPUS];
-#define INDEX_KOBJECT_PTR(x,y)    (&((index_kobject[x])[y]))
-
-#define show_one_plus(file_name, object, val)                          \
-static ssize_t show_##file_name                                                
\
-                       (struct _cpuid4_info *this_leaf, char *buf)     \
-{                                                                      \
-       return sprintf (buf, "%lu\n", (unsigned long)this_leaf->object + val); \
-}
-
-show_one_plus(level, eax.split.level, 0);
-show_one_plus(coherency_line_size, ebx.split.coherency_line_size, 1);
-show_one_plus(physical_line_partition, ebx.split.physical_line_partition, 1);
-show_one_plus(ways_of_associativity, ebx.split.ways_of_associativity, 1);
-show_one_plus(number_of_sets, ecx.split.number_of_sets, 1);
-
-static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf)
-{
-       return sprintf (buf, "%luK\n", this_leaf->size / 1024);
-}
-
-static ssize_t show_shared_cpu_map(struct _cpuid4_info *this_leaf, char *buf)
-{
-       char mask_str[NR_CPUS];
-       cpumask_scnprintf(mask_str, NR_CPUS, this_leaf->shared_cpu_map);
-       return sprintf(buf, "%s\n", mask_str);
-}
-
-static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) {
-       switch(this_leaf->eax.split.type) {
-           case CACHE_TYPE_DATA:
-               return sprintf(buf, "Data\n");
-               break;
-           case CACHE_TYPE_INST:
-               return sprintf(buf, "Instruction\n");
-               break;
-           case CACHE_TYPE_UNIFIED:
-               return sprintf(buf, "Unified\n");
-               break;
-           default:
-               return sprintf(buf, "Unknown\n");
-               break;
-       }
-}
-
-struct _cache_attr {
-       struct attribute attr;
-       ssize_t (*show)(struct _cpuid4_info *, char *);
-       ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count);
-};
-
-#define define_one_ro(_name) \
-static struct _cache_attr _name = \
-       __ATTR(_name, 0444, show_##_name, NULL)
-
-define_one_ro(level);
-define_one_ro(type);
-define_one_ro(coherency_line_size);
-define_one_ro(physical_line_partition);
-define_one_ro(ways_of_associativity);
-define_one_ro(number_of_sets);
-define_one_ro(size);
-define_one_ro(shared_cpu_map);
-
-static struct attribute * default_attrs[] = {
-       &type.attr,
-       &level.attr,
-       &coherency_line_size.attr,
-       &physical_line_partition.attr,
-       &ways_of_associativity.attr,
-       &number_of_sets.attr,
-       &size.attr,
-       &shared_cpu_map.attr,
-       NULL
-};
-
-#define to_object(k) container_of(k, struct _index_kobject, kobj)
-#define to_attr(a) container_of(a, struct _cache_attr, attr)
-
-static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf)
-{
-       struct _cache_attr *fattr = to_attr(attr);
-       struct _index_kobject *this_leaf = to_object(kobj);
-       ssize_t ret;
-
-       ret = fattr->show ?
-               fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
-                       buf) :
-               0;
-       return ret;
-}
-
-static ssize_t store(struct kobject * kobj, struct attribute * attr,
-                    const char * buf, size_t count)
-{
-       return 0;
-}
-
-static struct sysfs_ops sysfs_ops = {
-       .show   = show,
-       .store  = store,
-};
-
-static struct kobj_type ktype_cache = {
-       .sysfs_ops      = &sysfs_ops,
-       .default_attrs  = default_attrs,
-};
-
-static struct kobj_type ktype_percpu_entry = {
-       .sysfs_ops      = &sysfs_ops,
-};
-
-static void cpuid4_cache_sysfs_exit(unsigned int cpu)
-{
-       kfree(cache_kobject[cpu]);
-       kfree(index_kobject[cpu]);
-       cache_kobject[cpu] = NULL;
-       index_kobject[cpu] = NULL;
-       free_cache_attributes(cpu);
-}
-
-static int __cpuinit cpuid4_cache_sysfs_init(unsigned int cpu)
-{
-
-       if (num_cache_leaves == 0)
-               return -ENOENT;
-
-       detect_cache_attributes(cpu);
-       if (cpuid4_info[cpu] == NULL)
-               return -ENOENT;
-
-       /* Allocate all required memory */
-       cache_kobject[cpu] = kmalloc(sizeof(struct kobject), GFP_KERNEL);
-       if (unlikely(cache_kobject[cpu] == NULL))
-               goto err_out;
-       memset(cache_kobject[cpu], 0, sizeof(struct kobject));
-
-       index_kobject[cpu] = kmalloc(
-           sizeof(struct _index_kobject ) * num_cache_leaves, GFP_KERNEL);
-       if (unlikely(index_kobject[cpu] == NULL))
-               goto err_out;
-       memset(index_kobject[cpu], 0,
-           sizeof(struct _index_kobject) * num_cache_leaves);
-
-       return 0;
-
-err_out:
-       cpuid4_cache_sysfs_exit(cpu);
-       return -ENOMEM;
-}
-
-/* Add/Remove cache interface for CPU device */
-static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
-{
-       unsigned int cpu = sys_dev->id;
-       unsigned long i, j;
-       struct _index_kobject *this_object;
-       int retval = 0;
-
-       retval = cpuid4_cache_sysfs_init(cpu);
-       if (unlikely(retval < 0))
-               return retval;
-
-       cache_kobject[cpu]->parent = &sys_dev->kobj;
-       kobject_set_name(cache_kobject[cpu], "%s", "cache");
-       cache_kobject[cpu]->ktype = &ktype_percpu_entry;
-       retval = kobject_register(cache_kobject[cpu]);
-
-       for (i = 0; i < num_cache_leaves; i++) {
-               this_object = INDEX_KOBJECT_PTR(cpu,i);
-               this_object->cpu = cpu;
-               this_object->index = i;
-               this_object->kobj.parent = cache_kobject[cpu];
-               kobject_set_name(&(this_object->kobj), "index%1lu", i);
-               this_object->kobj.ktype = &ktype_cache;
-               retval = kobject_register(&(this_object->kobj));
-               if (unlikely(retval)) {
-                       for (j = 0; j < i; j++) {
-                               kobject_unregister(
-                                       &(INDEX_KOBJECT_PTR(cpu,j)->kobj));
-                       }
-                       kobject_unregister(cache_kobject[cpu]);
-                       cpuid4_cache_sysfs_exit(cpu);
-                       break;
-               }
-       }
-       return retval;
-}
-
-static void __cpuexit cache_remove_dev(struct sys_device * sys_dev)
-{
-       unsigned int cpu = sys_dev->id;
-       unsigned long i;
-
-       for (i = 0; i < num_cache_leaves; i++) {
-               cache_remove_shared_cpu_map(cpu, i);
-               kobject_unregister(&(INDEX_KOBJECT_PTR(cpu,i)->kobj));
-       }
-       kobject_unregister(cache_kobject[cpu]);
-       cpuid4_cache_sysfs_exit(cpu);
-       return;
-}
-
-static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb,
-                                       unsigned long action, void *hcpu)
-{
-       unsigned int cpu = (unsigned long)hcpu;
-       struct sys_device *sys_dev;
-
-       sys_dev = get_cpu_sysdev(cpu);
-       switch (action) {
-       case CPU_ONLINE:
-               cache_add_dev(sys_dev);
-               break;
-       case CPU_DEAD:
-               cache_remove_dev(sys_dev);
-               break;
-       }
-       return NOTIFY_OK;
-}
-
-static struct notifier_block __cpuinitdata cacheinfo_cpu_notifier =
-{
-    .notifier_call = cacheinfo_cpu_callback,
-};
-
-static int __cpuinit cache_sysfs_init(void)
-{
-       int i;
-
-       if (num_cache_leaves == 0)
-               return 0;
-
-       register_hotcpu_notifier(&cacheinfo_cpu_notifier);
-
-       for_each_online_cpu(i) {
-               cacheinfo_cpu_callback(&cacheinfo_cpu_notifier, CPU_ONLINE,
-                       (void *)(long)i);
-       }
-
-       return 0;
-}
-
-device_initcall(cache_sysfs_init);
-
-#endif
diff -r 8f0b5295bb1b -r dcec453681bc 
linux-2.6-xen-sparse/arch/i386/kernel/entry-xen.S
--- a/linux-2.6-xen-sparse/arch/i386/kernel/entry-xen.S Mon Mar 05 12:49:12 
2007 -0600
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/entry-xen.S Thu Mar 08 14:39:52 
2007 -0600
@@ -368,6 +368,7 @@ sysexit_scrit:      /**** START OF SYSEXIT CR
        movl ESI(%esp), %esi
        sysexit
 14:    __DISABLE_INTERRUPTS
+       TRACE_IRQS_OFF
 sysexit_ecrit: /**** END OF SYSEXIT CRITICAL REGION ****/
        push %esp
        call evtchn_do_upcall
@@ -427,11 +428,13 @@ restore_nocheck:
 restore_nocheck:
        movl EFLAGS(%esp), %eax
        testl $(VM_MASK|NMI_MASK), %eax
+       CFI_REMEMBER_STATE
        jnz hypervisor_iret
        shr $9, %eax                    # EAX[0] == IRET_EFLAGS.IF
        GET_VCPU_INFO
        andb evtchn_upcall_mask(%esi),%al
        andb $1,%al                     # EAX[0] == IRET_EFLAGS.IF & event_mask
+       CFI_REMEMBER_STATE
        jnz restore_all_enable_events   #        != 0 => enable event delivery
 #endif
        TRACE_IRQS_IRET
@@ -455,8 +458,8 @@ iret_exc:
        .long 1b,iret_exc
 .previous
 
+       CFI_RESTORE_STATE
 #ifndef CONFIG_XEN
-       CFI_RESTORE_STATE
 ldt_ss:
        larl OLDSS(%esp), %eax
        jnz restore_nocheck
@@ -485,14 +488,36 @@ 1:        iret
        .align 4
        .long 1b,iret_exc
 .previous
-       CFI_ENDPROC
 #else
+        ALIGN
+restore_all_enable_events:
+       TRACE_IRQS_ON
+       __ENABLE_INTERRUPTS
+scrit: /**** START OF CRITICAL REGION ****/
+       __TEST_PENDING
+       jnz  14f                        # process more events if necessary...
+       RESTORE_REGS
+       addl $4, %esp
+       CFI_ADJUST_CFA_OFFSET -4
+1:     iret
+.section __ex_table,"a"
+       .align 4
+       .long 1b,iret_exc
+.previous
+14:    __DISABLE_INTERRUPTS
+       TRACE_IRQS_OFF
+       jmp  11f
+ecrit:  /**** END OF CRITICAL REGION ****/
+
+       CFI_RESTORE_STATE
 hypervisor_iret:
        andl $~NMI_MASK, EFLAGS(%esp)
        RESTORE_REGS
        addl $4, %esp
+       CFI_ADJUST_CFA_OFFSET -4
        jmp  hypercall_page + (__HYPERVISOR_iret * 32)
 #endif
+       CFI_ENDPROC
 
        # perform work that needs to be done immediately before resumption
        ALIGN
@@ -736,7 +761,9 @@ error_code:
 # critical region we know that the entire frame is present and correct
 # so we can simply throw away the new one.
 ENTRY(hypervisor_callback)
+       RING0_INT_FRAME
        pushl %eax
+       CFI_ADJUST_CFA_OFFSET 4
        SAVE_ALL
        movl EIP(%esp),%eax
        cmpl $scrit,%eax
@@ -749,26 +776,13 @@ ENTRY(hypervisor_callback)
        ja   11f
        addl $OLDESP,%esp               # Remove eflags...ebx from stack frame.
 11:    push %esp
+       CFI_ADJUST_CFA_OFFSET 4
        call evtchn_do_upcall
        add  $4,%esp
+       CFI_ADJUST_CFA_OFFSET -4
        jmp  ret_from_intr
-
-        ALIGN
-restore_all_enable_events:
-       __ENABLE_INTERRUPTS
-scrit: /**** START OF CRITICAL REGION ****/
-       __TEST_PENDING
-       jnz  14f                        # process more events if necessary...
-       RESTORE_REGS
-       addl $4, %esp
-1:     iret
-.section __ex_table,"a"
-       .align 4
-       .long 1b,iret_exc
-.previous
-14:    __DISABLE_INTERRUPTS
-       jmp  11b
-ecrit:  /**** END OF CRITICAL REGION ****/
+       CFI_ENDPROC
+
 # [How we do the fixup]. We want to merge the current stack frame with the
 # just-interrupted frame. How we do this depends on where in the critical
 # region the interrupted handler was executing, and so how many saved
@@ -835,6 +849,7 @@ 4:  mov 16(%esp),%gs
        addl $16,%esp           # EAX != 0 => Category 2 (Bad IRET)
        jmp iret_exc
 5:     addl $16,%esp           # EAX == 0 => Category 1 (Bad segment)
+       RING0_INT_FRAME
        pushl $0
        SAVE_ALL
        jmp ret_from_exception
@@ -860,6 +875,7 @@ 9:  xorl %eax,%eax;         \
        .long 4b,9b;            \
 .previous
 #endif
+       CFI_ENDPROC
 
 ENTRY(coprocessor_error)
        RING0_INT_FRAME
@@ -1187,8 +1203,11 @@ ENDPROC(arch_unwind_init_running)
 #endif
 
 ENTRY(fixup_4gb_segment)
+       RING0_EC_FRAME
        pushl $do_fixup_4gb_segment
-       jmp error_code
+       CFI_ADJUST_CFA_OFFSET 4
+       jmp error_code
+       CFI_ENDPROC
 
 .section .rodata,"a"
 .align 4
diff -r 8f0b5295bb1b -r dcec453681bc 
linux-2.6-xen-sparse/arch/i386/kernel/head-xen.S
--- a/linux-2.6-xen-sparse/arch/i386/kernel/head-xen.S  Mon Mar 05 12:49:12 
2007 -0600
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/head-xen.S  Thu Mar 08 14:39:52 
2007 -0600
@@ -9,10 +9,9 @@
 #include <asm/cache.h>
 #include <asm/thread_info.h>
 #include <asm/asm-offsets.h>
+#include <asm/dwarf2.h>
 #include <xen/interface/xen.h>
 #include <xen/interface/elfnote.h>
-
-#define _PAGE_PRESENT 0x1
 
 /*
  * References to members of the new_cpu_data structure.
@@ -65,14 +64,13 @@ ENTRY(startup_32)
 
        pushl %eax              # fake return address
        jmp start_kernel
-L6:
-       jmp L6                  # main should never return here, but
-                               # just in case, we know what happens.
 
 #define HYPERCALL_PAGE_OFFSET 0x1000
 .org HYPERCALL_PAGE_OFFSET
 ENTRY(hypercall_page)
+       CFI_STARTPROC
 .skip 0x1000
+       CFI_ENDPROC
 
 /*
  * Real beginning of normal "text" segment
diff -r 8f0b5295bb1b -r dcec453681bc 
linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c Mon Mar 05 12:49:12 
2007 -0600
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c Thu Mar 08 14:39:52 
2007 -0600
@@ -1847,6 +1847,9 @@ static __init int add_pcspkr(void)
        struct platform_device *pd;
        int ret;
 
+       if (!is_initial_xendomain())
+               return 0;
+
        pd = platform_device_alloc("pcspkr", -1);
        if (!pd)
                return -ENOMEM;
diff -r 8f0b5295bb1b -r dcec453681bc 
linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c  Mon Mar 05 12:49:12 
2007 -0600
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c  Thu Mar 08 14:39:52 
2007 -0600
@@ -1022,16 +1022,21 @@ void halt(void)
 }
 EXPORT_SYMBOL(halt);
 
-/* No locking required. We are only CPU running, and interrupts are off. */
+/* No locking required. Interrupts are disabled on all CPUs. */
 void time_resume(void)
 {
+       unsigned int cpu;
+
        init_cpu_khz();
 
-       get_time_values_from_xen(0);
+       for_each_online_cpu(cpu) {
+               get_time_values_from_xen(cpu);
+               per_cpu(processed_system_time, cpu) =
+                       per_cpu(shadow_time, 0).system_timestamp;
+               init_missing_ticks_accounting(cpu);
+       }
 
        processed_system_time = per_cpu(shadow_time, 0).system_timestamp;
-       per_cpu(processed_system_time, 0) = processed_system_time;
-       init_missing_ticks_accounting(0);
 
        update_wallclock();
 }
diff -r 8f0b5295bb1b -r dcec453681bc 
linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c   Mon Mar 05 12:49:12 
2007 -0600
+++ b/linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c   Thu Mar 08 14:39:52 
2007 -0600
@@ -374,8 +374,6 @@ void iounmap(volatile void __iomem *addr
 }
 EXPORT_SYMBOL(iounmap);
 
-#ifdef __i386__
-
 void __init *bt_ioremap(unsigned long phys_addr, unsigned long size)
 {
        unsigned long offset, last_addr;
@@ -443,5 +441,3 @@ void __init bt_iounmap(void *addr, unsig
                --nrpages;
        }
 }
-
-#endif /* __i386__ */
diff -r 8f0b5295bb1b -r dcec453681bc 
linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c   Mon Mar 05 12:49:12 
2007 -0600
+++ b/linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c   Thu Mar 08 14:39:52 
2007 -0600
@@ -573,64 +573,67 @@ void make_pages_writable(void *va, unsig
        }
 }
 
-static inline int pgd_walk_set_prot(struct page *page, pgprot_t flags)
+static inline void pgd_walk_set_prot(struct page *page, pgprot_t flags)
 {
        unsigned long pfn = page_to_pfn(page);
-
-       if (PageHighMem(page))
-               return pgprot_val(flags) & _PAGE_RW
-                      ? test_and_clear_bit(PG_pinned, &page->flags)
-                      : !test_and_set_bit(PG_pinned, &page->flags);
-
-       BUG_ON(HYPERVISOR_update_va_mapping(
-               (unsigned long)__va(pfn << PAGE_SHIFT),
-               pfn_pte(pfn, flags), 0));
-
-       return 0;
-}
-
-static int pgd_walk(pgd_t *pgd_base, pgprot_t flags)
+       int rc;
+
+       if (PageHighMem(page)) {
+               if (pgprot_val(flags) & _PAGE_RW)
+                       clear_bit(PG_pinned, &page->flags);
+               else
+                       set_bit(PG_pinned, &page->flags);
+       } else {
+               rc = HYPERVISOR_update_va_mapping(
+                       (unsigned long)__va(pfn << PAGE_SHIFT),
+                       pfn_pte(pfn, flags), 0);
+               if (rc)
+                       BUG();
+       }
+}
+
+static void pgd_walk(pgd_t *pgd_base, pgprot_t flags)
 {
        pgd_t *pgd = pgd_base;
        pud_t *pud;
        pmd_t *pmd;
-       int    g, u, m, flush;
+       int    g, u, m, rc;
 
        if (xen_feature(XENFEAT_auto_translated_physmap))
                return 0;
 
-       for (g = 0, flush = 0; g < USER_PTRS_PER_PGD; g++, pgd++) {
+       for (g = 0; g < USER_PTRS_PER_PGD; g++, pgd++) {
                if (pgd_none(*pgd))
                        continue;
                pud = pud_offset(pgd, 0);
                if (PTRS_PER_PUD > 1) /* not folded */
-                       flush |= pgd_walk_set_prot(virt_to_page(pud),flags);
+                       pgd_walk_set_prot(virt_to_page(pud),flags);
                for (u = 0; u < PTRS_PER_PUD; u++, pud++) {
                        if (pud_none(*pud))
                                continue;
                        pmd = pmd_offset(pud, 0);
                        if (PTRS_PER_PMD > 1) /* not folded */
-                               flush |= 
pgd_walk_set_prot(virt_to_page(pmd),flags);
+                               pgd_walk_set_prot(virt_to_page(pmd),flags);
                        for (m = 0; m < PTRS_PER_PMD; m++, pmd++) {
                                if (pmd_none(*pmd))
                                        continue;
-                               flush |= 
pgd_walk_set_prot(pmd_page(*pmd),flags);
+                               pgd_walk_set_prot(pmd_page(*pmd),flags);
                        }
                }
        }
 
-       BUG_ON(HYPERVISOR_update_va_mapping(
+       rc = HYPERVISOR_update_va_mapping(
                (unsigned long)pgd_base,
                pfn_pte(virt_to_phys(pgd_base)>>PAGE_SHIFT, flags),
-               UVMF_TLB_FLUSH));
-
-       return flush;
+               UVMF_TLB_FLUSH);
+       if (rc)
+               BUG();
 }
 
 static void __pgd_pin(pgd_t *pgd)
 {
-       if (pgd_walk(pgd, PAGE_KERNEL_RO))
-               kmap_flush_unused();
+       pgd_walk(pgd, PAGE_KERNEL_RO);
+       kmap_flush_unused();
        xen_pgd_pin(__pa(pgd));
        set_bit(PG_pinned, &virt_to_page(pgd)->flags);
 }
@@ -638,8 +641,7 @@ static void __pgd_unpin(pgd_t *pgd)
 static void __pgd_unpin(pgd_t *pgd)
 {
        xen_pgd_unpin(__pa(pgd));
-       if (pgd_walk(pgd, PAGE_KERNEL))
-               kmap_flush_unused();
+       pgd_walk(pgd, PAGE_KERNEL);
        clear_bit(PG_pinned, &virt_to_page(pgd)->flags);
 }
 
diff -r 8f0b5295bb1b -r dcec453681bc linux-2.6-xen-sparse/arch/x86_64/Kconfig
--- a/linux-2.6-xen-sparse/arch/x86_64/Kconfig  Mon Mar 05 12:49:12 2007 -0600
+++ b/linux-2.6-xen-sparse/arch/x86_64/Kconfig  Thu Mar 08 14:39:52 2007 -0600
@@ -450,7 +450,7 @@ config CALGARY_IOMMU
        bool "IBM Calgary IOMMU support"
        default y
        select SWIOTLB
-       depends on PCI && EXPERIMENTAL
+       depends on PCI && !X86_64_XEN && EXPERIMENTAL
        help
          Support for hardware IOMMUs in IBM's xSeries x366 and x460
          systems. Needed to run systems with more than 3GB of memory
diff -r 8f0b5295bb1b -r dcec453681bc 
linux-2.6-xen-sparse/arch/x86_64/kernel/Makefile
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/Makefile  Mon Mar 05 12:49:12 
2007 -0600
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/Makefile  Thu Mar 08 14:39:52 
2007 -0600
@@ -61,9 +61,7 @@ time-y                                += 
../../i386/kernel/time-xen.
 time-y                         += ../../i386/kernel/time-xen.o
 pci-dma-y                      += ../../i386/kernel/pci-dma-xen.o
 microcode-$(subst m,y,$(CONFIG_MICROCODE))  := 
../../i386/kernel/microcode-xen.o
-intel_cacheinfo-y              := ../../i386/kernel/cpu/intel_cacheinfo-xen.o
 quirks-y                       := ../../i386/kernel/quirks-xen.o
-alternative-y                  := ../../i386/kernel/alternative-xen.o
 
 n-obj-xen := i8259.o reboot.o i8237.o smpboot.o trampoline.o
 
diff -r 8f0b5295bb1b -r dcec453681bc 
linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S       Mon Mar 05 
12:49:12 2007 -0600
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S       Thu Mar 08 
14:39:52 2007 -0600
@@ -116,19 +116,21 @@ NMI_MASK = 0x80000000
        CFI_ADJUST_CFA_OFFSET   -(6*8)
        .endm
 
-       .macro  CFI_DEFAULT_STACK start=1
+       .macro  CFI_DEFAULT_STACK start=1,adj=0
        .if \start
        CFI_STARTPROC   simple
-       CFI_DEF_CFA     rsp,SS+8
+       CFI_DEF_CFA     rsp,SS+8-(\adj*ARGOFFSET)
        .else
-       CFI_DEF_CFA_OFFSET SS+8
+       CFI_DEF_CFA_OFFSET SS+8-(\adj*ARGOFFSET)
        .endif
+       .if \adj == 0
        CFI_REL_OFFSET  r15,R15
        CFI_REL_OFFSET  r14,R14
        CFI_REL_OFFSET  r13,R13
        CFI_REL_OFFSET  r12,R12
        CFI_REL_OFFSET  rbp,RBP
        CFI_REL_OFFSET  rbx,RBX
+       .endif
        CFI_REL_OFFSET  r11,R11
        CFI_REL_OFFSET  r10,R10
        CFI_REL_OFFSET  r9,R9
@@ -363,8 +365,8 @@ ENTRY(int_ret_from_sys_call)
        CFI_REL_OFFSET  r9,R9-ARGOFFSET
        CFI_REL_OFFSET  r10,R10-ARGOFFSET
        CFI_REL_OFFSET  r11,R11-ARGOFFSET
+        XEN_BLOCK_EVENTS(%rsi)
        TRACE_IRQS_OFF
-        XEN_BLOCK_EVENTS(%rsi)
        testb $3,CS-ARGOFFSET(%rsp)
         jnz 1f
         /* Need to set the proper %ss (not NULL) for ring 3 iretq */
@@ -534,7 +536,7 @@ END(stub_rt_sigreturn)
  */ 
 
 retint_check:
-       CFI_DEFAULT_STACK
+       CFI_DEFAULT_STACK adj=1
        movl threadinfo_flags(%rcx),%edx
        andl %edi,%edx
        CFI_REMEMBER_STATE
@@ -838,6 +840,7 @@ ENTRY(error_entry)
        CFI_REL_OFFSET  r15,R15
 #if 0        
        cmpl $__KERNEL_CS,CS(%rsp)
+       CFI_REMEMBER_STATE
        je  error_kernelspace
 #endif        
 error_call_handler:
@@ -864,7 +867,7 @@ error_exit:
        TRACE_IRQS_IRETQ
        jmp   retint_restore_args
 
-error_kernelspace:
+#if 0
          /*
          * We need to re-write the logic here because we don't do iretq to 
          * to return to user mode. It's still possible that we get trap/fault
@@ -872,7 +875,8 @@ error_kernelspace:
          * for example).
          *
          */           
-#if 0
+       CFI_RESTORE_STATE
+error_kernelspace:
        incl %ebx
        /* There are two places in the kernel that can potentially fault with
           usergs. Handle them here. The exception handlers after
@@ -888,11 +892,13 @@ error_kernelspace:
        cmpq $gs_change,RIP(%rsp)
         je   error_swapgs
        jmp  error_sti
-#endif        
+#endif
+       CFI_ENDPROC
 END(error_entry)
        
 ENTRY(hypervisor_callback)
        zeroentry do_hypervisor_callback
+END(hypervisor_callback)
         
 /*
  * Copied from arch/xen/i386/kernel/entry.S
@@ -909,48 +915,66 @@ ENTRY(hypervisor_callback)
 # existing activation in its critical region -- if so, we pop the current
 # activation and restart the handler using the previous one.
 ENTRY(do_hypervisor_callback)   # do_hypervisor_callback(struct *pt_regs)
+       CFI_STARTPROC
 # Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
 # see the correct pointer to the pt_regs
        movq %rdi, %rsp            # we don't return, adjust the stack frame
-11:    movq %gs:pda_irqstackptr,%rax
-       incl %gs:pda_irqcount
-       cmovzq %rax,%rsp
-       pushq %rdi
+       CFI_ENDPROC
+       CFI_DEFAULT_STACK
+11:    incl %gs:pda_irqcount
+       movq %rsp,%rbp
+       CFI_DEF_CFA_REGISTER rbp
+       cmovzq %gs:pda_irqstackptr,%rsp
+       pushq %rbp                      # backlink for old unwinder
        call evtchn_do_upcall
        popq %rsp
+       CFI_DEF_CFA_REGISTER rsp
        decl %gs:pda_irqcount
        jmp  error_exit
+       CFI_ENDPROC
+END(do_hypervisor_callback)
 
 #ifdef CONFIG_X86_LOCAL_APIC
 KPROBE_ENTRY(nmi)
        zeroentry do_nmi_callback
 ENTRY(do_nmi_callback)
+       CFI_STARTPROC
         addq $8, %rsp
+       CFI_ENDPROC
+       CFI_DEFAULT_STACK
         call do_nmi
         orl  $NMI_MASK,EFLAGS(%rsp)
         RESTORE_REST
         XEN_BLOCK_EVENTS(%rsi)
+       TRACE_IRQS_OFF
         GET_THREAD_INFO(%rcx)
         jmp  retint_restore_args
+       CFI_ENDPROC
        .previous .text
+END(nmi)
 #endif
 
         ALIGN
 restore_all_enable_events:  
+       CFI_DEFAULT_STACK adj=1
+       TRACE_IRQS_ON
        XEN_UNBLOCK_EVENTS(%rsi)        # %rsi is already set up...
 
 scrit: /**** START OF CRITICAL REGION ****/
        XEN_TEST_PENDING(%rsi)
+       CFI_REMEMBER_STATE
        jnz  14f                        # process more events if necessary...
        XEN_PUT_VCPU_INFO(%rsi)
         RESTORE_ARGS 0,8,0
         HYPERVISOR_IRET 0
         
+       CFI_RESTORE_STATE
 14:    XEN_LOCKED_BLOCK_EVENTS(%rsi)
        XEN_PUT_VCPU_INFO(%rsi)
        SAVE_REST
         movq %rsp,%rdi                  # set the argument again
        jmp  11b
+       CFI_ENDPROC
 ecrit:  /**** END OF CRITICAL REGION ****/
 # At this point, unlike on x86-32, we don't do the fixup to simplify the 
 # code and the stack frame is more complex on x86-64.
@@ -970,8 +994,12 @@ ecrit:  /**** END OF CRITICAL REGION ***
 # We distinguish between categories by comparing each saved segment register
 # with its current contents: any discrepancy means we in category 1.
 ENTRY(failsafe_callback)
+       _frame (RIP-0x30)
+       CFI_REL_OFFSET rcx, 0
+       CFI_REL_OFFSET r11, 8
        movw %ds,%cx
        cmpw %cx,0x10(%rsp)
+       CFI_REMEMBER_STATE
        jne 1f
        movw %es,%cx
        cmpw %cx,0x18(%rsp)
@@ -984,17 +1012,26 @@ ENTRY(failsafe_callback)
        jne 1f
        /* All segments match their saved values => Category 2 (Bad IRET). */
        movq (%rsp),%rcx
+       CFI_RESTORE rcx
        movq 8(%rsp),%r11
+       CFI_RESTORE r11
        addq $0x30,%rsp
+       CFI_ADJUST_CFA_OFFSET -0x30
        movq $11,%rdi   /* SIGSEGV */
        jmp do_exit                     
+       CFI_RESTORE_STATE
 1:     /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
        movq (%rsp),%rcx
+       CFI_RESTORE rcx
        movq 8(%rsp),%r11
+       CFI_RESTORE r11
        addq $0x30,%rsp
+       CFI_ADJUST_CFA_OFFSET -0x30
        pushq $0
+       CFI_ADJUST_CFA_OFFSET 8
        SAVE_ALL
        jmp error_exit
+       CFI_ENDPROC
 #if 0        
         .section __ex_table,"a"
         .align 8
@@ -1117,12 +1154,12 @@ END(device_not_available)
 
        /* runs on exception stack */
 KPROBE_ENTRY(debug)
-       INTR_FRAME
-/*     pushq $0
+/*     INTR_FRAME
+       pushq $0
        CFI_ADJUST_CFA_OFFSET 8 */
        zeroentry do_debug
-/*     paranoid_exit */
-       CFI_ENDPROC
+/*     paranoidexit
+       CFI_ENDPROC */
 END(debug)
        .previous .text
 
@@ -1144,12 +1181,12 @@ END(nmi)
 #endif        
 
 KPROBE_ENTRY(int3)
-       INTR_FRAME
-/*     pushq $0
+/*     INTR_FRAME
+       pushq $0
        CFI_ADJUST_CFA_OFFSET 8 */
        zeroentry do_int3
-/*     jmp paranoid_exit1 */
-       CFI_ENDPROC
+/*     jmp paranoid_exit1
+       CFI_ENDPROC */
 END(int3)
        .previous .text
 
@@ -1193,9 +1230,11 @@ END(segment_not_present)
 
        /* runs on exception stack */
 ENTRY(stack_segment)
-       XCPT_FRAME
+/*     XCPT_FRAME
+       paranoidentry do_stack_segment */
        errorentry do_stack_segment
-       CFI_ENDPROC
+/*     jmp paranoid_exit1
+       CFI_ENDPROC */
 END(stack_segment)
 
 KPROBE_ENTRY(general_protection)
diff -r 8f0b5295bb1b -r dcec453681bc 
linux-2.6-xen-sparse/arch/x86_64/kernel/head-xen.S
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/head-xen.S        Mon Mar 05 
12:49:12 2007 -0600
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/head-xen.S        Thu Mar 08 
14:39:52 2007 -0600
@@ -22,10 +22,8 @@
 #include <asm/page.h>
 #include <asm/msr.h>
 #include <asm/cache.h>
-
+#include <asm/dwarf2.h>
 #include <xen/interface/elfnote.h>
-
-#define _PAGE_PRESENT 0x1
 
        .section .bootstrap.text, "ax", @progbits
        .code64
@@ -42,6 +40,7 @@ ENTRY(_start)
        /* rsi is pointer to startup info structure.
           pass it to C */
        movq %rsi,%rdi
+       pushq $0                # fake return address
        jmp x86_64_start_kernel
 
 ENTRY(stext)
@@ -82,7 +81,25 @@ NEXT_PAGE(level2_kernel_pgt)
        .fill   512,8,0
 
 NEXT_PAGE(hypercall_page)
-       .fill   512,8,0
+       CFI_STARTPROC
+       .rept 0x1000 / 0x20
+       .skip 1 /* push %rcx */
+       CFI_ADJUST_CFA_OFFSET   8
+       CFI_REL_OFFSET  rcx,0
+       .skip 2 /* push %r11 */
+       CFI_ADJUST_CFA_OFFSET   8
+       CFI_REL_OFFSET  rcx,0
+       .skip 5 /* mov $#,%eax */
+       .skip 2 /* syscall */
+       .skip 2 /* pop %r11 */
+       CFI_ADJUST_CFA_OFFSET -8
+       CFI_RESTORE r11
+       .skip 1 /* pop %rcx */
+       CFI_ADJUST_CFA_OFFSET -8
+       CFI_RESTORE rcx
+       .align 0x20,0 /* ret */
+       .endr
+       CFI_ENDPROC
 
 #undef NEXT_PAGE
 
diff -r 8f0b5295bb1b -r dcec453681bc 
linux-2.6-xen-sparse/arch/x86_64/kernel/init_task.c
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/init_task.c       Mon Mar 05 
12:49:12 2007 -0600
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/init_task.c       Thu Mar 08 
14:39:52 2007 -0600
@@ -47,11 +47,11 @@ EXPORT_SYMBOL(init_task);
  * on exact cacheline boundaries, to eliminate cacheline ping-pong.
  */ 
 DEFINE_PER_CPU(struct tss_struct, init_tss) 
____cacheline_internodealigned_in_smp = INIT_TSS;
-#endif
 
 /* Copies of the original ist values from the tss are only accessed during
  * debugging, no special alignment required.
  */
 DEFINE_PER_CPU(struct orig_ist, orig_ist);
+#endif
 
 #define ALIGN_TO_4K __attribute__((section(".data.init_task")))
diff -r 8f0b5295bb1b -r dcec453681bc 
linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c       Mon Mar 05 
12:49:12 2007 -0600
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c       Thu Mar 08 
14:39:52 2007 -0600
@@ -676,7 +676,8 @@ void __init setup_arch(char **cmdline_p)
 
        init_memory_mapping(0, (end_pfn_map << PAGE_SHIFT));
 
-       /* dmi_scan_machine(); */
+       if (is_initial_xendomain())
+               dmi_scan_machine();
 
 #ifdef CONFIG_ACPI_NUMA
        /*
@@ -691,6 +692,14 @@ void __init setup_arch(char **cmdline_p)
        contig_initmem_init(0, end_pfn);
 #endif
 
+#ifdef CONFIG_XEN
+       /*
+        * Reserve kernel, physmap, start info, initial page tables, and
+        * direct mapping.
+        */
+       reserve_bootmem_generic(__pa_symbol(&_text),
+                               (table_end << PAGE_SHIFT) - 
__pa_symbol(&_text));
+#else
        /* Reserve direct mapping */
        reserve_bootmem_generic(table_start << PAGE_SHIFT, 
                                (table_end - table_start) << PAGE_SHIFT);
@@ -699,12 +708,6 @@ void __init setup_arch(char **cmdline_p)
        reserve_bootmem_generic(__pa_symbol(&_text),
                                __pa_symbol(&_end) - __pa_symbol(&_text));
 
-#ifdef CONFIG_XEN
-       /* reserve physmap, start info and initial page tables */
-       reserve_bootmem_generic(__pa_symbol(&_end),
-                               (table_start << PAGE_SHIFT) -
-                               __pa_symbol(&_end));
-#else
        /*
         * reserve physical page 0 - it's a special BIOS page on many boxes,
         * enabling clean reboots, SMP operation, laptop functions.
@@ -1625,13 +1628,6 @@ struct seq_operations cpuinfo_op = {
        .show = show_cpuinfo,
 };
 
-static int __init run_dmi_scan(void)
-{
-       dmi_scan_machine();
-       return 0;
-}
-core_initcall(run_dmi_scan);
-
 #if defined(CONFIG_INPUT_PCSPKR) || defined(CONFIG_INPUT_PCSPKR_MODULE)
 #include <linux/platform_device.h>
 static __init int add_pcspkr(void)
@@ -1639,6 +1635,9 @@ static __init int add_pcspkr(void)
        struct platform_device *pd;
        int ret;
 
+       if (!is_initial_xendomain())
+               return 0;
+
        pd = platform_device_alloc("pcspkr", -1);
        if (!pd)
                return -ENOMEM;
diff -r 8f0b5295bb1b -r dcec453681bc 
linux-2.6-xen-sparse/arch/x86_64/kernel/x8664_ksyms-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/x8664_ksyms-xen.c Mon Mar 05 
12:49:12 2007 -0600
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,59 +0,0 @@
-/* Exports for assembly files.
-   All C exports should go in the respective C files. */
-
-#include <linux/module.h>
-#include <linux/smp.h>
-
-#include <asm/semaphore.h>
-#include <asm/processor.h>
-#include <asm/uaccess.h>
-#include <asm/pgtable.h>
-
-EXPORT_SYMBOL(kernel_thread);
-
-EXPORT_SYMBOL(__down_failed);
-EXPORT_SYMBOL(__down_failed_interruptible);
-EXPORT_SYMBOL(__down_failed_trylock);
-EXPORT_SYMBOL(__up_wakeup);
-
-EXPORT_SYMBOL(__get_user_1);
-EXPORT_SYMBOL(__get_user_2);
-EXPORT_SYMBOL(__get_user_4);
-EXPORT_SYMBOL(__get_user_8);
-EXPORT_SYMBOL(__put_user_1);
-EXPORT_SYMBOL(__put_user_2);
-EXPORT_SYMBOL(__put_user_4);
-EXPORT_SYMBOL(__put_user_8);
-
-EXPORT_SYMBOL(copy_user_generic);
-EXPORT_SYMBOL(copy_from_user);
-EXPORT_SYMBOL(copy_to_user);
-
-EXPORT_SYMBOL(copy_page);
-EXPORT_SYMBOL(clear_page);
-
-#ifdef CONFIG_SMP
-extern void FASTCALL( __write_lock_failed(rwlock_t *rw));
-extern void FASTCALL( __read_lock_failed(rwlock_t *rw));
-EXPORT_SYMBOL(__write_lock_failed);
-EXPORT_SYMBOL(__read_lock_failed);
-#endif
-
-/* Export string functions. We normally rely on gcc builtin for most of these,
-   but gcc sometimes decides not to inline them. */    
-#undef memcpy
-#undef memset
-#undef memmove
-
-extern void * memset(void *,int,__kernel_size_t);
-extern void * memcpy(void *,const void *,__kernel_size_t);
-extern void * __memcpy(void *,const void *,__kernel_size_t);
-
-EXPORT_SYMBOL(memset);
-EXPORT_SYMBOL(memcpy);
-EXPORT_SYMBOL(__memcpy);
-
-EXPORT_SYMBOL(empty_zero_page);
-
-EXPORT_SYMBOL(load_gs_index);
-
diff -r 8f0b5295bb1b -r dcec453681bc 
linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c    Mon Mar 05 12:49:12 
2007 -0600
+++ b/linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c    Thu Mar 08 14:39:52 
2007 -0600
@@ -102,13 +102,10 @@ static void __meminit early_make_page_re
                BUG();
 }
 
-void make_page_readonly(void *va, unsigned int feature)
+static void __make_page_readonly(void *va)
 {
        pgd_t *pgd; pud_t *pud; pmd_t *pmd; pte_t pte, *ptep;
        unsigned long addr = (unsigned long) va;
-
-       if (xen_feature(feature))
-               return;
 
        pgd = pgd_offset_k(addr);
        pud = pud_offset(pgd, addr);
@@ -120,16 +117,13 @@ void make_page_readonly(void *va, unsign
                xen_l1_entry_update(ptep, pte); /* fallback */
 
        if ((addr >= VMALLOC_START) && (addr < VMALLOC_END))
-               make_page_readonly(__va(pte_pfn(pte) << PAGE_SHIFT), feature);
-}
-
-void make_page_writable(void *va, unsigned int feature)
+               __make_page_readonly(__va(pte_pfn(pte) << PAGE_SHIFT));
+}
+
+static void __make_page_writable(void *va)
 {
        pgd_t *pgd; pud_t *pud; pmd_t *pmd; pte_t pte, *ptep;
        unsigned long addr = (unsigned long) va;
-
-       if (xen_feature(feature))
-               return;
 
        pgd = pgd_offset_k(addr);
        pud = pud_offset(pgd, addr);
@@ -141,7 +135,19 @@ void make_page_writable(void *va, unsign
                xen_l1_entry_update(ptep, pte); /* fallback */
 
        if ((addr >= VMALLOC_START) && (addr < VMALLOC_END))
-               make_page_writable(__va(pte_pfn(pte) << PAGE_SHIFT), feature);
+               __make_page_writable(__va(pte_pfn(pte) << PAGE_SHIFT));
+}
+
+void make_page_readonly(void *va, unsigned int feature)
+{
+       if (!xen_feature(feature))
+               __make_page_readonly(va);
+}
+
+void make_page_writable(void *va, unsigned int feature)
+{
+       if (!xen_feature(feature))
+               __make_page_writable(va);
 }
 
 void make_pages_readonly(void *va, unsigned nr, unsigned int feature)
@@ -150,7 +156,7 @@ void make_pages_readonly(void *va, unsig
                return;
 
        while (nr-- != 0) {
-               make_page_readonly(va, feature);
+               __make_page_readonly(va);
                va = (void*)((unsigned long)va + PAGE_SIZE);
        }
 }
@@ -161,7 +167,7 @@ void make_pages_writable(void *va, unsig
                return;
 
        while (nr-- != 0) {
-               make_page_writable(va, feature);
+               __make_page_writable(va);
                va = (void*)((unsigned long)va + PAGE_SIZE);
        }
 }
@@ -208,7 +214,11 @@ static __init void *spp_getpage(void)
        void *ptr;
        if (after_bootmem)
                ptr = (void *) get_zeroed_page(GFP_ATOMIC); 
-       else
+       else if (start_pfn < table_end) {
+               ptr = __va(start_pfn << PAGE_SHIFT);
+               start_pfn++;
+               memset(ptr, 0, PAGE_SIZE);
+       } else
                ptr = alloc_bootmem_pages(PAGE_SIZE);
        if (!ptr || ((unsigned long)ptr & ~PAGE_MASK))
                panic("set_pte_phys: cannot allocate page data %s\n", 
after_bootmem?"after bootmem":"");
@@ -299,36 +309,26 @@ static __init void set_pte_phys_ma(unsig
 
                pmd = (pmd_t *) spp_getpage(); 
                make_page_readonly(pmd, XENFEAT_writable_page_tables);
-
                set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER));
-
                if (pmd != pmd_offset(pud, 0)) {
                        printk("PAGETABLE BUG #01! %p <-> %p\n", pmd, 
pmd_offset(pud,0));
                        return;
                }
        }
        pmd = pmd_offset(pud, vaddr);
-
        if (pmd_none(*pmd)) {
                pte = (pte_t *) spp_getpage();
                make_page_readonly(pte, XENFEAT_writable_page_tables);
-
                set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE | _PAGE_USER));
                if (pte != pte_offset_kernel(pmd, 0)) {
                        printk("PAGETABLE BUG #02!\n");
                        return;
                }
        }
-
        new_pte = pfn_pte_ma(phys >> PAGE_SHIFT, prot);
+
        pte = pte_offset_kernel(pmd, vaddr);
-
-       /* 
-        * Note that the pte page is already RO, thus we want to use
-        * xen_l1_entry_update(), not set_pte().
-        */
-       xen_l1_entry_update(pte, 
-                           pfn_pte_ma(phys >> PAGE_SHIFT, prot));
+       set_pte(pte, new_pte);
 
        /*
         * It's enough to flush this one mapping.
@@ -404,6 +404,7 @@ static inline void __set_pte(pte_t *dst,
 
 static inline int make_readonly(unsigned long paddr)
 {
+       extern char __vsyscall_0;
        int readonly = 0;
 
        /* Make new page tables read-only. */
@@ -420,25 +421,45 @@ static inline int make_readonly(unsigned
        /*
         * No need for writable mapping of kernel image. This also ensures that
         * page and descriptor tables embedded inside don't have writable
-        * mappings. 
+        * mappings. Exclude the vsyscall area here, allowing alternative
+        * instruction patching to work.
         */
-       if ((paddr >= __pa_symbol(&_text)) && (paddr < __pa_symbol(&_end)))
+       if ((paddr >= __pa_symbol(&_text)) && (paddr < __pa_symbol(&_end))
+           && !(paddr >= __pa_symbol(&__vsyscall_0)
+                && paddr < __pa_symbol(&__vsyscall_0) + PAGE_SIZE))
                readonly = 1;
 
        return readonly;
 }
 
+#ifndef CONFIG_XEN
 /* Must run before zap_low_mappings */
 __init void *early_ioremap(unsigned long addr, unsigned long size)
 {
-       return ioremap(addr, size);
+       unsigned long map = round_down(addr, LARGE_PAGE_SIZE);
+
+       /* actually usually some more */
+       if (size >= LARGE_PAGE_SIZE) {
+               printk("SMBIOS area too long %lu\n", size);
+               return NULL;
+       }
+       set_pmd(temp_mappings[0].pmd,  __pmd(map | _KERNPG_TABLE | _PAGE_PSE));
+       map += LARGE_PAGE_SIZE;
+       set_pmd(temp_mappings[1].pmd,  __pmd(map | _KERNPG_TABLE | _PAGE_PSE));
+       __flush_tlb();
+       return temp_mappings[0].address + (addr & (LARGE_PAGE_SIZE-1));
 }
 
 /* To avoid virtual aliases later */
 __init void early_iounmap(void *addr, unsigned long size)
 {
-       iounmap(addr);
-}
+       if ((void *)round_down((unsigned long)addr, LARGE_PAGE_SIZE) != 
temp_mappings[0].address)
+               printk("early_iounmap: bad address %p\n", addr);
+       set_pmd(temp_mappings[0].pmd, __pmd(0));
+       set_pmd(temp_mappings[1].pmd, __pmd(0));
+       __flush_tlb();
+}
+#endif
 
 static void __meminit
 phys_pmd_init(pmd_t *pmd, unsigned long address, unsigned long end)
@@ -638,7 +659,7 @@ static void __init extend_init_mapping(u
 
 static void __init find_early_table_space(unsigned long end)
 {
-       unsigned long puds, pmds, ptes, tables; 
+       unsigned long puds, pmds, ptes, tables;
 
        puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
        pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
@@ -656,6 +677,64 @@ static void __init find_early_table_spac
        early_printk("kernel direct mapping tables up to %lx @ %lx-%lx\n",
                end, table_start << PAGE_SHIFT,
                (table_start << PAGE_SHIFT) + tables);
+}
+
+static void xen_finish_init_mapping(void)
+{
+       unsigned long i, start, end;
+
+       /* Re-vector virtual addresses pointing into the initial
+          mapping to the just-established permanent ones. */
+       xen_start_info = __va(__pa(xen_start_info));
+       xen_start_info->pt_base = (unsigned long)
+               __va(__pa(xen_start_info->pt_base));
+       if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+               phys_to_machine_mapping =
+                       __va(__pa(xen_start_info->mfn_list));
+               xen_start_info->mfn_list = (unsigned long)
+                       phys_to_machine_mapping;
+       }
+       if (xen_start_info->mod_start)
+               xen_start_info->mod_start = (unsigned long)
+                       __va(__pa(xen_start_info->mod_start));
+
+       /* Destroy the Xen-created mappings beyond the kernel image as
+        * well as the temporary mappings created above. Prevents
+        * overlap with modules area (if init mapping is very big).
+        */
+       start = PAGE_ALIGN((unsigned long)_end);
+       end   = __START_KERNEL_map + (table_end << PAGE_SHIFT);
+       for (; start < end; start += PAGE_SIZE)
+               WARN_ON(HYPERVISOR_update_va_mapping(
+                       start, __pte_ma(0), 0));
+
+       /* Allocate pte's for initial fixmaps from 'start_pfn' allocator. */
+       table_end = ~0UL;
+
+       /*
+        * Prefetch pte's for the bt_ioremap() area. It gets used before the
+        * boot-time allocator is online, so allocate-on-demand would fail.
+        */
+       for (i = FIX_BTMAP_END; i <= FIX_BTMAP_BEGIN; i++)
+               __set_fixmap(i, 0, __pgprot(0));
+
+       /* Switch to the real shared_info page, and clear the dummy page. */
+       set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info);
+       HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
+       memset(empty_zero_page, 0, sizeof(empty_zero_page));
+
+       /* Set up mapping of lowest 1MB of physical memory. */
+       for (i = 0; i < NR_FIX_ISAMAPS; i++)
+               if (is_initial_xendomain())
+                       set_fixmap(FIX_ISAMAP_BEGIN - i, i * PAGE_SIZE);
+               else
+                       __set_fixmap(FIX_ISAMAP_BEGIN - i,
+                                    virt_to_mfn(empty_zero_page)
+                                    << PAGE_SHIFT,
+                                    PAGE_KERNEL_RO);
+
+       /* Disable the 'start_pfn' allocator. */
+       table_end = start_pfn;
 }
 
 /* Setup the direct mapping of the physical memory at PAGE_OFFSET.
@@ -663,7 +742,7 @@ static void __init find_early_table_spac
    physical memory. To access them they are temporarily mapped. */
 void __meminit init_memory_mapping(unsigned long start, unsigned long end)
 { 
-       unsigned long next; 
+       unsigned long next;
 
        Dprintk("init_memory_mapping\n");
 
@@ -702,31 +781,7 @@ void __meminit init_memory_mapping(unsig
 
        if (!after_bootmem) {
                BUG_ON(start_pfn != table_end);
-
-               /* Re-vector virtual addresses pointing into the initial
-                  mapping to the just-established permanent ones. */
-               xen_start_info = __va(__pa(xen_start_info));
-               xen_start_info->pt_base = (unsigned long)
-                       __va(__pa(xen_start_info->pt_base));
-               if (!xen_feature(XENFEAT_auto_translated_physmap)) {
-                       phys_to_machine_mapping =
-                               __va(__pa(xen_start_info->mfn_list));
-                       xen_start_info->mfn_list = (unsigned long)
-                               phys_to_machine_mapping;
-               }
-               if (xen_start_info->mod_start)
-                       xen_start_info->mod_start = (unsigned long)
-                               __va(__pa(xen_start_info->mod_start));
-
-               /* Destroy the Xen-created mappings beyond the kernel image as
-                * well as the temporary mappings created above. Prevents
-                * overlap with modules area (if init mapping is very big).
-                */
-               start = PAGE_ALIGN((unsigned long)_end);
-               end   = __START_KERNEL_map + (table_end << PAGE_SHIFT);
-               for (; start < end; start += PAGE_SIZE)
-                       WARN_ON(HYPERVISOR_update_va_mapping(
-                               start, __pte_ma(0), 0));
+               xen_finish_init_mapping();
        }
 
        __flush_tlb_all();
@@ -805,7 +860,6 @@ void __init paging_init(void)
 void __init paging_init(void)
 {
        unsigned long zones[MAX_NR_ZONES], holes[MAX_NR_ZONES];
-       int i;
 
        memory_present(0, 0, end_pfn);
        sparse_init();
@@ -813,22 +867,7 @@ void __init paging_init(void)
        free_area_init_node(0, NODE_DATA(0), zones,
                            __pa(PAGE_OFFSET) >> PAGE_SHIFT, holes);
 
-       /* Switch to the real shared_info page, and clear the
-        * dummy page. */
-       set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info);
-       HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
-       memset(empty_zero_page, 0, sizeof(empty_zero_page));
-
        init_mm.context.pinned = 1;
-
-       /* Setup mapping of lower 1st MB */
-       for (i = 0; i < NR_FIX_ISAMAPS; i++)
-               if (is_initial_xendomain())
-                       set_fixmap(FIX_ISAMAP_BEGIN - i, i * PAGE_SIZE);
-               else
-                       __set_fixmap(FIX_ISAMAP_BEGIN - i,
-                                    virt_to_mfn(empty_zero_page) << PAGE_SHIFT,
-                                    PAGE_KERNEL_RO);
 }
 #endif
 
@@ -1028,11 +1067,6 @@ void __init mem_init(void)
 
 void free_init_pages(char *what, unsigned long begin, unsigned long end)
 {
-#ifdef __DO_LATER__
-       /*
-        * Some pages can be pinned, but some are not. Unpinning such pages 
-        * triggers BUG(). 
-        */
        unsigned long addr;
 
        if (begin >= end)
@@ -1044,25 +1078,27 @@ void free_init_pages(char *what, unsigne
                init_page_count(virt_to_page(addr));
                memset((void *)(addr & ~(PAGE_SIZE-1)),
                       POISON_FREE_INITMEM, PAGE_SIZE); 
-               make_page_writable(
-                       __va(__pa(addr)), XENFEAT_writable_page_tables);
-               /*
-                * Make pages from __PAGE_OFFSET address as well
-                */
-               make_page_writable(
-                       (void *)addr, XENFEAT_writable_page_tables);
+               if (addr >= __START_KERNEL_map) {
+                       /* make_readonly() reports all kernel addresses. */
+                       __make_page_writable(__va(__pa(addr)));
+                       if (HYPERVISOR_update_va_mapping(addr, __pte(0), 0)) {
+                               pgd_t *pgd = pgd_offset_k(addr);
+                               pud_t *pud = pud_offset(pgd, addr);
+                               pmd_t *pmd = pmd_offset(pud, addr);
+                               pte_t *pte = pte_offset_kernel(pmd, addr);
+
+                               xen_l1_entry_update(pte, __pte(0)); /* fallback 
*/
+                       }
+               }
                free_page(addr);
                totalram_pages++;
        }
-#endif
 }
 
 void free_initmem(void)
 {
-#ifdef __DO_LATER__
        memset(__initdata_begin, POISON_FREE_INITDATA,
                __initdata_end - __initdata_begin);
-#endif
        free_init_pages("unused kernel memory",
                        (unsigned long)(&__init_begin),
                        (unsigned long)(&__init_end));
@@ -1125,7 +1161,7 @@ int kern_addr_valid(unsigned long addr)
        if (pgd_none(*pgd))
                return 0;
 
-       pud = pud_offset_k(pgd, addr);
+       pud = pud_offset(pgd, addr);
        if (pud_none(*pud))
                return 0; 
 
diff -r 8f0b5295bb1b -r dcec453681bc 
linux-2.6-xen-sparse/arch/x86_64/mm/pageattr-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/mm/pageattr-xen.c        Mon Mar 05 
12:49:12 2007 -0600
+++ b/linux-2.6-xen-sparse/arch/x86_64/mm/pageattr-xen.c        Thu Mar 08 
14:39:52 2007 -0600
@@ -24,10 +24,13 @@ static inline void mm_walk_set_prot(void
 {
        struct page *page = virt_to_page(pt);
        unsigned long pfn = page_to_pfn(page);
-
-       BUG_ON(HYPERVISOR_update_va_mapping(
-                      (unsigned long)__va(pfn << PAGE_SHIFT),
-                      pfn_pte(pfn, flags), 0));
+       int rc;
+
+       rc = HYPERVISOR_update_va_mapping(
+               (unsigned long)__va(pfn << PAGE_SHIFT),
+               pfn_pte(pfn, flags), 0);
+       if (rc)
+               BUG();
 }
 
 static void mm_walk(struct mm_struct *mm, pgprot_t flags)
diff -r 8f0b5295bb1b -r dcec453681bc linux-2.6-xen-sparse/drivers/char/mem.c
--- a/linux-2.6-xen-sparse/drivers/char/mem.c   Mon Mar 05 12:49:12 2007 -0600
+++ b/linux-2.6-xen-sparse/drivers/char/mem.c   Thu Mar 08 14:39:52 2007 -0600
@@ -787,7 +787,7 @@ static const struct file_operations mem_
        .open           = open_mem,
 };
 #else
-extern struct file_operations mem_fops;
+extern const struct file_operations mem_fops;
 #endif
 
 static const struct file_operations kmem_fops = {
diff -r 8f0b5295bb1b -r dcec453681bc 
linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c
--- a/linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c   Mon Mar 05 12:49:12 
2007 -0600
+++ b/linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c   Thu Mar 08 14:39:52 
2007 -0600
@@ -113,14 +113,13 @@ void __exit tpmif_exit(void);
 
 
 static inline int
-tx_buffer_copy(struct tx_buffer *txb, const u8 * src, int len,
+tx_buffer_copy(struct tx_buffer *txb, const u8 *src, int len,
                int isuserbuffer)
 {
        int copied = len;
 
-       if (len > txb->size) {
+       if (len > txb->size)
                copied = txb->size;
-       }
        if (isuserbuffer) {
                if (copy_from_user(txb->data, src, copied))
                        return -EFAULT;
@@ -133,18 +132,20 @@ tx_buffer_copy(struct tx_buffer *txb, co
 
 static inline struct tx_buffer *tx_buffer_alloc(void)
 {
-       struct tx_buffer *txb = kzalloc(sizeof (struct tx_buffer),
-                                       GFP_KERNEL);
-
-       if (txb) {
-               txb->len = 0;
-               txb->size = PAGE_SIZE;
-               txb->data = (unsigned char *)__get_free_page(GFP_KERNEL);
-               if (txb->data == NULL) {
-                       kfree(txb);
-                       txb = NULL;
-               }
-       }
+       struct tx_buffer *txb;
+
+       txb = kzalloc(sizeof(struct tx_buffer), GFP_KERNEL);
+       if (!txb)
+               return NULL;
+
+       txb->len = 0;
+       txb->size = PAGE_SIZE;
+       txb->data = (unsigned char *)__get_free_page(GFP_KERNEL);
+       if (txb->data == NULL) {
+               kfree(txb);
+               txb = NULL;
+       }
+
        return txb;
 }
 
@@ -160,37 +161,41 @@ static inline void tx_buffer_free(struct
 /**************************************************************
  Utility function for the tpm_private structure
 **************************************************************/
-static inline void tpm_private_init(struct tpm_private *tp)
+static void tpm_private_init(struct tpm_private *tp)
 {
        spin_lock_init(&tp->tx_lock);
        init_waitqueue_head(&tp->wait_q);
        atomic_set(&tp->refcnt, 1);
 }
 
-static inline void tpm_private_put(void)
-{
-       if ( atomic_dec_and_test(&my_priv->refcnt)) {
-               tpmif_free_tx_buffers(my_priv);
-               kfree(my_priv);
-               my_priv = NULL;
-       }
+static void tpm_private_put(void)
+{
+       if (!atomic_dec_and_test(&my_priv->refcnt))
+               return;
+
+       tpmif_free_tx_buffers(my_priv);
+       kfree(my_priv);
+       my_priv = NULL;
 }
 
 static struct tpm_private *tpm_private_get(void)
 {
        int err;
-       if (!my_priv) {
-               my_priv = kzalloc(sizeof(struct tpm_private), GFP_KERNEL);
-               if (my_priv) {
-                       tpm_private_init(my_priv);
-                       err = tpmif_allocate_tx_buffers(my_priv);
-                       if (err < 0) {
-                               tpm_private_put();
-                       }
-               }
-       } else {
+
+       if (my_priv) {
                atomic_inc(&my_priv->refcnt);
-       }
+               return my_priv;
+       }
+
+       my_priv = kzalloc(sizeof(struct tpm_private), GFP_KERNEL);
+       if (!my_priv)
+               return NULL;
+
+       tpm_private_init(my_priv);
+       err = tpmif_allocate_tx_buffers(my_priv);
+       if (err < 0)
+               tpm_private_put();
+
        return my_priv;
 }
 
@@ -379,10 +384,8 @@ static int tpmfront_probe(struct xenbus_
                return -ENOMEM;
 
        tp->chip = init_vtpm(&dev->dev, &tvd, tp);
-
-       if (IS_ERR(tp->chip)) {
+       if (IS_ERR(tp->chip))
                return PTR_ERR(tp->chip);
-       }
 
        err = xenbus_scanf(XBT_NIL, dev->nodename,
                           "handle", "%i", &handle);
@@ -401,6 +404,7 @@ static int tpmfront_probe(struct xenbus_
                tpm_private_put();
                return err;
        }
+
        return 0;
 }
 
@@ -417,30 +421,34 @@ static int tpmfront_suspend(struct xenbu
 {
        struct tpm_private *tp = tpm_private_from_dev(&dev->dev);
        u32 ctr;
-       /* lock, so no app can send */
+
+       /* Take the lock, preventing any application from sending. */
        mutex_lock(&suspend_lock);
        tp->is_suspended = 1;
 
-       for (ctr = 0; atomic_read(&tp->tx_busy) && ctr <= 300; ctr++) {
+       for (ctr = 0; atomic_read(&tp->tx_busy); ctr++) {
                if ((ctr % 10) == 0)
                        printk("TPM-FE [INFO]: Waiting for outstanding "
                               "request.\n");
-               /*
-                * Wait for a request to be responded to.
-                */
+               /* Wait for a request to be responded to. */
                interruptible_sleep_on_timeout(&tp->wait_q, 100);
        }
-       xenbus_switch_state(dev, XenbusStateClosing);
-
-       if (atomic_read(&tp->tx_busy)) {
-               /*
-                * A temporary work-around.
-                */
-               printk("TPM-FE [WARNING]: Resetting busy flag.");
-               atomic_set(&tp->tx_busy, 0);
-       }
-
-       return 0;
+
+       return 0;
+}
+
+static int tpmfront_suspend_finish(struct tpm_private *tp)
+{
+       tp->is_suspended = 0;
+       /* Allow applications to send again. */
+       mutex_unlock(&suspend_lock);
+       return 0;
+}
+
+static int tpmfront_suspend_cancel(struct xenbus_device *dev)
+{
+       struct tpm_private *tp = tpm_private_from_dev(&dev->dev);
+       return tpmfront_suspend_finish(tp);
 }
 
 static int tpmfront_resume(struct xenbus_device *dev)
@@ -484,6 +492,7 @@ static struct xenbus_driver tpmfront = {
        .resume = tpmfront_resume,
        .otherend_changed = backend_changed,
        .suspend = tpmfront_suspend,
+       .suspend_cancel = tpmfront_suspend_cancel,
 };
 
 static void __init init_tpm_xenbus(void)
@@ -514,9 +523,8 @@ static void tpmif_free_tx_buffers(struct
 {
        unsigned int i;
 
-       for (i = 0; i < TPMIF_TX_RING_SIZE; i++) {
+       for (i = 0; i < TPMIF_TX_RING_SIZE; i++)
                tx_buffer_free(tp->tx_buffers[i]);
-       }
 }
 
 static void tpmif_rx_action(unsigned long priv)
@@ -536,9 +544,8 @@ static void tpmif_rx_action(unsigned lon
        received = tx->size;
 
        buffer = kmalloc(received, GFP_ATOMIC);
-       if (NULL == buffer) {
+       if (!buffer)
                goto exit;
-       }
 
        for (i = 0; i < TPMIF_TX_RING_SIZE && offset < received; i++) {
                struct tx_buffer *txb = tp->tx_buffers[i];
@@ -547,9 +554,8 @@ static void tpmif_rx_action(unsigned lon
 
                tx = &tp->tx->ring[i].req;
                tocopy = tx->size;
-               if (tocopy > PAGE_SIZE) {
+               if (tocopy > PAGE_SIZE)
                        tocopy = PAGE_SIZE;
-               }
 
                memcpy(&buffer[offset], txb->data, tocopy);
 
@@ -607,12 +613,13 @@ static int tpm_xmit(struct tpm_private *
                struct tx_buffer *txb = tp->tx_buffers[i];
                int copied;
 
-               if (NULL == txb) {
+               if (!txb) {
                        DPRINTK("txb (i=%d) is NULL. buffers initilized?\n"
                                "Not transmitting anything!\n", i);
                        spin_unlock_irq(&tp->tx_lock);
                        return -EFAULT;
                }
+
                copied = tx_buffer_copy(txb, &buf[offset], count,
                                        isuserbuffer);
                if (copied < 0) {
@@ -624,25 +631,26 @@ static int tpm_xmit(struct tpm_private *
                offset += copied;
 
                tx = &tp->tx->ring[i].req;
-
                tx->addr = virt_to_machine(txb->data);
                tx->size = txb->len;
 
-               DPRINTK("First 4 characters sent by TPM-FE are 0x%02x 0x%02x 
0x%02x 0x%02x\n",
+               DPRINTK("First 4 characters sent by TPM-FE are "
+                       "0x%02x 0x%02x 0x%02x 0x%02x\n",
                        txb->data[0],txb->data[1],txb->data[2],txb->data[3]);
 
-               /* get the granttable reference for this page */
+               /* Get the granttable reference for this page. */
                tx->ref = gnttab_claim_grant_reference(&gref_head);
-
-               if (-ENOSPC == tx->ref) {
+               if (tx->ref == -ENOSPC) {
                        spin_unlock_irq(&tp->tx_lock);
-                       DPRINTK(" Grant table claim reference failed in func:%s 
line:%d file:%s\n", __FUNCTION__, __LINE__, __FILE__);
+                       DPRINTK("Grant table claim reference failed in "
+                               "func:%s line:%d file:%s\n",
+                               __FUNCTION__, __LINE__, __FILE__);
                        return -ENOSPC;
                }
-               gnttab_grant_foreign_access_ref( tx->ref,
-                                                tp->backend_id,
-                                                virt_to_mfn(txb->data),
-                                                0 /*RW*/);
+               gnttab_grant_foreign_access_ref(tx->ref,
+                                               tp->backend_id,
+                                               virt_to_mfn(txb->data),
+                                               0 /*RW*/);
                wmb();
        }
 
@@ -660,15 +668,10 @@ static int tpm_xmit(struct tpm_private *
 
 static void tpmif_notify_upperlayer(struct tpm_private *tp)
 {
-       /*
-        * Notify upper layer about the state of the connection
-        * to the BE.
-        */
-       if (tp->is_connected) {
-               vtpm_vd_status(tp->chip, TPM_VD_STATUS_CONNECTED);
-       } else {
-               vtpm_vd_status(tp->chip, TPM_VD_STATUS_DISCONNECTED);
-       }
+       /* Notify upper layer about the state of the connection to the BE. */
+       vtpm_vd_status(tp->chip, (tp->is_connected
+                                 ? TPM_VD_STATUS_CONNECTED
+                                 : TPM_VD_STATUS_DISCONNECTED));
 }
 
 
@@ -679,20 +682,16 @@ static void tpmif_set_connected_state(st
         * should disconnect - assumption is that we will resume
         * The mutex keeps apps from sending.
         */
-       if (is_connected == 0 && tp->is_suspended == 1) {
+       if (is_connected == 0 && tp->is_suspended == 1)
                return;
-       }
 
        /*
         * Unlock the mutex if we are connected again
         * after being suspended - now resuming.
         * This also removes the suspend state.
         */
-       if (is_connected == 1 && tp->is_suspended == 1) {
-               tp->is_suspended = 0;
-               /* unlock, so apps can resume sending */
-               mutex_unlock(&suspend_lock);
-       }
+       if (is_connected == 1 && tp->is_suspended == 1)
+               tpmfront_suspend_finish(tp);
 
        if (is_connected != tp->is_connected) {
                tp->is_connected = is_connected;
@@ -710,33 +709,24 @@ static void tpmif_set_connected_state(st
 
 static int __init tpmif_init(void)
 {
-       long rc = 0;
        struct tpm_private *tp;
 
        if (is_initial_xendomain())
                return -EPERM;
 
        tp = tpm_private_get();
-       if (!tp) {
-               rc = -ENOMEM;
-               goto failexit;
-       }
+       if (!tp)
+               return -ENOMEM;
 
        IPRINTK("Initialising the vTPM driver.\n");
-       if ( gnttab_alloc_grant_references ( TPMIF_TX_RING_SIZE,
-                                            &gref_head ) < 0) {
-               rc = -EFAULT;
-               goto gnttab_alloc_failed;
+       if (gnttab_alloc_grant_references(TPMIF_TX_RING_SIZE,
+                                         &gref_head) < 0) {
+               tpm_private_put();
+               return -EFAULT;
        }
 
        init_tpm_xenbus();
        return 0;
-
-gnttab_alloc_failed:
-       tpm_private_put();
-failexit:
-
-       return (int)rc;
 }
 
 
diff -r 8f0b5295bb1b -r dcec453681bc 
linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c  Mon Mar 05 12:49:12 
2007 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c  Thu Mar 08 14:39:52 
2007 -0600
@@ -335,7 +335,7 @@ static int blktap_ioctl(struct inode *in
                         unsigned int cmd, unsigned long arg);
 static unsigned int blktap_poll(struct file *file, poll_table *wait);
 
-static struct file_operations blktap_fops = {
+static const struct file_operations blktap_fops = {
        .owner   = THIS_MODULE,
        .poll    = blktap_poll,
        .ioctl   = blktap_ioctl,
diff -r 8f0b5295bb1b -r dcec453681bc linux-2.6-xen-sparse/drivers/xen/char/mem.c
--- a/linux-2.6-xen-sparse/drivers/xen/char/mem.c       Mon Mar 05 12:49:12 
2007 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/char/mem.c       Thu Mar 08 14:39:52 
2007 -0600
@@ -194,7 +194,7 @@ static int open_mem(struct inode * inode
        return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
 }
 
-struct file_operations mem_fops = {
+const struct file_operations mem_fops = {
        .llseek         = memory_lseek,
        .read           = read_mem,
        .write          = write_mem,
diff -r 8f0b5295bb1b -r dcec453681bc 
linux-2.6-xen-sparse/drivers/xen/core/machine_reboot.c
--- a/linux-2.6-xen-sparse/drivers/xen/core/machine_reboot.c    Mon Mar 05 
12:49:12 2007 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/core/machine_reboot.c    Thu Mar 08 
14:39:52 2007 -0600
@@ -59,27 +59,11 @@ EXPORT_SYMBOL(machine_halt);
 EXPORT_SYMBOL(machine_halt);
 EXPORT_SYMBOL(machine_power_off);
 
-/* Ensure we run on the idle task page tables so that we will
-   switch page tables before running user space. This is needed
-   on architectures with separate kernel and user page tables
-   because the user page table pointer is not saved/restored. */
-static void switch_idle_mm(void)
-{
-       struct mm_struct *mm = current->active_mm;
-
-       if (mm == &init_mm)
-               return;
-
-       atomic_inc(&init_mm.mm_count);
-       switch_mm(mm, &init_mm, current);
-       current->active_mm = &init_mm;
-       mmdrop(mm);
-}
-
 static void pre_suspend(void)
 {
        HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page;
-       clear_fixmap(FIX_SHARED_INFO);
+       HYPERVISOR_update_va_mapping(fix_to_virt(FIX_SHARED_INFO),
+                                    __pte_ma(0), 0);
 
        xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn);
        xen_start_info->console.domU.mfn =
@@ -89,6 +73,7 @@ static void post_suspend(int suspend_can
 static void post_suspend(int suspend_cancelled)
 {
        int i, j, k, fpp;
+       unsigned long shinfo_mfn;
        extern unsigned long max_pfn;
        extern unsigned long *pfn_to_mfn_frame_list_list;
        extern unsigned long *pfn_to_mfn_frame_list[];
@@ -99,11 +84,14 @@ static void post_suspend(int suspend_can
                xen_start_info->console.domU.mfn =
                        pfn_to_mfn(xen_start_info->console.domU.mfn);
        } else {
+#ifdef CONFIG_SMP
                cpu_initialized_map = cpumask_of_cpu(0);
-       }
-       
-       set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info);
-
+#endif
+       }
+
+       shinfo_mfn = xen_start_info->shared_info >> PAGE_SHIFT;
+       HYPERVISOR_update_va_mapping(fix_to_virt(FIX_SHARED_INFO),
+                                    pfn_pte_ma(shinfo_mfn, PAGE_KERNEL), 0);
        HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
 
        memset(empty_zero_page, 0, PAGE_SIZE);
@@ -172,10 +160,25 @@ static int take_machine_down(void *p_fas
 
        post_suspend(suspend_cancelled);
        gnttab_resume();
-       if (!suspend_cancelled)
+       if (!suspend_cancelled) {
                irq_resume();
+#ifdef __x86_64__
+               /*
+                * Older versions of Xen do not save/restore the user %cr3.
+                * We do it here just in case, but there's no need if we are
+                * in fast-suspend mode as that implies a new enough Xen.
+                */
+               if (!fast_suspend) {
+                       struct mmuext_op op;
+                       op.cmd = MMUEXT_NEW_USER_BASEPTR;
+                       op.arg1.mfn = pfn_to_mfn(__pa(__user_pgd(
+                               current->active_mm->pgd)) >> PAGE_SHIFT);
+                       if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF))
+                               BUG();
+               }
+#endif
+       }
        time_resume();
-       switch_idle_mm();
        local_irq_enable();
 
        if (fast_suspend && !suspend_cancelled) {
@@ -210,6 +213,10 @@ int __xen_suspend(int fast_suspend)
        }
 #endif
 
+       /* If we are definitely UP then 'slow mode' is actually faster. */
+       if (num_possible_cpus() == 1)
+               fast_suspend = 0;
+
        if (fast_suspend) {
                xenbus_suspend();
                err = stop_machine_run(take_machine_down, &fast_suspend, 0);
diff -r 8f0b5295bb1b -r dcec453681bc 
linux-2.6-xen-sparse/drivers/xen/evtchn/evtchn.c
--- a/linux-2.6-xen-sparse/drivers/xen/evtchn/evtchn.c  Mon Mar 05 12:49:12 
2007 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/evtchn/evtchn.c  Thu Mar 08 14:39:52 
2007 -0600
@@ -406,7 +406,7 @@ static int evtchn_release(struct inode *
        return 0;
 }
 
-static struct file_operations evtchn_fops = {
+static const struct file_operations evtchn_fops = {
        .owner   = THIS_MODULE,
        .read    = evtchn_read,
        .write   = evtchn_write,
diff -r 8f0b5295bb1b -r dcec453681bc 
linux-2.6-xen-sparse/drivers/xen/netback/common.h
--- a/linux-2.6-xen-sparse/drivers/xen/netback/common.h Mon Mar 05 12:49:12 
2007 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/common.h Thu Mar 08 14:39:52 
2007 -0600
@@ -99,8 +99,20 @@ typedef struct netif_st {
        struct net_device *dev;
        struct net_device_stats stats;
 
+       unsigned int carrier;
+
        wait_queue_head_t waiting_to_free;
 } netif_t;
+
+/*
+ * Implement our own carrier flag: the network stack's version causes delays
+ * when the carrier is re-enabled (in particular, dev_activate() may not
+ * immediately be called, which can cause packet loss; also the etherbridge
+ * can be rather lazy in activating its port).
+ */
+#define netback_carrier_on(netif)      ((netif)->carrier = 1)
+#define netback_carrier_off(netif)     ((netif)->carrier = 0)
+#define netback_carrier_ok(netif)      ((netif)->carrier)
 
 #define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE)
 #define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE)
@@ -120,7 +132,8 @@ int netif_map(netif_t *netif, unsigned l
 
 void netif_xenbus_init(void);
 
-#define netif_schedulable(dev) (netif_running(dev) && netif_carrier_ok(dev))
+#define netif_schedulable(netif)                               \
+       (netif_running((netif)->dev) && netback_carrier_ok(netif))
 
 void netif_schedule_work(netif_t *netif);
 void netif_deschedule_work(netif_t *netif);
diff -r 8f0b5295bb1b -r dcec453681bc 
linux-2.6-xen-sparse/drivers/xen/netback/interface.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/interface.c      Mon Mar 05 
12:49:12 2007 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/interface.c      Thu Mar 08 
14:39:52 2007 -0600
@@ -66,16 +66,19 @@ static int net_open(struct net_device *d
 static int net_open(struct net_device *dev)
 {
        netif_t *netif = netdev_priv(dev);
-       if (netif_carrier_ok(dev))
+       if (netback_carrier_ok(netif)) {
                __netif_up(netif);
+               netif_start_queue(dev);
+       }
        return 0;
 }
 
 static int net_close(struct net_device *dev)
 {
        netif_t *netif = netdev_priv(dev);
-       if (netif_carrier_ok(dev))
+       if (netback_carrier_ok(netif))
                __netif_down(netif);
+       netif_stop_queue(dev);
        return 0;
 }
 
@@ -138,8 +141,6 @@ netif_t *netif_alloc(domid_t domid, unsi
                return ERR_PTR(-ENOMEM);
        }
 
-       netif_carrier_off(dev);
-
        netif = netdev_priv(dev);
        memset(netif, 0, sizeof(*netif));
        netif->domid  = domid;
@@ -147,6 +148,8 @@ netif_t *netif_alloc(domid_t domid, unsi
        atomic_set(&netif->refcnt, 1);
        init_waitqueue_head(&netif->waiting_to_free);
        netif->dev = dev;
+
+       netback_carrier_off(netif);
 
        netif->credit_bytes = netif->remaining_credit = ~0UL;
        netif->credit_usec  = 0UL;
@@ -285,7 +288,7 @@ int netif_map(netif_t *netif, unsigned l
        netif_get(netif);
 
        rtnl_lock();
-       netif_carrier_on(netif->dev);
+       netback_carrier_on(netif);
        if (netif_running(netif->dev))
                __netif_up(netif);
        rtnl_unlock();
@@ -302,9 +305,10 @@ err_rx:
 
 void netif_disconnect(netif_t *netif)
 {
-       if (netif_carrier_ok(netif->dev)) {
+       if (netback_carrier_ok(netif)) {
                rtnl_lock();
-               netif_carrier_off(netif->dev);
+               netback_carrier_off(netif);
+               netif_carrier_off(netif->dev); /* discard queued packets */
                if (netif_running(netif->dev))
                        __netif_down(netif);
                rtnl_unlock();
diff -r 8f0b5295bb1b -r dcec453681bc 
linux-2.6-xen-sparse/drivers/xen/netback/netback.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c        Mon Mar 05 
12:49:12 2007 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c        Thu Mar 08 
14:39:52 2007 -0600
@@ -38,7 +38,10 @@
 #include <xen/balloon.h>
 #include <xen/interface/memory.h>
 
-/*#define NETBE_DEBUG_INTERRUPT*/
+/*define NETBE_DEBUG_INTERRUPT*/
+
+/* extra field used in struct page */
+#define netif_page_index(pg) (*(long *)&(pg)->mapping)
 
 struct netbk_rx_meta {
        skb_frag_t frag;
@@ -231,7 +234,7 @@ static void tx_queue_callback(unsigned l
 static void tx_queue_callback(unsigned long data)
 {
        netif_t *netif = (netif_t *)data;
-       if (netif_schedulable(netif->dev))
+       if (netif_schedulable(netif))
                netif_wake_queue(netif->dev);
 }
 
@@ -242,7 +245,7 @@ int netif_be_start_xmit(struct sk_buff *
        BUG_ON(skb->dev != dev);
 
        /* Drop the packet if the target domain has no receive buffers. */
-       if (unlikely(!netif_schedulable(dev) || netbk_queue_full(netif)))
+       if (unlikely(!netif_schedulable(netif) || netbk_queue_full(netif)))
                goto drop;
 
        /*
@@ -352,7 +355,7 @@ static u16 netbk_gop_frag(netif_t *netif
                copy_gop->flags = GNTCOPY_dest_gref;
                if (PageForeign(page)) {
                        struct pending_tx_info *src_pend =
-                               &pending_tx_info[page->index];
+                               &pending_tx_info[netif_page_index(page)];
                        copy_gop->source.domid = src_pend->netif->domid;
                        copy_gop->source.u.ref = src_pend->req.gref;
                        copy_gop->flags |= GNTCOPY_source_gref;
@@ -681,7 +684,7 @@ static void net_rx_action(unsigned long 
                }
 
                if (netif_queue_stopped(netif->dev) &&
-                   netif_schedulable(netif->dev) &&
+                   netif_schedulable(netif) &&
                    !netbk_queue_full(netif))
                        netif_wake_queue(netif->dev);
 
@@ -739,7 +742,7 @@ static void add_to_net_schedule_list_tai
 
        spin_lock_irq(&net_schedule_list_lock);
        if (!__on_net_schedule_list(netif) &&
-           likely(netif_schedulable(netif->dev))) {
+           likely(netif_schedulable(netif))) {
                list_add_tail(&netif->list, &net_schedule_list);
                netif_get(netif);
        }
@@ -1327,7 +1330,7 @@ static void netif_page_release(struct pa
        /* Ready for next use. */
        init_page_count(page);
 
-       netif_idx_release(page->index);
+       netif_idx_release(netif_page_index(page));
 }
 
 irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs)
@@ -1337,7 +1340,7 @@ irqreturn_t netif_be_int(int irq, void *
        add_to_net_schedule_list_tail(netif);
        maybe_schedule_tx_action();
 
-       if (netif_schedulable(netif->dev) && !netbk_queue_full(netif))
+       if (netif_schedulable(netif) && !netbk_queue_full(netif))
                netif_wake_queue(netif->dev);
 
        return IRQ_HANDLED;
@@ -1457,7 +1460,7 @@ static int __init netback_init(void)
        for (i = 0; i < MAX_PENDING_REQS; i++) {
                page = mmap_pages[i];
                SetPageForeign(page, netif_page_release);
-               page->index = i;
+               netif_page_index(page) = i;
        }
 
        pending_cons = 0;
diff -r 8f0b5295bb1b -r dcec453681bc 
linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Mon Mar 05 12:49:12 
2007 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Thu Mar 08 14:39:52 
2007 -0600
@@ -338,9 +338,7 @@ static void connect(struct backend_info 
 
        xenbus_switch_state(dev, XenbusStateConnected);
 
-       /* May not get a kick from the frontend, so start the tx_queue now. */
-       if (!netbk_can_queue(be->netif->dev))
-               netif_wake_queue(be->netif->dev);
+       netif_wake_queue(be->netif->dev);
 }
 
 
diff -r 8f0b5295bb1b -r dcec453681bc 
linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c      Mon Mar 05 
12:49:12 2007 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c      Thu Mar 08 
14:39:52 2007 -0600
@@ -154,6 +154,7 @@ struct netfront_info {
 
        unsigned int irq;
        unsigned int copying_receiver;
+       unsigned int carrier;
 
        /* Receive-ring batched refills. */
 #define RX_MIN_TARGET 8
@@ -191,6 +192,15 @@ struct netfront_rx_info {
        struct netif_rx_response rx;
        struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
 };
+
+/*
+ * Implement our own carrier flag: the network stack's version causes delays
+ * when the carrier is re-enabled (in particular, dev_activate() may not
+ * immediately be called, which can cause packet loss).
+ */
+#define netfront_carrier_on(netif)     ((netif)->carrier = 1)
+#define netfront_carrier_off(netif)    ((netif)->carrier = 0)
+#define netfront_carrier_ok(netif)     ((netif)->carrier)
 
 /*
  * Access macros for acquiring freeing slots in tx_skbs[].
@@ -590,6 +600,22 @@ static int send_fake_arp(struct net_devi
        return dev_queue_xmit(skb);
 }
 
+static inline int netfront_tx_slot_available(struct netfront_info *np)
+{
+       return ((np->tx.req_prod_pvt - np->tx.rsp_cons) <
+               (TX_MAX_TARGET - MAX_SKB_FRAGS - 2));
+}
+
+static inline void network_maybe_wake_tx(struct net_device *dev)
+{
+       struct netfront_info *np = netdev_priv(dev);
+
+       if (unlikely(netif_queue_stopped(dev)) &&
+           netfront_tx_slot_available(np) &&
+           likely(netif_running(dev)))
+               netif_wake_queue(dev);
+}
+
 static int network_open(struct net_device *dev)
 {
        struct netfront_info *np = netdev_priv(dev);
@@ -597,7 +623,7 @@ static int network_open(struct net_devic
        memset(&np->stats, 0, sizeof(np->stats));
 
        spin_lock(&np->rx_lock);
-       if (netif_carrier_ok(dev)) {
+       if (netfront_carrier_ok(np)) {
                network_alloc_rx_buffers(dev);
                np->rx.sring->rsp_event = np->rx.rsp_cons + 1;
                if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx))
@@ -605,25 +631,9 @@ static int network_open(struct net_devic
        }
        spin_unlock(&np->rx_lock);
 
-       netif_start_queue(dev);
+       network_maybe_wake_tx(dev);
 
        return 0;
-}
-
-static inline int netfront_tx_slot_available(struct netfront_info *np)
-{
-       return ((np->tx.req_prod_pvt - np->tx.rsp_cons) <
-               (TX_MAX_TARGET - MAX_SKB_FRAGS - 2));
-}
-
-static inline void network_maybe_wake_tx(struct net_device *dev)
-{
-       struct netfront_info *np = netdev_priv(dev);
-
-       if (unlikely(netif_queue_stopped(dev)) &&
-           netfront_tx_slot_available(np) &&
-           likely(netif_running(dev)))
-               netif_wake_queue(dev);
 }
 
 static void network_tx_buf_gc(struct net_device *dev)
@@ -633,7 +643,7 @@ static void network_tx_buf_gc(struct net
        struct netfront_info *np = netdev_priv(dev);
        struct sk_buff *skb;
 
-       BUG_ON(!netif_carrier_ok(dev));
+       BUG_ON(!netfront_carrier_ok(np));
 
        do {
                prod = np->tx.sring->rsp_prod;
@@ -703,7 +713,7 @@ static void network_alloc_rx_buffers(str
        int nr_flips;
        netif_rx_request_t *req;
 
-       if (unlikely(!netif_carrier_ok(dev)))
+       if (unlikely(!netfront_carrier_ok(np)))
                return;
 
        /*
@@ -934,7 +944,7 @@ static int network_start_xmit(struct sk_
 
        spin_lock_irq(&np->tx_lock);
 
-       if (unlikely(!netif_carrier_ok(dev) ||
+       if (unlikely(!netfront_carrier_ok(np) ||
                     (frags > 1 && !xennet_can_sg(dev)) ||
                     netif_needs_gso(dev, skb))) {
                spin_unlock_irq(&np->tx_lock);
@@ -1024,7 +1034,7 @@ static irqreturn_t netif_int(int irq, vo
 
        spin_lock_irqsave(&np->tx_lock, flags);
 
-       if (likely(netif_carrier_ok(dev))) {
+       if (likely(netfront_carrier_ok(np))) {
                network_tx_buf_gc(dev);
                /* Under tx_lock: protects access to rx shared-ring indexes. */
                if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx))
@@ -1299,7 +1309,7 @@ static int netif_poll(struct net_device 
 
        spin_lock(&np->rx_lock);
 
-       if (unlikely(!netif_carrier_ok(dev))) {
+       if (unlikely(!netfront_carrier_ok(np))) {
                spin_unlock(&np->rx_lock);
                return 0;
        }
@@ -1317,7 +1327,7 @@ static int netif_poll(struct net_device 
        work_done = 0;
        while ((i != rp) && (work_done < budget)) {
                memcpy(rx, RING_GET_RESPONSE(&np->rx, i), sizeof(*rx));
-               memset(extras, 0, sizeof(extras));
+               memset(extras, 0, sizeof(rinfo.extras));
 
                err = xennet_get_responses(np, &rinfo, rp, &tmpq,
                                           &pages_flipped);
@@ -1744,7 +1754,7 @@ static int network_connect(struct net_de
         * domain a kick because we've probably just requeued some
         * packets.
         */
-       netif_carrier_on(dev);
+       netfront_carrier_on(np);
        notify_remote_via_irq(np->irq);
        network_tx_buf_gc(dev);
        network_alloc_rx_buffers(dev);
@@ -1989,7 +1999,7 @@ static struct net_device * __devinit cre
 
        np->netdev = netdev;
 
-       netif_carrier_off(netdev);
+       netfront_carrier_off(np);
 
        return netdev;
 
@@ -2023,7 +2033,7 @@ static void netif_disconnect_backend(str
        /* Stop old i/f to prevent errors whilst we rebuild the state. */
        spin_lock_irq(&info->tx_lock);
        spin_lock(&info->rx_lock);
-       netif_carrier_off(info->netdev);
+       netfront_carrier_off(info);
        spin_unlock(&info->rx_lock);
        spin_unlock_irq(&info->tx_lock);
 
diff -r 8f0b5295bb1b -r dcec453681bc 
linux-2.6-xen-sparse/drivers/xen/pcifront/pci_op.c
--- a/linux-2.6-xen-sparse/drivers/xen/pcifront/pci_op.c        Mon Mar 05 
12:49:12 2007 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/pcifront/pci_op.c        Thu Mar 08 
14:39:52 2007 -0600
@@ -239,17 +239,12 @@ static void free_root_bus_devs(struct pc
 {
        struct pci_dev *dev;
 
-       down_write(&pci_bus_sem);
        while (!list_empty(&bus->devices)) {
-               dev = container_of(bus->devices.next, struct pci_dev, bus_list);
-               up_write(&pci_bus_sem);
-
+               dev = container_of(bus->devices.next, struct pci_dev,
+                                  bus_list);
                dev_dbg(&dev->dev, "removing device\n");
                pci_remove_bus_device(dev);
-
-               down_write(&pci_bus_sem);
-       }
-       up_write(&pci_bus_sem);
+       }
 }
 
 void pcifront_free_roots(struct pcifront_device *pdev)
diff -r 8f0b5295bb1b -r dcec453681bc 
linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c
--- a/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c        Mon Mar 05 
12:49:12 2007 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c        Thu Mar 08 
14:39:52 2007 -0600
@@ -248,7 +248,7 @@ static int privcmd_enforce_singleshot_ma
 }
 #endif
 
-static struct file_operations privcmd_file_ops = {
+static const struct file_operations privcmd_file_ops = {
        .ioctl = privcmd_ioctl,
        .mmap  = privcmd_mmap,
 };
diff -r 8f0b5295bb1b -r dcec453681bc 
linux-2.6-xen-sparse/drivers/xen/tpmback/tpmback.c
--- a/linux-2.6-xen-sparse/drivers/xen/tpmback/tpmback.c        Mon Mar 05 
12:49:12 2007 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/tpmback/tpmback.c        Thu Mar 08 
14:39:52 2007 -0600
@@ -629,7 +629,7 @@ static unsigned int vtpm_op_poll(struct 
        return flags;
 }
 
-static struct file_operations vtpm_ops = {
+static const struct file_operations vtpm_ops = {
        .owner = THIS_MODULE,
        .llseek = no_llseek,
        .open = vtpm_op_open,
diff -r 8f0b5295bb1b -r dcec453681bc 
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c      Mon Mar 05 
12:49:12 2007 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c      Thu Mar 08 
14:39:52 2007 -0600
@@ -173,17 +173,22 @@ static ssize_t xenbus_dev_write(struct f
        void *reply;
        char *path, *token;
        struct watch_adapter *watch, *tmp_watch;
-       int err;
-
-       if ((len + u->len) > sizeof(u->u.buffer))
-               return -EINVAL;
-
-       if (copy_from_user(u->u.buffer + u->len, ubuf, len) != 0)
-               return -EFAULT;
+       int err, rc = len;
+
+       if ((len + u->len) > sizeof(u->u.buffer)) {
+               rc = -EINVAL;
+               goto out;
+       }
+
+       if (copy_from_user(u->u.buffer + u->len, ubuf, len) != 0) {
+               rc = -EFAULT;
+               goto out;
+       }
 
        u->len += len;
-       if (u->len < (sizeof(u->u.msg) + u->u.msg.len))
-               return len;
+       if ((u->len < sizeof(u->u.msg)) ||
+           (u->len < (sizeof(u->u.msg) + u->u.msg.len)))
+               return rc;
 
        msg_type = u->u.msg.type;
 
@@ -201,14 +206,17 @@ static ssize_t xenbus_dev_write(struct f
        case XS_SET_PERMS:
                if (msg_type == XS_TRANSACTION_START) {
                        trans = kmalloc(sizeof(*trans), GFP_KERNEL);
-                       if (!trans)
-                               return -ENOMEM;
+                       if (!trans) {
+                               rc = -ENOMEM;
+                               goto out;
+                       }
                }
 
                reply = xenbus_dev_request_and_reply(&u->u.msg);
                if (IS_ERR(reply)) {
                        kfree(trans);
-                       return PTR_ERR(reply);
+                       rc = PTR_ERR(reply);
+                       goto out;
                }
 
                if (msg_type == XS_TRANSACTION_START) {
@@ -231,8 +239,10 @@ static ssize_t xenbus_dev_write(struct f
        case XS_UNWATCH:
                path = u->u.buffer + sizeof(u->u.msg);
                token = memchr(path, 0, u->u.msg.len);
-               if (token == NULL)
-                       return -EILSEQ;
+               if (token == NULL) {
+                       rc = -EILSEQ;
+                       goto out;
+               }
                token++;
 
                if (msg_type == XS_WATCH) {
@@ -251,7 +261,8 @@ static ssize_t xenbus_dev_write(struct f
                        err = register_xenbus_watch(&watch->watch);
                        if (err) {
                                free_watch_adapter(watch);
-                               return err;
+                               rc = err;
+                               goto out;
                        }
                        
                        list_add(&watch->list, &u->watches);
@@ -265,7 +276,6 @@ static ssize_t xenbus_dev_write(struct f
                                                  &u->watches, list) {
                                if (!strcmp(watch->token, token) &&
                                    !strcmp(watch->watch.node, path))
-                                       break;
                                {
                                        unregister_xenbus_watch(&watch->watch);
                                        list_del(&watch->list);
@@ -278,11 +288,13 @@ static ssize_t xenbus_dev_write(struct f
                break;
 
        default:
-               return -EINVAL;
-       }
-
+               rc = -EINVAL;
+               break;
+       }
+
+ out:
        u->len = 0;
-       return len;
+       return rc;
 }
 
 static int xenbus_dev_open(struct inode *inode, struct file *filp)
@@ -342,7 +354,7 @@ static unsigned int xenbus_dev_poll(stru
        return 0;
 }
 
-static struct file_operations xenbus_dev_file_ops = {
+static const struct file_operations xenbus_dev_file_ops = {
        .read = xenbus_dev_read,
        .write = xenbus_dev_write,
        .open = xenbus_dev_open,
diff -r 8f0b5295bb1b -r dcec453681bc 
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h Mon Mar 05 
12:49:12 2007 -0600
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h Thu Mar 08 
14:39:52 2007 -0600
@@ -20,6 +20,14 @@
 #define LARGE_PAGE_SIZE (1UL << PMD_SHIFT)
 
 #ifdef __KERNEL__
+
+/*
+ * Need to repeat this here in order to not include pgtable.h (which in turn
+ * depends on definitions made here), but to be able to use the symbolic
+ * below. The preprocessor will warn if the two definitions aren't identical.
+ */
+#define _PAGE_PRESENT  0x001
+
 #ifndef __ASSEMBLY__
 
 #include <linux/string.h>
@@ -28,13 +36,6 @@
 #include <asm/bug.h>
 #include <xen/interface/xen.h>
 #include <xen/features.h>
-
-/*
- * Need to repeat this here in order to not include pgtable.h (which in turn
- * depends on definitions made here), but to be able to use the symbolic
- * below. The preprocessor will warn if the two definitions aren't identical.
- */
-#define _PAGE_PRESENT  0x001
 
 #define arch_free_page(_page,_order)           \
 ({     int foreign = PageForeign(_page);       \
@@ -225,8 +226,6 @@ extern int page_is_ram(unsigned long pag
        ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0 ) | \
                 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
 
-#define __HAVE_ARCH_GATE_AREA 1
-
 #include <asm-generic/memory_model.h>
 #include <asm-generic/page.h>
 
diff -r 8f0b5295bb1b -r dcec453681bc 
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/system.h
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/system.h       Mon Mar 
05 12:49:12 2007 -0600
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/system.h       Thu Mar 
08 14:39:52 2007 -0600
@@ -137,7 +137,7 @@ __asm__ __volatile__ ("movw %%dx,%1\n\t"
 })
 
 #define write_cr4(x) \
-       __asm__ __volatile__("movl %0,%%cr4": :"r" (x));
+       __asm__ __volatile__("movl %0,%%cr4": :"r" (x))
 
 /*
  * Clear and set 'TS' bit respectively
diff -r 8f0b5295bb1b -r dcec453681bc 
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/fixmap.h
--- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/fixmap.h     Mon Mar 
05 12:49:12 2007 -0600
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/fixmap.h     Thu Mar 
08 14:39:52 2007 -0600
@@ -53,6 +53,11 @@ enum fixed_addresses {
 #define NR_FIX_ISAMAPS 256
        FIX_ISAMAP_END,
        FIX_ISAMAP_BEGIN = FIX_ISAMAP_END + NR_FIX_ISAMAPS - 1,
+       __end_of_permanent_fixed_addresses,
+       /* temporary boot-time mappings, used before ioremap() is functional */
+#define NR_FIX_BTMAPS  16
+       FIX_BTMAP_END = __end_of_permanent_fixed_addresses,
+       FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS - 1,
        __end_of_fixed_addresses
 };
 
diff -r 8f0b5295bb1b -r dcec453681bc 
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/io.h
--- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/io.h Mon Mar 05 
12:49:12 2007 -0600
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/io.h Thu Mar 08 
14:39:52 2007 -0600
@@ -150,8 +150,10 @@ static inline void __iomem * ioremap (un
        return __ioremap(offset, size, 0);
 }
 
-extern void *early_ioremap(unsigned long addr, unsigned long size);
-extern void early_iounmap(void *addr, unsigned long size);
+extern void *bt_ioremap(unsigned long addr, unsigned long size);
+extern void bt_iounmap(void *addr, unsigned long size);
+#define early_ioremap bt_ioremap
+#define early_iounmap bt_iounmap
 
 /*
  * This one maps high address device memory and turns off caching for that 
area.
diff -r 8f0b5295bb1b -r dcec453681bc 
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h
--- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h    Mon Mar 
05 12:49:12 2007 -0600
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h    Thu Mar 
08 14:39:52 2007 -0600
@@ -403,19 +403,6 @@ static inline int pmd_large(pmd_t pte) {
 /* to find an entry in a page-table-directory. */
 #define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
 #define pud_offset(pgd, address) ((pud_t *) pgd_page(*(pgd)) + 
pud_index(address))
-static inline pud_t *__pud_offset_k(pud_t *pud, unsigned long address)
-{ 
-       return pud + pud_index(address);
-} 
-
-/* Find correct pud via the hidden fourth level page level: */
-
-/* This accesses the reference page table of the boot cpu. 
-   Other CPUs get synced lazily via the page fault handler. */
-static inline pud_t *pud_offset_k(pgd_t *pgd, unsigned long address)
-{
-       return pud_offset(pgd_offset_k(address), address);
-}
 
 /* PMD  - Level 2 access */
 #define pmd_page_kernel(pmd) ((unsigned long) __va(pmd_val(pmd) & PTE_MASK))
diff -r 8f0b5295bb1b -r dcec453681bc 
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/processor.h
--- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/processor.h  Mon Mar 
05 12:49:12 2007 -0600
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/processor.h  Thu Mar 
08 14:39:52 2007 -0600
@@ -246,11 +246,13 @@ DECLARE_PER_CPU(struct tss_struct,init_t
 
 
 extern struct cpuinfo_x86 boot_cpu_data;
+#ifndef CONFIG_X86_NO_TSS
 /* Save the original ist values for checking stack pointers during debugging */
 struct orig_ist {
        unsigned long ist[7];
 };
 DECLARE_PER_CPU(struct orig_ist, orig_ist);
+#endif
 
 #ifdef CONFIG_X86_VSMP
 #define ARCH_MIN_TASKALIGN     (1 << INTERNODE_CACHE_SHIFT)
diff -r 8f0b5295bb1b -r dcec453681bc 
linux-2.6-xen-sparse/include/linux/page-flags.h
--- a/linux-2.6-xen-sparse/include/linux/page-flags.h   Mon Mar 05 12:49:12 
2007 -0600
+++ b/linux-2.6-xen-sparse/include/linux/page-flags.h   Thu Mar 08 14:39:52 
2007 -0600
@@ -252,14 +252,14 @@
 #define PageForeign(page)      test_bit(PG_foreign, &(page)->flags)
 #define SetPageForeign(page, dtor) do {                \
        set_bit(PG_foreign, &(page)->flags);    \
-       (page)->mapping = (void *)dtor;         \
+       (page)->index = (long)(dtor);           \
 } while (0)
 #define ClearPageForeign(page) do {            \
        clear_bit(PG_foreign, &(page)->flags);  \
-       (page)->mapping = NULL;                 \
+       (page)->index = 0;                      \
 } while (0)
 #define PageForeignDestructor(page)            \
-       ( (void (*) (struct page *)) (page)->mapping )(page)
+       ( (void (*) (struct page *)) (page)->index )(page)
 
 struct page;   /* forward declaration */
 
diff -r 8f0b5295bb1b -r dcec453681bc 
linux-2.6-xen-sparse/include/xen/cpu_hotplug.h
--- a/linux-2.6-xen-sparse/include/xen/cpu_hotplug.h    Mon Mar 05 12:49:12 
2007 -0600
+++ b/linux-2.6-xen-sparse/include/xen/cpu_hotplug.h    Thu Mar 08 14:39:52 
2007 -0600
@@ -4,7 +4,7 @@
 #include <linux/kernel.h>
 #include <linux/cpumask.h>
 
-#if defined(CONFIG_X86)
+#if defined(CONFIG_X86) && defined(CONFIG_SMP)
 extern cpumask_t cpu_initialized_map;
 #define cpu_set_initialized(cpu) cpu_set(cpu, cpu_initialized_map)
 #else
diff -r 8f0b5295bb1b -r dcec453681bc linux-2.6-xen-sparse/mm/Kconfig
--- a/linux-2.6-xen-sparse/mm/Kconfig   Mon Mar 05 12:49:12 2007 -0600
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,157 +0,0 @@
-config SELECT_MEMORY_MODEL
-       def_bool y
-       depends on EXPERIMENTAL || ARCH_SELECT_MEMORY_MODEL
-
-choice
-       prompt "Memory model"
-       depends on SELECT_MEMORY_MODEL
-       default DISCONTIGMEM_MANUAL if ARCH_DISCONTIGMEM_DEFAULT
-       default SPARSEMEM_MANUAL if ARCH_SPARSEMEM_DEFAULT
-       default FLATMEM_MANUAL
-
-config FLATMEM_MANUAL
-       bool "Flat Memory"
-       depends on !(ARCH_DISCONTIGMEM_ENABLE || ARCH_SPARSEMEM_ENABLE) || 
ARCH_FLATMEM_ENABLE
-       help
-         This option allows you to change some of the ways that
-         Linux manages its memory internally.  Most users will
-         only have one option here: FLATMEM.  This is normal
-         and a correct option.
-
-         Some users of more advanced features like NUMA and
-         memory hotplug may have different options here.
-         DISCONTIGMEM is an more mature, better tested system,
-         but is incompatible with memory hotplug and may suffer
-         decreased performance over SPARSEMEM.  If unsure between
-         "Sparse Memory" and "Discontiguous Memory", choose
-         "Discontiguous Memory".
-
-         If unsure, choose this option (Flat Memory) over any other.
-
-config DISCONTIGMEM_MANUAL
-       bool "Discontiguous Memory"
-       depends on ARCH_DISCONTIGMEM_ENABLE
-       help
-         This option provides enhanced support for discontiguous
-         memory systems, over FLATMEM.  These systems have holes
-         in their physical address spaces, and this option provides
-         more efficient handling of these holes.  However, the vast
-         majority of hardware has quite flat address spaces, and
-         can have degraded performance from extra overhead that
-         this option imposes.
-
-         Many NUMA configurations will have this as the only option.
-
-         If unsure, choose "Flat Memory" over this option.
-
-config SPARSEMEM_MANUAL
-       bool "Sparse Memory"
-       depends on ARCH_SPARSEMEM_ENABLE
-       help
-         This will be the only option for some systems, including
-         memory hotplug systems.  This is normal.
-
-         For many other systems, this will be an alternative to
-         "Discontiguous Memory".  This option provides some potential
-         performance benefits, along with decreased code complexity,
-         but it is newer, and more experimental.
-
-         If unsure, choose "Discontiguous Memory" or "Flat Memory"
-         over this option.
-
-endchoice
-
-config DISCONTIGMEM
-       def_bool y
-       depends on (!SELECT_MEMORY_MODEL && ARCH_DISCONTIGMEM_ENABLE) || 
DISCONTIGMEM_MANUAL
-
-config SPARSEMEM
-       def_bool y
-       depends on SPARSEMEM_MANUAL
-
-config FLATMEM
-       def_bool y
-       depends on (!DISCONTIGMEM && !SPARSEMEM) || FLATMEM_MANUAL
-
-config FLAT_NODE_MEM_MAP
-       def_bool y
-       depends on !SPARSEMEM
-
-#
-# Both the NUMA code and DISCONTIGMEM use arrays of pg_data_t's
-# to represent different areas of memory.  This variable allows
-# those dependencies to exist individually.
-#
-config NEED_MULTIPLE_NODES
-       def_bool y
-       depends on DISCONTIGMEM || NUMA
-
-config HAVE_MEMORY_PRESENT
-       def_bool y
-       depends on ARCH_HAVE_MEMORY_PRESENT || SPARSEMEM
-
-#
-# SPARSEMEM_EXTREME (which is the default) does some bootmem
-# allocations when memory_present() is called.  If this can not
-# be done on your architecture, select this option.  However,
-# statically allocating the mem_section[] array can potentially
-# consume vast quantities of .bss, so be careful.
-#
-# This option will also potentially produce smaller runtime code
-# with gcc 3.4 and later.
-#
-config SPARSEMEM_STATIC
-       def_bool n
-
-#
-# Architectecture platforms which require a two level mem_section in SPARSEMEM
-# must select this option. This is usually for architecture platforms with
-# an extremely sparse physical address space.
-#
-config SPARSEMEM_EXTREME
-       def_bool y
-       depends on SPARSEMEM && !SPARSEMEM_STATIC
-
-# eventually, we can have this option just 'select SPARSEMEM'
-config MEMORY_HOTPLUG
-       bool "Allow for memory hot-add"
-       depends on SPARSEMEM && HOTPLUG && !SOFTWARE_SUSPEND && 
ARCH_ENABLE_MEMORY_HOTPLUG
-       depends on (IA64 || X86 || PPC64)
-
-comment "Memory hotplug is currently incompatible with Software Suspend"
-       depends on SPARSEMEM && HOTPLUG && SOFTWARE_SUSPEND
-
-# Heavily threaded applications may benefit from splitting the mm-wide
-# page_table_lock, so that faults on different parts of the user address
-# space can be handled with less contention: split it at this NR_CPUS.
-# Default to 4 for wider testing, though 8 might be more appropriate.
-# ARM's adjust_pte (unused if VIPT) depends on mm-wide page_table_lock.
-# PA-RISC 7xxx's spinlock_t would enlarge struct page from 32 to 44 bytes.
-# XEN on x86 architecture uses the mapping field on pagetable pages to store a
-# pointer to the destructor. This conflicts with pte_lock_deinit().
-#
-config SPLIT_PTLOCK_CPUS
-       int
-       default "4096" if ARM && !CPU_CACHE_VIPT
-       default "4096" if PARISC && !PA20
-       default "4096" if X86_XEN || X86_64_XEN
-       default "4"
-
-#
-# support for page migration
-#
-config MIGRATION
-       bool "Page migration"
-       def_bool y
-       depends on NUMA
-       help
-         Allows the migration of the physical location of pages of processes
-         while the virtual addresses are not changed. This is useful for
-         example on NUMA systems to put pages nearer to the processors 
accessing
-         the page.
-
-config RESOURCES_64BIT
-       bool "64 bit Memory and IO resources (EXPERIMENTAL)" if (!64BIT && 
EXPERIMENTAL)
-       default 64BIT
-       help
-         This option allows memory and IO resources to be 64 bit.
diff -r 8f0b5295bb1b -r dcec453681bc 
patches/linux-2.6.18/blktap-aio-16_03_06.patch
--- a/patches/linux-2.6.18/blktap-aio-16_03_06.patch    Mon Mar 05 12:49:12 
2007 -0600
+++ b/patches/linux-2.6.18/blktap-aio-16_03_06.patch    Thu Mar 08 14:39:52 
2007 -0600
@@ -106,7 +106,7 @@ diff -pruN ../orig-linux-2.6.18/fs/aio.c
 +      return pollflags;
 +}
 +
-+static struct file_operations aioq_fops = {
++static const struct file_operations aioq_fops = {
 +      .release        = aio_queue_fd_close,
 +      .poll           = aio_queue_fd_poll
 +};
@@ -201,7 +201,7 @@ diff -pruN ../orig-linux-2.6.18/fs/event
                   int maxevents, long timeout);
  static int eventpollfs_delete_dentry(struct dentry *dentry);
 -static struct inode *ep_eventpoll_inode(void);
-+static struct inode *ep_eventpoll_inode(struct file_operations *fops);
++static struct inode *ep_eventpoll_inode(const struct file_operations *fops);
  static int eventpollfs_get_sb(struct file_system_type *fs_type,
                              int flags, const char *dev_name,
                              void *data, struct vfsmount *mnt);
@@ -221,7 +221,7 @@ diff -pruN ../orig-linux-2.6.18/fs/event
 -static int ep_getfd(int *efd, struct inode **einode, struct file **efile,
 -                  struct eventpoll *ep)
 +int ep_getfd(int *efd, struct inode **einode, struct file **efile,
-+                  struct eventpoll *ep, struct file_operations *fops)
++                  struct eventpoll *ep, const struct file_operations *fops)
  {
        struct qstr this;
        char name[32];
@@ -248,7 +248,7 @@ diff -pruN ../orig-linux-2.6.18/fs/event
  
  
 -static struct inode *ep_eventpoll_inode(void)
-+static struct inode *ep_eventpoll_inode(struct file_operations *fops)
++static struct inode *ep_eventpoll_inode(const struct file_operations *fops)
  {
        int error = -ENOMEM;
        struct inode *inode = new_inode(eventpoll_mnt->mnt_sb);
@@ -288,7 +288,7 @@ diff -pruN ../orig-linux-2.6.18/include/
 + */
 +struct eventpoll;
 +int ep_getfd(int *efd, struct inode **einode, struct file **efile,
-+             struct eventpoll *ep, struct file_operations *fops);
++             struct eventpoll *ep, const struct file_operations *fops);
  #else
  
  static inline void eventpoll_init_file(struct file *file) {}
diff -r 8f0b5295bb1b -r dcec453681bc tools/Makefile
--- a/tools/Makefile    Mon Mar 05 12:49:12 2007 -0600
+++ b/tools/Makefile    Thu Mar 08 14:39:52 2007 -0600
@@ -24,9 +24,8 @@ SUBDIRS-$(LIBXENAPI_BINDINGS) += libxen
 
 # These don't cross-compile
 ifeq ($(XEN_COMPILE_ARCH),$(XEN_TARGET_ARCH))
-SUBDIRS-y += python
-SUBDIRS-y += pygrub
-SUBDIRS-y += ptsname
+SUBDIRS-$(PYTHON_TOOLS) += python
+SUBDIRS-$(PYTHON_TOOLS) += pygrub
 endif
 
 .PHONY: all
@@ -42,8 +41,8 @@ install: check
                $(MAKE) -C $$subdir $@; \
        done
        $(MAKE) ioemuinstall
-       $(INSTALL_DIR) -p $(DESTDIR)/var/xen/dump
-       $(INSTALL_DIR) -p $(DESTDIR)/var/log/xen
+       $(INSTALL_DIR) $(DESTDIR)/var/xen/dump
+       $(INSTALL_DIR) $(DESTDIR)/var/log/xen
 
 .PHONY: clean
 clean: check_clean
diff -r 8f0b5295bb1b -r dcec453681bc tools/blktap/lib/Makefile
--- a/tools/blktap/lib/Makefile Mon Mar 05 12:49:12 2007 -0600
+++ b/tools/blktap/lib/Makefile Thu Mar 08 14:39:52 2007 -0600
@@ -40,8 +40,8 @@ libblktap: libblktap.a
 libblktap: libblktap.a
 
 install: all
-       $(INSTALL_DIR) -p $(DESTDIR)/usr/$(LIBDIR)
-       $(INSTALL_DIR) -p $(DESTDIR)/usr/include
+       $(INSTALL_DIR) $(DESTDIR)/usr/$(LIBDIR)
+       $(INSTALL_DIR) $(DESTDIR)/usr/include
        $(INSTALL_PROG) $(LIB) $(DESTDIR)/usr/$(LIBDIR)
        ln -sf libblktap.so.$(MAJOR).$(MINOR) 
$(DESTDIR)/usr/$(LIBDIR)/libblktap.so.$(MAJOR)
        ln -sf libblktap.so.$(MAJOR) $(DESTDIR)/usr/$(LIBDIR)/libblktap.so
diff -r 8f0b5295bb1b -r dcec453681bc tools/console/Makefile
--- a/tools/console/Makefile    Mon Mar 05 12:49:12 2007 -0600
+++ b/tools/console/Makefile    Thu Mar 08 14:39:52 2007 -0600
@@ -30,7 +30,7 @@ xenconsole: $(patsubst %.c,%.o,$(wildcar
 
 .PHONY: install
 install: $(BIN)
-       $(INSTALL_DIR) -p $(DESTDIR)/$(DAEMON_INSTALL_DIR)
+       $(INSTALL_DIR) $(DESTDIR)/$(DAEMON_INSTALL_DIR)
        $(INSTALL_PROG) xenconsoled $(DESTDIR)/$(DAEMON_INSTALL_DIR)
-       $(INSTALL_DIR) -p $(DESTDIR)/$(CLIENT_INSTALL_DIR)
+       $(INSTALL_DIR) $(DESTDIR)/$(CLIENT_INSTALL_DIR)
        $(INSTALL_PROG) xenconsole $(DESTDIR)/$(CLIENT_INSTALL_DIR)
diff -r 8f0b5295bb1b -r dcec453681bc tools/firmware/rombios/rombios.c
--- a/tools/firmware/rombios/rombios.c  Mon Mar 05 12:49:12 2007 -0600
+++ b/tools/firmware/rombios/rombios.c  Thu Mar 08 14:39:52 2007 -0600
@@ -890,7 +890,7 @@ static void           int15_function();
 static void           int15_function();
 static void           int16_function();
 static void           int17_function();
-static void           int19_function();
+static void           int18_function();
 static void           int1a_function();
 static void           int70_function();
 static void           int74_function();
@@ -1837,6 +1837,38 @@ keyboard_panic(status)
 }
 
 //--------------------------------------------------------------------------
+// machine_reset
+//--------------------------------------------------------------------------
+  void
+machine_reset()
+{
+  /* Frob the keyboard reset line to reset the processor */
+  outb(0x64, 0x60); /* Map the flags register at data port (0x60) */
+  outb(0x60, 0x14); /* Set the flags to system|disable */
+  outb(0x64, 0xfe); /* Pulse output 0 (system reset) low */
+  BX_PANIC("Couldn't reset the machine\n");
+}
+
+//--------------------------------------------------------------------------
+// clobber_entry_point
+//    Because PV drivers in HVM guests detach some of the emulated devices, 
+//    it is not safe to do a soft reboot by just dropping to real mode and
+//    jumping at ffff:0000. -- the boot drives might have disappeared!
+//    This rather foul function overwrites(!) the BIOS entry point 
+//    to point at machine-reset, which will cause the Xen tools to
+//    rebuild the whole machine from scratch.
+//--------------------------------------------------------------------------
+  void 
+clobber_entry_point() 
+{
+    /* The instruction at the entry point is one byte (0xea) for the
+     * jump opcode, then two bytes of address, then two of segment. 
+     * Overwrite the address bytes.*/
+    write_word(0xffff, 0x0001, machine_reset); 
+}
+
+
+//--------------------------------------------------------------------------
 // shutdown_status_panic
 //   called when the shutdown statsu is not implemented, displays the status
 //--------------------------------------------------------------------------
@@ -7626,7 +7658,7 @@ int17_function(regs, ds, iret_addr)
 }
 
 void
-int19_function(seq_nr)
+int18_function(seq_nr)
 Bit16u seq_nr;
 {
   Bit16u ebda_seg=read_word(0x0040,0x000E);
@@ -7702,8 +7734,8 @@ ASM_START
     push cx
     push dx
 
-    mov  dl, _int19_function.bootdrv + 2[bp]
-    mov  ax, _int19_function.bootseg + 2[bp]
+    mov  dl, _int18_function.bootdrv + 2[bp]
+    mov  ax, _int18_function.bootseg + 2[bp]
     mov  es, ax         ;; segment
     mov  bx, #0x0000    ;; offset
     mov  ah, #0x02      ;; function 2, read diskette sector
@@ -7714,7 +7746,7 @@ ASM_START
     int  #0x13          ;; read sector
     jnc  int19_load_done
     mov  ax, #0x0001
-    mov  _int19_function.status + 2[bp], ax
+    mov  _int18_function.status + 2[bp], ax
 
 int19_load_done:
     pop  dx
@@ -7789,13 +7821,13 @@ ASM_START
     ;; Build an iret stack frame that will take us to the boot vector.
     ;; iret pops ip, then cs, then flags, so push them in the opposite order.
     pushf
-    mov  ax, _int19_function.bootseg + 0[bp] 
+    mov  ax, _int18_function.bootseg + 0[bp] 
     push ax
-    mov  ax, _int19_function.bootip + 0[bp] 
+    mov  ax, _int18_function.bootip + 0[bp] 
     push ax
     ;; Set the magic number in ax and the boot drive in dl.
     mov  ax, #0xaa55
-    mov  dl, _int19_function.bootdrv + 0[bp]
+    mov  dl, _int18_function.bootdrv + 0[bp]
     ;; Zero some of the other registers.
     xor  bx, bx
     mov  ds, bx
@@ -8272,6 +8304,8 @@ int18_handler: ;; Boot Failure recovery:
   mov  ss, ax
 
   ;; Get the boot sequence number out of the IPL memory
+  ;; The first time we do this it will have been set to -1 so 
+  ;; we will start from device 0.
   mov  bx, #IPL_SEG 
   mov  ds, bx                     ;; Set segment
   mov  bx, IPL_SEQUENCE_OFFSET    ;; BX is now the sequence number
@@ -8279,43 +8313,33 @@ int18_handler: ;; Boot Failure recovery:
   mov  IPL_SEQUENCE_OFFSET, bx    ;; Write it back
   mov  ds, ax                     ;; and reset the segment to zero. 
 
-  ;; Carry on in the INT 19h handler, using the new sequence number
+  ;; Call the C code for the next boot device
   push bx
-
-  jmp  int19_next_boot
+  call _int18_function
+
+  ;; Boot failed: invoke the boot recovery function...
+  int  #0x18
 
 ;----------
 ;- INT19h -
 ;----------
 int19_relocated: ;; Boot function, relocated
-
-  ;; int19 was beginning to be really complex, so now it
-  ;; just calls a C function that does the work
-
-  push bp
-  mov  bp, sp
-  
-  ;; Reset SS and SP
+  ;;
+  ;; *** Warning: INT 19h resets the whole machine *** 
+  ;;
+  ;; Because PV drivers in HVM guests detach some of the emulated devices, 
+  ;; it is not safe to do a soft reboot by just dropping to real mode and
+  ;; invoking INT 19h -- the boot drives might have disappeared!
+  ;; If the user asks for a soft reboot, the only thing we can do is 
+  ;; reset the whole machine.  When it comes back up, the normal BIOS 
+  ;; boot sequence will start, which is more or less the required behaviour.
+  ;; 
+  ;; Reset SP and SS
   mov  ax, #0xfffe
   mov  sp, ax
   xor  ax, ax
   mov  ss, ax
-
-  ;; Start from the first boot device (0, in AX)
-  mov  bx, #IPL_SEG 
-  mov  ds, bx                     ;; Set segment to write to the IPL memory
-  mov  IPL_SEQUENCE_OFFSET, ax    ;; Save the sequence number 
-  mov  ds, ax                     ;; and reset the segment.
-
-  push ax
-
-int19_next_boot:
-
-  ;; Call the C code for the next boot device
-  call _int19_function
-
-  ;; Boot failed: invoke the boot recovery function
-  int  #0x18
+  call _machine_reset
 
 ;----------
 ;- INT1Ch -
@@ -9609,6 +9633,8 @@ normal_post:
 
   call _log_bios_start
 
+  call _clobber_entry_point
+
   ;; set all interrupts to default handler
   mov  bx, #0x0000    ;; offset index
   mov  cx, #0x0100    ;; counter (256 interrupts)
@@ -9857,8 +9883,10 @@ post_default_ints:
   call _tcpa_calling_int19h          /* specs: 8.2.3 step 1 */
   call _tcpa_add_event_separators    /* specs: 8.2.3 step 2 */
 #endif
-  int  #0x19
-  //JMP_EP(0x0064) ; INT 19h location
+
+  ;; Start the boot sequence.   See the comments in int19_relocated 
+  ;; for why we use INT 18h instead of INT 19h here.
+  int  #0x18
 
 #if BX_TCGBIOS
   call _tcpa_returned_int19h         /* specs: 8.2.3 step 3/7 */
diff -r 8f0b5295bb1b -r dcec453681bc tools/guest-headers/Makefile
--- a/tools/guest-headers/Makefile      Mon Mar 05 12:49:12 2007 -0600
+++ b/tools/guest-headers/Makefile      Thu Mar 08 14:39:52 2007 -0600
@@ -13,7 +13,7 @@ check:
 
 install-Linux:
        mkdir -p $(DESTDIR)/usr/include/xen/linux
-       install -m0644 $(linuxsparsetree)/include/xen/public/*.h 
$(DESTDIR)/usr/include/xen/linux
+       $(INSTALL_DATA) $(linuxsparsetree)/include/xen/public/*.h 
$(DESTDIR)/usr/include/xen/linux
 
 install-SunOS:
 
diff -r 8f0b5295bb1b -r dcec453681bc tools/ioemu/Makefile
--- a/tools/ioemu/Makefile      Mon Mar 05 12:49:12 2007 -0600
+++ b/tools/ioemu/Makefile      Thu Mar 08 14:39:52 2007 -0600
@@ -65,10 +65,10 @@ common  de-ch  es     fo  fr-ca  hu     
 
 install-doc: $(DOCS)
        mkdir -p "$(DESTDIR)$(docdir)"
-       $(INSTALL) -m 644 qemu-doc.html  qemu-tech.html "$(DESTDIR)$(docdir)"
+       $(INSTALL_DATA) qemu-doc.html  qemu-tech.html "$(DESTDIR)$(docdir)"
 ifndef CONFIG_WIN32
        mkdir -p "$(DESTDIR)$(mandir)/man1"
-       $(INSTALL) qemu.1 qemu-img.1 "$(DESTDIR)$(mandir)/man1"
+       $(INSTALL_DATA) qemu.1 qemu-img.1 "$(DESTDIR)$(mandir)/man1"
 endif
 
 install: all $(if $(BUILD_DOCS),install-doc)
@@ -77,12 +77,12 @@ install: all $(if $(BUILD_DOCS),install-
 #      mkdir -p "$(DESTDIR)$(datadir)"
 #      for x in bios.bin vgabios.bin vgabios-cirrus.bin ppc_rom.bin \
 #                      video.x openbios-sparc32 linux_boot.bin; do \
-#              $(INSTALL) -m 644 $(SRC_PATH)/pc-bios/$$x 
"$(DESTDIR)$(datadir)"; \
+#              $(INSTALL_DATA) $(SRC_PATH)/pc-bios/$$x "$(DESTDIR)$(datadir)"; 
\
 #      done
 ifndef CONFIG_WIN32
        mkdir -p "$(DESTDIR)$(datadir)/keymaps"
        for x in $(KEYMAPS); do \
-               $(INSTALL) -m 644 $(SRC_PATH)/keymaps/$$x 
"$(DESTDIR)$(datadir)/keymaps"; \
+               $(INSTALL_DATA) $(SRC_PATH)/keymaps/$$x 
"$(DESTDIR)$(datadir)/keymaps"; \
        done
 endif
        for d in $(TARGET_DIRS); do \
diff -r 8f0b5295bb1b -r dcec453681bc tools/ioemu/hw/ide.c
--- a/tools/ioemu/hw/ide.c      Mon Mar 05 12:49:12 2007 -0600
+++ b/tools/ioemu/hw/ide.c      Thu Mar 08 14:39:52 2007 -0600
@@ -2602,6 +2602,120 @@ void pci_cmd646_ide_init(PCIBus *bus, Bl
 #endif /* DMA_MULTI_THREAD */
 }
 
+static void pci_ide_save(QEMUFile* f, void *opaque)
+{
+    PCIIDEState *d = opaque;
+    int i;
+
+    for(i = 0; i < 2; i++) {
+        BMDMAState *bm = &d->bmdma[i];
+        qemu_put_8s(f, &bm->cmd);
+        qemu_put_8s(f, &bm->status);
+        qemu_put_be32s(f, &bm->addr);
+        /* XXX: if a transfer is pending, we do not save it yet */
+    }
+
+    /* per IDE interface data */
+    for(i = 0; i < 2; i++) {
+        IDEState *s = &d->ide_if[i * 2];
+        uint8_t drive1_selected;
+        qemu_put_8s(f, &s->cmd);
+        drive1_selected = (s->cur_drive != s);
+        qemu_put_8s(f, &drive1_selected);
+    }
+
+    /* per IDE drive data */
+    for(i = 0; i < 4; i++) {
+        IDEState *s = &d->ide_if[i];
+        qemu_put_be32s(f, &s->mult_sectors);
+        qemu_put_be32s(f, &s->identify_set);
+        if (s->identify_set) {
+            qemu_put_buffer(f, (const uint8_t *)s->identify_data, 512);
+        }
+        qemu_put_8s(f, &s->write_cache);
+        qemu_put_8s(f, &s->feature);
+        qemu_put_8s(f, &s->error);
+        qemu_put_be32s(f, &s->nsector);
+        qemu_put_8s(f, &s->sector);
+        qemu_put_8s(f, &s->lcyl);
+        qemu_put_8s(f, &s->hcyl);
+        qemu_put_8s(f, &s->hob_feature);
+        qemu_put_8s(f, &s->hob_nsector);
+        qemu_put_8s(f, &s->hob_sector);
+        qemu_put_8s(f, &s->hob_lcyl);
+        qemu_put_8s(f, &s->hob_hcyl);
+        qemu_put_8s(f, &s->select);
+        qemu_put_8s(f, &s->status);
+        qemu_put_8s(f, &s->lba48);
+
+        qemu_put_8s(f, &s->sense_key);
+        qemu_put_8s(f, &s->asc);
+        /* XXX: if a transfer is pending, we do not save it yet */
+    }
+}
+
+static int pci_ide_load(QEMUFile* f, void *opaque, int version_id)
+{
+    PCIIDEState *d = opaque;
+    int ret, i;
+
+    if (version_id != 1)
+        return -EINVAL;
+
+    for(i = 0; i < 2; i++) {
+        BMDMAState *bm = &d->bmdma[i];
+        qemu_get_8s(f, &bm->cmd);
+        qemu_get_8s(f, &bm->status);
+        qemu_get_be32s(f, &bm->addr);
+        /* XXX: if a transfer is pending, we do not save it yet */
+    }
+
+    /* per IDE interface data */
+    for(i = 0; i < 2; i++) {
+        IDEState *s = &d->ide_if[i * 2];
+        uint8_t drive1_selected;
+        qemu_get_8s(f, &s->cmd);
+        qemu_get_8s(f, &drive1_selected);
+        s->cur_drive = &d->ide_if[i * 2 + (drive1_selected != 0)];
+    }
+
+    /* per IDE drive data */
+    for(i = 0; i < 4; i++) {
+        IDEState *s = &d->ide_if[i];
+        qemu_get_be32s(f, &s->mult_sectors);
+        qemu_get_be32s(f, &s->identify_set);
+        if (s->identify_set) {
+            qemu_get_buffer(f, (uint8_t *)s->identify_data, 512);
+        }
+        qemu_get_8s(f, &s->write_cache);
+        qemu_get_8s(f, &s->feature);
+        qemu_get_8s(f, &s->error);
+        qemu_get_be32s(f, &s->nsector);
+        qemu_get_8s(f, &s->sector);
+        qemu_get_8s(f, &s->lcyl);
+        qemu_get_8s(f, &s->hcyl);
+        qemu_get_8s(f, &s->hob_feature);
+        qemu_get_8s(f, &s->hob_nsector);
+        qemu_get_8s(f, &s->hob_sector);
+        qemu_get_8s(f, &s->hob_lcyl);
+        qemu_get_8s(f, &s->hob_hcyl);
+        qemu_get_8s(f, &s->select);
+        qemu_get_8s(f, &s->status);
+        qemu_get_8s(f, &s->lba48);
+
+        qemu_get_8s(f, &s->sense_key);
+        qemu_get_8s(f, &s->asc);
+        /* XXX: if a transfer is pending, we do not save it yet */
+        if (s->status & (DRQ_STAT|BUSY_STAT)) {
+            /* Tell the guest that its transfer has gone away */
+            ide_abort_command(s);
+            ide_set_irq(s);
+        }
+    }
+    return 0;
+}
+
+
 /* hd_table must contain 4 block drivers */
 /* NOTE: for the PIIX3, the IRQs and IOports are hardcoded */
 void pci_piix3_ide_init(PCIBus *bus, BlockDriverState **hd_table, int devfn)
@@ -2643,6 +2757,7 @@ void pci_piix3_ide_init(PCIBus *bus, Blo
     buffered_pio_init();
 
     register_savevm("ide_pci", 0, 1, generic_pci_save, generic_pci_load, d);
+    register_savevm("ide", 0, 1, pci_ide_save, pci_ide_load, d);
 
 #ifdef DMA_MULTI_THREAD    
     dma_create_thread();
diff -r 8f0b5295bb1b -r dcec453681bc tools/ioemu/target-i386-dm/qemu-ifup
--- a/tools/ioemu/target-i386-dm/qemu-ifup      Mon Mar 05 12:49:12 2007 -0600
+++ b/tools/ioemu/target-i386-dm/qemu-ifup      Thu Mar 08 14:39:52 2007 -0600
@@ -3,8 +3,7 @@
 #. /etc/rc.d/init.d/functions
 #ulimit -c unlimited
 
-echo -c 'config qemu network with xen bridge for '
-echo $*
+echo 'config qemu network with xen bridge for ' $*
 
 ifconfig $1 0.0.0.0 up
 brctl addif $2 $1
diff -r 8f0b5295bb1b -r dcec453681bc tools/ioemu/vl.c
--- a/tools/ioemu/vl.c  Mon Mar 05 12:49:12 2007 -0600
+++ b/tools/ioemu/vl.c  Thu Mar 08 14:39:52 2007 -0600
@@ -3250,6 +3250,14 @@ static int net_tap_init(VLANState *vlan,
         pid = fork();
         if (pid >= 0) {
             if (pid == 0) {
+                int open_max = sysconf(_SC_OPEN_MAX), i;
+                for (i = 0; i < open_max; i++)
+                    if (i != STDIN_FILENO &&
+                        i != STDOUT_FILENO &&
+                        i != STDERR_FILENO &&
+                        i != fd)
+                        close(i);
+
                 parg = args;
                 *parg++ = (char *)setup_script;
                 *parg++ = ifname;
diff -r 8f0b5295bb1b -r dcec453681bc tools/ioemu/vnc.c
--- a/tools/ioemu/vnc.c Mon Mar 05 12:49:12 2007 -0600
+++ b/tools/ioemu/vnc.c Thu Mar 08 14:39:52 2007 -0600
@@ -1445,7 +1445,7 @@ int vnc_display_init(DisplayState *ds, i
 
 int vnc_start_viewer(int port)
 {
-    int pid;
+    int pid, i, open_max;
     char s[16];
 
     sprintf(s, ":%d", port);
@@ -1456,6 +1456,12 @@ int vnc_start_viewer(int port)
        exit(1);
 
     case 0:    /* child */
+       open_max = sysconf(_SC_OPEN_MAX);
+       for (i = 0; i < open_max; i++)
+           if (i != STDIN_FILENO &&
+               i != STDOUT_FILENO &&
+               i != STDERR_FILENO)
+               close(i);
        execlp("vncviewer", "vncviewer", s, NULL);
        fprintf(stderr, "vncviewer execlp failed\n");
        exit(1);
diff -r 8f0b5295bb1b -r dcec453681bc tools/libxc/xc_core.c
--- a/tools/libxc/xc_core.c     Mon Mar 05 12:49:12 2007 -0600
+++ b/tools/libxc/xc_core.c     Thu Mar 08 14:39:52 2007 -0600
@@ -153,7 +153,7 @@ struct xc_core_section_headers {
     uint16_t    num;
     uint16_t    num_max;
 
-    Elf_Shdr   *shdrs;
+    Elf64_Shdr  *shdrs;
 };
 #define SHDR_INIT       16
 #define SHDR_INC        4
@@ -184,14 +184,14 @@ xc_core_shdr_free(struct xc_core_section
     free(sheaders);
 }
 
-Elf_Shdr*
+Elf64_Shdr*
 xc_core_shdr_get(struct xc_core_section_headers *sheaders)
 {
-    Elf_Shdr *shdr;
+    Elf64_Shdr *shdr;
 
     if ( sheaders->num == sheaders->num_max )
     {
-        Elf_Shdr *shdrs;
+        Elf64_Shdr *shdrs;
         if ( sheaders->num_max + SHDR_INC < sheaders->num_max )
         {
             errno = E2BIG;
@@ -212,7 +212,7 @@ xc_core_shdr_get(struct xc_core_section_
 }
 
 int
-xc_core_shdr_set(Elf_Shdr *shdr,
+xc_core_shdr_set(Elf64_Shdr *shdr,
                  struct xc_core_strtab *strtab,
                  const char *name, uint32_t type,
                  uint64_t offset, uint64_t size,
@@ -317,15 +317,15 @@ xc_domain_dumpcore_via_callback(int xc_h
 
     uint64_t *pfn_array = NULL;
 
-    Elf_Ehdr ehdr;
-    unsigned long filesz;
-    unsigned long offset;
-    unsigned long fixup;
+    Elf64_Ehdr ehdr;
+    uint64_t filesz;
+    uint64_t offset;
+    uint64_t fixup;
 
     struct xc_core_strtab *strtab = NULL;
     uint16_t strtab_idx;
     struct xc_core_section_headers *sheaders = NULL;
-    Elf_Shdr *shdr;
+    Elf64_Shdr *shdr;
 
     /* elf notes */
     struct elfnote elfnote;
@@ -460,7 +460,7 @@ xc_domain_dumpcore_via_callback(int xc_h
     ehdr.e_ident[EI_MAG1] = ELFMAG1;
     ehdr.e_ident[EI_MAG2] = ELFMAG2;
     ehdr.e_ident[EI_MAG3] = ELFMAG3;
-    ehdr.e_ident[EI_CLASS] = ELFCLASS;
+    ehdr.e_ident[EI_CLASS] = ELFCLASS64;
     ehdr.e_ident[EI_DATA] = ELF_ARCH_DATA;
     ehdr.e_ident[EI_VERSION] = EV_CURRENT;
     ehdr.e_ident[EI_OSABI] = ELFOSABI_SYSV;
@@ -474,9 +474,9 @@ xc_domain_dumpcore_via_callback(int xc_h
     ehdr.e_shoff = sizeof(ehdr);
     ehdr.e_flags = ELF_CORE_EFLAGS;
     ehdr.e_ehsize = sizeof(ehdr);
-    ehdr.e_phentsize = sizeof(Elf_Phdr);
+    ehdr.e_phentsize = sizeof(Elf64_Phdr);
     ehdr.e_phnum = 0;
-    ehdr.e_shentsize = sizeof(Elf_Shdr);
+    ehdr.e_shentsize = sizeof(Elf64_Shdr);
     /* ehdr.e_shnum and ehdr.e_shstrndx aren't known here yet. fill it later*/
 
     /* create section header */
diff -r 8f0b5295bb1b -r dcec453681bc tools/libxc/xc_core.h
--- a/tools/libxc/xc_core.h     Mon Mar 05 12:49:12 2007 -0600
+++ b/tools/libxc/xc_core.h     Thu Mar 08 14:39:52 2007 -0600
@@ -116,10 +116,10 @@ struct xc_core_strtab;
 struct xc_core_strtab;
 struct xc_core_section_headers;
 
-Elf_Shdr*
+Elf64_Shdr*
 xc_core_shdr_get(struct xc_core_section_headers *sheaders);
 int
-xc_core_shdr_set(Elf_Shdr *shdr,
+xc_core_shdr_set(Elf64_Shdr *shdr,
                  struct xc_core_strtab *strtab,
                  const char *name, uint32_t type,
                  uint64_t offset, uint64_t size,
diff -r 8f0b5295bb1b -r dcec453681bc tools/libxc/xc_core_ia64.c
--- a/tools/libxc/xc_core_ia64.c        Mon Mar 05 12:49:12 2007 -0600
+++ b/tools/libxc/xc_core_ia64.c        Thu Mar 08 14:39:52 2007 -0600
@@ -266,10 +266,10 @@ xc_core_arch_context_get_shdr(struct xc_
 xc_core_arch_context_get_shdr(struct xc_core_arch_context *arch_ctxt, 
                               struct xc_core_section_headers *sheaders,
                               struct xc_core_strtab *strtab,
-                              unsigned long *filesz, unsigned long offset)
+                              uint64_t *filesz, uint64_t offset)
 {
     int sts = -1;
-    Elf_Shdr *shdr;
+    Elf64_Shdr *shdr;
 
     /* mmapped priv regs */
     shdr = xc_core_shdr_get(sheaders);
diff -r 8f0b5295bb1b -r dcec453681bc tools/libxc/xc_core_ia64.h
--- a/tools/libxc/xc_core_ia64.h        Mon Mar 05 12:49:12 2007 -0600
+++ b/tools/libxc/xc_core_ia64.h        Thu Mar 08 14:39:52 2007 -0600
@@ -42,7 +42,7 @@ xc_core_arch_context_get_shdr(struct xc_
 xc_core_arch_context_get_shdr(struct xc_core_arch_context* arch_ctxt, 
                               struct xc_core_section_headers *sheaders,
                               struct xc_core_strtab *strtab,
-                              unsigned long *filesz, unsigned long offset);
+                              uint64_t *filesz, uint64_t offset);
 int
 xc_core_arch_context_dump(struct xc_core_arch_context* arch_ctxt,
                           void* args, dumpcore_rtn_t dump_rtn);
diff -r 8f0b5295bb1b -r dcec453681bc tools/libxc/xc_core_x86.h
--- a/tools/libxc/xc_core_x86.h Mon Mar 05 12:49:12 2007 -0600
+++ b/tools/libxc/xc_core_x86.h Thu Mar 08 14:39:52 2007 -0600
@@ -45,7 +45,7 @@ xc_core_arch_context_get_shdr(struct xc_
 xc_core_arch_context_get_shdr(struct xc_core_arch_context *arch_ctxt, 
                               struct xc_core_section_headers *sheaders,
                               struct xc_core_strtab *strtab,
-                              unsigned long *filesz, unsigned long offset)
+                              uint64_t *filesz, uint64_t offset)
 {
     *filesz = 0;
     return 0;
diff -r 8f0b5295bb1b -r dcec453681bc tools/libxc/xc_dom_core.c
--- a/tools/libxc/xc_dom_core.c Mon Mar 05 12:49:12 2007 -0600
+++ b/tools/libxc/xc_dom_core.c Thu Mar 08 14:39:52 2007 -0600
@@ -721,9 +721,6 @@ int xc_dom_build_image(struct xc_dom_ima
     }
     page_size = XC_DOM_PAGE_SIZE(dom);
 
-    /* 4MB align virtual base address */
-    dom->parms.virt_base &= ~(((uint64_t)1<<22)-1);
-
     /* load kernel */
     if ( xc_dom_alloc_segment(dom, &dom->kernel_seg, "kernel",
                               dom->kernel_seg.vstart,
diff -r 8f0b5295bb1b -r dcec453681bc tools/libxc/xc_linux_restore.c
--- a/tools/libxc/xc_linux_restore.c    Mon Mar 05 12:49:12 2007 -0600
+++ b/tools/libxc/xc_linux_restore.c    Thu Mar 08 14:39:52 2007 -0600
@@ -19,7 +19,7 @@ static unsigned long max_mfn;
 /* virtual starting address of the hypervisor */
 static unsigned long hvirt_start;
 
-/* #levels of page tables used by the currrent guest */
+/* #levels of page tables used by the current guest */
 static unsigned int pt_levels;
 
 /* total number of pages used by the current guest */
@@ -857,6 +857,28 @@ int xc_linux_restore(int xc_handle, int 
 
         ctxt.ctrlreg[3] = xen_pfn_to_cr3(p2m[pfn]);
 
+        /* Guest pagetable (x86/64) stored in otherwise-unused CR1. */
+        if ( (pt_levels == 4) && ctxt.ctrlreg[1] )
+        {
+            pfn = xen_cr3_to_pfn(ctxt.ctrlreg[1]);
+
+            if (pfn >= max_pfn) {
+                ERROR("User PT base is bad: pfn=%lu max_pfn=%lu type=%08lx",
+                      pfn, max_pfn, pfn_type[pfn]);
+                goto out;
+            }
+
+            if ( (pfn_type[pfn] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) !=
+                 ((unsigned long)pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT) ) {
+                ERROR("User PT base is bad. pfn=%lu nr=%lu type=%08lx %08lx",
+                      pfn, max_pfn, pfn_type[pfn],
+                      (unsigned long)pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT);
+                goto out;
+            }
+
+            ctxt.ctrlreg[1] = xen_pfn_to_cr3(p2m[pfn]);
+        }
+
         domctl.cmd = XEN_DOMCTL_setvcpucontext;
         domctl.domain = (domid_t)dom;
         domctl.u.vcpucontext.vcpu = i;
diff -r 8f0b5295bb1b -r dcec453681bc tools/libxc/xc_linux_save.c
--- a/tools/libxc/xc_linux_save.c       Mon Mar 05 12:49:12 2007 -0600
+++ b/tools/libxc/xc_linux_save.c       Thu Mar 08 14:39:52 2007 -0600
@@ -34,7 +34,7 @@ static unsigned long max_mfn;
 /* virtual starting address of the hypervisor */
 static unsigned long hvirt_start;
 
-/* #levels of page tables used by the currrent guest */
+/* #levels of page tables used by the current guest */
 static unsigned int pt_levels;
 
 /* total number of pages used by the current guest */
@@ -491,7 +491,7 @@ static int canonicalize_pagetable(unsign
     ** reserved hypervisor mappings. This depends on the current
     ** page table type as well as the number of paging levels.
     */
-    xen_start = xen_end = pte_last = PAGE_SIZE / ((pt_levels == 2)? 4 : 8);
+    xen_start = xen_end = pte_last = PAGE_SIZE / ((pt_levels == 2) ? 4 : 8);
 
     if (pt_levels == 2 && type == XEN_DOMCTL_PFINFO_L2TAB)
         xen_start = (hvirt_start >> L2_PAGETABLE_SHIFT);
@@ -1279,6 +1279,18 @@ int xc_linux_save(int xc_handle, int io_
         ctxt.ctrlreg[3] = 
             xen_pfn_to_cr3(mfn_to_pfn(xen_cr3_to_pfn(ctxt.ctrlreg[3])));
 
+        /* Guest pagetable (x86/64) stored in otherwise-unused CR1. */
+        if ( (pt_levels == 4) && ctxt.ctrlreg[1] )
+        {
+            if ( !MFN_IS_IN_PSEUDOPHYS_MAP(xen_cr3_to_pfn(ctxt.ctrlreg[1])) ) {
+                ERROR("PT base is not in range of pseudophys map");
+                goto out;
+            }
+            /* Least-significant bit means 'valid PFN'. */
+            ctxt.ctrlreg[1] = 1 |
+                xen_pfn_to_cr3(mfn_to_pfn(xen_cr3_to_pfn(ctxt.ctrlreg[1])));
+        }
+
         if (!write_exact(io_fd, &ctxt, sizeof(ctxt))) {
             ERROR("Error when writing to state file (1) (errno %d)", errno);
             goto out;
diff -r 8f0b5295bb1b -r dcec453681bc tools/libxc/xc_ptrace_core.c
--- a/tools/libxc/xc_ptrace_core.c      Mon Mar 05 12:49:12 2007 -0600
+++ b/tools/libxc/xc_ptrace_core.c      Thu Mar 08 14:39:52 2007 -0600
@@ -192,7 +192,7 @@ struct elf_core
 struct elf_core
 {
     int         domfd;
-    Elf_Ehdr    ehdr;
+    Elf64_Ehdr  ehdr;
 
     char*       shdr;
     
@@ -241,6 +241,8 @@ elf_core_init(struct elf_core* ecore, in
     
     /* check elf header */
     if (!IS_ELF(ecore->ehdr) || ecore->ehdr.e_type != ET_CORE)
+        goto out;
+    if (ecore->ehdr.e_ident[EI_CLASS] != ELFCLASS64)
         goto out;
     /* check elf header more: EI_DATA, EI_VERSION, e_machine... */
 
@@ -294,7 +296,7 @@ elf_core_search_note(struct elf_core* ec
 }
 
 static int
-elf_core_alloc_read_sec(struct elf_core* ecore, const Elf_Shdr* shdr,
+elf_core_alloc_read_sec(struct elf_core* ecore, const Elf64_Shdr* shdr,
                         char** buf)
 {
     int ret;
@@ -309,19 +311,19 @@ elf_core_alloc_read_sec(struct elf_core*
     return ret;
 }
 
-static Elf_Shdr*
+static Elf64_Shdr*
 elf_core_shdr_by_index(struct elf_core* ecore, uint16_t index)
 {
     if (index >= ecore->ehdr.e_shnum)
         return NULL;
-    return (Elf_Shdr*)(ecore->shdr + ecore->ehdr.e_shentsize * index);
+    return (Elf64_Shdr*)(ecore->shdr + ecore->ehdr.e_shentsize * index);
 }
 
 static int
 elf_core_alloc_read_sec_by_index(struct elf_core* ecore, uint16_t index,
                                  char** buf, uint64_t* size)
 {
-    Elf_Shdr* shdr = elf_core_shdr_by_index(ecore, index);
+    Elf64_Shdr* shdr = elf_core_shdr_by_index(ecore, index);
     if (shdr == NULL)
         return -1;
     if (size != NULL)
@@ -329,14 +331,14 @@ elf_core_alloc_read_sec_by_index(struct 
     return elf_core_alloc_read_sec(ecore, shdr, buf);
 }
 
-static Elf_Shdr*
+static Elf64_Shdr*
 elf_core_shdr_by_name(struct elf_core* ecore, const char* name)
 {
     const char* s;
     for (s = ecore->shdr;
          s < ecore->shdr + ecore->ehdr.e_shentsize * ecore->ehdr.e_shnum;
          s += ecore->ehdr.e_shentsize) {
-        Elf_Shdr* shdr = (Elf_Shdr*)s;
+        Elf64_Shdr* shdr = (Elf64_Shdr*)s;
 
         if (strncmp(ecore->shstrtab + shdr->sh_name, name, strlen(name)) == 0)
             return shdr;
@@ -348,7 +350,7 @@ static int
 static int
 elf_core_read_sec_by_name(struct elf_core* ecore, const char* name, char* buf)
 {
-    Elf_Shdr* shdr = elf_core_shdr_by_name(ecore, name);
+    Elf64_Shdr* shdr = elf_core_shdr_by_name(ecore, name);
     return pread_exact(ecore->domfd, buf, shdr->sh_size, shdr->sh_offset);
     
 }
@@ -357,7 +359,7 @@ elf_core_alloc_read_sec_by_name(struct e
 elf_core_alloc_read_sec_by_name(struct elf_core* ecore, const char* name,
                                 char** buf, uint64_t* size)
 {
-    Elf_Shdr* shdr = elf_core_shdr_by_name(ecore, name);
+    Elf64_Shdr* shdr = elf_core_shdr_by_name(ecore, name);
     if (shdr == NULL)
         return -1;
     if (size != NULL)
@@ -508,8 +510,8 @@ xc_waitdomain_core_elf(
     struct xen_dumpcore_elfnote_xen_version *xen_version;
     struct xen_dumpcore_elfnote_format_version *format_version;
 
-    Elf_Shdr* table_shdr;
-    Elf_Shdr* pages_shdr;
+    Elf64_Shdr* table_shdr;
+    Elf64_Shdr* pages_shdr;
 
     if (elf_core_init(&ecore, domfd) < 0)
         goto out;
diff -r 8f0b5295bb1b -r dcec453681bc tools/libxen/Makefile
--- a/tools/libxen/Makefile     Mon Mar 05 12:49:12 2007 -0600
+++ b/tools/libxen/Makefile     Thu Mar 08 14:39:52 2007 -0600
@@ -57,8 +57,8 @@ test/test_hvm_bindings: test/test_hvm_bi
 
 .PHONY: install
 install: all
-       $(INSTALL_DIR) -p $(DESTDIR)/usr/include/xen/api
-       $(INSTALL_DIR) -p $(DESTDIR)/usr/$(LIBDIR)
+       $(INSTALL_DIR) $(DESTDIR)/usr/include/xen/api
+       $(INSTALL_DIR) $(DESTDIR)/usr/$(LIBDIR)
        $(INSTALL_PROG) libxenapi.so.$(MAJOR).$(MINOR) $(DESTDIR)/usr/$(LIBDIR)
        ln -sf libxenapi.so.$(MAJOR).$(MINOR) 
$(DESTDIR)/usr/$(LIBDIR)/libxenapi.so.$(MAJOR)
        ln -sf libxenapi.so.$(MAJOR) $(DESTDIR)/usr/$(LIBDIR)/libxenapi.so
diff -r 8f0b5295bb1b -r dcec453681bc tools/ptsname/Makefile
--- a/tools/ptsname/Makefile    Mon Mar 05 12:49:12 2007 -0600
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,22 +0,0 @@
-
-XEN_ROOT = ../..
-include $(XEN_ROOT)/tools/Rules.mk
-
-.PHONY: all
-all: build
-.PHONY: build
-build:
-       CC="$(CC)" CFLAGS="$(CFLAGS)" python setup.py build
-
-.PHONY: install
-ifndef XEN_PYTHON_NATIVE_INSTALL
-install: all
-       CC="$(CC)" CFLAGS="$(CFLAGS)" python setup.py install 
--home="$(DESTDIR)/usr" --prefix=""
-else
-install: all
-       CC="$(CC)" CFLAGS="$(CFLAGS)" python setup.py install 
--root="$(DESTDIR)"
-endif
-
-.PHONY: clean
-clean:
-       rm -rf build tmp *.pyc *.pyo *.o *.a *~ a.out
diff -r 8f0b5295bb1b -r dcec453681bc tools/ptsname/ptsname.c
--- a/tools/ptsname/ptsname.c   Mon Mar 05 12:49:12 2007 -0600
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,44 +0,0 @@
-/******************************************************************************
- * ptsname.c
- * 
- * A python extension to expose the POSIX ptsname() function.
- * 
- * Copyright (C) 2007 XenSource Ltd
- */
-
-#include <Python.h>
-#include <stdlib.h>
-
-/* Needed for Python versions earlier than 2.3. */
-#ifndef PyMODINIT_FUNC
-#define PyMODINIT_FUNC DL_EXPORT(void)
-#endif
-
-static PyObject *do_ptsname(PyObject *self, PyObject *args)
-{
-    int fd;
-    char *path;
-
-    if (!PyArg_ParseTuple(args, "i", &fd))
-        return NULL;
-
-    path = ptsname(fd);
-
-    if (!path)
-    {
-        PyErr_SetFromErrno(PyExc_IOError);
-        return NULL;
-    } 
-
-    return PyString_FromString(path);
-}
-
-static PyMethodDef ptsname_methods[] = { 
-    { "ptsname", do_ptsname, METH_VARARGS }, 
-    { NULL }
-};
-
-PyMODINIT_FUNC initptsname(void)
-{
-    Py_InitModule("ptsname", ptsname_methods);
-}
diff -r 8f0b5295bb1b -r dcec453681bc tools/ptsname/setup.py
--- a/tools/ptsname/setup.py    Mon Mar 05 12:49:12 2007 -0600
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,11 +0,0 @@
-from distutils.core import setup, Extension
-
-extra_compile_args  = [ "-fno-strict-aliasing", "-Werror" ]
-
-setup(name         = 'ptsname',
-      version      = '1.0',
-      description  = 'POSIX ptsname() function',
-      author       = 'Tim Deegan',
-      author_email = 'Tim.Deegan@xxxxxxxxxxxxx',
-      license      = 'GPL',
-      ext_modules  = [ Extension("ptsname", [ "ptsname.c" ]) ])
diff -r 8f0b5295bb1b -r dcec453681bc tools/pygrub/Makefile
--- a/tools/pygrub/Makefile     Mon Mar 05 12:49:12 2007 -0600
+++ b/tools/pygrub/Makefile     Thu Mar 08 14:39:52 2007 -0600
@@ -10,13 +10,14 @@ build:
 
 .PHONY: install
 ifndef XEN_PYTHON_NATIVE_INSTALL
+install: LIBPATH=$(shell PYTHONPATH=../python/xen/util python -c "import 
auxbin; print auxbin.libpath()")
 install: all
-       CC="$(CC)" CFLAGS="$(CFLAGS)" python setup.py install 
--home="$(DESTDIR)/usr" --prefix=""
-       $(INSTALL_DIR) -p $(DESTDIR)/var/run/xend/boot
+       CC="$(CC)" CFLAGS="$(CFLAGS)" python setup.py install 
--home="$(DESTDIR)/usr" --prefix="" --install-lib="$(DESTDIR)$(LIBPATH)/python"
+       $(INSTALL_DIR) $(DESTDIR)/var/run/xend/boot
 else
 install: all
        CC="$(CC)" CFLAGS="$(CFLAGS)" python setup.py install 
--root="$(DESTDIR)"
-       $(INSTALL_DIR) -p $(DESTDIR)/var/run/xend/boot
+       $(INSTALL_DIR) $(DESTDIR)/var/run/xend/boot
 endif
 
 .PHONY: clean
diff -r 8f0b5295bb1b -r dcec453681bc tools/python/Makefile
--- a/tools/python/Makefile     Mon Mar 05 12:49:12 2007 -0600
+++ b/tools/python/Makefile     Thu Mar 08 14:39:52 2007 -0600
@@ -18,8 +18,9 @@ build:
 
 .PHONY: install
 ifndef XEN_PYTHON_NATIVE_INSTALL
+install: LIBPATH=$(shell PYTHONPATH=xen/util python -c "import auxbin; print 
auxbin.libpath()")
 install: install-messages
-       CC="$(CC)" CFLAGS="$(CFLAGS)" python setup.py install 
--home="$(DESTDIR)/usr" --prefix="" --force
+       CC="$(CC)" CFLAGS="$(CFLAGS)" python setup.py install 
--home="$(DESTDIR)/usr" --prefix="" --force 
--install-lib="$(DESTDIR)$(LIBPATH)/python"
 else
 install: install-messages
        CC="$(CC)" CFLAGS="$(CFLAGS)" python setup.py install 
--root="$(DESTDIR)" --force
diff -r 8f0b5295bb1b -r dcec453681bc tools/python/ptsname/ptsname.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/python/ptsname/ptsname.c    Thu Mar 08 14:39:52 2007 -0600
@@ -0,0 +1,44 @@
+/******************************************************************************
+ * ptsname.c
+ * 
+ * A python extension to expose the POSIX ptsname() function.
+ * 
+ * Copyright (C) 2007 XenSource Ltd
+ */
+
+#include <Python.h>
+#include <stdlib.h>
+
+/* Needed for Python versions earlier than 2.3. */
+#ifndef PyMODINIT_FUNC
+#define PyMODINIT_FUNC DL_EXPORT(void)
+#endif
+
+static PyObject *do_ptsname(PyObject *self, PyObject *args)
+{
+    int fd;
+    char *path;
+
+    if (!PyArg_ParseTuple(args, "i", &fd))
+        return NULL;
+
+    path = ptsname(fd);
+
+    if (!path)
+    {
+        PyErr_SetFromErrno(PyExc_IOError);
+        return NULL;
+    } 
+
+    return PyString_FromString(path);
+}
+
+static PyMethodDef ptsname_methods[] = { 
+    { "ptsname", do_ptsname, METH_VARARGS }, 
+    { NULL }
+};
+
+PyMODINIT_FUNC initptsname(void)
+{
+    Py_InitModule("ptsname", ptsname_methods);
+}
diff -r 8f0b5295bb1b -r dcec453681bc tools/python/setup.py
--- a/tools/python/setup.py     Mon Mar 05 12:49:12 2007 -0600
+++ b/tools/python/setup.py     Thu Mar 08 14:39:52 2007 -0600
@@ -44,7 +44,14 @@ acm = Extension("acm",
                libraries          = libraries,
                sources            = [ "xen/lowlevel/acm/acm.c" ])
 
-modules = [ xc, xs, acm ]
+ptsname = Extension("ptsname",
+               extra_compile_args = extra_compile_args,
+               include_dirs       = include_dirs + [ "ptsname" ],
+               library_dirs       = library_dirs,
+               libraries          = libraries,
+               sources            = [ "ptsname/ptsname.c" ])
+
+modules = [ xc, xs, acm, ptsname ]
 if os.uname()[0] == 'SunOS':
     modules.append(scf)
 
diff -r 8f0b5295bb1b -r dcec453681bc tools/python/xen/xend/XendBootloader.py
--- a/tools/python/xen/xend/XendBootloader.py   Mon Mar 05 12:49:12 2007 -0600
+++ b/tools/python/xen/xend/XendBootloader.py   Thu Mar 08 14:39:52 2007 -0600
@@ -21,7 +21,8 @@ from XendLogging import log
 from XendLogging import log
 from XendError import VmError
 
-import pty, ptsname, termios, fcntl
+import pty, termios, fcntl
+from xen.lowlevel import ptsname
 
 def bootloader(blexec, disk, dom, quiet = False, blargs = '', kernel = '',
                ramdisk = '', kernel_args = ''):
diff -r 8f0b5295bb1b -r dcec453681bc tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py   Mon Mar 05 12:49:12 2007 -0600
+++ b/tools/python/xen/xend/XendDomainInfo.py   Thu Mar 08 14:39:52 2007 -0600
@@ -781,7 +781,6 @@ class XendDomainInfo:
             'name':               self.info['name_label'],
             'console/limit':      str(xoptions.get_console_limit() * 1024),
             'memory/target':      str(self.info['memory_static_min'] * 1024),
-            'control/platform-feature-multiprocessor-suspend': str(1)
             }
 
         def f(n, v):
@@ -795,6 +794,9 @@ class XendDomainInfo:
         f('console/ring-ref', self.console_mfn)
         f('store/port',       self.store_port)
         f('store/ring-ref',   self.store_mfn)
+
+        if arch.type == "x86":
+            f('control/platform-feature-multiprocessor-suspend', True)
 
         # elfnotes
         for n, v in self.info.get_notes().iteritems():
@@ -1503,7 +1505,7 @@ class XendDomainInfo:
             self.info['start_time'] = time.time()
 
             self._stateSet(DOM_STATE_RUNNING)
-        except RuntimeError, exn:
+        except (RuntimeError, VmError), exn:
             log.exception("XendDomainInfo.initDomain: exception occurred")
             self.image.cleanupBootloading()
             raise VmError(str(exn))
@@ -2090,26 +2092,26 @@ class XendDomainInfo:
         return self.info.get('tools_version', {})
     
     def get_on_shutdown(self):
-        after_shutdown = self.info.get('action_after_shutdown')
+        after_shutdown = self.info.get('actions_after_shutdown')
         if not after_shutdown or after_shutdown not in XEN_API_ON_NORMAL_EXIT:
             return XEN_API_ON_NORMAL_EXIT[-1]
         return after_shutdown
 
     def get_on_reboot(self):
-        after_reboot = self.info.get('action_after_reboot')
+        after_reboot = self.info.get('actions_after_reboot')
         if not after_reboot or after_reboot not in XEN_API_ON_NORMAL_EXIT:
             return XEN_API_ON_NORMAL_EXIT[-1]
         return after_reboot
 
     def get_on_suspend(self):
         # TODO: not supported        
-        after_suspend = self.info.get('action_after_suspend') 
+        after_suspend = self.info.get('actions_after_suspend') 
         if not after_suspend or after_suspend not in XEN_API_ON_NORMAL_EXIT:
             return XEN_API_ON_NORMAL_EXIT[-1]
         return after_suspend        
 
     def get_on_crash(self):
-        after_crash = self.info.get('action_after_crash')
+        after_crash = self.info.get('actions_after_crash')
         if not after_crash or after_crash not in XEN_API_ON_CRASH_BEHAVIOUR:
             return XEN_API_ON_CRASH_BEHAVIOUR[0]
         return after_crash
diff -r 8f0b5295bb1b -r dcec453681bc tools/python/xen/xend/XendNode.py
--- a/tools/python/xen/xend/XendNode.py Mon Mar 05 12:49:12 2007 -0600
+++ b/tools/python/xen/xend/XendNode.py Thu Mar 08 14:39:52 2007 -0600
@@ -22,7 +22,7 @@ import xen.lowlevel.xc
 
 from xen.util import Brctl
 
-from xen.xend import uuid
+from xen.xend import uuid, arch
 from xen.xend.XendError import *
 from xen.xend.XendOptions import instance as xendoptions
 from xen.xend.XendQCoWStorageRepo import XendQCoWStorageRepo
@@ -97,17 +97,38 @@ class XendNode:
         for u in self.cpus.keys():
             log.error(self.cpus[u])
             number = self.cpus[u]['number']
+            # We can run off the end of the cpuinfo list if domain0 does not
+            # have #vcpus == #pcpus. In that case we just replicate one that's
+            # in the hash table.
+            if not cpuinfo.has_key(number):
+                number = cpuinfo.keys()[0]
             log.error(number)
             log.error(cpuinfo)
-            self.cpus[u].update(
-                { 'host'     : self.uuid,
-                  'features' : cpu_features,
-                  'speed'    : int(float(cpuinfo[number]['cpu MHz'])),
-                  'vendor'   : cpuinfo[number]['vendor_id'],
-                  'modelname': cpuinfo[number]['model name'],
-                  'stepping' : cpuinfo[number]['stepping'],
-                  'flags'    : cpuinfo[number]['flags'],
-                })
+            if arch.type == "x86":
+                self.cpus[u].update(
+                    { 'host'     : self.uuid,
+                      'features' : cpu_features,
+                      'speed'    : int(float(cpuinfo[number]['cpu MHz'])),
+                      'vendor'   : cpuinfo[number]['vendor_id'],
+                      'modelname': cpuinfo[number]['model name'],
+                      'stepping' : cpuinfo[number]['stepping'],
+                      'flags'    : cpuinfo[number]['flags'],
+                    })
+            elif arch.type == "ia64":
+                self.cpus[u].update(
+                    { 'host'     : self.uuid,
+                      'features' : cpu_features,
+                      'speed'    : int(float(cpuinfo[number]['cpu MHz'])),
+                      'vendor'   : cpuinfo[number]['vendor'],
+                      'modelname': cpuinfo[number]['family'],
+                      'stepping' : cpuinfo[number]['model'],
+                      'flags'    : cpuinfo[number]['features'],
+                    })
+            else:
+                self.cpus[u].update(
+                    { 'host'     : self.uuid,
+                      'features' : cpu_features,
+                    })
 
         self.pifs = {}
         self.pif_metrics = {}
diff -r 8f0b5295bb1b -r dcec453681bc tools/security/Makefile
--- a/tools/security/Makefile   Mon Mar 05 12:49:12 2007 -0600
+++ b/tools/security/Makefile   Thu Mar 08 14:39:52 2007 -0600
@@ -54,26 +54,29 @@ all: build
 all: build
 
 .PHONY: install
+ifndef XEN_PYTHON_NATIVE_INSTALL
+install: LIBPATH=$(shell PYTHONPATH=../python/xen/util python -c "import 
auxbin; print auxbin.libpath()")
+endif
 install: all $(ACM_CONFIG_FILE)
-       $(INSTALL_DIR) -p $(DESTDIR)/usr/sbin
-       $(INSTALL_PROG) -p $(ACM_INST_TOOLS) $(DESTDIR)/usr/sbin
-       $(INSTALL_PROG) -p $(ACM_EZPOLICY) $(DESTDIR)/usr/sbin
-       $(INSTALL_DIR) -p $(DESTDIR)$(ACM_CONFIG_DIR)
-       $(INSTALL_DIR) -p $(DESTDIR)$(ACM_POLICY_DIR)
-       $(INSTALL_DATA) -p policies/$(ACM_SCHEMA) $(DESTDIR)$(ACM_POLICY_DIR)
-       $(INSTALL_DIR) -p $(DESTDIR)$(ACM_POLICY_DIR)/example
+       $(INSTALL_DIR) $(DESTDIR)/usr/sbin
+       $(INSTALL_PROG) $(ACM_INST_TOOLS) $(DESTDIR)/usr/sbin
+       $(INSTALL_PROG) $(ACM_EZPOLICY) $(DESTDIR)/usr/sbin
+       $(INSTALL_DIR) $(DESTDIR)$(ACM_CONFIG_DIR)
+       $(INSTALL_DIR) $(DESTDIR)$(ACM_POLICY_DIR)
+       $(INSTALL_DATA) policies/$(ACM_SCHEMA) $(DESTDIR)$(ACM_POLICY_DIR)
+       $(INSTALL_DIR) $(DESTDIR)$(ACM_POLICY_DIR)/example
        for i in $(ACM_EXAMPLES); do \
-               $(INSTALL_DIR) -p $(DESTDIR)$(ACM_POLICY_DIR)/example/$$i; \
-               $(INSTALL_DATA) -p 
policies/example/$$i/client_v1-$(ACM_POLICY_SUFFIX) 
$(DESTDIR)$(ACM_POLICY_DIR)/example/$$i; \
+               $(INSTALL_DIR) $(DESTDIR)$(ACM_POLICY_DIR)/example/$$i; \
+               $(INSTALL_DATA) 
policies/example/$$i/client_v1-$(ACM_POLICY_SUFFIX) 
$(DESTDIR)$(ACM_POLICY_DIR)/example/$$i; \
        done
-       $(INSTALL_DIR) -p $(DESTDIR)$(ACM_SCRIPT_DIR)
-       $(INSTALL_PROG) -p $(ACM_SCRIPTS) $(DESTDIR)$(ACM_SCRIPT_DIR)
-       $(INSTALL_DIR) -p $(DESTDIR)$(ACM_SECGEN_HTMLDIR)
-       $(INSTALL_DATA) -p $(ACM_INST_HTML) $(DESTDIR)$(ACM_SECGEN_HTMLDIR)
-       $(INSTALL_DIR) -p $(DESTDIR)$(ACM_SECGEN_CGIDIR)
-       $(INSTALL_PROG) -p $(ACM_INST_CGI) $(DESTDIR)$(ACM_SECGEN_CGIDIR)
+       $(INSTALL_DIR) $(DESTDIR)$(ACM_SCRIPT_DIR)
+       $(INSTALL_PROG) $(ACM_SCRIPTS) $(DESTDIR)$(ACM_SCRIPT_DIR)
+       $(INSTALL_DIR) $(DESTDIR)$(ACM_SECGEN_HTMLDIR)
+       $(INSTALL_DATA) $(ACM_INST_HTML) $(DESTDIR)$(ACM_SECGEN_HTMLDIR)
+       $(INSTALL_DIR) $(DESTDIR)$(ACM_SECGEN_CGIDIR)
+       $(INSTALL_PROG) $(ACM_INST_CGI) $(DESTDIR)$(ACM_SECGEN_CGIDIR)
 ifndef XEN_PYTHON_NATIVE_INSTALL
-       python python/setup.py install --home="$(DESTDIR)/usr"
+       python python/setup.py install --home="$(DESTDIR)/usr" 
--install-lib="$(DESTDIR)$(LIBPATH)/python"
 else
        python python/setup.py install --root="$(DESTDIR)"
 endif
diff -r 8f0b5295bb1b -r dcec453681bc tools/vnet/libxutil/Makefile
--- a/tools/vnet/libxutil/Makefile      Mon Mar 05 12:49:12 2007 -0600
+++ b/tools/vnet/libxutil/Makefile      Thu Mar 08 14:39:52 2007 -0600
@@ -71,7 +71,7 @@ check-for-zlib:
 
 .PHONY: install
 install: build
-       [ -d $(DESTDIR)/usr/$(LIBDIR) ] || $(INSTALL_DIR) -p 
$(DESTDIR)/usr/$(LIBDIR)
+       [ -d $(DESTDIR)/usr/$(LIBDIR) ] || $(INSTALL_DIR) 
$(DESTDIR)/usr/$(LIBDIR)
        $(INSTALL_PROG) libxutil.so.$(MAJOR).$(MINOR) $(DESTDIR)/usr/$(LIBDIR)
        $(INSTALL_DATA) libxutil.a $(DESTDIR)/usr/$(LIBDIR)
        ln -sf libxutil.so.$(MAJOR).$(MINOR) 
$(DESTDIR)/usr/$(LIBDIR)/libxutil.so.$(MAJOR)
diff -r 8f0b5295bb1b -r dcec453681bc tools/xenfb/Makefile
--- a/tools/xenfb/Makefile      Mon Mar 05 12:49:12 2007 -0600
+++ b/tools/xenfb/Makefile      Thu Mar 08 14:39:52 2007 -0600
@@ -1,12 +1,9 @@ XEN_ROOT=../..
 XEN_ROOT=../..
 include $(XEN_ROOT)/tools/Rules.mk
 
-CFLAGS += -I$(XEN_LIBXC) -I$(XEN_XENSTORE) 
-I$(XEN_ROOT)/linux-2.6-xen-sparse/include -I$(XEN_ROOT)/tools/ioemu
+CFLAGS  += -I$(XEN_LIBXC) -I$(XEN_XENSTORE)
+CFLAGS  += -I$(XEN_ROOT)/linux-2.6-xen-sparse/include -I$(XEN_ROOT)/tools/ioemu
 LDFLAGS += -L$(XEN_LIBXC) -L$(XEN_XENSTORE)
-
-INSTALL         = install
-INSTALL_PROG    = $(INSTALL) -m0755
-INSTALL_DIR     = $(INSTALL) -d -m0755
 
 .PHONY: all
 all: build
@@ -16,7 +13,7 @@ build:
        $(MAKE) vncfb sdlfb
 
 install: all
-       $(INSTALL_DIR) -p $(DESTDIR)/usr/$(LIBDIR)/xen/bin
+       $(INSTALL_DIR) $(DESTDIR)/usr/$(LIBDIR)/xen/bin
        $(INSTALL_PROG) vncfb $(DESTDIR)/usr/$(LIBDIR)/xen/bin/xen-vncfb
        $(INSTALL_PROG) sdlfb $(DESTDIR)/usr/$(LIBDIR)/xen/bin/xen-sdlfb
 
diff -r 8f0b5295bb1b -r dcec453681bc tools/xenfb/xenfb.c
--- a/tools/xenfb/xenfb.c       Mon Mar 05 12:49:12 2007 -0600
+++ b/tools/xenfb/xenfb.c       Thu Mar 08 14:39:52 2007 -0600
@@ -245,11 +245,10 @@ static int xenfb_wait_for_state(struct x
        unsigned state, dummy;
        char **vec;
 
+       awaited |= 1 << XenbusStateUnknown;
+
        for (;;) {
                state = xenfb_read_state(xsh, dir);
-               if (state < 0)
-                       return -1;
-
                if ((1 << state) & awaited)
                        return state;
 
diff -r 8f0b5295bb1b -r dcec453681bc tools/xenstore/Makefile
--- a/tools/xenstore/Makefile   Mon Mar 05 12:49:12 2007 -0600
+++ b/tools/xenstore/Makefile   Thu Mar 08 14:39:52 2007 -0600
@@ -168,16 +168,16 @@ tarball: clean
 
 .PHONY: install
 install: all
-       $(INSTALL_DIR) -p $(DESTDIR)/var/run/xenstored
-       $(INSTALL_DIR) -p $(DESTDIR)/var/lib/xenstored
-       $(INSTALL_DIR) -p $(DESTDIR)/usr/bin
-       $(INSTALL_DIR) -p $(DESTDIR)/usr/sbin
-       $(INSTALL_DIR) -p $(DESTDIR)/usr/include
+       $(INSTALL_DIR) $(DESTDIR)/var/run/xenstored
+       $(INSTALL_DIR) $(DESTDIR)/var/lib/xenstored
+       $(INSTALL_DIR) $(DESTDIR)/usr/bin
+       $(INSTALL_DIR) $(DESTDIR)/usr/sbin
+       $(INSTALL_DIR) $(DESTDIR)/usr/include
        $(INSTALL_PROG) xenstored $(DESTDIR)/usr/sbin
        $(INSTALL_PROG) $(CLIENTS) $(DESTDIR)/usr/bin
        $(INSTALL_PROG) xenstore-control $(DESTDIR)/usr/bin
        $(INSTALL_PROG) xenstore-ls $(DESTDIR)/usr/bin
-       $(INSTALL_DIR) -p $(DESTDIR)/usr/$(LIBDIR)
+       $(INSTALL_DIR) $(DESTDIR)/usr/$(LIBDIR)
        $(INSTALL_PROG) libxenstore.so.$(MAJOR).$(MINOR) 
$(DESTDIR)/usr/$(LIBDIR)
        ln -sf libxenstore.so.$(MAJOR).$(MINOR) 
$(DESTDIR)/usr/$(LIBDIR)/libxenstore.so.$(MAJOR)
        ln -sf libxenstore.so.$(MAJOR) $(DESTDIR)/usr/$(LIBDIR)/libxenstore.so
diff -r 8f0b5295bb1b -r dcec453681bc xen/Rules.mk
--- a/xen/Rules.mk      Mon Mar 05 12:49:12 2007 -0600
+++ b/xen/Rules.mk      Thu Mar 08 14:39:52 2007 -0600
@@ -41,8 +41,8 @@ include $(BASEDIR)/arch/$(TARGET_ARCH)/R
 include $(BASEDIR)/arch/$(TARGET_ARCH)/Rules.mk
 
 # Do not depend on auto-generated header files.
-HDRS := $(subst $(BASEDIR)/include/asm-$(TARGET_ARCH)/asm-offsets.h,,$(HDRS))
-HDRS := $(subst $(BASEDIR)/include/xen/compile.h,,$(HDRS))
+AHDRS := $(filter-out %/include/xen/compile.h,$(HDRS))
+HDRS  := $(filter-out %/asm-offsets.h,$(AHDRS))
 
 # Note that link order matters!
 ALL_OBJS-y               += $(BASEDIR)/common/built_in.o
@@ -110,12 +110,12 @@ _clean_%/: FORCE
 %.o: %.c $(HDRS) Makefile
        $(CC) $(CFLAGS) -c $< -o $@
 
-%.o: %.S $(HDRS) Makefile
+%.o: %.S $(AHDRS) Makefile
        $(CC) $(AFLAGS) -c $< -o $@
 
 %.i: %.c $(HDRS) Makefile
        $(CPP) $(CFLAGS) $< -o $@
 
 # -std=gnu{89,99} gets confused by # as an end-of-line comment marker
-%.s: %.S $(HDRS) Makefile
+%.s: %.S $(AHDRS) Makefile
        $(CPP) $(AFLAGS) $< -o $@
diff -r 8f0b5295bb1b -r dcec453681bc xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c     Mon Mar 05 12:49:12 2007 -0600
+++ b/xen/arch/x86/domain.c     Thu Mar 08 14:39:52 2007 -0600
@@ -641,6 +641,31 @@ int arch_set_info_guest(
             }
 
             v->arch.guest_table = pagetable_from_pfn(cr3_pfn);
+
+#ifdef __x86_64__
+            if ( c.nat->ctrlreg[1] )
+            {
+                cr3_pfn = gmfn_to_mfn(d, xen_cr3_to_pfn(c.nat->ctrlreg[1]));
+
+                if ( !mfn_valid(cr3_pfn) ||
+                     (paging_mode_refcounts(d)
+                      ? !get_page(mfn_to_page(cr3_pfn), d)
+                      : !get_page_and_type(mfn_to_page(cr3_pfn), d,
+                                           PGT_base_page_table)) )
+                {
+                    cr3_pfn = pagetable_get_pfn(v->arch.guest_table);
+                    v->arch.guest_table = pagetable_null();
+                    if ( paging_mode_refcounts(d) )
+                        put_page(mfn_to_page(cr3_pfn));
+                    else
+                        put_page_and_type(mfn_to_page(cr3_pfn));
+                    destroy_gdt(v);
+                    return -EINVAL;
+                }
+
+                v->arch.guest_table_user = pagetable_from_pfn(cr3_pfn);
+            }
+#endif
         }
 #ifdef CONFIG_COMPAT
         else
diff -r 8f0b5295bb1b -r dcec453681bc xen/arch/x86/domain_build.c
--- a/xen/arch/x86/domain_build.c       Mon Mar 05 12:49:12 2007 -0600
+++ b/xen/arch/x86/domain_build.c       Thu Mar 08 14:39:52 2007 -0600
@@ -374,9 +374,6 @@ int construct_dom0(struct domain *d,
     if ( parms.f_required[0] /* Huh? -- kraxel */ )
             panic("Domain 0 requires an unsupported hypervisor feature.\n");
 
-    /* Align load address to 4MB boundary. */
-    v_start = parms.virt_base & ~((1UL<<22)-1);
-
     /*
      * Why do we need this? The number of page-table frames depends on the 
      * size of the bootstrap address space. But the size of the address space 
@@ -384,6 +381,7 @@ int construct_dom0(struct domain *d,
      * read-only). We have a pair of simultaneous equations in two unknowns, 
      * which we solve by exhaustive search.
      */
+    v_start          = parms.virt_base;
     vkern_start      = parms.virt_kstart;
     vkern_end        = parms.virt_kend;
     vinitrd_start    = round_pgup(vkern_end);
diff -r 8f0b5295bb1b -r dcec453681bc xen/arch/x86/domctl.c
--- a/xen/arch/x86/domctl.c     Mon Mar 05 12:49:12 2007 -0600
+++ b/xen/arch/x86/domctl.c     Thu Mar 08 14:39:52 2007 -0600
@@ -470,8 +470,15 @@ void arch_get_info_guest(struct vcpu *v,
         c(user_regs.eflags |= v->arch.iopl << 12);
 
         if ( !IS_COMPAT(v->domain) )
+        {
             c.nat->ctrlreg[3] = xen_pfn_to_cr3(
                 pagetable_get_pfn(v->arch.guest_table));
+#ifdef __x86_64__
+            if ( !pagetable_is_null(v->arch.guest_table_user) )
+                c.nat->ctrlreg[1] = xen_pfn_to_cr3(
+                    pagetable_get_pfn(v->arch.guest_table_user));
+#endif
+        }
 #ifdef CONFIG_COMPAT
         else
         {
diff -r 8f0b5295bb1b -r dcec453681bc xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c    Mon Mar 05 12:49:12 2007 -0600
+++ b/xen/arch/x86/hvm/hvm.c    Thu Mar 08 14:39:52 2007 -0600
@@ -161,7 +161,8 @@ int hvm_domain_initialise(struct domain 
     spin_lock_init(&d->arch.hvm_domain.buffered_io_lock);
     spin_lock_init(&d->arch.hvm_domain.irq_lock);
 
-    rc = paging_enable(d, PG_SH_enable|PG_refcounts|PG_translate|PG_external);
+    /* paging support will be determined inside paging.c */
+    rc = paging_enable(d, PG_refcounts|PG_translate|PG_external);
     if ( rc != 0 )
         return rc;
 
diff -r 8f0b5295bb1b -r dcec453681bc xen/arch/x86/hvm/svm/emulate.c
--- a/xen/arch/x86/hvm/svm/emulate.c    Mon Mar 05 12:49:12 2007 -0600
+++ b/xen/arch/x86/hvm/svm/emulate.c    Thu Mar 08 14:39:52 2007 -0600
@@ -24,8 +24,10 @@
 #include <asm/msr.h>
 #include <asm/hvm/hvm.h>
 #include <asm/hvm/support.h>
+#include <asm/hvm/svm/svm.h>
 #include <asm/hvm/svm/vmcb.h>
 #include <asm/hvm/svm/emulate.h>
+
 
 extern int inst_copy_from_guest(unsigned char *buf, unsigned long guest_eip,
         int inst_len);
@@ -133,13 +135,15 @@ static inline unsigned long DECODE_GPR_V
 #define sib operand [1]
 
 
-unsigned long get_effective_addr_modrm64(struct vmcb_struct *vmcb, 
-        struct cpu_user_regs *regs, const u8 prefix, int inst_len,
-        const u8 *operand, u8 *size)
+unsigned long get_effective_addr_modrm64(struct cpu_user_regs *regs, 
+                                         const u8 prefix, int inst_len,
+                                         const u8 *operand, u8 *size)
 {
     unsigned long effective_addr = (unsigned long) -1;
     u8 length, modrm_mod, modrm_rm;
     u32 disp = 0;
+    struct vcpu *v = current;
+    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
 
     HVM_DBG_LOG(DBG_LEVEL_1, "get_effective_addr_modrm64(): prefix = %x, "
             "length = %d, operand[0,1] = %x %x.\n", prefix, *size, operand [0],
@@ -198,7 +202,7 @@ unsigned long get_effective_addr_modrm64
 
 #if __x86_64__
         /* 64-bit mode */
-        if (vmcb->cs.attr.fields.l && (vmcb->efer & EFER_LMA))
+        if (vmcb->cs.attr.fields.l && svm_long_mode_enabled(v))
             return vmcb->rip + inst_len + *size + disp;
 #endif
         return disp;
@@ -310,7 +314,7 @@ unsigned int decode_src_reg(u8 prefix, u
 }
 
 
-unsigned long svm_rip2pointer(struct vmcb_struct *vmcb)
+unsigned long svm_rip2pointer(struct vcpu *v)
 {
     /*
      * The following is subtle. Intuitively this code would be something like:
@@ -322,8 +326,9 @@ unsigned long svm_rip2pointer(struct vmc
      * %cs is update, but fortunately, base contain the valid base address
      * no matter what kind of addressing is used.
      */
+    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
     unsigned long p = vmcb->cs.base + vmcb->rip;
-    if (!(vmcb->cs.attr.fields.l && vmcb->efer & EFER_LMA))
+    if (!(vmcb->cs.attr.fields.l && svm_long_mode_enabled(v)))
         return (u32)p; /* mask to 32 bits */
     /* NB. Should mask to 16 bits if in real mode or 16-bit protected mode. */
     return p;
@@ -410,10 +415,11 @@ static const u8 *opc_bytes[INSTR_MAX_COU
  * The caller can either pass a NULL pointer to the guest_eip_buf, or a pointer
  * to enough bytes to satisfy the instruction including prefix bytes.
  */
-int __get_instruction_length_from_list(struct vmcb_struct *vmcb,
+int __get_instruction_length_from_list(struct vcpu *v,
         enum instruction_index *list, unsigned int list_count, 
         u8 *guest_eip_buf, enum instruction_index *match)
 {
+    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
     unsigned int inst_len = 0;
     unsigned int i;
     unsigned int j;
@@ -429,7 +435,7 @@ int __get_instruction_length_from_list(s
     }
     else
     {
-        inst_copy_from_guest(buffer, svm_rip2pointer(vmcb), MAX_INST_LEN);
+        inst_copy_from_guest(buffer, svm_rip2pointer(v), MAX_INST_LEN);
         buf = buffer;
     }
 
diff -r 8f0b5295bb1b -r dcec453681bc xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c        Mon Mar 05 12:49:12 2007 -0600
+++ b/xen/arch/x86/hvm/svm/svm.c        Thu Mar 08 14:39:52 2007 -0600
@@ -49,6 +49,7 @@
 #include <public/sched.h>
 #include <asm/hvm/vpt.h>
 #include <asm/hvm/trace.h>
+#include <asm/hap.h>
 
 #define SVM_EXTRA_DEBUG
 
@@ -75,6 +76,10 @@ static void *root_vmcb[NR_CPUS] __read_m
 
 /* physical address of above for host VMSAVE/VMLOAD */
 u64 root_vmcb_pa[NR_CPUS] __read_mostly;
+
+/* hardware assisted paging bits */
+extern int opt_hap_enabled;
+extern int hap_capable_system;
 
 static inline void svm_inject_exception(struct vcpu *v, int trap, 
                                         int ev, int error_code)
@@ -148,31 +153,6 @@ static void svm_store_cpu_guest_regs(
     }
 }
 
-static int svm_paging_enabled(struct vcpu *v)
-{
-    unsigned long cr0;
-
-    cr0 = v->arch.hvm_svm.cpu_shadow_cr0;
-
-    return (cr0 & X86_CR0_PE) && (cr0 & X86_CR0_PG);
-}
-
-static int svm_pae_enabled(struct vcpu *v)
-{
-    unsigned long cr4;
-
-    if(!svm_paging_enabled(v))
-        return 0;
-
-    cr4 = v->arch.hvm_svm.cpu_shadow_cr4;
-
-    return (cr4 & X86_CR4_PAE);
-}
-
-static int svm_long_mode_enabled(struct vcpu *v)
-{
-    return test_bit(SVM_CPU_STATE_LMA_ENABLED, &v->arch.hvm_svm.cpu_state);
-}
 
 static inline int long_mode_do_msr_read(struct cpu_user_regs *regs)
 {
@@ -183,8 +163,7 @@ static inline int long_mode_do_msr_read(
     switch ((u32)regs->ecx)
     {
     case MSR_EFER:
-        msr_content = vmcb->efer;
-        msr_content &= ~EFER_SVME;
+        msr_content = v->arch.hvm_svm.cpu_shadow_efer;
         break;
 
 #ifdef __x86_64__
@@ -255,30 +234,54 @@ static inline int long_mode_do_msr_write
             goto gp_fault;
         }
 
+        /* 
+         * update the VMCB's EFER with the intended value along with
+         * that crucial EFER.SVME bit =)
+         */
+        vmcb->efer = msr_content | EFER_SVME;
+
 #ifdef __x86_64__
-        /* LME: 0 -> 1 */
-        if ( msr_content & EFER_LME &&
-             !test_bit(SVM_CPU_STATE_LME_ENABLED, &v->arch.hvm_svm.cpu_state))
+
+        /*
+         * Check for EFER.LME transitions from 0->1 or 1->0.  Do the
+         * sanity checks and then make sure that both EFER.LME and
+         * EFER.LMA are cleared. (EFER.LME can't be set in the vmcb
+         * until the guest also sets CR0.PG, since even if the guest has
+         * paging "disabled", the vmcb's CR0 always has PG set.)
+         */
+        if ( (msr_content & EFER_LME) && !svm_lme_is_set(v) )
         {
+            /* EFER.LME transition from 0 to 1 */
+            
             if ( svm_paging_enabled(v) ||
-                 !test_bit(SVM_CPU_STATE_PAE_ENABLED,
-                           &v->arch.hvm_svm.cpu_state) )
+                 !svm_cr4_pae_is_set(v) )
             {
                 gdprintk(XENLOG_WARNING, "Trying to set LME bit when "
                          "in paging mode or PAE bit is not set\n");
                 goto gp_fault;
             }
-            set_bit(SVM_CPU_STATE_LME_ENABLED, &v->arch.hvm_svm.cpu_state);
-        }
-
-        /* We have already recorded that we want LME, so it will be set 
-         * next time CR0 gets updated. So we clear that bit and continue.
-         */
-        if ((msr_content ^ vmcb->efer) & EFER_LME)
-            msr_content &= ~EFER_LME;  
-        /* No update for LME/LMA since it have no effect */
-#endif
-        vmcb->efer = msr_content | EFER_SVME;
+
+            vmcb->efer &= ~(EFER_LME | EFER_LMA);
+        }
+        else if ( !(msr_content & EFER_LME) && svm_lme_is_set(v) )
+        {
+            /* EFER.LME transistion from 1 to 0 */
+            
+            if ( svm_paging_enabled(v) )
+            {
+                gdprintk(XENLOG_WARNING, 
+                         "Trying to clear EFER.LME while paging enabled\n");
+                goto gp_fault;
+            }
+
+            vmcb->efer &= ~(EFER_LME | EFER_LMA);
+        }
+
+#endif /* __x86_64__ */
+
+        /* update the guest EFER's shadow with the intended value */
+        v->arch.hvm_svm.cpu_shadow_efer = msr_content;
+
         break;
 
 #ifdef __x86_64__
@@ -468,22 +471,25 @@ int svm_vmcb_restore(struct vcpu *v, str
             c->cr4);
 #endif
 
-    if (!svm_paging_enabled(v)) {
+    if ( !svm_paging_enabled(v) ) 
+    {
         printk("%s: paging not enabled.", __func__);
         goto skip_cr3;
     }
 
-    if (c->cr3 == v->arch.hvm_svm.cpu_cr3) {
+    if ( c->cr3 == v->arch.hvm_svm.cpu_cr3 ) 
+    {
         /*
          * This is simple TLB flush, implying the guest has
          * removed some translation or changed page attributes.
          * We simply invalidate the shadow.
          */
         mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT);
-        if (mfn != pagetable_get_pfn(v->arch.guest_table)) {
+        if ( mfn != pagetable_get_pfn(v->arch.guest_table) ) 
             goto bad_cr3;
-        }
-    } else {
+    } 
+    else 
+    {
         /*
          * If different, make a shadow. Check if the PDBR is valid
          * first.
@@ -491,9 +497,9 @@ int svm_vmcb_restore(struct vcpu *v, str
         HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 c->cr3 = %"PRIx64"", c->cr3);
         /* current!=vcpu as not called by arch_vmx_do_launch */
         mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT);
-        if( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain)) {
+        if( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) ) 
             goto bad_cr3;
-        }
+
         old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
         v->arch.guest_table = pagetable_from_pfn(mfn);
         if (old_base_mfn)
@@ -631,7 +637,7 @@ static int svm_guest_x86_mode(struct vcp
 {
     struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
 
-    if ( (vmcb->efer & EFER_LMA) && vmcb->cs.attr.fields.l )
+    if ( svm_long_mode_enabled(v) && vmcb->cs.attr.fields.l )
         return 8;
 
     if ( svm_realmode(v) )
@@ -681,7 +687,7 @@ static unsigned long svm_get_segment_bas
     int long_mode = 0;
 
 #ifdef __x86_64__
-    long_mode = vmcb->cs.attr.fields.l && (vmcb->efer & EFER_LMA);
+    long_mode = vmcb->cs.attr.fields.l && svm_long_mode_enabled(v);
 #endif
     switch ( seg )
     {
@@ -905,6 +911,10 @@ static void arch_svm_do_launch(struct vc
 {
     svm_do_launch(v);
 
+    if ( paging_mode_hap(v->domain) ) {
+        v->arch.hvm_svm.vmcb->h_cr3 = 
pagetable_get_paddr(v->domain->arch.phys_table);
+    }
+
     if ( v->vcpu_id != 0 )
     {
         cpu_user_regs_t *regs = ¤t->arch.guest_context.user_regs;
@@ -1011,6 +1021,21 @@ static struct hvm_function_table svm_fun
     .event_injection_faulted = svm_event_injection_faulted
 };
 
+void svm_npt_detect(void)
+{
+    u32 eax, ebx, ecx, edx;
+
+    /* check CPUID for nested paging support */
+    cpuid(0x8000000A, &eax, &ebx, &ecx, &edx);
+    if ( edx & 0x01 ) { /* nested paging */
+        hap_capable_system = 1;
+    }
+    else if ( opt_hap_enabled ) {
+        printk(" nested paging is not supported by this CPU.\n");
+        hap_capable_system = 0; /* no nested paging, we disable flag. */
+    }
+}
+
 int start_svm(void)
 {
     u32 eax, ecx, edx;
@@ -1041,6 +1066,8 @@ int start_svm(void)
     wrmsr(MSR_EFER, eax, edx);
     printk("AMD SVM Extension is enabled for cpu %d.\n", cpu );
 
+    svm_npt_detect();
+
     /* Initialize the HSA for this core */
     phys_hsa = (u64) virt_to_maddr(hsa[cpu]);
     phys_hsa_lo = (u32) phys_hsa;
@@ -1077,6 +1104,18 @@ void arch_svm_do_resume(struct vcpu *v)
     }
 }
 
+static int svm_do_nested_pgfault(paddr_t gpa, struct cpu_user_regs *regs)
+{
+    if (mmio_space(gpa)) {
+        handle_mmio(gpa);
+        return 1;
+    }
+
+    /* We should not reach here. Otherwise, P2M table is not correct.*/
+    return 0;
+}
+
+
 static int svm_do_page_fault(unsigned long va, struct cpu_user_regs *regs) 
 {
     HVM_DBG_LOG(DBG_LEVEL_VMMU, 
@@ -1114,7 +1153,7 @@ static void svm_do_general_protection_fa
         printk("Huh? We got a GP Fault with an invalid IDTR!\n");
         svm_dump_vmcb(__func__, vmcb);
         svm_dump_regs(__func__, regs);
-        svm_dump_inst(svm_rip2pointer(vmcb));
+        svm_dump_inst(svm_rip2pointer(v));
         domain_crash(v->domain);
         return;
     }
@@ -1209,7 +1248,7 @@ static void svm_vmexit_do_cpuid(struct v
     HVMTRACE_3D(CPUID, v, input,
                 ((uint64_t)eax << 32) | ebx, ((uint64_t)ecx << 32) | edx);
 
-    inst_len = __get_instruction_length(vmcb, INSTR_CPUID, NULL);
+    inst_len = __get_instruction_length(v, INSTR_CPUID, NULL);
     ASSERT(inst_len > 0);
     __update_guest_eip(vmcb, inst_len);
 }
@@ -1312,15 +1351,16 @@ static void svm_dr_access(struct vcpu *v
 }
 
 
-static void svm_get_prefix_info(
-    struct vmcb_struct *vmcb, 
-    unsigned int dir, svm_segment_register_t **seg, unsigned int *asize)
-{
+static void svm_get_prefix_info(struct vcpu *v, unsigned int dir, 
+                                svm_segment_register_t **seg, 
+                                unsigned int *asize)
+{
+    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
     unsigned char inst[MAX_INST_LEN];
     int i;
 
     memset(inst, 0, MAX_INST_LEN);
-    if (inst_copy_from_guest(inst, svm_rip2pointer(vmcb), sizeof(inst)) 
+    if (inst_copy_from_guest(inst, svm_rip2pointer(v), sizeof(inst)) 
         != MAX_INST_LEN) 
     {
         gdprintk(XENLOG_ERR, "get guest instruction failed\n");
@@ -1400,7 +1440,7 @@ static inline int svm_get_io_address(
 
 #ifdef __x86_64__
     /* If we're in long mode, we shouldn't check the segment presence & limit 
*/
-    long_mode = vmcb->cs.attr.fields.l && vmcb->efer & EFER_LMA;
+    long_mode = vmcb->cs.attr.fields.l && svm_long_mode_enabled(v);
 #endif
 
     /* d field of cs.attr is 1 for 32-bit, 0 for 16 or 64 bit. 
@@ -1419,7 +1459,7 @@ static inline int svm_get_io_address(
         isize --;
 
     if (isize > 1) 
-        svm_get_prefix_info(vmcb, info.fields.type, &seg, &asize);
+        svm_get_prefix_info(v, info.fields.type, &seg, &asize);
 
     if (info.fields.type == IOREQ_WRITE)
     {
@@ -1702,6 +1742,52 @@ static void svm_io_instruction(struct vc
     }
 }
 
+static int npt_set_cr0(unsigned long value) 
+{
+    struct vcpu *v = current;
+    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+  
+    ASSERT(vmcb);
+
+    /* ET is reserved and should be always be 1*/
+    value |= X86_CR0_ET;
+
+    /* Check whether the guest is about to turn on long mode. 
+     * If it is, set EFER.LME and EFER.LMA.  Update the shadow EFER.LMA
+     * bit too, so svm_long_mode_enabled() will work.
+     */
+    if ( (value & X86_CR0_PG) && svm_lme_is_set(v) &&
+         (vmcb->cr4 & X86_CR4_PAE) && (vmcb->cr0 & X86_CR0_PE) )
+    {
+        v->arch.hvm_svm.cpu_shadow_efer |= EFER_LMA;
+        vmcb->efer |= EFER_LMA | EFER_LME;
+    }
+
+    /* Whenever CR0.PG is cleared under long mode, LMA will be cleared 
+     * immediatly. We emulate this process for svm_long_mode_enabled().
+     */
+    if ( (value & (X86_CR0_PE | X86_CR0_PG)) == X86_CR0_PE )
+    {
+        if ( svm_long_mode_enabled(v) )
+        {
+            v->arch.hvm_svm.cpu_shadow_efer &= ~EFER_LMA;
+        }
+    }
+    
+    vmcb->cr0 = value | X86_CR0_WP;
+    v->arch.hvm_svm.cpu_shadow_cr0 = value;
+
+    /* TS cleared? Then initialise FPU now. */
+    if ( !(value & X86_CR0_TS) ) {
+        setup_fpu(v);
+        vmcb->exception_intercepts &= ~EXCEPTION_BITMAP_NM;
+    }
+    
+    paging_update_paging_modes(v);
+    
+    return 1;
+}
+
 static int svm_set_cr0(unsigned long value)
 {
     struct vcpu *v = current;
@@ -1727,7 +1813,8 @@ static int svm_set_cr0(unsigned long val
 
     HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR0 value = %lx\n", value);
 
-    if ((value & X86_CR0_PE) && (value & X86_CR0_PG) && !paging_enabled) 
+    if ( ((value & (X86_CR0_PE | X86_CR0_PG)) == (X86_CR0_PE | X86_CR0_PG))
+         && !paging_enabled ) 
     {
         /* The guest CR3 must be pointing to the guest physical. */
         mfn = get_mfn_from_gpfn(v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT);
@@ -1740,18 +1827,16 @@ static int svm_set_cr0(unsigned long val
         }
 
 #if defined(__x86_64__)
-        if (test_bit(SVM_CPU_STATE_LME_ENABLED, &v->arch.hvm_svm.cpu_state) 
-            && !test_bit(SVM_CPU_STATE_PAE_ENABLED, 
-                         &v->arch.hvm_svm.cpu_state))
+        if ( svm_lme_is_set(v) && !svm_cr4_pae_is_set(v) )
         {
             HVM_DBG_LOG(DBG_LEVEL_1, "Enable paging before PAE enable\n");
             svm_inject_exception(v, TRAP_gp_fault, 1, 0);
         }
 
-        if (test_bit(SVM_CPU_STATE_LME_ENABLED, &v->arch.hvm_svm.cpu_state))
+        if ( svm_lme_is_set(v) )
         {
             HVM_DBG_LOG(DBG_LEVEL_1, "Enable the Long mode\n");
-            set_bit(SVM_CPU_STATE_LMA_ENABLED, &v->arch.hvm_svm.cpu_state);
+            v->arch.hvm_svm.cpu_shadow_efer |= EFER_LMA;
             vmcb->efer |= EFER_LMA | EFER_LME;
         }
 #endif  /* __x86_64__ */
@@ -1790,14 +1875,93 @@ static int svm_set_cr0(unsigned long val
     {
         if ( svm_long_mode_enabled(v) )
         {
-            vmcb->efer &= ~EFER_LMA;
-            clear_bit(SVM_CPU_STATE_LMA_ENABLED, &v->arch.hvm_svm.cpu_state);
+            vmcb->efer &= ~(EFER_LME | EFER_LMA);
+            v->arch.hvm_svm.cpu_shadow_efer &= ~EFER_LMA;
         }
         /* we should take care of this kind of situation */
         paging_update_paging_modes(v);
     }
 
     return 1;
+}
+
+//
+// nested paging functions
+//
+
+static int npt_mov_to_cr(int gpreg, int cr, struct cpu_user_regs *regs)
+{  
+    unsigned long value;
+    struct vcpu *v = current;
+    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+    struct vlapic *vlapic = vcpu_vlapic(v);
+
+    ASSERT(vmcb);
+
+    value = get_reg(gpreg, regs, vmcb);
+
+    switch (cr) {
+    case 0:
+        return npt_set_cr0(value);
+
+    case 3:
+        vmcb->cr3 = value;
+        v->arch.hvm_svm.cpu_cr3 = value;
+        break;
+
+    case 4: /* CR4 */
+        vmcb->cr4 = value;
+        v->arch.hvm_svm.cpu_shadow_cr4 = value;
+        paging_update_paging_modes(v);
+        break;
+
+    case 8:
+        vlapic_set_reg(vlapic, APIC_TASKPRI, ((value & 0x0F) << 4));
+        vmcb->vintr.fields.tpr = value & 0x0F;
+        break;
+
+    default:
+        gdprintk(XENLOG_ERR, "invalid cr: %d\n", cr);
+        domain_crash(v->domain);
+        return 0;
+    }
+    
+    return 1;
+}
+
+static void npt_mov_from_cr(int cr, int gp, struct cpu_user_regs *regs)
+{
+    unsigned long value = 0;
+    struct vcpu *v = current;
+    struct vmcb_struct *vmcb;
+    struct vlapic *vlapic = vcpu_vlapic(v);
+
+    vmcb = v->arch.hvm_svm.vmcb;
+    ASSERT(vmcb);
+
+    switch(cr) {
+    case 0:
+        value = (unsigned long) v->arch.hvm_svm.cpu_shadow_cr0;
+        break;
+    case 2:
+        value = vmcb->cr2;
+        break;
+    case 3:
+        value = (unsigned long) v->arch.hvm_svm.cpu_cr3;
+        break;
+    case 4:
+        value = (unsigned long) v->arch.hvm_svm.cpu_shadow_cr4;
+       break;
+    case 8:
+        value = (unsigned long)vlapic_get_reg(vlapic, APIC_TASKPRI);
+        value = (value & 0xF0) >> 4;
+        break;
+    default:
+        domain_crash(v->domain);
+        return;
+    }
+    
+    set_reg(gp, value, regs, vmcb);
 }
 
 /*
@@ -1848,12 +2012,6 @@ static void mov_from_cr(int cr, int gp, 
     set_reg(gp, value, regs, vmcb);
 
     HVM_DBG_LOG(DBG_LEVEL_VMMU, "mov_from_cr: CR%d, value = %lx,", cr, value);
-}
-
-
-static inline int svm_pgbit_test(struct vcpu *v)
-{
-    return v->arch.hvm_svm.cpu_shadow_cr0 & X86_CR0_PG;
 }
 
 
@@ -1933,7 +2091,6 @@ static int mov_to_cr(int gpreg, int cr, 
         old_cr = v->arch.hvm_svm.cpu_shadow_cr4;
         if ( value & X86_CR4_PAE && !(old_cr & X86_CR4_PAE) )
         {
-            set_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state);
             if ( svm_pgbit_test(v) )
             {
                 /* The guest is a 32-bit PAE guest. */
@@ -1962,15 +2119,13 @@ static int mov_to_cr(int gpreg, int cr, 
                             v->arch.hvm_svm.cpu_cr3, mfn);
 #endif
             }
-        }
-        else if (value & X86_CR4_PAE) {
-            set_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state);
-        } else {
-            if (test_bit(SVM_CPU_STATE_LMA_ENABLED,
-                         &v->arch.hvm_svm.cpu_state)) {
+        } 
+        else if ( !(value & X86_CR4_PAE) )
+        {
+            if ( svm_long_mode_enabled(v) )
+            {
                 svm_inject_exception(v, TRAP_gp_fault, 1, 0);
             }
-            clear_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state);
         }
 
         v->arch.hvm_svm.cpu_shadow_cr4 = value;
@@ -2024,7 +2179,7 @@ static int svm_cr_access(struct vcpu *v,
 
     ASSERT(vmcb);
 
-    inst_copy_from_guest(buffer, svm_rip2pointer(vmcb), sizeof(buffer));
+    inst_copy_from_guest(buffer, svm_rip2pointer(v), sizeof(buffer));
 
     /* get index to first actual instruction byte - as we will need to know 
        where the prefix lives later on */
@@ -2033,12 +2188,12 @@ static int svm_cr_access(struct vcpu *v,
     if ( type == TYPE_MOV_TO_CR )
     {
         inst_len = __get_instruction_length_from_list(
-            vmcb, list_a, ARR_SIZE(list_a), &buffer[index], &match);
+            v, list_a, ARR_SIZE(list_a), &buffer[index], &match);
     }
     else /* type == TYPE_MOV_FROM_CR */
     {
         inst_len = __get_instruction_length_from_list(
-            vmcb, list_b, ARR_SIZE(list_b), &buffer[index], &match);
+            v, list_b, ARR_SIZE(list_b), &buffer[index], &match);
     }
 
     ASSERT(inst_len > 0);
@@ -2055,12 +2210,18 @@ static int svm_cr_access(struct vcpu *v,
     {
     case INSTR_MOV2CR:
         gpreg = decode_src_reg(prefix, buffer[index+2]);
-        result = mov_to_cr(gpreg, cr, regs);
+        if ( paging_mode_hap(v->domain) )
+            result = npt_mov_to_cr(gpreg, cr, regs);
+        else
+            result = mov_to_cr(gpreg, cr, regs);
         break;
 
     case INSTR_MOVCR2:
         gpreg = decode_src_reg(prefix, buffer[index+2]);
-        mov_from_cr(cr, gpreg, regs);
+        if ( paging_mode_hap(v->domain) )
+            npt_mov_from_cr(cr, gpreg, regs);
+        else
+            mov_from_cr(cr, gpreg, regs);
         break;
 
     case INSTR_CLTS:
@@ -2073,7 +2234,7 @@ static int svm_cr_access(struct vcpu *v,
 
     case INSTR_LMSW:
         if (svm_dbg_on)
-            svm_dump_inst(svm_rip2pointer(vmcb));
+            svm_dump_inst(svm_rip2pointer(v));
         
         gpreg = decode_src_reg(prefix, buffer[index+2]);
         value = get_reg(gpreg, regs, vmcb) & 0xF;
@@ -2087,12 +2248,15 @@ static int svm_cr_access(struct vcpu *v,
         if (svm_dbg_on)
             printk("CR0-LMSW CR0 - New value=%lx\n", value);
 
-        result = svm_set_cr0(value);
+        if ( paging_mode_hap(v->domain) )
+            result = npt_set_cr0(value);
+        else
+            result = svm_set_cr0(value);
         break;
 
     case INSTR_SMSW:
         if (svm_dbg_on)
-            svm_dump_inst(svm_rip2pointer(vmcb));
+            svm_dump_inst(svm_rip2pointer(v));
         value = v->arch.hvm_svm.cpu_shadow_cr0;
         gpreg = decode_src_reg(prefix, buffer[index+2]);
         set_reg(gpreg, value, regs, vmcb);
@@ -2168,7 +2332,7 @@ static inline void svm_do_msr_access(
         HVM_DBG_LOG(DBG_LEVEL_1, "returns: ecx=%x, eax=%lx, edx=%lx",
                     ecx, (unsigned long)regs->eax, (unsigned long)regs->edx);
 
-        inst_len = __get_instruction_length(vmcb, INSTR_RDMSR, NULL);
+        inst_len = __get_instruction_length(v, INSTR_RDMSR, NULL);
     }
     else
     {
@@ -2200,7 +2364,7 @@ static inline void svm_do_msr_access(
             break;
         }
 
-        inst_len = __get_instruction_length(vmcb, INSTR_WRMSR, NULL);
+        inst_len = __get_instruction_length(v, INSTR_WRMSR, NULL);
     }
 
     __update_guest_eip(vmcb, inst_len);
@@ -2223,8 +2387,9 @@ static inline void svm_vmexit_do_hlt(str
 }
 
 
-static void svm_vmexit_do_invd(struct vmcb_struct *vmcb)
-{
+static void svm_vmexit_do_invd(struct vcpu *v)
+{
+    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
     int  inst_len;
     
     /* Invalidate the cache - we can't really do that safely - maybe we should 
@@ -2237,7 +2402,7 @@ static void svm_vmexit_do_invd(struct vm
      */
     printk("INVD instruction intercepted - ignored\n");
     
-    inst_len = __get_instruction_length(vmcb, INSTR_INVD, NULL);
+    inst_len = __get_instruction_length(v, INSTR_INVD, NULL);
     __update_guest_eip(vmcb, inst_len);
 }    
         
@@ -2289,7 +2454,7 @@ void svm_handle_invlpg(const short invlp
      * Unknown how many bytes the invlpg instruction will take.  Use the
      * maximum instruction length here
      */
-    if (inst_copy_from_guest(opcode, svm_rip2pointer(vmcb), length) < length)
+    if (inst_copy_from_guest(opcode, svm_rip2pointer(v), length) < length)
     {
         gdprintk(XENLOG_ERR, "Error reading memory %d bytes\n", length);
         domain_crash(v->domain);
@@ -2298,7 +2463,7 @@ void svm_handle_invlpg(const short invlp
 
     if (invlpga)
     {
-        inst_len = __get_instruction_length(vmcb, INSTR_INVLPGA, opcode);
+        inst_len = __get_instruction_length(v, INSTR_INVLPGA, opcode);
         ASSERT(inst_len > 0);
         __update_guest_eip(vmcb, inst_len);
 
@@ -2312,7 +2477,7 @@ void svm_handle_invlpg(const short invlp
     {
         /* What about multiple prefix codes? */
         prefix = (is_prefix(opcode[0])?opcode[0]:0);
-        inst_len = __get_instruction_length(vmcb, INSTR_INVLPG, opcode);
+        inst_len = __get_instruction_length(v, INSTR_INVLPG, opcode);
         ASSERT(inst_len > 0);
 
         inst_len--;
@@ -2323,7 +2488,7 @@ void svm_handle_invlpg(const short invlp
          * displacement to get effective address and length in bytes.  Assume
          * the system in either 32- or 64-bit mode.
          */
-        g_vaddr = get_effective_addr_modrm64(vmcb, regs, prefix, inst_len,
+        g_vaddr = get_effective_addr_modrm64(regs, prefix, inst_len,
                                              &opcode[inst_len], &length);
 
         inst_len += length;
@@ -2369,7 +2534,11 @@ static int svm_do_vmmcall_reset_to_realm
 
     vmcb->cr4 = SVM_CR4_HOST_MASK;
     v->arch.hvm_svm.cpu_shadow_cr4 = 0;
-    clear_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state);
+
+    if ( paging_mode_hap(v->domain) ) {
+        vmcb->cr0 = v->arch.hvm_svm.cpu_shadow_cr0;
+        vmcb->cr4 = v->arch.hvm_svm.cpu_shadow_cr4;
+    }
 
     /* This will jump to ROMBIOS */
     vmcb->rip = 0xFFF0;
@@ -2445,7 +2614,7 @@ static int svm_do_vmmcall(struct vcpu *v
     ASSERT(vmcb);
     ASSERT(regs);
 
-    inst_len = __get_instruction_length(vmcb, INSTR_VMCALL, NULL);
+    inst_len = __get_instruction_length(v, INSTR_VMCALL, NULL);
     ASSERT(inst_len > 0);
 
     HVMTRACE_1D(VMMCALL, v, regs->eax);
@@ -2855,7 +3024,7 @@ asmlinkage void svm_vmexit_handler(struc
 
             svm_dump_vmcb(__func__, vmcb);
             svm_dump_regs(__func__, regs);
-            svm_dump_inst(svm_rip2pointer(vmcb));
+            svm_dump_inst(svm_rip2pointer(v));
         }
 
 #if defined(__i386__)
@@ -2957,7 +3126,7 @@ asmlinkage void svm_vmexit_handler(struc
         /* Debug info to hopefully help debug WHY the guest double-faulted. */
         svm_dump_vmcb(__func__, vmcb);
         svm_dump_regs(__func__, regs);
-        svm_dump_inst(svm_rip2pointer(vmcb));
+        svm_dump_inst(svm_rip2pointer(v));
         svm_inject_exception(v, TRAP_double_fault, 1, 0);
         break;
 
@@ -2967,7 +3136,7 @@ asmlinkage void svm_vmexit_handler(struc
         break;
 
     case VMEXIT_INVD:
-        svm_vmexit_do_invd(vmcb);
+        svm_vmexit_do_invd(v);
         break;
 
     case VMEXIT_GDTR_WRITE:
@@ -3053,6 +3222,15 @@ asmlinkage void svm_vmexit_handler(struc
     case VMEXIT_SHUTDOWN:
         hvm_triple_fault();
         break;
+
+    case VMEXIT_NPF:
+    {
+        regs->error_code = vmcb->exitinfo1;
+        if ( !svm_do_nested_pgfault(vmcb->exitinfo2, regs) ) {
+            domain_crash(v->domain);
+        }
+        break;
+    }
 
     default:
     exit_and_crash:
diff -r 8f0b5295bb1b -r dcec453681bc xen/arch/x86/hvm/svm/vmcb.c
--- a/xen/arch/x86/hvm/svm/vmcb.c       Mon Mar 05 12:49:12 2007 -0600
+++ b/xen/arch/x86/hvm/svm/vmcb.c       Thu Mar 08 14:39:52 2007 -0600
@@ -200,6 +200,13 @@ static int construct_vmcb(struct vcpu *v
     vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3; 
 
     arch_svm->vmcb->exception_intercepts = MONITOR_DEFAULT_EXCEPTION_BITMAP;
+
+    if ( paging_mode_hap(v->domain) ) {
+        vmcb->cr0 = arch_svm->cpu_shadow_cr0;
+        vmcb->np_enable = 1; /* enable nested paging */
+        vmcb->g_pat = 0x0007040600070406ULL; /* guest PAT */
+        vmcb->exception_intercepts &= ~EXCEPTION_BITMAP_PG;
+    }
 
     return 0;
 }
@@ -310,7 +317,8 @@ void svm_dump_vmcb(const char *from, str
     printk("KernGSBase = 0x%016llx PAT = 0x%016llx \n", 
            (unsigned long long) vmcb->kerngsbase,
            (unsigned long long) vmcb->g_pat);
-    
+    printk("H_CR3 = 0x%016llx\n", (unsigned long long)vmcb->h_cr3);
+
     /* print out all the selectors */
     svm_dump_sel("CS", &vmcb->cs);
     svm_dump_sel("DS", &vmcb->ds);
diff -r 8f0b5295bb1b -r dcec453681bc xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Mon Mar 05 12:49:12 2007 -0600
+++ b/xen/arch/x86/mm.c Thu Mar 08 14:39:52 2007 -0600
@@ -424,7 +424,10 @@ void invalidate_shadow_ldt(struct vcpu *
     }
 
     /* Dispose of the (now possibly invalid) mappings from the TLB.  */
-    queue_deferred_ops(v->domain, DOP_FLUSH_TLB | DOP_RELOAD_LDT);
+    if ( v == current )
+        queue_deferred_ops(v->domain, DOP_FLUSH_TLB | DOP_RELOAD_LDT);
+    else
+        flush_tlb_mask(v->domain->domain_dirty_cpumask);
 }
 
 
diff -r 8f0b5295bb1b -r dcec453681bc xen/arch/x86/mm/Makefile
--- a/xen/arch/x86/mm/Makefile  Mon Mar 05 12:49:12 2007 -0600
+++ b/xen/arch/x86/mm/Makefile  Thu Mar 08 14:39:52 2007 -0600
@@ -1,4 +1,5 @@ subdir-y += shadow
 subdir-y += shadow
+subdir-y += hap
 
 obj-y += paging.o
 obj-y += p2m.o
diff -r 8f0b5295bb1b -r dcec453681bc xen/arch/x86/mm/hap/Makefile
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/mm/hap/Makefile      Thu Mar 08 14:39:52 2007 -0600
@@ -0,0 +1,2 @@
+obj-y += hap.o
+obj-y += support.o
diff -r 8f0b5295bb1b -r dcec453681bc xen/arch/x86/mm/hap/hap.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/mm/hap/hap.c Thu Mar 08 14:39:52 2007 -0600
@@ -0,0 +1,708 @@
+/******************************************************************************
+ * arch/x86/mm/hap/hap.c
+ *
+ * hardware assisted paging
+ * Copyright (c) 2007 Advanced Micro Devices (Wei Huang)
+ * Parts of this code are Copyright (c) 2007 by XenSource Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <xen/config.h>
+#include <xen/types.h>
+#include <xen/mm.h>
+#include <xen/trace.h>
+#include <xen/sched.h>
+#include <xen/perfc.h>
+#include <xen/irq.h>
+#include <xen/domain_page.h>
+#include <xen/guest_access.h>
+#include <xen/keyhandler.h>
+#include <asm/event.h>
+#include <asm/page.h>
+#include <asm/current.h>
+#include <asm/flushtlb.h>
+#include <asm/shared.h>
+#include <asm/hap.h>
+#include <asm/paging.h>
+#include <asm/domain.h>
+
+#include "private.h"
+
+/* Override macros from asm/page.h to make them work with mfn_t */
+#undef mfn_to_page
+#define mfn_to_page(_m) (frame_table + mfn_x(_m))
+#undef mfn_valid
+#define mfn_valid(_mfn) (mfn_x(_mfn) < max_page)
+#undef page_to_mfn
+#define page_to_mfn(_pg) (_mfn((_pg) - frame_table))
+
+/************************************************/
+/*             HAP SUPPORT FUNCTIONS            */
+/************************************************/
+mfn_t hap_alloc(struct domain *d, unsigned long backpointer)
+{
+    struct page_info *sp = NULL;
+    void *p;
+
+    ASSERT(hap_locked_by_me(d));
+
+    sp = list_entry(d->arch.paging.hap.freelists.next, struct page_info, list);
+    list_del(&sp->list);
+    d->arch.paging.hap.free_pages -= 1;
+
+    /* Now safe to clear the page for reuse */
+    p = hap_map_domain_page(page_to_mfn(sp));
+    ASSERT(p != NULL);
+    clear_page(p);
+    hap_unmap_domain_page(p);
+
+    return page_to_mfn(sp);
+}
+
+void hap_free(struct domain *d, mfn_t smfn)
+{
+    struct page_info *sp = mfn_to_page(smfn); 
+
+    ASSERT(hap_locked_by_me(d));
+
+    d->arch.paging.hap.free_pages += 1;
+    list_add_tail(&sp->list, &d->arch.paging.hap.freelists);
+}
+
+static int hap_alloc_p2m_pages(struct domain *d)
+{
+    struct page_info *pg;
+
+    ASSERT(hap_locked_by_me(d));
+
+    pg = mfn_to_page(hap_alloc(d, 0));
+    d->arch.paging.hap.p2m_pages += 1;
+    d->arch.paging.hap.total_pages -= 1;
+    
+    page_set_owner(pg, d);
+    pg->count_info = 1;
+    list_add_tail(&pg->list, &d->arch.paging.hap.p2m_freelist);
+
+    return 1;
+}
+
+struct page_info * hap_alloc_p2m_page(struct domain *d)
+{
+    struct list_head *entry;
+    struct page_info *pg;
+    mfn_t mfn;
+    void *p;
+
+    hap_lock(d);
+    
+    if ( list_empty(&d->arch.paging.hap.p2m_freelist) && 
+         !hap_alloc_p2m_pages(d) ) {
+        hap_unlock(d);
+        return NULL;
+    }
+    entry = d->arch.paging.hap.p2m_freelist.next;
+    list_del(entry);
+    
+    hap_unlock(d);
+
+    pg = list_entry(entry, struct page_info, list);
+    mfn = page_to_mfn(pg);
+    p = hap_map_domain_page(mfn);
+    clear_page(p);
+    hap_unmap_domain_page(p);
+
+    return pg;
+}
+
+void hap_free_p2m_page(struct domain *d, struct page_info *pg)
+{
+    ASSERT(page_get_owner(pg) == d);
+    /* Should have just the one ref we gave it in alloc_p2m_page() */
+    if ( (pg->count_info & PGC_count_mask) != 1 ) {
+        HAP_ERROR("Odd p2m page count c=%#x t=%"PRtype_info"\n",
+                  pg->count_info, pg->u.inuse.type_info);
+    }
+    /* Free should not decrement domain's total allocation, since 
+     * these pages were allocated without an owner. */
+    page_set_owner(pg, NULL); 
+    free_domheap_pages(pg, 0);
+    d->arch.paging.hap.p2m_pages--;
+}
+
+/* Return the size of the pool, rounded up to the nearest MB */
+static unsigned int
+hap_get_allocation(struct domain *d)
+{
+    unsigned int pg = d->arch.paging.hap.total_pages;
+
+    HERE_I_AM;
+    return ((pg >> (20 - PAGE_SHIFT))
+            + ((pg & ((1 << (20 - PAGE_SHIFT)) - 1)) ? 1 : 0));
+}
+
+/* Set the pool of pages to the required number of pages.
+ * Returns 0 for success, non-zero for failure. */
+static unsigned int
+hap_set_allocation(struct domain *d, unsigned int pages, int *preempted)
+{
+    struct page_info *sp;
+
+    ASSERT(hap_locked_by_me(d));
+
+    while ( d->arch.paging.hap.total_pages != pages ) {
+        if ( d->arch.paging.hap.total_pages < pages ) {
+            /* Need to allocate more memory from domheap */
+            sp = alloc_domheap_pages(NULL, 0, 0);
+            if ( sp == NULL ) {
+                HAP_PRINTK("failed to allocate hap pages.\n");
+                return -ENOMEM;
+            }
+            d->arch.paging.hap.free_pages += 1;
+            d->arch.paging.hap.total_pages += 1;
+            list_add_tail(&sp->list, &d->arch.paging.hap.freelists);
+        }
+        else if ( d->arch.paging.hap.total_pages > pages ) {
+            /* Need to return memory to domheap */
+            ASSERT(!list_empty(&d->arch.paging.hap.freelists));
+            sp = list_entry(d->arch.paging.hap.freelists.next,
+                            struct page_info, list);
+            list_del(&sp->list);
+            d->arch.paging.hap.free_pages -= 1;
+            d->arch.paging.hap.total_pages -= 1;
+            free_domheap_pages(sp, 0);
+        }
+        
+        /* Check to see if we need to yield and try again */
+        if ( preempted && hypercall_preempt_check() ) {
+            *preempted = 1;
+            return 0;
+        }
+    }
+
+    return 0;
+}
+
+#if CONFIG_PAGING_LEVELS == 4
+void hap_install_xen_entries_in_l4(struct vcpu *v, mfn_t gl4mfn, mfn_t sl4mfn)
+{
+    struct domain *d = v->domain;
+    l4_pgentry_t *sl4e;
+
+    sl4e = hap_map_domain_page(sl4mfn);
+    ASSERT(sl4e != NULL);
+
+    /* Copy the common Xen mappings from the idle domain */
+    memcpy(&sl4e[ROOT_PAGETABLE_FIRST_XEN_SLOT],
+           &idle_pg_table[ROOT_PAGETABLE_FIRST_XEN_SLOT],
+           ROOT_PAGETABLE_XEN_SLOTS * sizeof(l4_pgentry_t));
+
+    /* Install the per-domain mappings for this domain */
+    sl4e[l4_table_offset(PERDOMAIN_VIRT_START)] =
+        l4e_from_pfn(mfn_x(page_to_mfn(virt_to_page(d->arch.mm_perdomain_l3))),
+                     __PAGE_HYPERVISOR);
+
+    sl4e[l4_table_offset(LINEAR_PT_VIRT_START)] =
+        l4e_from_pfn(mfn_x(gl4mfn), __PAGE_HYPERVISOR);
+
+    /* install domain-specific P2M table */
+    sl4e[l4_table_offset(RO_MPT_VIRT_START)] =
+        l4e_from_pfn(mfn_x(pagetable_get_mfn(d->arch.phys_table)),
+                     __PAGE_HYPERVISOR);
+
+    hap_unmap_domain_page(sl4e);
+}
+#endif /* CONFIG_PAGING_LEVELS == 4 */
+
+#if CONFIG_PAGING_LEVELS == 3
+void hap_install_xen_entries_in_l2h(struct vcpu *v, mfn_t sl2hmfn)
+{
+    struct domain *d = v->domain;
+    l2_pgentry_t *sl2e;
+
+    int i;
+
+    sl2e = hap_map_domain_page(sl2hmfn);
+    ASSERT(sl2e != NULL);
+    
+    /* Copy the common Xen mappings from the idle domain */
+    memcpy(&sl2e[L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES-1)],
+           &idle_pg_table_l2[L2_PAGETABLE_FIRST_XEN_SLOT],
+           L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t));
+
+    /* Install the per-domain mappings for this domain */
+    for ( i = 0; i < PDPT_L2_ENTRIES; i++ )
+        sl2e[l2_table_offset(PERDOMAIN_VIRT_START) + i] =
+            l2e_from_pfn(
+                         
mfn_x(page_to_mfn(virt_to_page(d->arch.mm_perdomain_pt) + i)),
+                         __PAGE_HYPERVISOR);
+    
+    for ( i = 0; i < HAP_L3_PAGETABLE_ENTRIES; i++ )
+        sl2e[l2_table_offset(LINEAR_PT_VIRT_START) + i] =
+            l2e_empty();
+
+    if ( paging_mode_translate(d) )
+    {
+        /* Install the domain-specific p2m table */
+        l3_pgentry_t *p2m;
+        ASSERT(pagetable_get_pfn(d->arch.phys_table) != 0);
+        p2m = hap_map_domain_page(pagetable_get_mfn(d->arch.phys_table));
+        for ( i = 0; i < MACHPHYS_MBYTES>>1; i++ )
+        {
+            sl2e[l2_table_offset(RO_MPT_VIRT_START) + i] =
+                (l3e_get_flags(p2m[i]) & _PAGE_PRESENT)
+                ? l2e_from_pfn(mfn_x(_mfn(l3e_get_pfn(p2m[i]))),
+                                      __PAGE_HYPERVISOR)
+                : l2e_empty();
+        }
+        hap_unmap_domain_page(p2m);
+    }
+
+    hap_unmap_domain_page(sl2e);
+}
+#endif
+
+#if CONFIG_PAGING_LEVELS == 2
+void hap_install_xen_entries_in_l2(struct vcpu *v, mfn_t gl2mfn, mfn_t sl2mfn)
+{
+    struct domain *d = v->domain;
+    l2_pgentry_t *sl2e;
+    int i;
+
+    sl2e = hap_map_domain_page(sl2mfn);
+    ASSERT(sl2e != NULL);
+    
+    /* Copy the common Xen mappings from the idle domain */
+    memcpy(&sl2e[L2_PAGETABLE_FIRST_XEN_SLOT],
+           &idle_pg_table[L2_PAGETABLE_FIRST_XEN_SLOT],
+           L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t));
+
+    /* Install the per-domain mappings for this domain */
+    for ( i = 0; i < PDPT_L2_ENTRIES; i++ )
+        sl2e[l2_table_offset(PERDOMAIN_VIRT_START) + i] =
+            l2e_from_pfn(
+                mfn_x(page_to_mfn(virt_to_page(d->arch.mm_perdomain_pt) + i)),
+                __PAGE_HYPERVISOR);
+
+
+    sl2e[l2_table_offset(LINEAR_PT_VIRT_START)] =
+        l2e_from_pfn(mfn_x(gl2mfn), __PAGE_HYPERVISOR);
+
+    /* install domain-specific P2M table */
+    sl2e[l2_table_offset(RO_MPT_VIRT_START)] =
+        l2e_from_pfn(mfn_x(pagetable_get_mfn(d->arch.phys_table)),
+                            __PAGE_HYPERVISOR);
+
+    hap_unmap_domain_page(sl2e);
+}
+#endif
+
+mfn_t hap_make_monitor_table(struct vcpu *v)
+{
+    struct domain *d = v->domain;
+
+    ASSERT(pagetable_get_pfn(v->arch.monitor_table) == 0);
+
+#if CONFIG_PAGING_LEVELS == 4
+    {
+        mfn_t m4mfn;
+        m4mfn = hap_alloc(d, 0);
+        hap_install_xen_entries_in_l4(v, m4mfn, m4mfn);
+        return m4mfn;
+    }
+#elif CONFIG_PAGING_LEVELS == 3
+    {
+        mfn_t m3mfn, m2mfn; 
+        l3_pgentry_t *l3e;
+        l2_pgentry_t *l2e;
+        int i;
+
+        m3mfn = hap_alloc(d, 0);
+
+        /* Install a monitor l2 table in slot 3 of the l3 table.
+         * This is used for all Xen entries, including linear maps
+         */
+        m2mfn = hap_alloc(d, 0);
+        l3e = hap_map_domain_page(m3mfn);
+        l3e[3] = l3e_from_pfn(mfn_x(m2mfn), _PAGE_PRESENT);
+        hap_install_xen_entries_in_l2h(v, m2mfn);
+        /* Install the monitor's own linear map */
+        l2e = hap_map_domain_page(m2mfn);
+        for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
+            l2e[l2_table_offset(LINEAR_PT_VIRT_START) + i] =
+                (l3e_get_flags(l3e[i]) & _PAGE_PRESENT) 
+                ? l2e_from_pfn(l3e_get_pfn(l3e[i]), __PAGE_HYPERVISOR) 
+                : l2e_empty();
+        hap_unmap_domain_page(l2e);
+        hap_unmap_domain_page(l3e);
+
+        HAP_PRINTK("new monitor table: %#lx\n", mfn_x(m3mfn));
+        return m3mfn;
+    }
+#else
+    {
+        mfn_t m2mfn;
+        
+        m2mfn = hap_alloc(d, 0);
+        hap_install_xen_entries_in_l2(v, m2mfn, m2mfn);
+    
+        return m2mfn;
+    }
+#endif
+}
+
+void hap_destroy_monitor_table(struct vcpu* v, mfn_t mmfn)
+{
+    struct domain *d = v->domain;
+
+#if CONFIG_PAGING_LEVELS == 4
+    /* Need to destroy the l3 monitor page in slot 0 too */
+    {
+        mfn_t m3mfn;
+        l4_pgentry_t *l4e = hap_map_domain_page(mmfn);
+        ASSERT(l4e_get_flags(l4e[0]) & _PAGE_PRESENT);
+        m3mfn = _mfn(l4e_get_pfn(l4e[0]));
+        hap_free(d, m3mfn);
+        hap_unmap_domain_page(l4e);
+    }
+#elif CONFIG_PAGING_LEVELS == 3
+    /* Need to destroy the l2 monitor page in slot 4 too */
+    {
+        l3_pgentry_t *l3e = hap_map_domain_page(mmfn);
+        ASSERT(l3e_get_flags(l3e[3]) & _PAGE_PRESENT);
+        hap_free(d, _mfn(l3e_get_pfn(l3e[3])));
+        hap_unmap_domain_page(l3e);
+    }
+#endif
+
+    /* Put the memory back in the pool */
+    hap_free(d, mmfn);
+}
+
+/************************************************/
+/*          HAP DOMAIN LEVEL FUNCTIONS          */
+/************************************************/
+void hap_domain_init(struct domain *d)
+{
+    hap_lock_init(d);
+    INIT_LIST_HEAD(&d->arch.paging.hap.freelists);
+    INIT_LIST_HEAD(&d->arch.paging.hap.p2m_freelist);
+}
+
+/* return 0 for success, -errno for failure */
+int hap_enable(struct domain *d, u32 mode)
+{
+    unsigned int old_pages;
+    int rv = 0;
+
+    HERE_I_AM;
+
+    domain_pause(d);
+    /* error check */
+    if ( (d == current->domain) ) {
+        rv = -EINVAL;
+        goto out;
+    }
+
+    old_pages = d->arch.paging.hap.total_pages;
+    if ( old_pages == 0 ) {
+        unsigned int r;
+        hap_lock(d);
+        r = hap_set_allocation(d, 256, NULL);
+        hap_unlock(d);
+        if ( r != 0 ) {
+            hap_set_allocation(d, 0, NULL);
+            rv = -ENOMEM;
+            goto out;
+        }
+    }
+
+    /* allocate P2m table */
+    if ( mode & PG_translate ) {
+        rv = p2m_alloc_table(d, hap_alloc_p2m_page, hap_free_p2m_page);
+        if ( rv != 0 )
+            goto out;
+    }
+
+    d->arch.paging.mode = mode | PG_SH_enable;
+
+ out:
+    domain_unpause(d);
+    return rv;
+}
+
+void hap_final_teardown(struct domain *d)
+{
+    HERE_I_AM;
+
+    if ( d->arch.paging.hap.total_pages != 0 )
+        hap_teardown(d);
+
+    p2m_teardown(d);
+}
+
+void hap_teardown(struct domain *d)
+{
+    struct vcpu *v;
+    mfn_t mfn;
+    HERE_I_AM;
+
+    ASSERT(test_bit(_DOMF_dying, &d->domain_flags));
+    ASSERT(d != current->domain);
+
+    if ( !hap_locked_by_me(d) )
+        hap_lock(d); /* Keep various asserts happy */
+
+    if ( paging_mode_enabled(d) ) {
+        /* release the monitor table held by each vcpu */
+        for_each_vcpu(d, v) {
+            if ( v->arch.paging.mode && paging_mode_external(d) ) {
+                mfn = pagetable_get_mfn(v->arch.monitor_table);
+                if ( mfn_valid(mfn) && (mfn_x(mfn) != 0) )
+                    hap_destroy_monitor_table(v, mfn);
+                v->arch.monitor_table = pagetable_null();
+            }
+        }
+    }
+
+    if ( d->arch.paging.hap.total_pages != 0 ) {
+        HAP_PRINTK("teardown of domain %u starts."
+                      "  pages total = %u, free = %u, p2m=%u\n",
+                      d->domain_id,
+                      d->arch.paging.hap.total_pages,
+                      d->arch.paging.hap.free_pages,
+                      d->arch.paging.hap.p2m_pages);
+        hap_set_allocation(d, 0, NULL);
+        HAP_PRINTK("teardown done."
+                      "  pages total = %u, free = %u, p2m=%u\n",
+                      d->arch.paging.hap.total_pages,
+                      d->arch.paging.hap.free_pages,
+                      d->arch.paging.hap.p2m_pages);
+        ASSERT(d->arch.paging.hap.total_pages == 0);
+    }
+    
+    d->arch.paging.mode &= ~PG_log_dirty;
+
+    hap_unlock(d);
+}
+
+int hap_domctl(struct domain *d, xen_domctl_shadow_op_t *sc,
+               XEN_GUEST_HANDLE(void) u_domctl)
+{
+    int rc, preempted = 0;
+
+    HERE_I_AM;
+
+    if ( unlikely(d == current->domain) ) {
+        gdprintk(XENLOG_INFO, "Don't try to do a hap op on yourself!\n");
+        return -EINVAL;
+    }
+    
+    switch ( sc->op ) {
+    case XEN_DOMCTL_SHADOW_OP_OFF:
+    case XEN_DOMCTL_SHADOW_OP_ENABLE_TEST:
+    case XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY:
+    case XEN_DOMCTL_SHADOW_OP_ENABLE_TRANSLATE:
+    case XEN_DOMCTL_SHADOW_OP_CLEAN:
+    case XEN_DOMCTL_SHADOW_OP_PEEK:
+    case XEN_DOMCTL_SHADOW_OP_ENABLE:
+        HAP_ERROR("Bad hap domctl op %u\n", sc->op);
+        domain_crash(d);
+        return -EINVAL;
+    case XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION:
+        hap_lock(d);
+        rc = hap_set_allocation(d, sc->mb << (20 - PAGE_SHIFT), &preempted);
+        hap_unlock(d);
+        if ( preempted )
+            /* Not finished.  Set up to re-run the call. */
+            rc = hypercall_create_continuation(__HYPERVISOR_domctl, "h", 
+                                               u_domctl);
+        else
+            /* Finished.  Return the new allocation */
+            sc->mb = hap_get_allocation(d);
+        return rc;
+    case XEN_DOMCTL_SHADOW_OP_GET_ALLOCATION:
+        sc->mb = hap_get_allocation(d);
+        return 0;
+    default:
+        HAP_ERROR("Bad hap domctl op %u\n", sc->op);
+        return -EINVAL;
+    }
+}
+
+void hap_vcpu_init(struct vcpu *v)
+{
+    v->arch.paging.mode = &hap_paging_real_mode;
+}
+/************************************************/
+/*          HAP PAGING MODE FUNCTIONS           */
+/************************************************/
+/* In theory, hap should not intercept guest page fault. This function can 
+ * be recycled to handle host/nested page fault, if needed.
+ */
+int hap_page_fault(struct vcpu *v, unsigned long va, 
+                   struct cpu_user_regs *regs)
+{
+    HERE_I_AM;
+    domain_crash(v->domain);
+    return 0;
+}
+
+/* called when guest issues a invlpg request. 
+ * Return 1 if need to issue page invalidation on CPU; Return 0 if does not
+ * need to do so.
+ */
+int hap_invlpg(struct vcpu *v, unsigned long va)
+{
+    HERE_I_AM;
+    return 0;
+}
+
+void hap_update_cr3(struct vcpu *v, int do_locking)
+{
+    struct domain *d = v->domain;
+    mfn_t gmfn;
+
+    HERE_I_AM;
+    /* Don't do anything on an uninitialised vcpu */
+    if ( !is_hvm_domain(d) && !test_bit(_VCPUF_initialised, &v->vcpu_flags) ) {
+        ASSERT(v->arch.cr3 == 0);
+        return;
+    }
+
+    if ( do_locking )
+        hap_lock(v->domain);
+    
+    ASSERT(hap_locked_by_me(v->domain));
+    ASSERT(v->arch.paging.mode);
+    
+    gmfn = pagetable_get_mfn(v->arch.guest_table);
+
+    make_cr3(v, pagetable_get_pfn(v->arch.monitor_table));
+    
+    hvm_update_guest_cr3(v, pagetable_get_paddr(v->arch.monitor_table));
+
+    HAP_PRINTK("d=%u v=%u guest_table=%05lx, monitor_table = %05lx\n", 
+               d->domain_id, v->vcpu_id, 
+               (unsigned long)pagetable_get_pfn(v->arch.guest_table),
+               (unsigned long)pagetable_get_pfn(v->arch.monitor_table));
+
+    flush_tlb_mask(d->domain_dirty_cpumask);
+
+    if ( do_locking )
+        hap_unlock(v->domain);
+}
+
+void hap_update_paging_modes(struct vcpu *v)
+{
+    struct domain *d;
+
+    HERE_I_AM;
+
+    d = v->domain;
+    hap_lock(d);
+
+    /* update guest paging mode. Note that we rely on hvm functions to detect
+     * guest's paging mode. So, make sure the shadow registers (CR0, CR4, EFER)
+     * reflect guest's status correctly.
+     */
+    if ( hvm_paging_enabled(v) ) {
+        if ( hvm_long_mode_enabled(v) )
+            v->arch.paging.mode = &hap_paging_long_mode;
+        else if ( hvm_pae_enabled(v) )
+            v->arch.paging.mode = &hap_paging_pae_mode;
+        else
+            v->arch.paging.mode = &hap_paging_protected_mode;
+    }
+    else {
+        v->arch.paging.mode = &hap_paging_real_mode;
+    }
+
+    v->arch.paging.translate_enabled = !!hvm_paging_enabled(v);    
+
+    /* use p2m map */
+    v->arch.guest_table =
+        pagetable_from_pfn(pagetable_get_pfn(d->arch.phys_table));
+
+    if ( pagetable_is_null(v->arch.monitor_table) ) {
+        mfn_t mmfn = hap_make_monitor_table(v);
+        v->arch.monitor_table = pagetable_from_mfn(mmfn);
+        make_cr3(v, mfn_x(mmfn));
+    }
+
+    flush_tlb_mask(d->domain_dirty_cpumask);
+    hap_unlock(d);
+}
+
+void 
+hap_write_p2m_entry(struct vcpu *v, unsigned long gfn, l1_pgentry_t *p,
+                    l1_pgentry_t new, unsigned int level)
+{
+    hap_lock(v->domain);
+    safe_write_pte(p, new);
+    hap_unlock(v->domain);
+}
+
+/* Entry points into this mode of the hap code. */
+struct paging_mode hap_paging_real_mode = {
+    .page_fault             = hap_page_fault, 
+    .invlpg                 = hap_invlpg,
+    .gva_to_gfn             = hap_gva_to_gfn_real_mode,
+    .update_cr3             = hap_update_cr3,
+    .update_paging_modes    = hap_update_paging_modes,
+    .write_p2m_entry        = hap_write_p2m_entry,
+    .guest_levels           = 1
+};
+
+struct paging_mode hap_paging_protected_mode = {
+    .page_fault             = hap_page_fault, 
+    .invlpg                 = hap_invlpg,
+    .gva_to_gfn             = hap_gva_to_gfn_protected_mode,
+    .update_cr3             = hap_update_cr3,
+    .update_paging_modes    = hap_update_paging_modes,
+    .write_p2m_entry        = hap_write_p2m_entry,
+    .guest_levels           = 2
+};
+
+struct paging_mode hap_paging_pae_mode = {
+    .page_fault             = hap_page_fault, 
+    .invlpg                 = hap_invlpg,
+    .gva_to_gfn             = hap_gva_to_gfn_pae_mode,
+    .update_cr3             = hap_update_cr3,
+    .update_paging_modes    = hap_update_paging_modes,
+    .write_p2m_entry        = hap_write_p2m_entry,
+    .guest_levels           = 3
+};
+
+struct paging_mode hap_paging_long_mode = {
+    .page_fault             = hap_page_fault, 
+    .invlpg                 = hap_invlpg,
+    .gva_to_gfn             = hap_gva_to_gfn_long_mode,
+    .update_cr3             = hap_update_cr3,
+    .update_paging_modes    = hap_update_paging_modes,
+    .write_p2m_entry        = hap_write_p2m_entry,
+    .guest_levels           = 4
+};
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
+
+
diff -r 8f0b5295bb1b -r dcec453681bc xen/arch/x86/mm/hap/private.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/mm/hap/private.h     Thu Mar 08 14:39:52 2007 -0600
@@ -0,0 +1,112 @@
+/*
+ * arch/x86/mm/hap/private.h
+ *
+ * Copyright (c) 2007, AMD Corporation (Wei Huang)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+#ifndef __HAP_PRIVATE_H__
+#define __HAP_PRIVATE_H__
+
+#include <asm/flushtlb.h>
+#include <asm/hvm/support.h>
+
+/********************************************/
+/*          GUEST TRANSLATION FUNCS         */
+/********************************************/
+unsigned long hap_gva_to_gfn_real_mode(struct vcpu *v, unsigned long gva);
+unsigned long hap_gva_to_gfn_protected_mode(struct vcpu *v, unsigned long gva);
+unsigned long hap_gva_to_gfn_pae_mode(struct vcpu *v, unsigned long gva);
+unsigned long hap_gva_to_gfn_long_mode(struct vcpu *v, unsigned long gva);
+/********************************************/
+/*            MISC DEFINITIONS              */
+/********************************************/
+
+/* PT_SHIFT describes the amount by which a virtual address is shifted right 
+ * to right justify the portion to be used for indexing into a page 
+ * table, given the guest memory model (i.e. number of levels) and the level 
+ * of the page table being accessed. The idea is from Virtual Iron's code.
+ */
+static const int PT_SHIFT[][5] =
+  {   /*     ------  level ------           nr_levels  */
+    /*         1     2     3     4                   */
+    {    0,    0,    0,    0,    0},   /* 0 not used */
+    {    0,    0,    0,    0,    0},   /* 1 not used */
+    {    0,   12,   22,    0,    0},   /* 2  */
+    {    0,   12,   21,   30,    0},   /* 3  */
+    {    0,   12,   21,   30,   39}    /* 4  */
+  };
+
+/* PT_ENTRIES describes the number of entries in a page table, given the 
+ * memory model (i.e. number of levels) and the level of the page table 
+ * being considered. This idea from Virtual Iron's shadow code*/
+static const int PT_ENTRIES[][5] =
+  {   /*     ------  level ------           nr_levels  */
+    /*         1     2     3     4                   */
+    {    0,    0,    0,    0,    0},   /* 0 not used */
+    {    0,    0,    0,    0,    0},   /* 1 not used */
+    {    0, 1024, 1024,    0,    0},   /* 2  */
+    {    0,  512,  512,    4,    0},   /* 3  */
+    {    0,  512,  512,  512,  512}    /* 4  */
+  };
+
+/********************************************/
+/*       PAGING DEFINITION FOR GUEST        */
+/********************************************/
+#define PHYSICAL_PAGE_4K_SIZE (1UL << 12)
+#define PHYSICAL_PAGE_2M_SIZE (1UL << 21)
+#define PHYSICAL_PAGE_4M_SIZE (1UL << 22)
+#define PHYSICAL_PAGE_4K_MASK ( ~(PHYSICAL_PAGE_4K_SIZE - 1) )
+#define PHYSICAL_PAGE_2M_MASK ( ~(PHYSICAL_PAGE_2M_SIZE - 1) )
+#define PHYSICAL_PAGE_4M_MASK ( ~(PHYSICAL_PAGE_4M_SIZE - 1) )
+
+/* long mode physical address mask */
+#define PHYSICAL_ADDR_BITS_LM    52
+#define PHYSICAL_ADDR_MASK_LM    ((1UL << PHYSICAL_ADDR_BITS_LM)-1)
+#define PHYSICAL_ADDR_2M_MASK_LM (PHYSICAL_PAGE_2M_MASK & 
PHYSICAL_ADDR_MASK_LM)
+#define PHYSICAL_ADDR_4K_MASK_LM (PHYSICAL_PAGE_4K_MASK & 
PHYSICAL_ADDR_MASK_LM)
+
+#define PAGE_NX_BIT      (1ULL << 63)
+/************************************************/
+/*        PAGETABLE RELATED VARIABLES           */
+/************************************************/
+#if CONFIG_PAGING_LEVELS == 2
+#define HAP_L1_PAGETABLE_ENTRIES    1024
+#define HAP_L2_PAGETABLE_ENTRIES    1024
+#define HAP_L1_PAGETABLE_SHIFT        12
+#define HAP_L2_PAGETABLE_SHIFT        22
+#endif
+
+#if CONFIG_PAGING_LEVELS == 3
+#define HAP_L1_PAGETABLE_ENTRIES     512
+#define HAP_L2_PAGETABLE_ENTRIES     512
+#define HAP_L3_PAGETABLE_ENTRIES       4
+#define HAP_L1_PAGETABLE_SHIFT        12
+#define HAP_L2_PAGETABLE_SHIFT        21
+#define HAP_L3_PAGETABLE_SHIFT        30
+#endif
+
+#if CONFIG_PAGING_LEVELS == 4
+#define HAP_L1_PAGETABLE_ENTRIES     512
+#define HAP_L2_PAGETABLE_ENTRIES     512
+#define HAP_L3_PAGETABLE_ENTRIES     512
+#define HAP_L4_PAGETABLE_ENTRIES     512
+#define HAP_L1_PAGETABLE_SHIFT        12
+#define HAP_L2_PAGETABLE_SHIFT        21
+#define HAP_L3_PAGETABLE_SHIFT        30
+#define HAP_L4_PAGETABLE_SHIFT        39
+#endif
+
+#endif /* __SVM_NPT_H__ */
diff -r 8f0b5295bb1b -r dcec453681bc xen/arch/x86/mm/hap/support.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/mm/hap/support.c     Thu Mar 08 14:39:52 2007 -0600
@@ -0,0 +1,334 @@
+/*
+ * arch/x86/mm/hap/support.c
+ * 
+ * guest page table walker
+ * Copyright (c) 2007, AMD Corporation (Wei Huang)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#include <xen/config.h>
+#include <xen/types.h>
+#include <xen/mm.h>
+#include <xen/domain_page.h>
+#include <asm/page.h>
+#include <xen/event.h>
+#include <xen/sched.h>
+#include <asm/hvm/svm/vmcb.h>
+#include <asm/domain.h>
+#include <asm/shadow.h>
+#include <asm/hap.h>
+
+#include "private.h"
+#include "../page-guest32.h"
+
+/*******************************************/
+/*      Platform Specific Functions        */
+/*******************************************/
+
+/* Translate guest virtual address to guest physical address. Specifically
+ * for real mode guest. 
+ */
+unsigned long hap_gva_to_gfn_real_mode(struct vcpu *v, unsigned long gva)
+{
+    HERE_I_AM;
+    return ((paddr_t)gva >> PAGE_SHIFT);
+}
+
+/* Translate guest virtual address to guest physical address. Specifically
+ * for protected guest. 
+ */
+unsigned long hap_gva_to_gfn_protected_mode(struct vcpu *v, unsigned long gva)
+{
+    unsigned long gcr3 = hvm_get_guest_ctrl_reg(v, 3);
+    int mode = 2; /* two-level guest */
+    int lev, index;
+    paddr_t gpa = 0;
+    unsigned long gpfn, mfn;
+    int success = 1;
+    l2_pgentry_32_t *l2e; /* guest page entry size is 32-bit */
+    l1_pgentry_32_t *l1e;
+
+    HERE_I_AM;
+
+    gpfn = (gcr3 >> PAGE_SHIFT);
+    for ( lev = mode; lev >= 1; lev-- ) {
+        mfn = get_mfn_from_gpfn( gpfn );
+        if ( mfn == INVALID_MFN ) {
+            HAP_PRINTK("bad pfn=0x%lx from gva=0x%lx at lev%d\n", gpfn, gva, 
+                       lev);
+            success = 0;
+            break;
+        }
+        index = (gva >> PT_SHIFT[mode][lev]) & (PT_ENTRIES[mode][lev]-1);
+
+        if ( lev == 2 ) {
+            l2e = map_domain_page( mfn );
+            HAP_PRINTK("l2 page table entry is %ulx at index = %d\n", 
+                       l2e[index].l2, index);
+            if ( !(l2e_get_flags_32(l2e[index]) & _PAGE_PRESENT) ) {
+                HAP_PRINTK("Level 2 entry not present at index = %d\n", index);
+                success = 0;
+            }
+
+            if ( l2e_get_flags_32(l2e[index]) & _PAGE_PSE ) { /* handle PSE */
+                HAP_PRINTK("guest page table is PSE\n");
+                if ( l2e_get_intpte(l2e[index]) & 0x001FE000UL ) { /*[13:20] */
+                    printk("guest physical memory size is too large!\n");
+                    domain_crash(v->domain);
+                }
+                gpa = (l2e_get_intpte(l2e[index]) & PHYSICAL_PAGE_4M_MASK) + 
+                    (gva & ~PHYSICAL_PAGE_4M_MASK);
+                unmap_domain_page(l2e);
+                break; /* last level page table, return from here */
+            }
+            else {
+                gpfn = l2e_get_pfn( l2e[index] );
+            }
+            unmap_domain_page(l2e);
+        }
+
+        if ( lev == 1 ) {
+            l1e = map_domain_page( mfn );
+            HAP_PRINTK("l1 page table entry is %ulx at index = %d\n", 
+                       l1e[index].l1, index);
+            if ( !(l1e_get_flags_32(l1e[index]) & _PAGE_PRESENT) ) {
+                HAP_PRINTK("Level 1 entry not present at index = %d\n", index);
+                success = 0;
+            }
+            gpfn = l1e_get_pfn( l1e[index] );
+            gpa = (l1e_get_intpte(l1e[index]) & PHYSICAL_PAGE_4K_MASK) + 
+                (gva & ~PHYSICAL_PAGE_4K_MASK);            
+            unmap_domain_page(l1e);
+        }
+
+        if ( !success ) /* error happened, jump out */
+            break;
+    }
+
+    HAP_PRINTK("success = %d, gva = %lx, gpa = %lx\n", success, gva, gpa);
+
+    if ( !success ) /* error happened */
+        return INVALID_GFN;
+    else
+        return ((paddr_t)gpa >> PAGE_SHIFT);
+}
+
+
+
+/* Translate guest virtual address to guest physical address. Specifically
+ * for PAE mode guest. 
+ */
+unsigned long hap_gva_to_gfn_pae_mode(struct vcpu *v, unsigned long gva)
+{
+#if CONFIG_PAGING_LEVELS >= 3
+    unsigned long gcr3 = hvm_get_guest_ctrl_reg(v, 3);
+    int mode = 3; /* three-level guest */
+    int lev, index;
+    paddr_t gpa = 0;
+    unsigned long gpfn, mfn;
+    int success = 1;
+    l1_pgentry_t *l1e;
+    l2_pgentry_t *l2e;
+    l3_pgentry_t *l3e;
+    
+    HERE_I_AM;
+
+    gpfn = (gcr3 >> PAGE_SHIFT);
+    for ( lev = mode; lev >= 1; lev-- ) {
+        mfn = get_mfn_from_gpfn( gpfn );
+        if ( mfn == INVALID_MFN ) {
+            HAP_PRINTK("bad pfn=0x%lx from gva=0x%lx at lev%d\n", gpfn, gva, 
+                       lev);
+            success = 0;
+            break;
+        }
+        index = (gva >> PT_SHIFT[mode][lev]) & (PT_ENTRIES[mode][lev]-1);
+
+        if ( lev == 3 ) {
+            l3e = map_domain_page( mfn );
+            index += ( ((gcr3 >> 5 ) & 127 ) * 4 );
+            if ( !(l3e_get_flags(l3e[index]) & _PAGE_PRESENT) ) {
+                HAP_PRINTK("Level 3 entry not present at index = %d\n", index);
+                success = 0;
+            }
+            gpfn = l3e_get_pfn( l3e[index] );
+            unmap_domain_page(l3e);
+        }
+
+        if ( lev == 2 ) {
+            l2e = map_domain_page( mfn );
+            if ( !(l2e_get_flags(l2e[index]) & _PAGE_PRESENT) ) {
+                HAP_PRINTK("Level 2 entry not present at index = %d\n", index);
+                success = 0;
+            }
+
+            if ( l2e_get_flags(l2e[index]) & _PAGE_PSE ) { /* handle PSE */
+                HAP_PRINTK("guest page table is PSE\n");
+                gpa = (l2e_get_intpte(l2e[index]) & PHYSICAL_PAGE_2M_MASK) + 
+                    (gva & ~PHYSICAL_PAGE_2M_MASK);
+                unmap_domain_page(l2e);
+                break; /* last level page table, jump out from here */
+            }
+            else { 
+                gpfn = l2e_get_pfn(l2e[index]);
+            }
+            unmap_domain_page(l2e);
+        }
+
+        if ( lev == 1 ) {
+            l1e = map_domain_page( mfn );
+            if ( !(l1e_get_flags(l1e[index]) & _PAGE_PRESENT) ) {
+                HAP_PRINTK("Level 1 entry not present at index = %d\n", index);
+                success = 0;
+            }
+            gpfn = l1e_get_pfn( l1e[index] );
+            gpa = (l1e_get_intpte(l1e[index]) & PHYSICAL_PAGE_4K_MASK) + 
+                (gva & ~PHYSICAL_PAGE_4K_MASK);
+            unmap_domain_page(l1e);
+        }
+
+        if ( success != 1 ) /* error happened, jump out */
+            break;
+    }
+
+    gpa &= ~PAGE_NX_BIT; /* clear NX bit of guest physical address */
+    HAP_PRINTK("success = %d, gva = %lx, gpa = %lx\n", success, gva, gpa);
+
+    if ( !success )
+        return INVALID_GFN;
+    else
+        return ((paddr_t)gpa >> PAGE_SHIFT);
+#else
+    HERE_I_AM;
+    printk("guest paging level (3) is greater than host paging level!\n");
+    domain_crash(v->domain);
+    return INVALID_GFN;
+#endif
+}
+
+
+
+/* Translate guest virtual address to guest physical address. Specifically
+ * for long mode guest. 
+ */
+unsigned long hap_gva_to_gfn_long_mode(struct vcpu *v, unsigned long gva)
+{
+#if CONFIG_PAGING_LEVELS == 4
+    unsigned long gcr3 = hvm_get_guest_ctrl_reg(v, 3);
+    int mode = 4; /* four-level guest */
+    int lev, index;
+    paddr_t gpa = 0;
+    unsigned long gpfn, mfn;
+    int success = 1;
+    l4_pgentry_t *l4e;
+    l3_pgentry_t *l3e;
+    l2_pgentry_t *l2e;
+    l1_pgentry_t *l1e;
+
+    HERE_I_AM;
+
+    gpfn = (gcr3 >> PAGE_SHIFT);
+    for ( lev = mode; lev >= 1; lev-- ) {
+        mfn = get_mfn_from_gpfn( gpfn );
+        if ( mfn == INVALID_MFN ) {
+            HAP_PRINTK("bad pfn=0x%lx from gva=0x%lx at lev%d\n", gpfn, gva, 
+                       lev);
+            success = 0;
+            break;
+        }
+        index = (gva >> PT_SHIFT[mode][lev]) & (PT_ENTRIES[mode][lev]-1);
+
+        if ( lev == 4 ) {
+            l4e = map_domain_page( mfn );
+            if ( !(l4e_get_flags(l4e[index]) & _PAGE_PRESENT) ) {
+                HAP_PRINTK("Level 4 entry not present at index = %d\n", index);
+                success = 0;
+            }
+            gpfn = l4e_get_pfn( l4e[index] );
+            unmap_domain_page(l4e);
+        }
+
+        if ( lev == 3 ) {
+            l3e = map_domain_page( mfn );
+            if ( !(l3e_get_flags(l3e[index]) & _PAGE_PRESENT) ) {
+                HAP_PRINTK("Level 3 entry not present at index = %d\n", index);
+                success = 0;
+            }
+            gpfn = l3e_get_pfn( l3e[index] );
+            unmap_domain_page(l3e);
+        }
+
+        if ( lev == 2 ) {
+            l2e = map_domain_page( mfn );
+            if ( !(l2e_get_flags(l2e[index]) & _PAGE_PRESENT) ) {
+                HAP_PRINTK("Level 2 entry not present at index = %d\n", index);
+                success = 0;
+            }
+
+            if ( l2e_get_flags(l2e[index]) & _PAGE_PSE ) { /* handle PSE */
+                HAP_PRINTK("guest page table is PSE\n");
+                gpa = (l2e_get_intpte(l2e[index]) & PHYSICAL_ADDR_2M_MASK_LM) 
+                    + (gva & ~PHYSICAL_PAGE_2M_MASK);
+                unmap_domain_page(l2e);
+                break; /* last level page table, jump out from here */
+            }
+            else { 
+                gpfn = l2e_get_pfn(l2e[index]);
+            }
+            unmap_domain_page(l2e);
+        }
+
+        if ( lev == 1 ) {
+            l1e = map_domain_page( mfn );
+            if ( !(l1e_get_flags(l1e[index]) & _PAGE_PRESENT) ) {
+                HAP_PRINTK("Level 1 entry not present at index = %d\n", index);
+                success = 0;
+            }
+            gpfn = l1e_get_pfn( l1e[index] );
+            gpa = (l1e_get_intpte(l1e[index]) & PHYSICAL_ADDR_4K_MASK_LM) + 
+                (gva & ~PHYSICAL_PAGE_4K_MASK);
+            unmap_domain_page(l1e);
+        }
+
+        if ( success != 1 ) /* error happened, jump out */
+            break;
+    }
+
+    gpa &= ~PAGE_NX_BIT; /* clear NX bit of guest physical address */
+    HAP_PRINTK("success = %d, gva = %lx, gpa = %lx\n", success, gva, gpa);
+
+    if ( !success )
+        return INVALID_GFN;
+    else
+        return ((paddr_t)gpa >> PAGE_SHIFT);
+#else
+    HERE_I_AM;
+    printk("guest paging level (4) is greater than host paging level!\n");
+    domain_crash(v->domain);
+    return INVALID_GFN;
+#endif
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
+
diff -r 8f0b5295bb1b -r dcec453681bc xen/arch/x86/mm/page-guest32.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/mm/page-guest32.h    Thu Mar 08 14:39:52 2007 -0600
@@ -0,0 +1,100 @@
+
+#ifndef __X86_PAGE_GUEST_H__
+#define __X86_PAGE_GUEST_H__
+
+#ifndef __ASSEMBLY__
+# include <asm/types.h>
+#endif
+
+#define PAGETABLE_ORDER_32         10
+#define L1_PAGETABLE_ENTRIES_32    (1<<PAGETABLE_ORDER_32)
+#define L2_PAGETABLE_ENTRIES_32    (1<<PAGETABLE_ORDER_32)
+#define ROOT_PAGETABLE_ENTRIES_32  L2_PAGETABLE_ENTRIES_32
+
+
+#define L1_PAGETABLE_SHIFT_32 12
+#define L2_PAGETABLE_SHIFT_32 22
+
+/* Extract flags into 12-bit integer, or turn 12-bit flags into a pte mask. */
+
+#ifndef __ASSEMBLY__
+
+typedef u32 intpte_32_t;
+
+typedef struct { intpte_32_t l1; } l1_pgentry_32_t;
+typedef struct { intpte_32_t l2; } l2_pgentry_32_t;
+typedef l2_pgentry_t root_pgentry_32_t;
+#endif
+
+#define get_pte_flags_32(x) ((u32)(x) & 0xFFF)
+#define put_pte_flags_32(x) ((intpte_32_t)(x))
+
+/* Get pte access flags (unsigned int). */
+#define l1e_get_flags_32(x)           (get_pte_flags_32((x).l1))
+#define l2e_get_flags_32(x)           (get_pte_flags_32((x).l2))
+
+#define l1e_get_paddr_32(x)           \
+    ((paddr_t)(((x).l1 & (PADDR_MASK&PAGE_MASK))))
+#define l2e_get_paddr_32(x)           \
+    ((paddr_t)(((x).l2 & (PADDR_MASK&PAGE_MASK))))
+
+/* Construct an empty pte. */
+#define l1e_empty_32()                ((l1_pgentry_32_t) { 0 })
+#define l2e_empty_32()                ((l2_pgentry_32_t) { 0 })
+
+/* Construct a pte from a pfn and access flags. */
+#define l1e_from_pfn_32(pfn, flags)   \
+    ((l1_pgentry_32_t) { ((intpte_32_t)(pfn) << PAGE_SHIFT) | 
put_pte_flags_32(flags) })
+#define l2e_from_pfn_32(pfn, flags)   \
+    ((l2_pgentry_32_t) { ((intpte_32_t)(pfn) << PAGE_SHIFT) | 
put_pte_flags_32(flags) })
+
+/* Construct a pte from a physical address and access flags. */
+#ifndef __ASSEMBLY__
+static inline l1_pgentry_32_t l1e_from_paddr_32(paddr_t pa, unsigned int flags)
+{
+    ASSERT((pa & ~(PADDR_MASK & PAGE_MASK)) == 0);
+    return (l1_pgentry_32_t) { pa | put_pte_flags_32(flags) };
+}
+static inline l2_pgentry_32_t l2e_from_paddr_32(paddr_t pa, unsigned int flags)
+{
+    ASSERT((pa & ~(PADDR_MASK & PAGE_MASK)) == 0);
+    return (l2_pgentry_32_t) { pa | put_pte_flags_32(flags) };
+}
+#endif /* !__ASSEMBLY__ */
+
+
+/* Construct a pte from a page pointer and access flags. */
+#define l1e_from_page_32(page, flags) 
(l1e_from_pfn_32(page_to_mfn(page),(flags)))
+#define l2e_from_page_32(page, flags) 
(l2e_from_pfn_32(page_to_mfn(page),(flags)))
+
+/* Add extra flags to an existing pte. */
+#define l1e_add_flags_32(x, flags)    ((x).l1 |= put_pte_flags_32(flags))
+#define l2e_add_flags_32(x, flags)    ((x).l2 |= put_pte_flags_32(flags))
+
+/* Remove flags from an existing pte. */
+#define l1e_remove_flags_32(x, flags) ((x).l1 &= ~put_pte_flags_32(flags))
+#define l2e_remove_flags_32(x, flags) ((x).l2 &= ~put_pte_flags_32(flags))
+
+/* Check if a pte's page mapping or significant access flags have changed. */
+#define l1e_has_changed_32(x,y,flags) \
+    ( !!(((x).l1 ^ (y).l1) & ((PADDR_MASK&PAGE_MASK)|put_pte_flags_32(flags))) 
)
+#define l2e_has_changed_32(x,y,flags) \
+    ( !!(((x).l2 ^ (y).l2) & ((PADDR_MASK&PAGE_MASK)|put_pte_flags_32(flags))) 
)
+
+/* Given a virtual address, get an entry offset into a page table. */
+#define l1_table_offset_32(a)         \
+    (((a) >> L1_PAGETABLE_SHIFT_32) & (L1_PAGETABLE_ENTRIES_32 - 1))
+#define l2_table_offset_32(a)         \
+    (((a) >> L2_PAGETABLE_SHIFT_32) & (L2_PAGETABLE_ENTRIES_32 - 1))
+
+#endif /* __X86_PAGE_GUEST_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 8f0b5295bb1b -r dcec453681bc xen/arch/x86/mm/paging.c
--- a/xen/arch/x86/mm/paging.c  Mon Mar 05 12:49:12 2007 -0600
+++ b/xen/arch/x86/mm/paging.c  Thu Mar 08 14:39:52 2007 -0600
@@ -24,10 +24,12 @@
 #include <asm/paging.h>
 #include <asm/shadow.h>
 #include <asm/p2m.h>
+#include <asm/hap.h>
 
 /* Xen command-line option to enable hardware-assisted paging */
 int opt_hap_enabled = 0; 
 boolean_param("hap", opt_hap_enabled);
+int hap_capable_system = 0;
 
 /* Printouts */
 #define PAGING_PRINTK(_f, _a...)                                     \
@@ -46,12 +48,18 @@ void paging_domain_init(struct domain *d
 {
     p2m_init(d);
     shadow_domain_init(d);
+
+    if ( opt_hap_enabled && hap_capable_system && is_hvm_domain(d) )
+        hap_domain_init(d);
 }
 
 /* vcpu paging struct initialization goes here */
 void paging_vcpu_init(struct vcpu *v)
 {
-    shadow_vcpu_init(v);
+    if ( opt_hap_enabled && hap_capable_system && is_hvm_vcpu(v) )
+        hap_vcpu_init(v);
+    else
+        shadow_vcpu_init(v);
 }
 
 
@@ -59,32 +67,38 @@ int paging_domctl(struct domain *d, xen_
                   XEN_GUEST_HANDLE(void) u_domctl)
 {
     /* Here, dispatch domctl to the appropriate paging code */
-    return shadow_domctl(d, sc, u_domctl);
+    if ( opt_hap_enabled && hap_capable_system && is_hvm_domain(d) )
+        return hap_domctl(d, sc, u_domctl);
+    else
+        return shadow_domctl(d, sc, u_domctl);
 }
 
 /* Call when destroying a domain */
 void paging_teardown(struct domain *d)
 {
-    shadow_teardown(d);
-    /* Call other modes' teardown code here */    
+    if ( opt_hap_enabled && hap_capable_system && is_hvm_domain(d) )
+        hap_teardown(d);
+    else
+        shadow_teardown(d);
 }
 
 /* Call once all of the references to the domain have gone away */
 void paging_final_teardown(struct domain *d)
 {
-    shadow_teardown(d);
-    /* Call other modes' final teardown code here */
+    if ( opt_hap_enabled && hap_capable_system && is_hvm_domain(d) )
+        hap_final_teardown(d);
+    else
+        shadow_final_teardown(d);
 }
 
 /* Enable an arbitrary paging-assistance mode.  Call once at domain
  * creation. */
 int paging_enable(struct domain *d, u32 mode)
 {
-    if ( mode & PG_SH_enable ) 
-        return shadow_enable(d, mode);
+    if ( opt_hap_enabled && hap_capable_system && is_hvm_domain(d) )
+        return hap_enable(d, mode | PG_HAP_enable);
     else
-        /* No other modes supported yet */
-        return -EINVAL; 
+        return shadow_enable(d, mode | PG_SH_enable);
 }
 
 /* Print paging-assistance info to the console */
diff -r 8f0b5295bb1b -r dcec453681bc xen/arch/x86/mm/shadow/common.c
--- a/xen/arch/x86/mm/shadow/common.c   Mon Mar 05 12:49:12 2007 -0600
+++ b/xen/arch/x86/mm/shadow/common.c   Thu Mar 08 14:39:52 2007 -0600
@@ -2912,7 +2912,16 @@ void sh_mark_dirty(struct domain *d, mfn
      * can be called from __hvm_copy during emulation).
      * If the lock isn't held, take it for the duration of the call. */
     do_locking = !shadow_locked_by_me(d);
-    if ( do_locking ) shadow_lock(d);
+    if ( do_locking ) 
+    { 
+        shadow_lock(d);
+        /* Check the mode again with the lock held */ 
+        if ( unlikely(!shadow_mode_log_dirty(d)) )
+        {
+            shadow_unlock(d);
+            return;
+        }
+    }
 
     ASSERT(d->arch.paging.shadow.dirty_bitmap != NULL);
 
@@ -2968,8 +2977,16 @@ int shadow_domctl(struct domain *d,
 
     if ( unlikely(d == current->domain) )
     {
-        gdprintk(XENLOG_INFO, "Don't try to do a shadow op on yourself!\n");
+        gdprintk(XENLOG_INFO, "Dom %u tried to do a shadow op on itself.\n",
+                 d->domain_id);
         return -EINVAL;
+    }
+
+    if ( unlikely(test_bit(_DOMF_dying, &d->domain_flags)) )
+    {
+        gdprintk(XENLOG_INFO, "Ignoring shadow op on dying domain %u\n",
+                 d->domain_id);
+        return 0;
     }
 
     switch ( sc->op )
diff -r 8f0b5295bb1b -r dcec453681bc xen/arch/x86/mm/shadow/page-guest32.h
--- a/xen/arch/x86/mm/shadow/page-guest32.h     Mon Mar 05 12:49:12 2007 -0600
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,100 +0,0 @@
-
-#ifndef __X86_PAGE_GUEST_H__
-#define __X86_PAGE_GUEST_H__
-
-#ifndef __ASSEMBLY__
-# include <asm/types.h>
-#endif
-
-#define PAGETABLE_ORDER_32         10
-#define L1_PAGETABLE_ENTRIES_32    (1<<PAGETABLE_ORDER_32)
-#define L2_PAGETABLE_ENTRIES_32    (1<<PAGETABLE_ORDER_32)
-#define ROOT_PAGETABLE_ENTRIES_32  L2_PAGETABLE_ENTRIES_32
-
-
-#define L1_PAGETABLE_SHIFT_32 12
-#define L2_PAGETABLE_SHIFT_32 22
-
-/* Extract flags into 12-bit integer, or turn 12-bit flags into a pte mask. */
-
-#ifndef __ASSEMBLY__
-
-typedef u32 intpte_32_t;
-
-typedef struct { intpte_32_t l1; } l1_pgentry_32_t;
-typedef struct { intpte_32_t l2; } l2_pgentry_32_t;
-typedef l2_pgentry_t root_pgentry_32_t;
-#endif
-
-#define get_pte_flags_32(x) ((u32)(x) & 0xFFF)
-#define put_pte_flags_32(x) ((intpte_32_t)(x))
-
-/* Get pte access flags (unsigned int). */
-#define l1e_get_flags_32(x)           (get_pte_flags_32((x).l1))
-#define l2e_get_flags_32(x)           (get_pte_flags_32((x).l2))
-
-#define l1e_get_paddr_32(x)           \
-    ((paddr_t)(((x).l1 & (PADDR_MASK&PAGE_MASK))))
-#define l2e_get_paddr_32(x)           \
-    ((paddr_t)(((x).l2 & (PADDR_MASK&PAGE_MASK))))
-
-/* Construct an empty pte. */
-#define l1e_empty_32()                ((l1_pgentry_32_t) { 0 })
-#define l2e_empty_32()                ((l2_pgentry_32_t) { 0 })
-
-/* Construct a pte from a pfn and access flags. */
-#define l1e_from_pfn_32(pfn, flags)   \
-    ((l1_pgentry_32_t) { ((intpte_32_t)(pfn) << PAGE_SHIFT) | 
put_pte_flags_32(flags) })
-#define l2e_from_pfn_32(pfn, flags)   \
-    ((l2_pgentry_32_t) { ((intpte_32_t)(pfn) << PAGE_SHIFT) | 
put_pte_flags_32(flags) })
-
-/* Construct a pte from a physical address and access flags. */
-#ifndef __ASSEMBLY__
-static inline l1_pgentry_32_t l1e_from_paddr_32(paddr_t pa, unsigned int flags)
-{
-    ASSERT((pa & ~(PADDR_MASK & PAGE_MASK)) == 0);
-    return (l1_pgentry_32_t) { pa | put_pte_flags_32(flags) };
-}
-static inline l2_pgentry_32_t l2e_from_paddr_32(paddr_t pa, unsigned int flags)
-{
-    ASSERT((pa & ~(PADDR_MASK & PAGE_MASK)) == 0);
-    return (l2_pgentry_32_t) { pa | put_pte_flags_32(flags) };
-}
-#endif /* !__ASSEMBLY__ */
-
-
-/* Construct a pte from a page pointer and access flags. */
-#define l1e_from_page_32(page, flags) 
(l1e_from_pfn_32(page_to_mfn(page),(flags)))
-#define l2e_from_page_32(page, flags) 
(l2e_from_pfn_32(page_to_mfn(page),(flags)))
-
-/* Add extra flags to an existing pte. */
-#define l1e_add_flags_32(x, flags)    ((x).l1 |= put_pte_flags_32(flags))
-#define l2e_add_flags_32(x, flags)    ((x).l2 |= put_pte_flags_32(flags))
-
-/* Remove flags from an existing pte. */
-#define l1e_remove_flags_32(x, flags) ((x).l1 &= ~put_pte_flags_32(flags))
-#define l2e_remove_flags_32(x, flags) ((x).l2 &= ~put_pte_flags_32(flags))
-
-/* Check if a pte's page mapping or significant access flags have changed. */
-#define l1e_has_changed_32(x,y,flags) \
-    ( !!(((x).l1 ^ (y).l1) & ((PADDR_MASK&PAGE_MASK)|put_pte_flags_32(flags))) 
)
-#define l2e_has_changed_32(x,y,flags) \
-    ( !!(((x).l2 ^ (y).l2) & ((PADDR_MASK&PAGE_MASK)|put_pte_flags_32(flags))) 
)
-
-/* Given a virtual address, get an entry offset into a page table. */
-#define l1_table_offset_32(a)         \
-    (((a) >> L1_PAGETABLE_SHIFT_32) & (L1_PAGETABLE_ENTRIES_32 - 1))
-#define l2_table_offset_32(a)         \
-    (((a) >> L2_PAGETABLE_SHIFT_32) & (L2_PAGETABLE_ENTRIES_32 - 1))
-
-#endif /* __X86_PAGE_GUEST_H__ */
-
-/*
- * Local variables:
- * mode: C
- * c-set-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */
diff -r 8f0b5295bb1b -r dcec453681bc xen/arch/x86/mm/shadow/private.h
--- a/xen/arch/x86/mm/shadow/private.h  Mon Mar 05 12:49:12 2007 -0600
+++ b/xen/arch/x86/mm/shadow/private.h  Thu Mar 08 14:39:52 2007 -0600
@@ -539,7 +539,7 @@ static inline int sh_get_ref(struct vcpu
 
     /* We remember the first shadow entry that points to each shadow. */
     if ( entry_pa != 0 
-         && sh_type_is_pinnable(v, sp->type) 
+         && !sh_type_is_pinnable(v, sp->type) 
          && sp->up == 0 ) 
         sp->up = entry_pa;
     
@@ -559,7 +559,7 @@ static inline void sh_put_ref(struct vcp
 
     /* If this is the entry in the up-pointer, remove it */
     if ( entry_pa != 0 
-         && sh_type_is_pinnable(v, sp->type) 
+         && !sh_type_is_pinnable(v, sp->type) 
          && sp->up == entry_pa ) 
         sp->up = 0;
 
diff -r 8f0b5295bb1b -r dcec453681bc xen/arch/x86/mm/shadow/types.h
--- a/xen/arch/x86/mm/shadow/types.h    Mon Mar 05 12:49:12 2007 -0600
+++ b/xen/arch/x86/mm/shadow/types.h    Thu Mar 08 14:39:52 2007 -0600
@@ -235,7 +235,7 @@ static inline shadow_l4e_t shadow_l4e_fr
 
 #if GUEST_PAGING_LEVELS == 2
 
-#include "page-guest32.h"
+#include "../page-guest32.h"
 
 #define GUEST_L1_PAGETABLE_ENTRIES     1024
 #define GUEST_L2_PAGETABLE_ENTRIES     1024
diff -r 8f0b5295bb1b -r dcec453681bc xen/common/event_channel.c
--- a/xen/common/event_channel.c        Mon Mar 05 12:49:12 2007 -0600
+++ b/xen/common/event_channel.c        Thu Mar 08 14:39:52 2007 -0600
@@ -560,6 +560,9 @@ void send_guest_global_virq(struct domai
 
     ASSERT(virq_is_global(virq));
 
+    if ( unlikely(d == NULL) )
+        return;
+
     v = d->vcpu[0];
     if ( unlikely(v == NULL) )
         return;
diff -r 8f0b5295bb1b -r dcec453681bc xen/common/page_alloc.c
--- a/xen/common/page_alloc.c   Mon Mar 05 12:49:12 2007 -0600
+++ b/xen/common/page_alloc.c   Thu Mar 08 14:39:52 2007 -0600
@@ -49,7 +49,7 @@ string_param("badpage", opt_badpage);
  * Bit width of the DMA heap.
  */
 static unsigned int  dma_bitsize = CONFIG_DMA_BITSIZE;
-static unsigned long max_dma_mfn = (1UL << (CONFIG_DMA_BITSIZE - PAGE_SHIFT)) 
- 1;
+static unsigned long max_dma_mfn = (1UL<<(CONFIG_DMA_BITSIZE-PAGE_SHIFT))-1;
 static void parse_dma_bits(char *s)
 {
     unsigned int v = simple_strtol(s, NULL, 0);
@@ -339,11 +339,13 @@ static void init_heap_block(heap_by_zone
 
 /* Allocate 2^@order contiguous pages. */
 static struct page_info *alloc_heap_pages(
-    unsigned int zone_lo, unsigned zone_hi,
+    unsigned int zone_lo, unsigned int zone_hi,
     unsigned int cpu, unsigned int order)
 {
-    unsigned int i, j, node = cpu_to_node(cpu), num_nodes = num_online_nodes();
-    unsigned int zone, request = (1UL << order);
+    unsigned int i, j, zone;
+    unsigned int node = cpu_to_node(cpu), num_nodes = num_online_nodes();
+    unsigned long request = 1UL << order;
+    cpumask_t extra_cpus_mask, mask;
     struct page_info *pg;
 
     ASSERT(node >= 0);
@@ -356,25 +358,24 @@ static struct page_info *alloc_heap_page
 
     spin_lock(&heap_lock);
 
-    /* start with requested node, but exhaust all node memory
-     * in requested zone before failing, only calc new node
-     * value if we fail to find memory in target node, this avoids
-     * needless computation on fast-path */
+    /*
+     * Start with requested node, but exhaust all node memory in requested 
+     * zone before failing, only calc new node value if we fail to find memory 
+     * in target node, this avoids needless computation on fast-path.
+     */
     for ( i = 0; i < num_nodes; i++ )
     {
-        for ( zone = zone_hi; zone >= zone_lo; --zone )
-        {
-            /* check if target node can support the allocation */
-            if ( avail[node] && (avail[node][zone] >= request) )
-            {
-                /* Find smallest order which can satisfy the request. */
-                for ( j = order; j <= MAX_ORDER; j++ )
-                {
-                    if ( !list_empty(&heap(node, zone, j)) )
-                        goto found;
-                }
-            }
-        }
+        zone = zone_hi;
+        do {
+            /* Check if target node can support the allocation. */
+            if ( !avail[node] || (avail[node][zone] < request) )
+                continue;
+
+            /* Find smallest order which can satisfy the request. */
+            for ( j = order; j <= MAX_ORDER; j++ )
+                if ( !list_empty(&heap(node, zone, j)) )
+                    goto found;
+        } while ( zone-- > zone_lo ); /* careful: unsigned zone may wrap */
 
         /* Pick next node, wrapping around if needed. */
         if ( ++node == num_nodes )
@@ -403,6 +404,29 @@ static struct page_info *alloc_heap_page
 
     spin_unlock(&heap_lock);
 
+    cpus_clear(mask);
+
+    for ( i = 0; i < (1 << order); i++ )
+    {
+        /* Reference count must continuously be zero for free pages. */
+        BUG_ON(pg[i].count_info != 0);
+
+        /* Add in any extra CPUs that need flushing because of this page. */
+        cpus_andnot(extra_cpus_mask, pg[i].u.free.cpumask, mask);
+        tlbflush_filter(extra_cpus_mask, pg[i].tlbflush_timestamp);
+        cpus_or(mask, mask, extra_cpus_mask);
+
+        /* Initialise fields which have other uses for free pages. */
+        pg[i].u.inuse.type_info = 0;
+        page_set_owner(&pg[i], NULL);
+    }
+
+    if ( unlikely(!cpus_empty(mask)) )
+    {
+        perfc_incrc(need_flush_tlb_flush);
+        flush_tlb_mask(mask);
+    }
+
     return pg;
 }
 
@@ -411,13 +435,28 @@ static void free_heap_pages(
     unsigned int zone, struct page_info *pg, unsigned int order)
 {
     unsigned long mask;
-    unsigned int node = phys_to_nid(page_to_maddr(pg));
+    unsigned int i, node = phys_to_nid(page_to_maddr(pg));
+    struct domain *d;
 
     ASSERT(zone < NR_ZONES);
     ASSERT(order <= MAX_ORDER);
     ASSERT(node >= 0);
     ASSERT(node < num_online_nodes());
 
+    for ( i = 0; i < (1 << order); i++ )
+    {
+        BUG_ON(pg[i].count_info != 0);
+        if ( (d = page_get_owner(&pg[i])) != NULL )
+        {
+            pg[i].tlbflush_timestamp = tlbflush_current_time();
+            pg[i].u.free.cpumask     = d->domain_dirty_cpumask;
+        }
+        else
+        {
+            cpus_clear(pg[i].u.free.cpumask);
+        }
+    }
+
     spin_lock(&heap_lock);
 
     map_free(page_to_mfn(pg), 1 << order);
@@ -426,7 +465,7 @@ static void free_heap_pages(
     /* Merge chunks as far as possible. */
     while ( order < MAX_ORDER )
     {
-        mask = 1 << order;
+        mask = 1UL << order;
 
         if ( (page_to_mfn(pg) & mask) )
         {
@@ -554,7 +593,7 @@ void end_boot_allocator(void)
 /*
  * Scrub all unallocated pages in all heap zones. This function is more
  * convoluted than appears necessary because we do not want to continuously
- * hold the lock or disable interrupts while scrubbing very large memory areas.
+ * hold the lock while scrubbing very large memory areas.
  */
 void scrub_heap_pages(void)
 {
@@ -575,7 +614,7 @@ void scrub_heap_pages(void)
         if ( (mfn % ((100*1024*1024)/PAGE_SIZE)) == 0 )
             printk(".");
 
-        spin_lock_irq(&heap_lock);
+        spin_lock(&heap_lock);
 
         /* Re-check page status with lock held. */
         if ( !allocated_in_map(mfn) )
@@ -595,7 +634,7 @@ void scrub_heap_pages(void)
             }
         }
 
-        spin_unlock_irq(&heap_lock);
+        spin_unlock(&heap_lock);
     }
 
     printk("done.\n");
@@ -609,8 +648,6 @@ void scrub_heap_pages(void)
 
 void init_xenheap_pages(paddr_t ps, paddr_t pe)
 {
-    unsigned long flags;
-
     ps = round_pgup(ps);
     pe = round_pgdown(pe);
     if ( pe <= ps )
@@ -625,33 +662,21 @@ void init_xenheap_pages(paddr_t ps, padd
     if ( !IS_XEN_HEAP_FRAME(maddr_to_page(pe)) )
         pe -= PAGE_SIZE;
 
-    local_irq_save(flags);
     init_heap_pages(MEMZONE_XEN, maddr_to_page(ps), (pe - ps) >> PAGE_SHIFT);
-    local_irq_restore(flags);
 }
 
 
 void *alloc_xenheap_pages(unsigned int order)
 {
-    unsigned long flags;
     struct page_info *pg;
-    int i;
-
-    local_irq_save(flags);
+
+    ASSERT(!in_irq());
+
     pg = alloc_heap_pages(MEMZONE_XEN, MEMZONE_XEN, smp_processor_id(), order);
-    local_irq_restore(flags);
-
     if ( unlikely(pg == NULL) )
         goto no_memory;
 
     memguard_unguard_range(page_to_virt(pg), 1 << (order + PAGE_SHIFT));
-
-    for ( i = 0; i < (1 << order); i++ )
-    {
-        pg[i].count_info        = 0;
-        pg[i].u.inuse._domain   = 0;
-        pg[i].u.inuse.type_info = 0;
-    }
 
     return page_to_virt(pg);
 
@@ -663,16 +688,14 @@ void *alloc_xenheap_pages(unsigned int o
 
 void free_xenheap_pages(void *v, unsigned int order)
 {
-    unsigned long flags;
+    ASSERT(!in_irq());
 
     if ( v == NULL )
         return;
 
-    memguard_guard_range(v, 1 << (order + PAGE_SHIFT));    
-
-    local_irq_save(flags);
+    memguard_guard_range(v, 1 << (order + PAGE_SHIFT));
+
     free_heap_pages(MEMZONE_XEN, virt_to_page(v), order);
-    local_irq_restore(flags);
 }
 
 
@@ -762,8 +785,6 @@ struct page_info *__alloc_domheap_pages(
     unsigned int memflags)
 {
     struct page_info *pg = NULL;
-    cpumask_t mask;
-    unsigned long i;
     unsigned int bits = memflags >> _MEMF_bits, zone_hi = NR_ZONES - 1;
 
     ASSERT(!in_irq());
@@ -792,38 +813,10 @@ struct page_info *__alloc_domheap_pages(
             return NULL;
     }
 
-    if ( pg == NULL )
-        if ( (pg = alloc_heap_pages(MEMZONE_XEN + 1,
-                                    zone_hi,
-                                    cpu, order)) == NULL )
-            return NULL;
-
-    mask = pg->u.free.cpumask;
-    tlbflush_filter(mask, pg->tlbflush_timestamp);
-
-    pg->count_info        = 0;
-    pg->u.inuse._domain   = 0;
-    pg->u.inuse.type_info = 0;
-
-    for ( i = 1; i < (1 << order); i++ )
-    {
-        /* Add in any extra CPUs that need flushing because of this page. */
-        cpumask_t extra_cpus_mask;
-        cpus_andnot(extra_cpus_mask, pg[i].u.free.cpumask, mask);
-        tlbflush_filter(extra_cpus_mask, pg[i].tlbflush_timestamp);
-        cpus_or(mask, mask, extra_cpus_mask);
-
-        pg[i].count_info        = 0;
-        pg[i].u.inuse._domain   = 0;
-        pg[i].u.inuse.type_info = 0;
-        page_set_owner(&pg[i], NULL);
-    }
-
-    if ( unlikely(!cpus_empty(mask)) )
-    {
-        perfc_incrc(need_flush_tlb_flush);
-        flush_tlb_mask(mask);
-    }
+    if ( (pg == NULL) &&
+         ((pg = alloc_heap_pages(MEMZONE_XEN + 1, zone_hi,
+                                 cpu, order)) == NULL) )
+         return NULL;
 
     if ( (d != NULL) && assign_pages(d, pg, order, memflags) )
     {
@@ -867,10 +860,7 @@ void free_domheap_pages(struct page_info
 
         for ( i = 0; i < (1 << order); i++ )
         {
-            shadow_drop_references(d, &pg[i]);
-            ASSERT((pg[i].u.inuse.type_info & PGT_count_mask) == 0);
-            pg[i].tlbflush_timestamp  = tlbflush_current_time();
-            pg[i].u.free.cpumask      = d->domain_dirty_cpumask;
+            BUG_ON((pg[i].u.inuse.type_info & PGT_count_mask) != 0);
             list_del(&pg[i].list);
         }
 
@@ -892,6 +882,7 @@ void free_domheap_pages(struct page_info
              */
             for ( i = 0; i < (1 << order); i++ )
             {
+                page_set_owner(&pg[i], NULL);
                 spin_lock(&page_scrub_lock);
                 list_add(&pg[i].list, &page_scrub_list);
                 scrub_pages++;
@@ -902,8 +893,6 @@ void free_domheap_pages(struct page_info
     else
     {
         /* Freeing anonymous domain-heap pages. */
-        for ( i = 0; i < (1 << order); i++ )
-            cpus_clear(pg[i].u.free.cpumask);
         free_heap_pages(pfn_dom_zone_type(page_to_mfn(pg)), pg, order);
         drop_dom_ref = 0;
     }
diff -r 8f0b5295bb1b -r dcec453681bc xen/common/xmalloc.c
--- a/xen/common/xmalloc.c      Mon Mar 05 12:49:12 2007 -0600
+++ b/xen/common/xmalloc.c      Thu Mar 08 14:39:52 2007 -0600
@@ -33,6 +33,8 @@
 #include <xen/timer.h>
 #include <xen/cache.h>
 #include <xen/prefetch.h>
+#include <xen/irq.h>
+#include <xen/smp.h>
 
 /*
  * XMALLOC_DEBUG:
@@ -175,6 +177,8 @@ void *_xmalloc(size_t size, size_t align
     struct xmalloc_hdr *i;
     unsigned long flags;
 
+    ASSERT(!in_irq());
+
     /* We currently always return cacheline aligned. */
     BUG_ON(align > SMP_CACHE_BYTES);
 
@@ -212,6 +216,8 @@ void xfree(void *p)
 {
     unsigned long flags;
     struct xmalloc_hdr *i, *tmp, *hdr;
+
+    ASSERT(!in_irq());
 
     if ( p == NULL )
         return;
diff -r 8f0b5295bb1b -r dcec453681bc xen/drivers/acpi/numa.c
--- a/xen/drivers/acpi/numa.c   Mon Mar 05 12:49:12 2007 -0600
+++ b/xen/drivers/acpi/numa.c   Thu Mar 08 14:39:52 2007 -0600
@@ -22,10 +22,6 @@
  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  *
  */
-#if 0
-#include <linux/module.h>
-#include <linux/kernel.h>
-#endif
 #include <xen/config.h>
 #include <xen/init.h>
 #include <xen/types.h>
@@ -34,7 +30,6 @@
 #include <xen/numa.h>
 #include <acpi/acpi_bus.h>
 #include <acpi/acmacros.h>
-#include <asm/page.h> /* __va() */
 
 #define ACPI_NUMA      0x80000000
 #define _COMPONENT     ACPI_NUMA
@@ -106,7 +101,7 @@ static int __init acpi_parse_slit(unsign
        if (!phys_addr || !size)
                return -EINVAL;
 
-       slit = (struct acpi_table_slit *)__va(phys_addr);
+       slit = (struct acpi_table_slit *)__acpi_map_table(phys_addr, size);
 
        /* downcast just for %llu vs %lu for i386/ia64  */
        localities = (u32) slit->localities;
@@ -159,7 +154,7 @@ static int __init acpi_parse_srat(unsign
        if (!phys_addr || !size)
                return -EINVAL;
 
-       srat = (struct acpi_table_srat *)__va(phys_addr);
+       srat = (struct acpi_table_srat *)__acpi_map_table(phys_addr, size);
 
        return 0;
 }
diff -r 8f0b5295bb1b -r dcec453681bc xen/drivers/char/console.c
--- a/xen/drivers/char/console.c        Mon Mar 05 12:49:12 2007 -0600
+++ b/xen/drivers/char/console.c        Thu Mar 08 14:39:52 2007 -0600
@@ -399,6 +399,8 @@ static void __putstr(const char *str)
         vga_putchar(c);
         putchar_console_ring(c);
     }
+
+    send_guest_global_virq(dom0, VIRQ_CON_RING);
 }
 
 static int printk_prefix_check(char *p, char **pp)
diff -r 8f0b5295bb1b -r dcec453681bc xen/include/acm/acm_hooks.h
--- a/xen/include/acm/acm_hooks.h       Mon Mar 05 12:49:12 2007 -0600
+++ b/xen/include/acm/acm_hooks.h       Thu Mar 08 14:39:52 2007 -0600
@@ -247,12 +247,12 @@ static inline int acm_pre_domctl(struct 
             if (*ssid == NULL) {
                 printk("%s: Warning. Destroying domain without ssid 
pointer.\n", 
                        __func__);
-                domain_rcu_lock(d);
+                rcu_unlock_domain(d);
                 return -EACCES;
             }
             d->ssid = NULL; /* make sure it's not used any more */
              /* no policy-specific hook */
-            domain_rcu_lock(d);
+            rcu_unlock_domain(d);
             ret = 0;
         }
         break;
diff -r 8f0b5295bb1b -r dcec453681bc xen/include/asm-x86/domain.h
--- a/xen/include/asm-x86/domain.h      Mon Mar 05 12:49:12 2007 -0600
+++ b/xen/include/asm-x86/domain.h      Thu Mar 08 14:39:52 2007 -0600
@@ -104,6 +104,21 @@ struct shadow_vcpu {
 };
 
 /************************************************/
+/*            hardware assisted paging          */
+/************************************************/
+struct hap_domain {
+    spinlock_t        lock;
+    int               locker;
+    const char       *locker_function;
+    
+    struct list_head  freelists;
+    struct list_head  p2m_freelist;
+    unsigned int      total_pages;  /* number of pages allocated */
+    unsigned int      free_pages;   /* number of pages on freelists */
+    unsigned int      p2m_pages;    /* number of pages allocates to p2m */
+};
+
+/************************************************/
 /*       p2m handling                           */
 /************************************************/
 
@@ -135,6 +150,7 @@ struct paging_domain {
     struct shadow_domain shadow;
 
     /* Other paging assistance code will have structs here */
+    struct hap_domain    hap;
 };
 
 struct paging_vcpu {
diff -r 8f0b5295bb1b -r dcec453681bc xen/include/asm-x86/hap.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/include/asm-x86/hap.h Thu Mar 08 14:39:52 2007 -0600
@@ -0,0 +1,122 @@
+/******************************************************************************
+ * include/asm-x86/hap.h
+ *
+ * hardware-assisted paging
+ * Copyright (c) 2007 Advanced Micro Devices (Wei Huang)
+ * 
+ * Parts of this code are Copyright (c) 2006 by XenSource Inc.
+ * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
+ * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef _XEN_HAP_H
+#define _XEN_HAP_H
+
+#define HERE_I_AM                                                     \
+    debugtrace_printk("HERE I AM: %s %s %d\n", __func__, __FILE__, __LINE__)
+#define HAP_PRINTK(_f, _a...)                                         \
+    debugtrace_printk("hap: %s(): " _f, __func__, ##_a)
+#define HAP_ERROR(_f, _a...)                                          \
+    printk("hap error: %s(): " _f, __func__, ##_a)
+
+/************************************************/
+/*          hap domain page mapping             */
+/************************************************/
+static inline void *
+hap_map_domain_page(mfn_t mfn)
+{
+    return map_domain_page(mfn_x(mfn));
+}
+
+static inline void
+hap_unmap_domain_page(void *p)
+{
+    unmap_domain_page(p);
+}
+
+static inline void *
+hap_map_domain_page_global(mfn_t mfn)
+{
+    return map_domain_page_global(mfn_x(mfn));
+}
+
+static inline void 
+hap_unmap_domain_page_global(void *p) 
+{
+    unmap_domain_page_global(p);
+}
+
+/************************************************/
+/*           locking for hap code               */
+/************************************************/
+#define hap_lock_init(_d)                                   \
+    do {                                                    \
+        spin_lock_init(&(_d)->arch.paging.hap.lock);        \
+        (_d)->arch.paging.hap.locker = -1;                  \
+        (_d)->arch.paging.hap.locker_function = "nobody";   \
+    } while (0)
+
+#define hap_locked_by_me(_d)                     \
+    (current->processor == (_d)->arch.paging.hap.locker)
+
+#define hap_lock(_d)                                                       \
+    do {                                                                   \
+        if ( unlikely((_d)->arch.paging.hap.locker == current->processor) )\
+        {                                                                  \
+            printk("Error: hap lock held by %s\n",                         \
+                   (_d)->arch.paging.hap.locker_function);                 \
+            BUG();                                                         \
+        }                                                                  \
+        spin_lock(&(_d)->arch.paging.hap.lock);                            \
+        ASSERT((_d)->arch.paging.hap.locker == -1);                        \
+        (_d)->arch.paging.hap.locker = current->processor;                 \
+        (_d)->arch.paging.hap.locker_function = __func__;                  \
+    } while (0)
+
+#define hap_unlock(_d)                                              \
+    do {                                                            \
+        ASSERT((_d)->arch.paging.hap.locker == current->processor); \
+        (_d)->arch.paging.hap.locker = -1;                          \
+        (_d)->arch.paging.hap.locker_function = "nobody";           \
+        spin_unlock(&(_d)->arch.paging.hap.lock);                   \
+    } while (0)
+
+/************************************************/
+/*        hap domain level functions            */
+/************************************************/
+void  hap_domain_init(struct domain *d);
+int   hap_domctl(struct domain *d, xen_domctl_shadow_op_t *sc,
+                 XEN_GUEST_HANDLE(void) u_domctl);
+int   hap_enable(struct domain *d, u32 mode);
+void  hap_final_teardown(struct domain *d);
+void  hap_teardown(struct domain *d);
+void  hap_vcpu_init(struct vcpu *v);
+
+extern struct paging_mode hap_paging_real_mode;
+extern struct paging_mode hap_paging_protected_mode;
+extern struct paging_mode hap_paging_pae_mode;
+extern struct paging_mode hap_paging_long_mode;
+#endif /* XEN_HAP_H */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 8f0b5295bb1b -r dcec453681bc xen/include/asm-x86/hvm/svm/emulate.h
--- a/xen/include/asm-x86/hvm/svm/emulate.h     Mon Mar 05 12:49:12 2007 -0600
+++ b/xen/include/asm-x86/hvm/svm/emulate.h     Thu Mar 08 14:39:52 2007 -0600
@@ -76,7 +76,7 @@ enum instruction_index {
 };
 
 
-extern unsigned long get_effective_addr_modrm64(struct vmcb_struct *vmcb, 
+extern unsigned long get_effective_addr_modrm64(
         struct cpu_user_regs *regs, const u8 prefix, int inst_len,
         const u8 *operand, u8 *size);
 extern unsigned long get_effective_addr_sib(struct vmcb_struct *vmcb, 
@@ -85,17 +85,17 @@ extern OPERATING_MODE get_operating_mode
 extern OPERATING_MODE get_operating_mode (struct vmcb_struct *vmcb);
 extern unsigned int decode_dest_reg(u8 prefix, u8 modrm);
 extern unsigned int decode_src_reg(u8 prefix, u8 modrm);
-extern unsigned long svm_rip2pointer(struct vmcb_struct *vmcb);
-extern int __get_instruction_length_from_list(struct vmcb_struct *vmcb,
+extern unsigned long svm_rip2pointer(struct vcpu *v);
+extern int __get_instruction_length_from_list(struct vcpu *v,
         enum instruction_index *list, unsigned int list_count, 
         u8 *guest_eip_buf, enum instruction_index *match);
 
 
-static inline int __get_instruction_length(struct vmcb_struct *vmcb, 
+static inline int __get_instruction_length(struct vcpu *v, 
         enum instruction_index instr, u8 *guest_eip_buf)
 {
     return __get_instruction_length_from_list(
-        vmcb, &instr, 1, guest_eip_buf, NULL);
+        v, &instr, 1, guest_eip_buf, NULL);
 }
 
 
diff -r 8f0b5295bb1b -r dcec453681bc xen/include/asm-x86/hvm/svm/svm.h
--- a/xen/include/asm-x86/hvm/svm/svm.h Mon Mar 05 12:49:12 2007 -0600
+++ b/xen/include/asm-x86/hvm/svm/svm.h Thu Mar 08 14:39:52 2007 -0600
@@ -34,6 +34,41 @@ extern void arch_svm_do_resume(struct vc
 
 extern u64 root_vmcb_pa[NR_CPUS];
 
+static inline int svm_long_mode_enabled(struct vcpu *v)
+{
+    u64 guest_efer = v->arch.hvm_svm.cpu_shadow_efer;
+    return guest_efer & EFER_LMA;
+}
+
+static inline int svm_lme_is_set(struct vcpu *v)
+{
+    u64 guest_efer = v->arch.hvm_svm.cpu_shadow_efer;
+    return guest_efer & EFER_LME;
+}
+
+static inline int svm_cr4_pae_is_set(struct vcpu *v)
+{
+    unsigned long guest_cr4 = v->arch.hvm_svm.cpu_shadow_cr4;
+    return guest_cr4 & X86_CR4_PAE;
+}
+
+static inline int svm_paging_enabled(struct vcpu *v)
+{
+    unsigned long guest_cr0 = v->arch.hvm_svm.cpu_shadow_cr0;
+    return (guest_cr0 & X86_CR0_PE) && (guest_cr0 & X86_CR0_PG);
+}
+
+static inline int svm_pae_enabled(struct vcpu *v)
+{
+    unsigned long guest_cr4 = v->arch.hvm_svm.cpu_shadow_cr4;
+    return svm_paging_enabled(v) && (guest_cr4 & X86_CR4_PAE);
+}
+
+static inline int svm_pgbit_test(struct vcpu *v)
+{
+    return v->arch.hvm_svm.cpu_shadow_cr0 & X86_CR0_PG;
+}
+
 #define SVM_REG_EAX (0) 
 #define SVM_REG_ECX (1) 
 #define SVM_REG_EDX (2) 
diff -r 8f0b5295bb1b -r dcec453681bc xen/include/asm-x86/hvm/svm/vmcb.h
--- a/xen/include/asm-x86/hvm/svm/vmcb.h        Mon Mar 05 12:49:12 2007 -0600
+++ b/xen/include/asm-x86/hvm/svm/vmcb.h        Thu Mar 08 14:39:52 2007 -0600
@@ -302,14 +302,6 @@ enum VMEXIT_EXITCODE
     VMEXIT_NPF              = 1024, /* nested paging fault */
     VMEXIT_INVALID          =  -1
 };
-
-enum {
-    SVM_CPU_STATE_PG_ENABLED=0,
-    SVM_CPU_STATE_PAE_ENABLED,
-    SVM_CPU_STATE_LME_ENABLED,      
-    SVM_CPU_STATE_LMA_ENABLED,
-    SVM_CPU_STATE_ASSIST_ENABLED,
-};  
 
 /* Definitions of segment state are borrowed by the generic HVM code. */
 typedef segment_attributes_t svm_segment_attributes_t;
@@ -457,12 +449,12 @@ struct arch_svm_struct {
     int                 saved_irq_vector;
     u32                 launch_core;
     
-    unsigned long       flags;      /* VMCB flags */
-    unsigned long       cpu_shadow_cr0; /* Guest value for CR0 */
-    unsigned long       cpu_shadow_cr4; /* Guest value for CR4 */
+    unsigned long       flags;            /* VMCB flags */
+    unsigned long       cpu_shadow_cr0;   /* Guest value for CR0 */
+    unsigned long       cpu_shadow_cr4;   /* Guest value for CR4 */
+    unsigned long       cpu_shadow_efer;  /* Guest value for EFER */
     unsigned long       cpu_cr2;
     unsigned long       cpu_cr3;
-    unsigned long       cpu_state;
 };
 
 struct vmcb_struct *alloc_vmcb(void);
diff -r 8f0b5295bb1b -r dcec453681bc xen/include/public/arch-x86/xen.h
--- a/xen/include/public/arch-x86/xen.h Mon Mar 05 12:49:12 2007 -0600
+++ b/xen/include/public/arch-x86/xen.h Thu Mar 08 14:39:52 2007 -0600
@@ -132,6 +132,7 @@ struct vcpu_guest_context {
     unsigned long ldt_base, ldt_ents;       /* LDT (linear address, # ents) */
     unsigned long gdt_frames[16], gdt_ents; /* GDT (machine frames, # ents) */
     unsigned long kernel_ss, kernel_sp;     /* Virtual TSS (only SS1/SP1)   */
+    /* NB. User pagetable on x86/64 is placed in ctrlreg[1]. */
     unsigned long ctrlreg[8];               /* CR0-CR7 (control registers)  */
     unsigned long debugreg[8];              /* DB0-DB7 (debug registers)    */
 #ifdef __i386__
diff -r 8f0b5295bb1b -r dcec453681bc xen/include/public/xen.h
--- a/xen/include/public/xen.h  Mon Mar 05 12:49:12 2007 -0600
+++ b/xen/include/public/xen.h  Thu Mar 08 14:39:52 2007 -0600
@@ -131,6 +131,7 @@
 #define VIRQ_TBUF       4  /* G. (DOM0) Trace buffer has records available.  */
 #define VIRQ_DEBUGGER   6  /* G. (DOM0) A domain has paused for debugging.   */
 #define VIRQ_XENOPROF   7  /* V. XenOprofile interrupt: new sample available */
+#define VIRQ_CON_RING   8  /* G. (DOM0) Bytes received on console            */
 
 /* Architecture-specific VIRQ definitions. */
 #define VIRQ_ARCH_0    16
@@ -473,26 +474,24 @@ typedef struct shared_info shared_info_t
 #endif
 
 /*
- * Start-of-day memory layout for the initial domain (DOM0):
+ * Start-of-day memory layout:
  *  1. The domain is started within contiguous virtual-memory region.
- *  2. The contiguous region begins and ends on an aligned 4MB boundary.
- *  3. The region start corresponds to the load address of the OS image.
- *     If the load address is not 4MB aligned then the address is rounded down.
- *  4. This the order of bootstrap elements in the initial virtual region:
+ *  2. The contiguous region ends on an aligned 4MB boundary.
+ *  3. This the order of bootstrap elements in the initial virtual region:
  *      a. relocated kernel image
  *      b. initial ram disk              [mod_start, mod_len]
  *      c. list of allocated page frames [mfn_list, nr_pages]
  *      d. start_info_t structure        [register ESI (x86)]
  *      e. bootstrap page tables         [pt_base, CR3 (x86)]
  *      f. bootstrap stack               [register ESP (x86)]
- *  5. Bootstrap elements are packed together, but each is 4kB-aligned.
- *  6. The initial ram disk may be omitted.
- *  7. The list of page frames forms a contiguous 'pseudo-physical' memory
+ *  4. Bootstrap elements are packed together, but each is 4kB-aligned.
+ *  5. The initial ram disk may be omitted.
+ *  6. The list of page frames forms a contiguous 'pseudo-physical' memory
  *     layout for the domain. In particular, the bootstrap virtual-memory
  *     region is a 1:1 mapping to the first section of the pseudo-physical map.
- *  8. All bootstrap elements are mapped read-writable for the guest OS. The
+ *  7. All bootstrap elements are mapped read-writable for the guest OS. The
  *     only exception is the bootstrap page table, which is mapped read-only.
- *  9. There is guaranteed to be at least 512kB padding after the final
+ *  8. There is guaranteed to be at least 512kB padding after the final
  *     bootstrap element. If necessary, the bootstrap virtual region is
  *     extended by an extra 4MB to ensure this.
  */
_______________________________________________
Xen-ppc-devel mailing list
Xen-ppc-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-ppc-devel
 |