WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-unstable] [merge] with xen-unstable

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] [xen-unstable] [merge] with xen-unstable
From: Xen patchbot-unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Fri, 19 Jan 2007 10:45:26 -0800
Delivery-date: Fri, 19 Jan 2007 10:47:06 -0800
Envelope-to: www-data@xxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User Christian Limpach <Christian.Limpach@xxxxxxxxxxxxx>
# Date 1169218137 0
# Node ID 3c8bb086025ee18f077582a5343da631c67fbaca
# Parent  8475a4e0425ed158923d9849a8e5a6821e8bdb34
# Parent  3157835b1d45f7175aba2b4a98cac93f527d6b10
[merge] with xen-unstable

Signed-off-by: Christian Limpach <Christian.Limpach@xxxxxxxxxxxxx>
---
 tools/python/xen/xend/XendRoot.py                        |  322 -
 xen/arch/ia64/linux/cmdline.c                            |  121 
 xen/include/asm-ia64/linux-null/linux/ioport.h           |    1 
 xen/include/asm-ia64/linux-xen/asm/sn/sn_sal.h           |  994 ----
 xen/include/asm-ia64/linux/asm/machvec.h                 |  390 -
 xen/include/asm-ia64/linux/asm/pci.h                     |  161 
 Config.mk                                                |    5 
 buildconfigs/linux-defconfig_xen0_ia64                   |    3 
 buildconfigs/linux-defconfig_xen0_x86_32                 |    6 
 buildconfigs/linux-defconfig_xen0_x86_64                 |    6 
 buildconfigs/linux-defconfig_xenU_ia64                   |    3 
 buildconfigs/linux-defconfig_xenU_x86_32                 |    2 
 buildconfigs/linux-defconfig_xenU_x86_64                 |    2 
 buildconfigs/linux-defconfig_xen_ia64                    |    3 
 buildconfigs/linux-defconfig_xen_x86_32                  |    2 
 buildconfigs/linux-defconfig_xen_x86_64                  |    2 
 buildconfigs/mk.linux-2.6-xen                            |    4 
 extras/mini-os/Makefile                                  |    2 
 extras/mini-os/include/x86/x86_32/hypercall-x86_32.h     |   14 
 extras/mini-os/include/x86/x86_64/hypercall-x86_64.h     |   14 
 linux-2.6-xen-sparse/arch/i386/kernel/sysenter.c         |    4 
 linux-2.6-xen-sparse/arch/ia64/Kconfig                   |    6 
 linux-2.6-xen-sparse/arch/ia64/kernel/asm-offsets.c      |    1 
 linux-2.6-xen-sparse/arch/ia64/kernel/fsys.S             |  925 ++++
 linux-2.6-xen-sparse/arch/ia64/kernel/gate.S             |   24 
 linux-2.6-xen-sparse/arch/ia64/kernel/gate.lds.S         |    8 
 linux-2.6-xen-sparse/arch/ia64/kernel/patch.c            |    4 
 linux-2.6-xen-sparse/arch/ia64/kernel/setup.c            |   10 
 linux-2.6-xen-sparse/arch/ia64/xen/hypercall.S           |  267 -
 linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c          |   88 
 linux-2.6-xen-sparse/arch/ia64/xen/xenentry.S            |   49 
 linux-2.6-xen-sparse/arch/ia64/xen/xenivt.S              |   54 
 linux-2.6-xen-sparse/arch/ia64/xen/xenminstate.h         |   11 
 linux-2.6-xen-sparse/arch/ia64/xen/xenpal.S              |   23 
 linux-2.6-xen-sparse/arch/ia64/xen/xensetup.S            |    7 
 linux-2.6-xen-sparse/drivers/xen/Kconfig                 |    8 
 linux-2.6-xen-sparse/include/asm-ia64/hypercall.h        |  108 
 linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h       |    3 
 linux-2.6-xen-sparse/include/asm-ia64/maddr.h            |    1 
 linux-2.6-xen-sparse/include/asm-ia64/page.h             |    1 
 linux-2.6-xen-sparse/include/asm-ia64/xen/privop.h       |   18 
 linux-2.6-xen-sparse/include/linux/skbuff.h              |    3 
 linux-2.6-xen-sparse/net/core/skbuff.c                   |   41 
 tools/ioemu/hw/cirrus_vga.c                              |   52 
 tools/ioemu/hw/ide.c                                     |    3 
 tools/ioemu/hw/pci.c                                     |    3 
 tools/ioemu/target-i386-dm/helper2.c                     |   15 
 tools/ioemu/vl.c                                         |   38 
 tools/libxc/Makefile                                     |    2 
 tools/libxc/ia64/xc_ia64_hvm_build.c                     |   44 
 tools/libxc/xc_domain.c                                  |   44 
 tools/libxc/xc_hvm_restore.c                             |  360 +
 tools/libxc/xc_hvm_save.c                                |  727 +++
 tools/libxc/xenctrl.h                                    |   24 
 tools/libxc/xenguest.h                                   |   20 
 tools/pygrub/src/pygrub                                  |    6 
 tools/python/setup.py                                    |   13 
 tools/python/xen/lowlevel/scf/scf.c                      |  156 
 tools/python/xen/lowlevel/xc/xc.c                        |   22 
 tools/python/xen/util/xmlrpclib2.py                      |    1 
 tools/python/xen/web/httpserver.py                       |   12 
 tools/python/xen/xend/Vifctl.py                          |    4 
 tools/python/xen/xend/XendCheckpoint.py                  |   77 
 tools/python/xen/xend/XendConfig.py                      |   45 
 tools/python/xen/xend/XendDomain.py                      |   12 
 tools/python/xen/xend/XendDomainInfo.py                  |   64 
 tools/python/xen/xend/XendNode.py                        |    4 
 tools/python/xen/xend/XendOptions.py                     |  373 +
 tools/python/xen/xend/XendProtocol.py                    |    6 
 tools/python/xen/xend/balloon.py                         |    6 
 tools/python/xen/xend/image.py                           |   28 
 tools/python/xen/xend/osdep.py                           |   10 
 tools/python/xen/xend/server/DevController.py            |    8 
 tools/python/xen/xend/server/SrvRoot.py                  |    2 
 tools/python/xen/xend/server/SrvServer.py                |   58 
 tools/python/xen/xend/server/XMLRPCServer.py             |   17 
 tools/python/xen/xend/server/netif.py                    |   13 
 tools/python/xen/xend/server/relocate.py                 |   14 
 tools/python/xen/xend/server/tests/test_controllers.py   |   10 
 tools/python/xen/xend/server/tpmif.py                    |    8 
 tools/python/xen/xend/server/vfbif.py                    |    4 
 tools/python/xen/xm/create.py                            |    6 
 tools/python/xen/xm/main.py                              |   13 
 tools/python/xen/xm/opts.py                              |    8 
 tools/python/xen/xm/tests/test_create.py                 |    6 
 tools/xcutils/xc_restore.c                               |   19 
 tools/xcutils/xc_save.c                                  |    5 
 unmodified_drivers/linux-2.6/platform-pci/platform-pci.c |    9 
 xen/arch/ia64/asm-offsets.c                              |    2 
 xen/arch/ia64/asm-xsi-offsets.c                          |    2 
 xen/arch/ia64/linux-xen/Makefile                         |    3 
 xen/arch/ia64/linux-xen/README.origin                    |    1 
 xen/arch/ia64/linux-xen/cmdline.c                        |  131 
 xen/arch/ia64/linux-xen/entry.S                          |  120 
 xen/arch/ia64/linux-xen/sn/Makefile                      |    1 
 xen/arch/ia64/linux-xen/sn/kernel/Makefile               |    5 
 xen/arch/ia64/linux-xen/sn/kernel/README.origin          |   12 
 xen/arch/ia64/linux-xen/sn/kernel/io_init.c              |  783 +++
 xen/arch/ia64/linux-xen/sn/kernel/iomv.c                 |   82 
 xen/arch/ia64/linux-xen/sn/kernel/irq.c                  |  542 ++
 xen/arch/ia64/linux-xen/sn/kernel/setup.c                |  808 +++
 xen/arch/ia64/linux-xen/sn/kernel/sn2_smp.c              |  548 ++
 xen/arch/ia64/linux/Makefile                             |    6 
 xen/arch/ia64/linux/README.origin                        |    4 
 xen/arch/ia64/linux/dig/Makefile                         |    1 
 xen/arch/ia64/linux/dig/README.origin                    |    7 
 xen/arch/ia64/linux/dig/machvec.c                        |    3 
 xen/arch/ia64/linux/hp/Makefile                          |    1 
 xen/arch/ia64/linux/hp/zx1/Makefile                      |    1 
 xen/arch/ia64/linux/hp/zx1/README.origin                 |    7 
 xen/arch/ia64/linux/hp/zx1/hpzx1_machvec.c               |    3 
 xen/arch/ia64/linux/io.c                                 |  164 
 xen/arch/ia64/linux/sn/Makefile                          |    2 
 xen/arch/ia64/linux/sn/kernel/Makefile                   |    3 
 xen/arch/ia64/linux/sn/kernel/README.origin              |    9 
 xen/arch/ia64/linux/sn/kernel/machvec.c                  |   11 
 xen/arch/ia64/linux/sn/kernel/pio_phys.S                 |   71 
 xen/arch/ia64/linux/sn/kernel/ptc_deadlock.S             |   92 
 xen/arch/ia64/linux/sn/pci/Makefile                      |    1 
 xen/arch/ia64/linux/sn/pci/pcibr/Makefile                |    1 
 xen/arch/ia64/linux/sn/pci/pcibr/README.origin           |    7 
 xen/arch/ia64/linux/sn/pci/pcibr/pcibr_reg.c             |  285 +
 xen/arch/ia64/vmx/mmio.c                                 |   10 
 xen/arch/ia64/vmx/vmx_process.c                          |   23 
 xen/arch/ia64/vmx/vmx_vcpu.c                             |   45 
 xen/arch/ia64/vmx/vmx_virt.c                             |    2 
 xen/arch/ia64/xen/dom0_ops.c                             |   11 
 xen/arch/ia64/xen/dom_fw.c                               |  103 
 xen/arch/ia64/xen/domain.c                               |  149 
 xen/arch/ia64/xen/faults.c                               |   37 
 xen/arch/ia64/xen/hypercall.c                            |  172 
 xen/arch/ia64/xen/hyperprivop.S                          |  153 
 xen/arch/ia64/xen/irq.c                                  |    7 
 xen/arch/ia64/xen/ivt.S                                  |  458 +-
 xen/arch/ia64/xen/mm.c                                   |   19 
 xen/arch/ia64/xen/tlb_track.c                            |   25 
 xen/arch/ia64/xen/vcpu.c                                 |   75 
 xen/arch/ia64/xen/vhpt.c                                 |   13 
 xen/arch/ia64/xen/xenasm.S                               |   22 
 xen/arch/ia64/xen/xencomm.c                              |    4 
 xen/arch/ia64/xen/xensetup.c                             |    6 
 xen/arch/x86/hvm/hpet.c                                  |   24 
 xen/arch/x86/hvm/hvm.c                                   |   11 
 xen/arch/x86/hvm/i8254.c                                 |  153 
 xen/arch/x86/hvm/intercept.c                             |  303 +
 xen/arch/x86/hvm/rtc.c                                   |    2 
 xen/arch/x86/hvm/vioapic.c                               |  132 
 xen/arch/x86/hvm/vlapic.c                                |   74 
 xen/arch/x86/hvm/vmx/vmx.c                               |  296 +
 xen/arch/x86/hvm/vpic.c                                  |   83 
 xen/arch/x86/hvm/vpt.c                                   |   10 
 xen/arch/x86/mm/shadow/common.c                          |    7 
 xen/arch/x86/mm/shadow/multi.c                           |    2 
 xen/common/domain.c                                      |    1 
 xen/common/domctl.c                                      |   73 
 xen/include/asm-ia64/config.h                            |   29 
 xen/include/asm-ia64/domain.h                            |    4 
 xen/include/asm-ia64/hypercall.h                         |    2 
 xen/include/asm-ia64/linux-null/asm/nmi.h                |    1 
 xen/include/asm-ia64/linux-null/linux/dmapool.h          |    1 
 xen/include/asm-ia64/linux-null/linux/rwsem.h            |    1 
 xen/include/asm-ia64/linux-xen/asm/README.origin         |    7 
 xen/include/asm-ia64/linux-xen/asm/machvec.h             |  498 ++
 xen/include/asm-ia64/linux-xen/asm/machvec_dig.h         |   46 
 xen/include/asm-ia64/linux-xen/asm/machvec_hpzx1.h       |   66 
 xen/include/asm-ia64/linux-xen/asm/machvec_sn2.h         |  166 
 xen/include/asm-ia64/linux-xen/asm/page.h                |   10 
 xen/include/asm-ia64/linux-xen/asm/pci.h                 |  185 
 xen/include/asm-ia64/linux-xen/asm/sn/README.origin      |   16 
 xen/include/asm-ia64/linux-xen/asm/sn/addrs.h            |  299 +
 xen/include/asm-ia64/linux-xen/asm/sn/arch.h             |   92 
 xen/include/asm-ia64/linux-xen/asm/sn/hubdev.h           |   95 
 xen/include/asm-ia64/linux-xen/asm/sn/intr.h             |   73 
 xen/include/asm-ia64/linux-xen/asm/sn/io.h               |  281 +
 xen/include/asm-ia64/linux-xen/asm/sn/nodepda.h          |   87 
 xen/include/asm-ia64/linux-xen/asm/sn/pcibr_provider.h   |  153 
 xen/include/asm-ia64/linux-xen/asm/sn/rw_mmr.h           |   32 
 xen/include/asm-ia64/linux-xen/asm/sn/types.h            |   28 
 xen/include/asm-ia64/linux-xen/asm/system.h              |    1 
 xen/include/asm-ia64/linux-xen/asm/types.h               |    8 
 xen/include/asm-ia64/linux-xen/linux/README.origin       |    5 
 xen/include/asm-ia64/linux-xen/linux/device.h            |  489 ++
 xen/include/asm-ia64/linux-xen/linux/kobject.h           |  286 +
 xen/include/asm-ia64/linux-xen/linux/pci.h               |  820 +++
 xen/include/asm-ia64/linux/README.origin                 |   12 
 xen/include/asm-ia64/linux/asm/README.origin             |    4 
 xen/include/asm-ia64/linux/asm/machvec_init.h            |   32 
 xen/include/asm-ia64/linux/asm/sn/README.origin          |   24 
 xen/include/asm-ia64/linux/asm/sn/geo.h                  |  132 
 xen/include/asm-ia64/linux/asm/sn/klconfig.h             |  246 +
 xen/include/asm-ia64/linux/asm/sn/l1.h                   |   51 
 xen/include/asm-ia64/linux/asm/sn/leds.h                 |   33 
 xen/include/asm-ia64/linux/asm/sn/module.h               |  127 
 xen/include/asm-ia64/linux/asm/sn/pcibus_provider_defs.h |   68 
 xen/include/asm-ia64/linux/asm/sn/pcidev.h               |   83 
 xen/include/asm-ia64/linux/asm/sn/pda.h                  |   69 
 xen/include/asm-ia64/linux/asm/sn/pic.h                  |  261 +
 xen/include/asm-ia64/linux/asm/sn/shub_mmr.h             |  502 ++
 xen/include/asm-ia64/linux/asm/sn/shubio.h               | 3358 +++++++++++++++
 xen/include/asm-ia64/linux/asm/sn/simulator.h            |   20 
 xen/include/asm-ia64/linux/asm/sn/sn_cpuid.h             |  132 
 xen/include/asm-ia64/linux/asm/sn/sn_feature_sets.h      |   51 
 xen/include/asm-ia64/linux/asm/sn/sn_sal.h               | 1157 +++++
 xen/include/asm-ia64/linux/asm/sn/tiocp.h                |  257 +
 xen/include/asm-ia64/linux/asm/sn/xbow.h                 |  301 +
 xen/include/asm-ia64/linux/asm/sn/xwidgetdev.h           |   70 
 xen/include/asm-ia64/linux/completion.h                  |   57 
 xen/include/asm-ia64/linux/ioport.h                      |  136 
 xen/include/asm-ia64/linux/klist.h                       |   61 
 xen/include/asm-ia64/linux/kref.h                        |   32 
 xen/include/asm-ia64/linux/mod_devicetable.h             |  323 +
 xen/include/asm-ia64/linux/pci_ids.h                     | 2356 ++++++++++
 xen/include/asm-ia64/linux/pci_regs.h                    |  488 ++
 xen/include/asm-ia64/linux/pm.h                          |  279 +
 xen/include/asm-ia64/linux/sysfs.h                       |  206 
 xen/include/asm-ia64/multicall.h                         |   12 
 xen/include/asm-ia64/tlbflush.h                          |    2 
 xen/include/asm-ia64/vcpu.h                              |    1 
 xen/include/asm-ia64/vmx_vcpu.h                          |    2 
 xen/include/asm-ia64/xensystem.h                         |    1 
 xen/include/asm-ia64/xentypes.h                          |   19 
 xen/include/asm-x86/hvm/domain.h                         |   17 
 xen/include/asm-x86/hvm/hvm.h                            |   38 
 xen/include/asm-x86/hvm/support.h                        |  127 
 xen/include/asm-x86/hvm/vpt.h                            |    2 
 xen/include/public/arch-ia64.h                           |   64 
 xen/include/public/arch-x86/xen.h                        |   64 
 xen/include/public/domctl.h                              |   16 
 xen/include/xlat.lst                                     |    2 
 229 files changed, 23967 insertions(+), 3425 deletions(-)

diff -r 8475a4e0425e -r 3c8bb086025e Config.mk
--- a/Config.mk Thu Jan 18 15:18:07 2007 +0000
+++ b/Config.mk Fri Jan 19 14:48:57 2007 +0000
@@ -6,8 +6,11 @@ XEN_COMPILE_ARCH    ?= $(shell uname -m 
 XEN_COMPILE_ARCH    ?= $(shell uname -m | sed -e s/i.86/x86_32/ \
                          -e s/ppc/powerpc/ -e s/i86pc/x86_32/)
 XEN_TARGET_ARCH     ?= $(XEN_COMPILE_ARCH)
-XEN_TARGET_X86_PAE  ?= n
 XEN_OS              ?= $(shell uname -s)
+
+ifeq ($(XEN_TARGET_ARCH),x86_32)
+XEN_TARGET_X86_PAE  ?= y
+endif
 
 CONFIG_$(XEN_OS) := y
 
diff -r 8475a4e0425e -r 3c8bb086025e buildconfigs/linux-defconfig_xen0_ia64
--- a/buildconfigs/linux-defconfig_xen0_ia64    Thu Jan 18 15:18:07 2007 +0000
+++ b/buildconfigs/linux-defconfig_xen0_ia64    Fri Jan 19 14:48:57 2007 +0000
@@ -1512,10 +1512,7 @@ CONFIG_CRYPTO_DES=y
 # Hardware crypto devices
 #
 # CONFIG_XEN_UTIL is not set
-CONFIG_HAVE_ARCH_ALLOC_SKB=y
-CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y
 CONFIG_XEN_BALLOON=y
-CONFIG_XEN_SKBUFF=y
 # CONFIG_XEN_DEVMEM is not set
 CONFIG_XEN_REBOOT=y
 # CONFIG_XEN_SMPBOOT is not set
diff -r 8475a4e0425e -r 3c8bb086025e buildconfigs/linux-defconfig_xen0_x86_32
--- a/buildconfigs/linux-defconfig_xen0_x86_32  Thu Jan 18 15:18:07 2007 +0000
+++ b/buildconfigs/linux-defconfig_xen0_x86_32  Fri Jan 19 14:48:57 2007 +0000
@@ -569,7 +569,7 @@ CONFIG_MEGARAID_NEWGEN=y
 # CONFIG_MEGARAID_LEGACY is not set
 # CONFIG_MEGARAID_SAS is not set
 CONFIG_SCSI_SATA=y
-# CONFIG_SCSI_SATA_AHCI is not set
+CONFIG_SCSI_SATA_AHCI=y
 # CONFIG_SCSI_SATA_SVW is not set
 CONFIG_SCSI_ATA_PIIX=y
 # CONFIG_SCSI_SATA_MV is not set
@@ -734,7 +734,7 @@ CONFIG_SK98LIN=y
 CONFIG_SK98LIN=y
 # CONFIG_VIA_VELOCITY is not set
 CONFIG_TIGON3=y
-# CONFIG_BNX2 is not set
+CONFIG_BNX2=y
 
 #
 # Ethernet (10000 Mbit)
@@ -1413,8 +1413,6 @@ CONFIG_XEN_COMPAT_030002_AND_LATER=y
 CONFIG_XEN_COMPAT_030002_AND_LATER=y
 # CONFIG_XEN_COMPAT_LATEST_ONLY is not set
 CONFIG_XEN_COMPAT_030002=y
-CONFIG_HAVE_ARCH_ALLOC_SKB=y
-CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y
 CONFIG_HAVE_IRQ_IGNORE_UNHANDLED=y
 CONFIG_NO_IDLE_HZ=y
 CONFIG_XEN_UTIL=y
diff -r 8475a4e0425e -r 3c8bb086025e buildconfigs/linux-defconfig_xen0_x86_64
--- a/buildconfigs/linux-defconfig_xen0_x86_64  Thu Jan 18 15:18:07 2007 +0000
+++ b/buildconfigs/linux-defconfig_xen0_x86_64  Fri Jan 19 14:48:57 2007 +0000
@@ -517,7 +517,7 @@ CONFIG_MEGARAID_NEWGEN=y
 # CONFIG_MEGARAID_LEGACY is not set
 # CONFIG_MEGARAID_SAS is not set
 CONFIG_SCSI_SATA=y
-# CONFIG_SCSI_SATA_AHCI is not set
+CONFIG_SCSI_SATA_AHCI=y
 # CONFIG_SCSI_SATA_SVW is not set
 CONFIG_SCSI_ATA_PIIX=y
 # CONFIG_SCSI_SATA_MV is not set
@@ -683,7 +683,7 @@ CONFIG_SK98LIN=y
 CONFIG_SK98LIN=y
 # CONFIG_VIA_VELOCITY is not set
 CONFIG_TIGON3=y
-# CONFIG_BNX2 is not set
+CONFIG_BNX2=y
 
 #
 # Ethernet (10000 Mbit)
@@ -1363,8 +1363,6 @@ CONFIG_XEN_COMPAT_030002_AND_LATER=y
 CONFIG_XEN_COMPAT_030002_AND_LATER=y
 # CONFIG_XEN_COMPAT_LATEST_ONLY is not set
 CONFIG_XEN_COMPAT_030002=y
-CONFIG_HAVE_ARCH_ALLOC_SKB=y
-CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y
 CONFIG_HAVE_IRQ_IGNORE_UNHANDLED=y
 CONFIG_NO_IDLE_HZ=y
 CONFIG_XEN_UTIL=y
diff -r 8475a4e0425e -r 3c8bb086025e buildconfigs/linux-defconfig_xenU_ia64
--- a/buildconfigs/linux-defconfig_xenU_ia64    Thu Jan 18 15:18:07 2007 +0000
+++ b/buildconfigs/linux-defconfig_xenU_ia64    Fri Jan 19 14:48:57 2007 +0000
@@ -1386,10 +1386,7 @@ CONFIG_CRYPTO_DES=y
 # Hardware crypto devices
 #
 # CONFIG_XEN_UTIL is not set
-CONFIG_HAVE_ARCH_ALLOC_SKB=y
-CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y
 CONFIG_XEN_BALLOON=y
-CONFIG_XEN_SKBUFF=y
 # CONFIG_XEN_DEVMEM is not set
 CONFIG_XEN_REBOOT=y
 # CONFIG_XEN_SMPBOOT is not set
diff -r 8475a4e0425e -r 3c8bb086025e buildconfigs/linux-defconfig_xenU_x86_32
--- a/buildconfigs/linux-defconfig_xenU_x86_32  Thu Jan 18 15:18:07 2007 +0000
+++ b/buildconfigs/linux-defconfig_xenU_x86_32  Fri Jan 19 14:48:57 2007 +0000
@@ -922,8 +922,6 @@ CONFIG_XEN_COMPAT_030002_AND_LATER=y
 CONFIG_XEN_COMPAT_030002_AND_LATER=y
 # CONFIG_XEN_COMPAT_LATEST_ONLY is not set
 CONFIG_XEN_COMPAT_030002=y
-CONFIG_HAVE_ARCH_ALLOC_SKB=y
-CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y
 CONFIG_HAVE_IRQ_IGNORE_UNHANDLED=y
 CONFIG_NO_IDLE_HZ=y
 CONFIG_XEN_UTIL=y
diff -r 8475a4e0425e -r 3c8bb086025e buildconfigs/linux-defconfig_xenU_x86_64
--- a/buildconfigs/linux-defconfig_xenU_x86_64  Thu Jan 18 15:18:07 2007 +0000
+++ b/buildconfigs/linux-defconfig_xenU_x86_64  Fri Jan 19 14:48:57 2007 +0000
@@ -1218,8 +1218,6 @@ CONFIG_XEN_COMPAT_030002_AND_LATER=y
 CONFIG_XEN_COMPAT_030002_AND_LATER=y
 # CONFIG_XEN_COMPAT_LATEST_ONLY is not set
 CONFIG_XEN_COMPAT_030002=y
-CONFIG_HAVE_ARCH_ALLOC_SKB=y
-CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y
 CONFIG_HAVE_IRQ_IGNORE_UNHANDLED=y
 CONFIG_NO_IDLE_HZ=y
 CONFIG_XEN_UTIL=y
diff -r 8475a4e0425e -r 3c8bb086025e buildconfigs/linux-defconfig_xen_ia64
--- a/buildconfigs/linux-defconfig_xen_ia64     Thu Jan 18 15:18:07 2007 +0000
+++ b/buildconfigs/linux-defconfig_xen_ia64     Fri Jan 19 14:48:57 2007 +0000
@@ -1518,10 +1518,7 @@ CONFIG_CRYPTO_DES=y
 # Hardware crypto devices
 #
 # CONFIG_XEN_UTIL is not set
-CONFIG_HAVE_ARCH_ALLOC_SKB=y
-CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y
 CONFIG_XEN_BALLOON=y
-CONFIG_XEN_SKBUFF=y
 # CONFIG_XEN_DEVMEM is not set
 CONFIG_XEN_REBOOT=y
 # CONFIG_XEN_SMPBOOT is not set
diff -r 8475a4e0425e -r 3c8bb086025e buildconfigs/linux-defconfig_xen_x86_32
--- a/buildconfigs/linux-defconfig_xen_x86_32   Thu Jan 18 15:18:07 2007 +0000
+++ b/buildconfigs/linux-defconfig_xen_x86_32   Fri Jan 19 14:48:57 2007 +0000
@@ -3272,8 +3272,6 @@ CONFIG_XEN_COMPAT_030002_AND_LATER=y
 CONFIG_XEN_COMPAT_030002_AND_LATER=y
 # CONFIG_XEN_COMPAT_LATEST_ONLY is not set
 CONFIG_XEN_COMPAT_030002=y
-CONFIG_HAVE_ARCH_ALLOC_SKB=y
-CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y
 CONFIG_HAVE_IRQ_IGNORE_UNHANDLED=y
 CONFIG_NO_IDLE_HZ=y
 CONFIG_XEN_UTIL=y
diff -r 8475a4e0425e -r 3c8bb086025e buildconfigs/linux-defconfig_xen_x86_64
--- a/buildconfigs/linux-defconfig_xen_x86_64   Thu Jan 18 15:18:07 2007 +0000
+++ b/buildconfigs/linux-defconfig_xen_x86_64   Fri Jan 19 14:48:57 2007 +0000
@@ -3103,8 +3103,6 @@ CONFIG_XEN_COMPAT_030002_AND_LATER=y
 CONFIG_XEN_COMPAT_030002_AND_LATER=y
 # CONFIG_XEN_COMPAT_LATEST_ONLY is not set
 CONFIG_XEN_COMPAT_030002=y
-CONFIG_HAVE_ARCH_ALLOC_SKB=y
-CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y
 CONFIG_HAVE_IRQ_IGNORE_UNHANDLED=y
 CONFIG_NO_IDLE_HZ=y
 CONFIG_XEN_UTIL=y
diff -r 8475a4e0425e -r 3c8bb086025e buildconfigs/mk.linux-2.6-xen
--- a/buildconfigs/mk.linux-2.6-xen     Thu Jan 18 15:18:07 2007 +0000
+++ b/buildconfigs/mk.linux-2.6-xen     Fri Jan 19 14:48:57 2007 +0000
@@ -8,6 +8,10 @@ LINUX_DIR    = build-linux-$(LINUX_VER)-
 
 IMAGE_TARGET ?= vmlinuz
 INSTALL_BOOT_PATH ?= $(DESTDIR)
+
+ifeq ($(XEN_TARGET_ARCH),ia64)
+INSTALL_BOOT_PATH := $(DESTDIR)/boot
+endif
 
 LINUX_VER3  := $(LINUX_SERIES).$(word 3, $(subst ., ,$(LINUX_VER)))
 
diff -r 8475a4e0425e -r 3c8bb086025e extras/mini-os/Makefile
--- a/extras/mini-os/Makefile   Thu Jan 18 15:18:07 2007 +0000
+++ b/extras/mini-os/Makefile   Fri Jan 19 14:48:57 2007 +0000
@@ -9,7 +9,7 @@ XEN_ROOT = ../..
 XEN_ROOT = ../..
 include $(XEN_ROOT)/Config.mk
 
-XEN_INTERFACE_VERSION := 0x00030204
+XEN_INTERFACE_VERSION := 0x00030205
 export XEN_INTERFACE_VERSION
 
 # Set TARGET_ARCH
diff -r 8475a4e0425e -r 3c8bb086025e 
extras/mini-os/include/x86/x86_32/hypercall-x86_32.h
--- a/extras/mini-os/include/x86/x86_32/hypercall-x86_32.h      Thu Jan 18 
15:18:07 2007 +0000
+++ b/extras/mini-os/include/x86/x86_32/hypercall-x86_32.h      Fri Jan 19 
14:48:57 2007 +0000
@@ -303,6 +303,20 @@ HYPERVISOR_nmi_op(
        unsigned long arg)
 {
        return _hypercall2(int, nmi_op, op, arg);
+}
+
+static inline int
+HYPERVISOR_sysctl(
+       unsigned long op)
+{
+       return _hypercall1(int, sysctl, op);
+}
+
+static inline int
+HYPERVISOR_domctl(
+       unsigned long op)
+{
+       return _hypercall1(int, domctl, op);
 }
 
 #endif /* __HYPERCALL_X86_32_H__ */
diff -r 8475a4e0425e -r 3c8bb086025e 
extras/mini-os/include/x86/x86_64/hypercall-x86_64.h
--- a/extras/mini-os/include/x86/x86_64/hypercall-x86_64.h      Thu Jan 18 
15:18:07 2007 +0000
+++ b/extras/mini-os/include/x86/x86_64/hypercall-x86_64.h      Fri Jan 19 
14:48:57 2007 +0000
@@ -303,6 +303,20 @@ HYPERVISOR_nmi_op(
        unsigned long arg)
 {
        return _hypercall2(int, nmi_op, op, arg);
+}
+
+static inline int
+HYPERVISOR_sysctl(
+       unsigned long op)
+{
+       return _hypercall1(int, sysctl, op);
+}
+
+static inline int
+HYPERVISOR_domctl(
+       unsigned long op)
+{
+       return _hypercall1(int, domctl, op);
 }
 
 #endif /* __HYPERCALL_X86_64_H__ */
diff -r 8475a4e0425e -r 3c8bb086025e 
linux-2.6-xen-sparse/arch/i386/kernel/sysenter.c
--- a/linux-2.6-xen-sparse/arch/i386/kernel/sysenter.c  Thu Jan 18 15:18:07 
2007 +0000
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/sysenter.c  Fri Jan 19 14:48:57 
2007 +0000
@@ -76,7 +76,9 @@ static void *syscall_page;
 
 int __init sysenter_setup(void)
 {
-       syscall_page = (void *)get_zeroed_page(GFP_ATOMIC);
+       void *page = (void *)get_zeroed_page(GFP_ATOMIC);
+
+       syscall_page = page;
 
 #ifdef CONFIG_XEN
        if (boot_cpu_has(X86_FEATURE_SEP)) {
diff -r 8475a4e0425e -r 3c8bb086025e linux-2.6-xen-sparse/arch/ia64/Kconfig
--- a/linux-2.6-xen-sparse/arch/ia64/Kconfig    Thu Jan 18 15:18:07 2007 +0000
+++ b/linux-2.6-xen-sparse/arch/ia64/Kconfig    Fri Jan 19 14:48:57 2007 +0000
@@ -579,12 +579,6 @@ config XEN_UTIL
 config XEN_UTIL
        default n
 
-config HAVE_ARCH_ALLOC_SKB
-       default y
-
-config HAVE_ARCH_DEV_ALLOC_SKB
-       default y
-
 config XEN_BALLOON
        default y
 
diff -r 8475a4e0425e -r 3c8bb086025e 
linux-2.6-xen-sparse/arch/ia64/kernel/asm-offsets.c
--- a/linux-2.6-xen-sparse/arch/ia64/kernel/asm-offsets.c       Thu Jan 18 
15:18:07 2007 +0000
+++ b/linux-2.6-xen-sparse/arch/ia64/kernel/asm-offsets.c       Fri Jan 19 
14:48:57 2007 +0000
@@ -287,7 +287,6 @@ void foo(void)
        DEFINE_MAPPED_REG_OFS(XSI_IHA_OFS, iha);
        DEFINE_MAPPED_REG_OFS(XSI_ITIR_OFS, itir);
        DEFINE_MAPPED_REG_OFS(XSI_PSR_IC_OFS, interrupt_collection_enabled);
-       DEFINE_MAPPED_REG_OFS(XSI_INCOMPL_REGFR_OFS, incomplete_regframe);
        DEFINE_MAPPED_REG_OFS(XSI_BANKNUM_OFS, banknum);
        DEFINE_MAPPED_REG_OFS(XSI_BANK0_R16_OFS, bank0_regs[0]);
        DEFINE_MAPPED_REG_OFS(XSI_BANK1_R16_OFS, bank1_regs[0]);
diff -r 8475a4e0425e -r 3c8bb086025e 
linux-2.6-xen-sparse/arch/ia64/kernel/fsys.S
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/linux-2.6-xen-sparse/arch/ia64/kernel/fsys.S      Fri Jan 19 14:48:57 
2007 +0000
@@ -0,0 +1,925 @@
+/*
+ * This file contains the light-weight system call handlers 
(fsyscall-handlers).
+ *
+ * Copyright (C) 2003 Hewlett-Packard Co
+ *     David Mosberger-Tang <davidm@xxxxxxxxxx>
+ *
+ * 25-Sep-03 davidm    Implement fsys_rt_sigprocmask().
+ * 18-Feb-03 louisk    Implement fsys_gettimeofday().
+ * 28-Feb-03 davidm    Fixed several bugs in fsys_gettimeofday().  Tuned it 
some more,
+ *                     probably broke it along the way... ;-)
+ * 13-Jul-04 clameter   Implement fsys_clock_gettime and revise 
fsys_gettimeofday to make
+ *                      it capable of using memory based clocks without 
falling back to C code.
+ */
+
+#include <asm/asmmacro.h>
+#include <asm/errno.h>
+#include <asm/asm-offsets.h>
+#include <asm/percpu.h>
+#include <asm/thread_info.h>
+#include <asm/sal.h>
+#include <asm/signal.h>
+#include <asm/system.h>
+#include <asm/unistd.h>
+
+#include "entry.h"
+
+/*
+ * See Documentation/ia64/fsys.txt for details on fsyscalls.
+ *
+ * On entry to an fsyscall handler:
+ *   r10       = 0 (i.e., defaults to "successful syscall return")
+ *   r11       = saved ar.pfs (a user-level value)
+ *   r15       = system call number
+ *   r16       = "current" task pointer (in normal kernel-mode, this is in r13)
+ *   r32-r39   = system call arguments
+ *   b6                = return address (a user-level value)
+ *   ar.pfs    = previous frame-state (a user-level value)
+ *   PSR.be    = cleared to zero (i.e., little-endian byte order is in effect)
+ *   all other registers may contain values passed in from user-mode
+ *
+ * On return from an fsyscall handler:
+ *   r11       = saved ar.pfs (as passed into the fsyscall handler)
+ *   r15       = system call number (as passed into the fsyscall handler)
+ *   r32-r39   = system call arguments (as passed into the fsyscall handler)
+ *   b6                = return address (as passed into the fsyscall handler)
+ *   ar.pfs    = previous frame-state (as passed into the fsyscall handler)
+ */
+
+ENTRY(fsys_ni_syscall)
+       .prologue
+       .altrp b6
+       .body
+       mov r8=ENOSYS
+       mov r10=-1
+       FSYS_RETURN
+END(fsys_ni_syscall)
+
+ENTRY(fsys_getpid)
+       .prologue
+       .altrp b6
+       .body
+       add r9=TI_FLAGS+IA64_TASK_SIZE,r16
+       ;;
+       ld4 r9=[r9]
+       add r8=IA64_TASK_TGID_OFFSET,r16
+       ;;
+       and r9=TIF_ALLWORK_MASK,r9
+       ld4 r8=[r8]                             // r8 = current->tgid
+       ;;
+       cmp.ne p8,p0=0,r9
+(p8)   br.spnt.many fsys_fallback_syscall
+       FSYS_RETURN
+END(fsys_getpid)
+
+ENTRY(fsys_getppid)
+       .prologue
+       .altrp b6
+       .body
+       add r17=IA64_TASK_GROUP_LEADER_OFFSET,r16
+       ;;
+       ld8 r17=[r17]                           // r17 = current->group_leader
+       add r9=TI_FLAGS+IA64_TASK_SIZE,r16
+       ;;
+
+       ld4 r9=[r9]
+       add r17=IA64_TASK_REAL_PARENT_OFFSET,r17 // r17 = 
&current->group_leader->real_parent
+       ;;
+       and r9=TIF_ALLWORK_MASK,r9
+
+1:     ld8 r18=[r17]                           // r18 = 
current->group_leader->real_parent
+       ;;
+       cmp.ne p8,p0=0,r9
+       add r8=IA64_TASK_TGID_OFFSET,r18        // r8 = 
&current->group_leader->real_parent->tgid
+       ;;
+
+       /*
+        * The .acq is needed to ensure that the read of tgid has returned its 
data before
+        * we re-check "real_parent".
+        */
+       ld4.acq r8=[r8]                         // r8 = 
current->group_leader->real_parent->tgid
+#ifdef CONFIG_SMP
+       /*
+        * Re-read current->group_leader->real_parent.
+        */
+       ld8 r19=[r17]                           // r19 = 
current->group_leader->real_parent
+(p8)   br.spnt.many fsys_fallback_syscall
+       ;;
+       cmp.ne p6,p0=r18,r19                    // did real_parent change?
+       mov r19=0                       // i must not leak kernel bits...
+(p6)   br.cond.spnt.few 1b                     // yes -> redo the read of tgid 
and the check
+       ;;
+       mov r17=0                       // i must not leak kernel bits...
+       mov r18=0                       // i must not leak kernel bits...
+#else
+       mov r17=0                       // i must not leak kernel bits...
+       mov r18=0                       // i must not leak kernel bits...
+       mov r19=0                       // i must not leak kernel bits...
+#endif
+       FSYS_RETURN
+END(fsys_getppid)
+
+ENTRY(fsys_set_tid_address)
+       .prologue
+       .altrp b6
+       .body
+       add r9=TI_FLAGS+IA64_TASK_SIZE,r16
+       ;;
+       ld4 r9=[r9]
+       tnat.z p6,p7=r32                // check argument register for being NaT
+       ;;
+       and r9=TIF_ALLWORK_MASK,r9
+       add r8=IA64_TASK_PID_OFFSET,r16
+       add r18=IA64_TASK_CLEAR_CHILD_TID_OFFSET,r16
+       ;;
+       ld4 r8=[r8]
+       cmp.ne p8,p0=0,r9
+       mov r17=-1
+       ;;
+(p6)   st8 [r18]=r32
+(p7)   st8 [r18]=r17
+(p8)   br.spnt.many fsys_fallback_syscall
+       ;;
+       mov r17=0                       // i must not leak kernel bits...
+       mov r18=0                       // i must not leak kernel bits...
+       FSYS_RETURN
+END(fsys_set_tid_address)
+
+/*
+ * Ensure that the time interpolator structure is compatible with the asm code
+ */
+#if IA64_TIME_INTERPOLATOR_SOURCE_OFFSET !=0 || 
IA64_TIME_INTERPOLATOR_SHIFT_OFFSET != 2 \
+       || IA64_TIME_INTERPOLATOR_JITTER_OFFSET != 3 || 
IA64_TIME_INTERPOLATOR_NSEC_OFFSET != 4
+#error fsys_gettimeofday incompatible with changes to struct time_interpolator
+#endif
+#define CLOCK_REALTIME 0
+#define CLOCK_MONOTONIC 1
+#define CLOCK_DIVIDE_BY_1000 0x4000
+#define CLOCK_ADD_MONOTONIC 0x8000
+
+ENTRY(fsys_gettimeofday)
+       .prologue
+       .altrp b6
+       .body
+       mov r31 = r32
+       tnat.nz p6,p0 = r33             // guard against NaT argument
+(p6)    br.cond.spnt.few .fail_einval
+       mov r30 = CLOCK_DIVIDE_BY_1000
+       ;;
+.gettime:
+       // Register map
+       // Incoming r31 = pointer to address where to place result
+       //          r30 = flags determining how time is processed
+       // r2,r3 = temp r4-r7 preserved
+       // r8 = result nanoseconds
+       // r9 = result seconds
+       // r10 = temporary storage for clock difference
+       // r11 = preserved: saved ar.pfs
+       // r12 = preserved: memory stack
+       // r13 = preserved: thread pointer
+       // r14 = address of mask / mask
+       // r15 = preserved: system call number
+       // r16 = preserved: current task pointer
+       // r17 = wall to monotonic use
+       // r18 = time_interpolator->offset
+       // r19 = address of wall_to_monotonic
+       // r20 = pointer to struct time_interpolator / pointer to 
time_interpolator->address
+       // r21 = shift factor
+       // r22 = address of time interpolator->last_counter
+       // r23 = address of time_interpolator->last_cycle
+       // r24 = adress of time_interpolator->offset
+       // r25 = last_cycle value
+       // r26 = last_counter value
+       // r27 = pointer to xtime
+       // r28 = sequence number at the beginning of critcal section
+       // r29 = address of seqlock
+       // r30 = time processing flags / memory address
+       // r31 = pointer to result
+       // Predicates
+       // p6,p7 short term use
+       // p8 = timesource ar.itc
+       // p9 = timesource mmio64
+       // p10 = timesource mmio32
+       // p11 = timesource not to be handled by asm code
+       // p12 = memory time source ( = p9 | p10)
+       // p13 = do cmpxchg with time_interpolator_last_cycle
+       // p14 = Divide by 1000
+       // p15 = Add monotonic
+       //
+       // Note that instructions are optimized for McKinley. McKinley can 
process two
+       // bundles simultaneously and therefore we continuously try to feed the 
CPU
+       // two bundles and then a stop.
+       tnat.nz p6,p0 = r31     // branch deferred since it does not fit into 
bundle structure
+       mov pr = r30,0xc000     // Set predicates according to function
+       add r2 = TI_FLAGS+IA64_TASK_SIZE,r16
+       movl r20 = time_interpolator
+       ;;
+       ld8 r20 = [r20]         // get pointer to time_interpolator structure
+       movl r29 = xtime_lock
+       ld4 r2 = [r2]           // process work pending flags
+       movl r27 = xtime
+       ;;      // only one bundle here
+       ld8 r21 = [r20]         // first quad with control information
+       and r2 = TIF_ALLWORK_MASK,r2
+(p6)    br.cond.spnt.few .fail_einval  // deferred branch
+       ;;
+       add r10 = IA64_TIME_INTERPOLATOR_ADDRESS_OFFSET,r20
+       extr r3 = r21,32,32     // time_interpolator->nsec_per_cyc
+       extr r8 = r21,0,16      // time_interpolator->source
+       cmp.ne p6, p0 = 0, r2   // Fallback if work is scheduled
+(p6)    br.cond.spnt.many fsys_fallback_syscall
+       ;;
+       cmp.eq p8,p12 = 0,r8    // Check for cpu timer
+       cmp.eq p9,p0 = 1,r8     // MMIO64 ?
+       extr r2 = r21,24,8      // time_interpolator->jitter
+       cmp.eq p10,p0 = 2,r8    // MMIO32 ?
+       cmp.ltu p11,p0 = 2,r8   // function or other clock
+(p11)  br.cond.spnt.many fsys_fallback_syscall
+       ;;
+       setf.sig f7 = r3        // Setup for scaling of counter
+(p15)  movl r19 = wall_to_monotonic
+(p12)  ld8 r30 = [r10]
+       cmp.ne p13,p0 = r2,r0   // need jitter compensation?
+       extr r21 = r21,16,8     // shift factor
+       ;;
+.time_redo:
+       .pred.rel.mutex p8,p9,p10
+       ld4.acq r28 = [r29]     // xtime_lock.sequence. Must come first for 
locking purposes
+(p8)   mov r2 = ar.itc         // CPU_TIMER. 36 clocks latency!!!
+       add r22 = IA64_TIME_INTERPOLATOR_LAST_COUNTER_OFFSET,r20
+(p9)   ld8 r2 = [r30]          // readq(ti->address). Could also have latency 
issues..
+(p10)  ld4 r2 = [r30]          // readw(ti->address)
+(p13)  add r23 = IA64_TIME_INTERPOLATOR_LAST_CYCLE_OFFSET,r20
+       ;;                      // could be removed by moving the last add 
upward
+       ld8 r26 = [r22]         // time_interpolator->last_counter
+(p13)  ld8 r25 = [r23]         // time interpolator->last_cycle
+       add r24 = IA64_TIME_INTERPOLATOR_OFFSET_OFFSET,r20
+(p15)  ld8 r17 = [r19],IA64_TIMESPEC_TV_NSEC_OFFSET
+       ld8 r9 = [r27],IA64_TIMESPEC_TV_NSEC_OFFSET
+       add r14 = IA64_TIME_INTERPOLATOR_MASK_OFFSET, r20
+       ;;
+       ld8 r18 = [r24]         // time_interpolator->offset
+       ld8 r8 = [r27],-IA64_TIMESPEC_TV_NSEC_OFFSET    // xtime.tv_nsec
+(p13)  sub r3 = r25,r2 // Diff needed before comparison (thanks davidm)
+       ;;
+       ld8 r14 = [r14]         // time_interpolator->mask
+(p13)  cmp.gt.unc p6,p7 = r3,r0        // check if it is less than last. p6,p7 
cleared
+       sub r10 = r2,r26        // current_counter - last_counter
+       ;;
+(p6)   sub r10 = r25,r26       // time we got was less than last_cycle
+(p7)   mov ar.ccv = r25        // more than last_cycle. Prep for cmpxchg
+       ;;
+       and r10 = r10,r14       // Apply mask
+       ;;
+       setf.sig f8 = r10
+       nop.i 123
+       ;;
+(p7)   cmpxchg8.rel r3 = [r23],r2,ar.ccv
+EX(.fail_efault, probe.w.fault r31, 3) // This takes 5 cycles and we have 
spare time
+       xmpy.l f8 = f8,f7       // nsec_per_cyc*(counter-last_counter)
+(p15)  add r9 = r9,r17         // Add wall to monotonic.secs to result secs
+       ;;
+(p15)  ld8 r17 = [r19],-IA64_TIMESPEC_TV_NSEC_OFFSET
+(p7)   cmp.ne p7,p0 = r25,r3   // if cmpxchg not successful redo
+       // simulate tbit.nz.or p7,p0 = r28,0
+       and r28 = ~1,r28        // Make sequence even to force retry if odd
+       getf.sig r2 = f8
+       mf
+       add r8 = r8,r18         // Add time interpolator offset
+       ;;
+       ld4 r10 = [r29]         // xtime_lock.sequence
+(p15)  add r8 = r8, r17        // Add monotonic.nsecs to nsecs
+       shr.u r2 = r2,r21
+       ;;              // overloaded 3 bundles!
+       // End critical section.
+       add r8 = r8,r2          // Add xtime.nsecs
+       cmp4.ne.or p7,p0 = r28,r10
+(p7)   br.cond.dpnt.few .time_redo     // sequence number changed ?
+       // Now r8=tv->tv_nsec and r9=tv->tv_sec
+       mov r10 = r0
+       movl r2 = 1000000000
+       add r23 = IA64_TIMESPEC_TV_NSEC_OFFSET, r31
+(p14)  movl r3 = 2361183241434822607   // Prep for / 1000 hack
+       ;;
+.time_normalize:
+       mov r21 = r8
+       cmp.ge p6,p0 = r8,r2
+(p14)  shr.u r20 = r8, 3               // We can repeat this if necessary just 
wasting some time
+       ;;
+(p14)  setf.sig f8 = r20
+(p6)   sub r8 = r8,r2
+(p6)   add r9 = 1,r9                   // two nops before the branch.
+(p14)  setf.sig f7 = r3                // Chances for repeats are 1 in 10000 
for gettod
+(p6)   br.cond.dpnt.few .time_normalize
+       ;;
+       // Divided by 8 though shift. Now divide by 125
+       // The compiler was able to do that with a multiply
+       // and a shift and we do the same
+EX(.fail_efault, probe.w.fault r23, 3)         // This also costs 5 cycles
+(p14)  xmpy.hu f8 = f8, f7                     // xmpy has 5 cycles latency so 
use it...
+       ;;
+       mov r8 = r0
+(p14)  getf.sig r2 = f8
+       ;;
+(p14)  shr.u r21 = r2, 4
+       ;;
+EX(.fail_efault, st8 [r31] = r9)
+EX(.fail_efault, st8 [r23] = r21)
+       FSYS_RETURN
+.fail_einval:
+       mov r8 = EINVAL
+       mov r10 = -1
+       FSYS_RETURN
+.fail_efault:
+       mov r8 = EFAULT
+       mov r10 = -1
+       FSYS_RETURN
+END(fsys_gettimeofday)
+
+ENTRY(fsys_clock_gettime)
+       .prologue
+       .altrp b6
+       .body
+       cmp4.ltu p6, p0 = CLOCK_MONOTONIC, r32
+       // Fallback if this is not CLOCK_REALTIME or CLOCK_MONOTONIC
+(p6)   br.spnt.few fsys_fallback_syscall
+       mov r31 = r33
+       shl r30 = r32,15
+       br.many .gettime
+END(fsys_clock_gettime)
+
+/*
+ * long fsys_rt_sigprocmask (int how, sigset_t *set, sigset_t *oset, size_t 
sigsetsize).
+ */
+#if _NSIG_WORDS != 1
+# error Sorry, fsys_rt_sigprocmask() needs to be updated for _NSIG_WORDS != 1.
+#endif
+ENTRY(fsys_rt_sigprocmask)
+       .prologue
+       .altrp b6
+       .body
+
+       add r2=IA64_TASK_BLOCKED_OFFSET,r16
+       add r9=TI_FLAGS+IA64_TASK_SIZE,r16
+       cmp4.ltu p6,p0=SIG_SETMASK,r32
+
+       cmp.ne p15,p0=r0,r34                    // oset != NULL?
+       tnat.nz p8,p0=r34
+       add r31=IA64_TASK_SIGHAND_OFFSET,r16
+       ;;
+       ld8 r3=[r2]                             // read/prefetch 
current->blocked
+       ld4 r9=[r9]
+       tnat.nz.or p6,p0=r35
+
+       cmp.ne.or p6,p0=_NSIG_WORDS*8,r35
+       tnat.nz.or p6,p0=r32
+(p6)   br.spnt.few .fail_einval                // fail with EINVAL
+       ;;
+#ifdef CONFIG_SMP
+       ld8 r31=[r31]                           // r31 <- current->sighand
+#endif
+       and r9=TIF_ALLWORK_MASK,r9
+       tnat.nz.or p8,p0=r33
+       ;;
+       cmp.ne p7,p0=0,r9
+       cmp.eq p6,p0=r0,r33                     // set == NULL?
+       add r31=IA64_SIGHAND_SIGLOCK_OFFSET,r31 // r31 <- 
current->sighand->siglock
+(p8)   br.spnt.few .fail_efault                // fail with EFAULT
+(p7)   br.spnt.many fsys_fallback_syscall      // got pending kernel work...
+(p6)   br.dpnt.many .store_mask                // -> short-circuit to just 
reading the signal mask
+
+       /* Argh, we actually have to do some work and _update_ the signal mask: 
*/
+
+EX(.fail_efault, probe.r.fault r33, 3)         // verify user has read-access 
to *set
+EX(.fail_efault, ld8 r14=[r33])                        // r14 <- *set
+       mov r17=(1 << (SIGKILL - 1)) | (1 << (SIGSTOP - 1))
+       ;;
+
+       rsm psr.i                               // mask interrupt delivery
+       mov ar.ccv=0
+       andcm r14=r14,r17                       // filter out SIGKILL & SIGSTOP
+
+#ifdef CONFIG_SMP
+       mov r17=1
+       ;;
+       cmpxchg4.acq r18=[r31],r17,ar.ccv       // try to acquire the lock
+       mov r8=EINVAL                   // default to EINVAL
+       ;;
+       ld8 r3=[r2]                     // re-read current->blocked now that we 
hold the lock
+       cmp4.ne p6,p0=r18,r0
+(p6)   br.cond.spnt.many .lock_contention
+       ;;
+#else
+       ld8 r3=[r2]                     // re-read current->blocked now that we 
hold the lock
+       mov r8=EINVAL                   // default to EINVAL
+#endif
+       add r18=IA64_TASK_PENDING_OFFSET+IA64_SIGPENDING_SIGNAL_OFFSET,r16
+       add r19=IA64_TASK_SIGNAL_OFFSET,r16
+       cmp4.eq p6,p0=SIG_BLOCK,r32
+       ;;
+       ld8 r19=[r19]                   // r19 <- current->signal
+       cmp4.eq p7,p0=SIG_UNBLOCK,r32
+       cmp4.eq p8,p0=SIG_SETMASK,r32
+       ;;
+       ld8 r18=[r18]                   // r18 <- current->pending.signal
+       .pred.rel.mutex p6,p7,p8
+(p6)   or r14=r3,r14                   // SIG_BLOCK
+(p7)   andcm r14=r3,r14                // SIG_UNBLOCK
+
+(p8)   mov r14=r14                     // SIG_SETMASK
+(p6)   mov r8=0                        // clear error code
+       // recalc_sigpending()
+       add r17=IA64_SIGNAL_GROUP_STOP_COUNT_OFFSET,r19
+
+       add 
r19=IA64_SIGNAL_SHARED_PENDING_OFFSET+IA64_SIGPENDING_SIGNAL_OFFSET,r19
+       ;;
+       ld4 r17=[r17]           // r17 <- current->signal->group_stop_count
+(p7)   mov r8=0                // clear error code
+
+       ld8 r19=[r19]           // r19 <- current->signal->shared_pending
+       ;;
+       cmp4.gt p6,p7=r17,r0    // p6/p7 <- (current->signal->group_stop_count 
> 0)?
+(p8)   mov r8=0                // clear error code
+
+       or r18=r18,r19          // r18 <- current->pending | 
current->signal->shared_pending
+       ;;
+       // r18 <- (current->pending | current->signal->shared_pending) & 
~current->blocked:
+       andcm r18=r18,r14
+       add r9=TI_FLAGS+IA64_TASK_SIZE,r16
+       ;;
+
+(p7)   cmp.ne.or.andcm p6,p7=r18,r0            // p6/p7 <- signal pending
+       mov r19=0                                       // i must not leak 
kernel bits...
+(p6)   br.cond.dpnt.many .sig_pending
+       ;;
+
+1:     ld4 r17=[r9]                            // r17 <- 
current->thread_info->flags
+       ;;
+       mov ar.ccv=r17
+       and r18=~_TIF_SIGPENDING,r17            // r18 <- r17 & ~(1 << 
TIF_SIGPENDING)
+       ;;
+
+       st8 [r2]=r14                            // update current->blocked with 
new mask
+       cmpxchg4.acq r8=[r9],r18,ar.ccv         // current->thread_info->flags 
<- r18
+       ;;
+       cmp.ne p6,p0=r17,r8                     // update failed?
+(p6)   br.cond.spnt.few 1b                     // yes -> retry
+
+#ifdef CONFIG_SMP
+       st4.rel [r31]=r0                        // release the lock
+#endif
+       ssm psr.i
+       ;;
+
+       srlz.d                                  // ensure psr.i is set again
+       mov r18=0                                       // i must not leak 
kernel bits...
+
+.store_mask:
+EX(.fail_efault, (p15) probe.w.fault r34, 3)   // verify user has write-access 
to *oset
+EX(.fail_efault, (p15) st8 [r34]=r3)
+       mov r2=0                                        // i must not leak 
kernel bits...
+       mov r3=0                                        // i must not leak 
kernel bits...
+       mov r8=0                                // return 0
+       mov r9=0                                        // i must not leak 
kernel bits...
+       mov r14=0                                       // i must not leak 
kernel bits...
+       mov r17=0                                       // i must not leak 
kernel bits...
+       mov r31=0                                       // i must not leak 
kernel bits...
+       FSYS_RETURN
+
+.sig_pending:
+#ifdef CONFIG_SMP
+       st4.rel [r31]=r0                        // release the lock
+#endif
+       ssm psr.i
+       ;;
+       srlz.d
+       br.sptk.many fsys_fallback_syscall      // with signal pending, do the 
heavy-weight syscall
+
+#ifdef CONFIG_SMP
+.lock_contention:
+       /* Rather than spinning here, fall back on doing a heavy-weight 
syscall.  */
+       ssm psr.i
+       ;;
+       srlz.d
+       br.sptk.many fsys_fallback_syscall
+#endif
+END(fsys_rt_sigprocmask)
+
+ENTRY(fsys_fallback_syscall)
+       .prologue
+       .altrp b6
+       .body
+       /*
+        * We only get here from light-weight syscall handlers.  Thus, we 
already
+        * know that r15 contains a valid syscall number.  No need to re-check.
+        */
+       adds r17=-1024,r15
+       movl r14=sys_call_table
+       ;;
+#ifdef CONFIG_XEN
+       movl r18=running_on_xen;;
+       ld4 r18=[r18];;
+       // p14 = running_on_xen
+       // p15 = !running_on_xen
+       cmp.ne p14,p15=r0,r18
+       ;;    
+(p14)  movl r18=XSI_PSR_I_ADDR;;
+(p14)  ld8 r18=[r18]
+(p14)  mov r29=1;;
+(p14)  st1 [r18]=r29
+(p15)  rsm psr.i
+#else    
+       rsm psr.i
+#endif    
+       shladd r18=r17,3,r14
+       ;;
+       ld8 r18=[r18]                           // load normal (heavy-weight) 
syscall entry-point
+#ifdef CONFIG_XEN
+(p14)  mov r27=r8
+(p14)  XEN_HYPER_GET_PSR
+       ;;
+(p14)  mov r29=r8
+(p14)  mov r8=r27
+(p15)  mov r29=psr                             // read psr (12 cyc load 
latency)
+#else    
+       mov r29=psr                             // read psr (12 cyc load 
latency)
+#endif    
+       mov r27=ar.rsc
+       mov r21=ar.fpsr
+       mov r26=ar.pfs
+END(fsys_fallback_syscall)
+       /* FALL THROUGH */
+GLOBAL_ENTRY(fsys_bubble_down)
+       .prologue
+       .altrp b6
+       .body
+       /*
+        * We get here for syscalls that don't have a lightweight
+        * handler.  For those, we need to bubble down into the kernel
+        * and that requires setting up a minimal pt_regs structure,
+        * and initializing the CPU state more or less as if an
+        * interruption had occurred.  To make syscall-restarts work,
+        * we setup pt_regs such that cr_iip points to the second
+        * instruction in syscall_via_break.  Decrementing the IP
+        * hence will restart the syscall via break and not
+        * decrementing IP will return us to the caller, as usual.
+        * Note that we preserve the value of psr.pp rather than
+        * initializing it from dcr.pp.  This makes it possible to
+        * distinguish fsyscall execution from other privileged
+        * execution.
+        *
+        * On entry:
+        *      - normal fsyscall handler register usage, except
+        *        that we also have:
+        *      - r18: address of syscall entry point
+        *      - r21: ar.fpsr
+        *      - r26: ar.pfs
+        *      - r27: ar.rsc
+        *      - r29: psr
+        *
+        * We used to clear some PSR bits here but that requires slow
+        * serialization.  Fortuntely, that isn't really necessary.
+        * The rationale is as follows: we used to clear bits
+        * ~PSR_PRESERVED_BITS in PSR.L.  Since
+        * PSR_PRESERVED_BITS==PSR.{UP,MFL,MFH,PK,DT,PP,SP,RT,IC}, we
+        * ended up clearing PSR.{BE,AC,I,DFL,DFH,DI,DB,SI,TB}.
+        * However,
+        *
+        * PSR.BE : already is turned off in __kernel_syscall_via_epc()
+        * PSR.AC : don't care (kernel normally turns PSR.AC on)
+        * PSR.I  : already turned off by the time fsys_bubble_down gets
+        *          invoked
+        * PSR.DFL: always 0 (kernel never turns it on)
+        * PSR.DFH: don't care --- kernel never touches f32-f127 on its own
+        *          initiative
+        * PSR.DI : always 0 (kernel never turns it on)
+        * PSR.SI : always 0 (kernel never turns it on)
+        * PSR.DB : don't care --- kernel never enables kernel-level
+        *          breakpoints
+        * PSR.TB : must be 0 already; if it wasn't zero on entry to
+        *          __kernel_syscall_via_epc, the branch to fsys_bubble_down
+        *          will trigger a taken branch; the taken-trap-handler then
+        *          converts the syscall into a break-based system-call.
+        */
+       /*
+        * Reading psr.l gives us only bits 0-31, psr.it, and psr.mc.
+        * The rest we have to synthesize.
+        */
+#      define PSR_ONE_BITS             ((3 << IA64_PSR_CPL0_BIT)       \
+                                        | (0x1 << IA64_PSR_RI_BIT)     \
+                                        | IA64_PSR_BN | IA64_PSR_I)
+
+       invala                                  // M0|1
+       movl r14=ia64_ret_from_syscall          // X
+
+       nop.m 0
+       movl r28=__kernel_syscall_via_break     // X    create cr.iip
+       ;;
+
+       mov r2=r16                              // A    get task addr to 
addl-addressable register
+       adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 // A
+       mov r31=pr                              // I0   save pr (2 cyc)
+       ;;
+       st1 [r16]=r0                            // M2|3 clear 
current->thread.on_ustack flag
+       addl r22=IA64_RBS_OFFSET,r2             // A    compute base of RBS
+       add r3=TI_FLAGS+IA64_TASK_SIZE,r2       // A
+       ;;
+       ld4 r3=[r3]                             // M0|1 r3 = 
current_thread_info()->flags
+       lfetch.fault.excl.nt1 [r22]             // M0|1 prefetch register 
backing-store
+       nop.i 0
+       ;;
+       mov ar.rsc=0                            // M2   set enforced lazy mode, 
pl 0, LE, loadrs=0
+       nop.m 0
+       nop.i 0
+       ;;
+       mov r23=ar.bspstore                     // M2 (12 cyc) save ar.bspstore
+       mov.m r24=ar.rnat                       // M2 (5 cyc) read ar.rnat 
(dual-issues!)
+       nop.i 0
+       ;;
+       mov ar.bspstore=r22                     // M2 (6 cyc) switch to kernel 
RBS
+       movl r8=PSR_ONE_BITS                    // X
+       ;;
+       mov r25=ar.unat                         // M2 (5 cyc) save ar.unat
+       mov r19=b6                              // I0   save b6 (2 cyc)
+       mov r20=r1                              // A    save caller's gp in r20
+       ;;
+       or r29=r8,r29                           // A    construct cr.ipsr value 
to save
+       mov b6=r18                              // I0   copy syscall 
entry-point to b6 (7 cyc)
+       addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2 // A compute base of 
memory stack
+
+       mov r18=ar.bsp                          // M2   save (kernel) ar.bsp 
(12 cyc)
+       cmp.ne pKStk,pUStk=r0,r0                // A    set pKStk <- 0, pUStk 
<- 1
+       br.call.sptk.many b7=ia64_syscall_setup // B
+       ;;
+       mov ar.rsc=0x3                          // M2   set eager mode, pl 0, 
LE, loadrs=0
+       mov rp=r14                              // I0   set the real return addr
+       and r3=_TIF_SYSCALL_TRACEAUDIT,r3       // A
+       ;;
+#ifdef CONFIG_XEN
+       movl r14=running_on_xen;;
+       ld4 r14=[r14];;
+       // p14 = running_on_xen
+       // p15 = !running_on_xen
+       cmp.ne p14,p15=r0,r14
+       ;; 
+(p14)  movl r28=XSI_PSR_I_ADDR;;
+(p14)  ld8 r28=[r28];;
+(p14)  adds r28=-1,r28;;                       // event_pending
+(p14)  ld1 r14=[r28];;
+(p14)  cmp.ne.unc p13,p14=r14,r0;;
+(p13)  XEN_HYPER_SSM_I
+(p14)  adds r28=1,r28;;                        // event_mask
+(p14)  st1 [r28]=r0;;
+(p15)  ssm psr.i
+#else
+       ssm psr.i                               // M2   we're on kernel stacks 
now, reenable irqs
+#endif    
+       cmp.eq p8,p0=r3,r0                      // A
+(p10)  br.cond.spnt.many ia64_ret_from_syscall // B    return if bad 
call-frame or r15 is a NaT
+
+       nop.m 0
+(p8)   br.call.sptk.many b6=b6                 // B    (ignore return address)
+       br.cond.spnt ia64_trace_syscall         // B
+END(fsys_bubble_down)
+
+       .rodata
+       .align 8
+       .globl fsyscall_table
+
+       data8 fsys_bubble_down
+fsyscall_table:
+       data8 fsys_ni_syscall
+       data8 0                         // exit                 // 1025
+       data8 0                         // read
+       data8 0                         // write
+       data8 0                         // open
+       data8 0                         // close
+       data8 0                         // creat                // 1030
+       data8 0                         // link
+       data8 0                         // unlink
+       data8 0                         // execve
+       data8 0                         // chdir
+       data8 0                         // fchdir               // 1035
+       data8 0                         // utimes
+       data8 0                         // mknod
+       data8 0                         // chmod
+       data8 0                         // chown
+       data8 0                         // lseek                // 1040
+       data8 fsys_getpid               // getpid
+       data8 fsys_getppid              // getppid
+       data8 0                         // mount
+       data8 0                         // umount
+       data8 0                         // setuid               // 1045
+       data8 0                         // getuid
+       data8 0                         // geteuid
+       data8 0                         // ptrace
+       data8 0                         // access
+       data8 0                         // sync                 // 1050
+       data8 0                         // fsync
+       data8 0                         // fdatasync
+       data8 0                         // kill
+       data8 0                         // rename
+       data8 0                         // mkdir                // 1055
+       data8 0                         // rmdir
+       data8 0                         // dup
+       data8 0                         // pipe
+       data8 0                         // times
+       data8 0                         // brk                  // 1060
+       data8 0                         // setgid
+       data8 0                         // getgid
+       data8 0                         // getegid
+       data8 0                         // acct
+       data8 0                         // ioctl                // 1065
+       data8 0                         // fcntl
+       data8 0                         // umask
+       data8 0                         // chroot
+       data8 0                         // ustat
+       data8 0                         // dup2                 // 1070
+       data8 0                         // setreuid
+       data8 0                         // setregid
+       data8 0                         // getresuid
+       data8 0                         // setresuid
+       data8 0                         // getresgid            // 1075
+       data8 0                         // setresgid
+       data8 0                         // getgroups
+       data8 0                         // setgroups
+       data8 0                         // getpgid
+       data8 0                         // setpgid              // 1080
+       data8 0                         // setsid
+       data8 0                         // getsid
+       data8 0                         // sethostname
+       data8 0                         // setrlimit
+       data8 0                         // getrlimit            // 1085
+       data8 0                         // getrusage
+       data8 fsys_gettimeofday         // gettimeofday
+       data8 0                         // settimeofday
+       data8 0                         // select
+       data8 0                         // poll                 // 1090
+       data8 0                         // symlink
+       data8 0                         // readlink
+       data8 0                         // uselib
+       data8 0                         // swapon
+       data8 0                         // swapoff              // 1095
+       data8 0                         // reboot
+       data8 0                         // truncate
+       data8 0                         // ftruncate
+       data8 0                         // fchmod
+       data8 0                         // fchown               // 1100
+       data8 0                         // getpriority
+       data8 0                         // setpriority
+       data8 0                         // statfs
+       data8 0                         // fstatfs
+       data8 0                         // gettid               // 1105
+       data8 0                         // semget
+       data8 0                         // semop
+       data8 0                         // semctl
+       data8 0                         // msgget
+       data8 0                         // msgsnd               // 1110
+       data8 0                         // msgrcv
+       data8 0                         // msgctl
+       data8 0                         // shmget
+       data8 0                         // shmat
+       data8 0                         // shmdt                // 1115
+       data8 0                         // shmctl
+       data8 0                         // syslog
+       data8 0                         // setitimer
+       data8 0                         // getitimer
+       data8 0                                                 // 1120
+       data8 0
+       data8 0
+       data8 0                         // vhangup
+       data8 0                         // lchown
+       data8 0                         // remap_file_pages     // 1125
+       data8 0                         // wait4
+       data8 0                         // sysinfo
+       data8 0                         // clone
+       data8 0                         // setdomainname
+       data8 0                         // newuname             // 1130
+       data8 0                         // adjtimex
+       data8 0
+       data8 0                         // init_module
+       data8 0                         // delete_module
+       data8 0                                                 // 1135
+       data8 0
+       data8 0                         // quotactl
+       data8 0                         // bdflush
+       data8 0                         // sysfs
+       data8 0                         // personality          // 1140
+       data8 0                         // afs_syscall
+       data8 0                         // setfsuid
+       data8 0                         // setfsgid
+       data8 0                         // getdents
+       data8 0                         // flock                // 1145
+       data8 0                         // readv
+       data8 0                         // writev
+       data8 0                         // pread64
+       data8 0                         // pwrite64
+       data8 0                         // sysctl               // 1150
+       data8 0                         // mmap
+       data8 0                         // munmap
+       data8 0                         // mlock
+       data8 0                         // mlockall
+       data8 0                         // mprotect             // 1155
+       data8 0                         // mremap
+       data8 0                         // msync
+       data8 0                         // munlock
+       data8 0                         // munlockall
+       data8 0                         // sched_getparam       // 1160
+       data8 0                         // sched_setparam
+       data8 0                         // sched_getscheduler
+       data8 0                         // sched_setscheduler
+       data8 0                         // sched_yield
+       data8 0                         // sched_get_priority_max       // 1165
+       data8 0                         // sched_get_priority_min
+       data8 0                         // sched_rr_get_interval
+       data8 0                         // nanosleep
+       data8 0                         // nfsservctl
+       data8 0                         // prctl                // 1170
+       data8 0                         // getpagesize
+       data8 0                         // mmap2
+       data8 0                         // pciconfig_read
+       data8 0                         // pciconfig_write
+       data8 0                         // perfmonctl           // 1175
+       data8 0                         // sigaltstack
+       data8 0                         // rt_sigaction
+       data8 0                         // rt_sigpending
+       data8 fsys_rt_sigprocmask       // rt_sigprocmask
+       data8 0                         // rt_sigqueueinfo      // 1180
+       data8 0                         // rt_sigreturn
+       data8 0                         // rt_sigsuspend
+       data8 0                         // rt_sigtimedwait
+       data8 0                         // getcwd
+       data8 0                         // capget               // 1185
+       data8 0                         // capset
+       data8 0                         // sendfile
+       data8 0
+       data8 0
+       data8 0                         // socket               // 1190
+       data8 0                         // bind
+       data8 0                         // connect
+       data8 0                         // listen
+       data8 0                         // accept
+       data8 0                         // getsockname          // 1195
+       data8 0                         // getpeername
+       data8 0                         // socketpair
+       data8 0                         // send
+       data8 0                         // sendto
+       data8 0                         // recv                 // 1200
+       data8 0                         // recvfrom
+       data8 0                         // shutdown
+       data8 0                         // setsockopt
+       data8 0                         // getsockopt
+       data8 0                         // sendmsg              // 1205
+       data8 0                         // recvmsg
+       data8 0                         // pivot_root
+       data8 0                         // mincore
+       data8 0                         // madvise
+       data8 0                         // newstat              // 1210
+       data8 0                         // newlstat
+       data8 0                         // newfstat
+       data8 0                         // clone2
+       data8 0                         // getdents64
+       data8 0                         // getunwind            // 1215
+       data8 0                         // readahead
+       data8 0                         // setxattr
+       data8 0                         // lsetxattr
+       data8 0                         // fsetxattr
+       data8 0                         // getxattr             // 1220
+       data8 0                         // lgetxattr
+       data8 0                         // fgetxattr
+       data8 0                         // listxattr
+       data8 0                         // llistxattr
+       data8 0                         // flistxattr           // 1225
+       data8 0                         // removexattr
+       data8 0                         // lremovexattr
+       data8 0                         // fremovexattr
+       data8 0                         // tkill
+       data8 0                         // futex                // 1230
+       data8 0                         // sched_setaffinity
+       data8 0                         // sched_getaffinity
+       data8 fsys_set_tid_address      // set_tid_address
+       data8 0                         // fadvise64_64
+       data8 0                         // tgkill               // 1235
+       data8 0                         // exit_group
+       data8 0                         // lookup_dcookie
+       data8 0                         // io_setup
+       data8 0                         // io_destroy
+       data8 0                         // io_getevents         // 1240
+       data8 0                         // io_submit
+       data8 0                         // io_cancel
+       data8 0                         // epoll_create
+       data8 0                         // epoll_ctl
+       data8 0                         // epoll_wait           // 1245
+       data8 0                         // restart_syscall
+       data8 0                         // semtimedop
+       data8 0                         // timer_create
+       data8 0                         // timer_settime
+       data8 0                         // timer_gettime        // 1250
+       data8 0                         // timer_getoverrun
+       data8 0                         // timer_delete
+       data8 0                         // clock_settime
+       data8 fsys_clock_gettime        // clock_gettime
+
+       // fill in zeros for the remaining entries
+       .zero:
+       .space fsyscall_table + 8*NR_syscalls - .zero, 0
diff -r 8475a4e0425e -r 3c8bb086025e 
linux-2.6-xen-sparse/arch/ia64/kernel/gate.S
--- a/linux-2.6-xen-sparse/arch/ia64/kernel/gate.S      Thu Jan 18 15:18:07 
2007 +0000
+++ b/linux-2.6-xen-sparse/arch/ia64/kernel/gate.S      Fri Jan 19 14:48:57 
2007 +0000
@@ -55,18 +55,6 @@
 #define LOAD_RUNNING_ON_XEN(reg)                       \
 [1:]   movl reg=0;                                     \
        .xdata4 ".data.patch.running_on_xen", 1b-.
-
-       .section ".data.patch.brl_xen_rsm_be_i", "a"
-       .previous
-#define BRL_COND_XEN_RSM_BE_I(pr)                      \
-[1:](pr)brl.cond.sptk 0;                               \
-       .xdata4 ".data.patch.brl_xen_rsm_be_i", 1b-.
-
-       .section ".data.patch.brl_xen_get_psr", "a"
-       .previous
-#define BRL_COND_XEN_GET_PSR(pr)                       \
-[1:](pr)brl.cond.sptk 0;                               \
-       .xdata4 ".data.patch.brl_xen_get_psr", 1b-.
 
        .section ".data.patch.brl_xen_ssm_i_0", "a"
        .previous
@@ -155,9 +143,9 @@ GLOBAL_ENTRY(__kernel_syscall_via_epc)
        cmp.ne isXen,isRaw=r0,r30
        ;;
 (isRaw)        rsm psr.be | psr.i
-       BRL_COND_XEN_RSM_BE_I(isXen)
-       .global .vdso_rsm_be_i_ret
-.vdso_rsm_be_i_ret:
+(isXen)        st1 [r22]=r20
+(isXen)        rum psr.be
+       ;;
 #else
        rsm psr.be | psr.i                      // M2 (5 cyc to srlz.d)
 #endif
@@ -170,9 +158,9 @@ GLOBAL_ENTRY(__kernel_syscall_via_epc)
        lfetch [r18]                            // M0|1
 #ifdef CONFIG_XEN_IA64_VDSO_PARAVIRT
 (isRaw)        mov r29=psr
-       BRL_COND_XEN_GET_PSR(isXen)
-       .global .vdso_get_psr_ret
-.vdso_get_psr_ret:
+(isXen)        XEN_HYPER_GET_PSR
+       ;;
+(isXen)        mov r29=r8
 #else
        mov r29=psr                             // M2 (12 cyc)
 #endif
diff -r 8475a4e0425e -r 3c8bb086025e 
linux-2.6-xen-sparse/arch/ia64/kernel/gate.lds.S
--- a/linux-2.6-xen-sparse/arch/ia64/kernel/gate.lds.S  Thu Jan 18 15:18:07 
2007 +0000
+++ b/linux-2.6-xen-sparse/arch/ia64/kernel/gate.lds.S  Fri Jan 19 14:48:57 
2007 +0000
@@ -48,14 +48,6 @@ SECTIONS
                                    __start_gate_running_on_xen_patchlist = .;
                                    *(.data.patch.running_on_xen)
                                    __end_gate_running_on_xen_patchlist = .;
-
-                                   __start_gate_brl_xen_rsm_be_i_patchlist = .;
-                                   *(.data.patch.brl_xen_rsm_be_i)
-                                   __end_gate_brl_xen_rsm_be_i_patchlist = .;
-
-                                   __start_gate_brl_xen_get_psr_patchlist = .;
-                                   *(.data.patch.brl_xen_get_psr)
-                                   __end_gate_brl_xen_get_psr_patchlist = .;
 
                                    __start_gate_brl_xen_ssm_i_0_patchlist = .;
                                    *(.data.patch.brl_xen_ssm_i_0)
diff -r 8475a4e0425e -r 3c8bb086025e 
linux-2.6-xen-sparse/arch/ia64/kernel/patch.c
--- a/linux-2.6-xen-sparse/arch/ia64/kernel/patch.c     Thu Jan 18 15:18:07 
2007 +0000
+++ b/linux-2.6-xen-sparse/arch/ia64/kernel/patch.c     Fri Jan 19 14:48:57 
2007 +0000
@@ -236,13 +236,9 @@ static void
 static void
 patch_brl_in_vdso(void)
 {
-       EXTERN_PATCHLIST(xen_rsm_be_i);
-       EXTERN_PATCHLIST(xen_get_psr);
        EXTERN_PATCHLIST(xen_ssm_i_0);
        EXTERN_PATCHLIST(xen_ssm_i_1);
 
-       PATCH_BRL_SYMADDR(xen_rsm_be_i);
-       PATCH_BRL_SYMADDR(xen_get_psr);
        PATCH_BRL_SYMADDR(xen_ssm_i_0);
        PATCH_BRL_SYMADDR(xen_ssm_i_1);
 }
diff -r 8475a4e0425e -r 3c8bb086025e 
linux-2.6-xen-sparse/arch/ia64/kernel/setup.c
--- a/linux-2.6-xen-sparse/arch/ia64/kernel/setup.c     Thu Jan 18 15:18:07 
2007 +0000
+++ b/linux-2.6-xen-sparse/arch/ia64/kernel/setup.c     Fri Jan 19 14:48:57 
2007 +0000
@@ -76,10 +76,18 @@ EXPORT_SYMBOL(__per_cpu_offset);
 #endif
 
 #ifdef CONFIG_XEN
+static void
+xen_panic_hypercall(struct unw_frame_info *info, void *arg)
+{
+       current->thread.ksp = (__u64)info->sw - 16;
+       HYPERVISOR_shutdown(SHUTDOWN_crash);
+       /* we're never actually going to get here... */
+}
+
 static int
 xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr)
 {
-       HYPERVISOR_shutdown(SHUTDOWN_crash);
+       unw_init_running(xen_panic_hypercall, NULL);
        /* we're never actually going to get here... */
        return NOTIFY_DONE;
 }
diff -r 8475a4e0425e -r 3c8bb086025e 
linux-2.6-xen-sparse/arch/ia64/xen/hypercall.S
--- a/linux-2.6-xen-sparse/arch/ia64/xen/hypercall.S    Thu Jan 18 15:18:07 
2007 +0000
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/hypercall.S    Fri Jan 19 14:48:57 
2007 +0000
@@ -8,336 +8,110 @@
 #include <asm/processor.h>
 #include <asm/asmmacro.h>
 
-/* To clear vpsr.ic, vpsr.i needs to be cleared first */
-#define XEN_CLEAR_PSR_IC                               \
-       mov r14=1;                                      \
-       movl r15=XSI_PSR_I_ADDR;                        \
-       movl r2=XSI_PSR_IC;                             \
-       ;;                                              \
-       ld8 r15=[r15];                                  \
-       ld4 r3=[r2];                                    \
-       ;;                                              \
-       ld1 r16=[r15];                                  \
-       ;;                                              \
-       st1 [r15]=r14;                                  \
-       st4 [r2]=r0;                                    \
-       ;;
-
-/* First restore vpsr.ic, and then vpsr.i */
-#define XEN_RESTORE_PSR_IC                             \
-       st4 [r2]=r3;                                    \
-       st1 [r15]=r16;                                  \
-       ;;
+GLOBAL_ENTRY(xen_get_psr)
+       XEN_HYPER_GET_PSR
+       br.ret.sptk.many rp
+    ;;
+END(xen_get_psr)
 
 GLOBAL_ENTRY(xen_get_ivr)
-       movl r8=running_on_xen;;
-       ld4 r8=[r8];;
-       cmp.eq p7,p0=r8,r0;;
-(p7)   mov r8=cr.ivr;;
-(p7)   br.ret.sptk.many rp
-       ;;
-       XEN_CLEAR_PSR_IC
-       ;;
        XEN_HYPER_GET_IVR
-       ;;
-       XEN_RESTORE_PSR_IC
-       ;;
        br.ret.sptk.many rp
        ;;
 END(xen_get_ivr)
 
 GLOBAL_ENTRY(xen_get_tpr)
-       movl r8=running_on_xen;;
-       ld4 r8=[r8];;
-       cmp.eq p7,p0=r8,r0;;
-(p7)   mov r8=cr.tpr;;
-(p7)   br.ret.sptk.many rp
-       ;;
-       XEN_CLEAR_PSR_IC
-       ;;
        XEN_HYPER_GET_TPR
-       ;;
-       XEN_RESTORE_PSR_IC
-       ;;
        br.ret.sptk.many rp
        ;;
 END(xen_get_tpr)
 
 GLOBAL_ENTRY(xen_set_tpr)
-       movl r8=running_on_xen;;
-       ld4 r8=[r8];;
-       cmp.eq p7,p0=r8,r0;;
-(p7)   mov cr.tpr=r32;;
-(p7)   br.ret.sptk.many rp
-       ;;
        mov r8=r32
-       ;;
-       XEN_CLEAR_PSR_IC
-       ;;
        XEN_HYPER_SET_TPR
-       ;;
-       XEN_RESTORE_PSR_IC
-       ;;
        br.ret.sptk.many rp
        ;;
 END(xen_set_tpr)
 
 GLOBAL_ENTRY(xen_eoi)
-       movl r8=running_on_xen;;
-       ld4 r8=[r8];;
-       cmp.eq p7,p0=r8,r0;;
-(p7)   mov cr.eoi=r0;;
-(p7)   br.ret.sptk.many rp
-       ;;
        mov r8=r32
-       ;;
-       XEN_CLEAR_PSR_IC
-       ;;
        XEN_HYPER_EOI
-       ;;
-       XEN_RESTORE_PSR_IC
-       ;;
        br.ret.sptk.many rp
        ;;
 END(xen_eoi)
 
 GLOBAL_ENTRY(xen_thash)
-       movl r8=running_on_xen;;
-       ld4 r8=[r8];;
-       cmp.eq p7,p0=r8,r0;;
-(p7)   thash r8=r32;;
-(p7)   br.ret.sptk.many rp
-       ;;
        mov r8=r32
-       ;;
-       XEN_CLEAR_PSR_IC
-       ;;
        XEN_HYPER_THASH
-       ;;
-       XEN_RESTORE_PSR_IC
-       ;;
        br.ret.sptk.many rp
        ;;
 END(xen_thash)
 
 GLOBAL_ENTRY(xen_set_itm)
-       movl r8=running_on_xen;;
-       ld4 r8=[r8];;
-       cmp.eq p7,p0=r8,r0;;
-(p7)   mov cr.itm=r32;;
-(p7)   br.ret.sptk.many rp
-       ;;
        mov r8=r32
-       ;;
-       XEN_CLEAR_PSR_IC
-       ;;
        XEN_HYPER_SET_ITM
-       ;;
-       XEN_RESTORE_PSR_IC
-       ;;
        br.ret.sptk.many rp
        ;;
 END(xen_set_itm)
 
 GLOBAL_ENTRY(xen_ptcga)
-       movl r8=running_on_xen;;
-       ld4 r8=[r8];;
-       cmp.eq p7,p0=r8,r0;;
-(p7)   ptc.ga r32,r33;;
-(p7)   br.ret.sptk.many rp
-       ;;
        mov r8=r32
        mov r9=r33
-       ;;
-       XEN_CLEAR_PSR_IC
-       ;;
        XEN_HYPER_PTC_GA
-       ;;
-       XEN_RESTORE_PSR_IC
-       ;;
        br.ret.sptk.many rp
        ;;
 END(xen_ptcga)
 
 GLOBAL_ENTRY(xen_get_rr)
-       movl r8=running_on_xen;;
-       ld4 r8=[r8];;
-       cmp.eq p7,p0=r8,r0;;
-(p7)   mov r8=rr[r32];;
-(p7)   br.ret.sptk.many rp
-       ;;
        mov r8=r32
-       ;;
-       XEN_CLEAR_PSR_IC
-       ;;
        XEN_HYPER_GET_RR
-       ;;
-       XEN_RESTORE_PSR_IC
-       ;;
        br.ret.sptk.many rp
        ;;
 END(xen_get_rr)
 
 GLOBAL_ENTRY(xen_set_rr)
-       movl r8=running_on_xen;;
-       ld4 r8=[r8];;
-       cmp.eq p7,p0=r8,r0;;
-(p7)   mov rr[r32]=r33;;
-(p7)   br.ret.sptk.many rp
-       ;;
        mov r8=r32
        mov r9=r33
-       ;;
-       XEN_CLEAR_PSR_IC
-       ;;
        XEN_HYPER_SET_RR
-       ;;
-       XEN_RESTORE_PSR_IC
-       ;;
        br.ret.sptk.many rp
        ;;
 END(xen_set_rr)
 
 GLOBAL_ENTRY(xen_set_kr)
-       movl r8=running_on_xen;;
-       ld4 r8=[r8];;
-       cmp.ne p7,p0=r8,r0;;
-(p7)   br.cond.spnt.few 1f;
-       ;;
-       cmp.eq p7,p0=r8,r0
-       adds r8=-1,r8;;
-(p7)   mov ar0=r9
-(p7)   br.ret.sptk.many rp;;
-       cmp.eq p7,p0=r8,r0
-       adds r8=-1,r8;;
-(p7)   mov ar1=r9
-(p7)   br.ret.sptk.many rp;;
-       cmp.eq p7,p0=r8,r0
-       adds r8=-1,r8;;
-(p7)   mov ar2=r9
-(p7)   br.ret.sptk.many rp;;
-       cmp.eq p7,p0=r8,r0
-       adds r8=-1,r8;;
-(p7)   mov ar3=r9
-(p7)   br.ret.sptk.many rp;;
-       cmp.eq p7,p0=r8,r0
-       adds r8=-1,r8;;
-(p7)   mov ar4=r9
-(p7)   br.ret.sptk.many rp;;
-       cmp.eq p7,p0=r8,r0
-       adds r8=-1,r8;;
-(p7)   mov ar5=r9
-(p7)   br.ret.sptk.many rp;;
-       cmp.eq p7,p0=r8,r0
-       adds r8=-1,r8;;
-(p7)   mov ar6=r9
-(p7)   br.ret.sptk.many rp;;
-       cmp.eq p7,p0=r8,r0
-       adds r8=-1,r8;;
-(p7)   mov ar7=r9
-(p7)   br.ret.sptk.many rp;;
-
-1:     mov r8=r32
+       mov r8=r32
        mov r9=r33
-       ;;
-       XEN_CLEAR_PSR_IC
-       ;;
        XEN_HYPER_SET_KR
-       ;;
-       XEN_RESTORE_PSR_IC
-       ;;
        br.ret.sptk.many rp
 END(xen_set_kr)
 
 GLOBAL_ENTRY(xen_fc)
-       movl r8=running_on_xen;;
-       ld4 r8=[r8];;
-       cmp.eq p7,p0=r8,r0;;
-(p7)   fc r32;;
-(p7)   br.ret.sptk.many rp
-       ;;
        mov r8=r32
-       ;;
-       XEN_CLEAR_PSR_IC
-       ;;
        XEN_HYPER_FC
-       ;;
-       XEN_RESTORE_PSR_IC
-       ;;
        br.ret.sptk.many rp
 END(xen_fc)
 
 GLOBAL_ENTRY(xen_get_cpuid)
-       movl r8=running_on_xen;;
-       ld4 r8=[r8];;
-       cmp.eq p7,p0=r8,r0;;
-(p7)   mov r8=cpuid[r32];;
-(p7)   br.ret.sptk.many rp
-       ;;
        mov r8=r32
-       ;;
-       XEN_CLEAR_PSR_IC
-       ;;
        XEN_HYPER_GET_CPUID
-       ;;
-       XEN_RESTORE_PSR_IC
-       ;;
        br.ret.sptk.many rp
 END(xen_get_cpuid)
 
 GLOBAL_ENTRY(xen_get_pmd)
-       movl r8=running_on_xen;;
-       ld4 r8=[r8];;
-       cmp.eq p7,p0=r8,r0;;
-(p7)   mov r8=pmd[r32];;
-(p7)   br.ret.sptk.many rp
-       ;;
        mov r8=r32
-       ;;
-       XEN_CLEAR_PSR_IC
-       ;;
        XEN_HYPER_GET_PMD
-       ;;
-       XEN_RESTORE_PSR_IC
-       ;;
        br.ret.sptk.many rp
 END(xen_get_pmd)
 
 #ifdef CONFIG_IA32_SUPPORT
 GLOBAL_ENTRY(xen_get_eflag)
-       movl r8=running_on_xen;;
-       ld4 r8=[r8];;
-       cmp.eq p7,p0=r8,r0;;
-(p7)   mov r8=ar24;;
-(p7)   br.ret.sptk.many rp
-       ;;
-       mov r8=r32
-       ;;
-       XEN_CLEAR_PSR_IC
-       ;;
        XEN_HYPER_GET_EFLAG
-       ;;
-       XEN_RESTORE_PSR_IC
-       ;;
        br.ret.sptk.many rp
 END(xen_get_eflag)
        
 // some bits aren't set if pl!=0, see SDM vol1 3.1.8
 GLOBAL_ENTRY(xen_set_eflag)
-       movl r8=running_on_xen;;
-       ld4 r8=[r8];;
-       cmp.eq p7,p0=r8,r0;;
-(p7)   mov ar24=r32
-(p7)   br.ret.sptk.many rp
-       ;;
        mov r8=r32
-       ;;
-       XEN_CLEAR_PSR_IC
-       ;;
        XEN_HYPER_SET_EFLAG
-       ;;
-       XEN_RESTORE_PSR_IC
-       ;;
        br.ret.sptk.many rp
 END(xen_set_eflag)
 #endif
@@ -355,27 +129,6 @@ END(xen_send_ipi)
 #ifdef CONFIG_XEN_IA64_VDSO_PARAVIRT
 // Those are vdso specialized.
 // In fsys mode, call, ret can't be used.
-GLOBAL_ENTRY(xen_rsm_be_i)
-       st1 [r22]=r20
-       st4 [r23]=r0
-       XEN_HYPER_RSM_BE
-       st4 [r23]=r20
-       brl.cond.sptk   .vdso_rsm_be_i_ret
-       ;; 
-END(xen_rsm_be_i)
-
-GLOBAL_ENTRY(xen_get_psr)
-       mov r31=r8
-       mov r25=IA64_PSR_IC
-       st4 [r23]=r0
-       XEN_HYPER_GET_PSR
-       ;; 
-       st4 [r23]=r20
-       or r29=r8,r25 // vpsr.ic was cleared for hyperprivop
-       mov r8=r31
-       brl.cond.sptk   .vdso_get_psr_ret
-       ;; 
-END(xen_get_psr)
 
        // see xen_ssm_i() in privop.h
        // r22 = &vcpu->vcpu_info->evtchn_upcall_mask
@@ -395,7 +148,6 @@ END(xen_get_psr)
 (p14)  cmp.ne.unc p11,p0=r0,r25;       \
        ;;                              \
 (p11)  st1 [r22]=r20;                  \
-(p11)  st4 [r23]=r0;                   \
 (p11)  XEN_HYPER_SSM_I;
                
 GLOBAL_ENTRY(xen_ssm_i_0)
@@ -409,4 +161,11 @@ GLOBAL_ENTRY(xen_ssm_i_1)
        brl.cond.sptk   .vdso_ssm_i_1_ret
        ;; 
 END(xen_ssm_i_1)
+
+GLOBAL_ENTRY(__hypercall)
+       mov r2=r37
+       break 0x1000
+       br.ret.sptk.many b0
+       ;; 
+END(__hypercall)
 #endif
diff -r 8475a4e0425e -r 3c8bb086025e 
linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c
--- a/linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c   Thu Jan 18 15:18:07 
2007 +0000
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c   Fri Jan 19 14:48:57 
2007 +0000
@@ -25,7 +25,10 @@
 #include <linux/bootmem.h>
 #include <linux/module.h>
 #include <linux/vmalloc.h>
+#include <linux/efi.h>
 #include <asm/page.h>
+#include <asm/pgalloc.h>
+#include <asm/meminit.h>
 #include <asm/hypervisor.h>
 #include <asm/hypercall.h>
 #include <xen/interface/memory.h>
@@ -45,6 +48,8 @@ static int p2m_expose_init(void);
 #else
 #define p2m_expose_init() (-ENOSYS)
 #endif
+
+EXPORT_SYMBOL(__hypercall);
 
 //XXX same as i386, x86_64 contiguous_bitmap_set(), contiguous_bitmap_clear()
 // move those to lib/contiguous_bitmap?
@@ -56,13 +61,90 @@ static int p2m_expose_init(void);
  */
 unsigned long *contiguous_bitmap;
 
+#ifdef CONFIG_VIRTUAL_MEM_MAP
+/* Following logic is stolen from create_mem_map_table() for virtual memmap */
+static int
+create_contiguous_bitmap(u64 start, u64 end, void *arg)
+{
+       unsigned long address, start_page, end_page;
+       unsigned long bitmap_start, bitmap_end;
+       unsigned char *bitmap;
+       int node;
+       pgd_t *pgd;
+       pud_t *pud;
+       pmd_t *pmd;
+       pte_t *pte;
+
+       bitmap_start = (unsigned long)contiguous_bitmap +
+                      ((__pa(start) >> PAGE_SHIFT) >> 3);
+       bitmap_end = (unsigned long)contiguous_bitmap +
+                    (((__pa(end) >> PAGE_SHIFT) + 2 * BITS_PER_LONG) >> 3);
+
+       start_page = bitmap_start & PAGE_MASK;
+       end_page = PAGE_ALIGN(bitmap_end);
+       node = paddr_to_nid(__pa(start));
+
+       bitmap = alloc_bootmem_pages_node(NODE_DATA(node),
+                                         end_page - start_page);
+       BUG_ON(!bitmap);
+       memset(bitmap, 0, end_page - start_page);
+
+       for (address = start_page; address < end_page; address += PAGE_SIZE) {
+               pgd = pgd_offset_k(address);
+               if (pgd_none(*pgd))
+                       pgd_populate(&init_mm, pgd,
+                                    alloc_bootmem_pages_node(NODE_DATA(node),
+                                                             PAGE_SIZE));
+               pud = pud_offset(pgd, address);
+
+               if (pud_none(*pud))
+                       pud_populate(&init_mm, pud,
+                                    alloc_bootmem_pages_node(NODE_DATA(node),
+                                                             PAGE_SIZE));
+               pmd = pmd_offset(pud, address);
+
+               if (pmd_none(*pmd))
+                       pmd_populate_kernel(&init_mm, pmd,
+                                           alloc_bootmem_pages_node
+                                           (NODE_DATA(node), PAGE_SIZE));
+               pte = pte_offset_kernel(pmd, address);
+
+               if (pte_none(*pte))
+                       set_pte(pte,
+                               pfn_pte(__pa(bitmap + (address - start_page))
+                                       >> PAGE_SHIFT, PAGE_KERNEL));
+       }
+       return 0;
+}
+#endif
+
+static void
+__contiguous_bitmap_init(unsigned long size)
+{
+       contiguous_bitmap = alloc_bootmem_pages(size);
+       BUG_ON(!contiguous_bitmap);
+       memset(contiguous_bitmap, 0, size);
+}
+
 void
 contiguous_bitmap_init(unsigned long end_pfn)
 {
        unsigned long size = (end_pfn + 2 * BITS_PER_LONG) >> 3;
-       contiguous_bitmap = alloc_bootmem_low_pages(size);
-       BUG_ON(!contiguous_bitmap);
-       memset(contiguous_bitmap, 0, size);
+#ifndef CONFIG_VIRTUAL_MEM_MAP
+       __contiguous_bitmap_init(size);
+#else
+       unsigned long max_gap = 0;
+
+       efi_memmap_walk(find_largest_hole, (u64*)&max_gap);
+       if (max_gap < LARGE_GAP) {
+               __contiguous_bitmap_init(size);
+       } else {
+               unsigned long map_size = PAGE_ALIGN(size);
+               vmalloc_end -= map_size;
+               contiguous_bitmap = (unsigned long*)vmalloc_end;
+               efi_memmap_walk(create_contiguous_bitmap, NULL);
+       }
+#endif
 }
 
 #if 0
diff -r 8475a4e0425e -r 3c8bb086025e 
linux-2.6-xen-sparse/arch/ia64/xen/xenentry.S
--- a/linux-2.6-xen-sparse/arch/ia64/xen/xenentry.S     Thu Jan 18 15:18:07 
2007 +0000
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/xenentry.S     Fri Jan 19 14:48:57 
2007 +0000
@@ -59,12 +59,6 @@ GLOBAL_ENTRY(ia64_switch_to)
        shr.u r26=r20,IA64_GRANULE_SHIFT
        cmp.eq p7,p6=r25,in0
        ;;
-#ifdef CONFIG_XEN
-       movl r8=XSI_PSR_IC
-       ;;
-       st4 [r8]=r0     // force psr.ic off for hyperprivop(s)
-       ;;
-#endif
        /*
         * If we've already mapped this task's page, we can skip doing it again.
         */
@@ -72,19 +66,13 @@ GLOBAL_ENTRY(ia64_switch_to)
 (p6)   br.cond.dpnt .map
        ;;
 .done:
-#ifdef CONFIG_XEN
-       // psr.ic already off
+       ld8 sp=[r21]                    // load kernel stack pointer of new task
+#ifdef CONFIG_XEN
        // update "current" application register
        mov r8=IA64_KR_CURRENT
        mov r9=in0;;
        XEN_HYPER_SET_KR
-       ld8 sp=[r21]                    // load kernel stack pointer of new task
-       movl r27=XSI_PSR_IC
-       mov r8=1
-       ;;
-       st4 [r27]=r8                    // psr.ic back on
-#else
-       ld8 sp=[r21]                    // load kernel stack pointer of new task
+#else
        mov IA64_KR(CURRENT)=in0        // update "current" application register
 #endif
        mov r8=r13                      // return pointer to previously running 
task
@@ -99,7 +87,10 @@ GLOBAL_ENTRY(ia64_switch_to)
 
 .map:
 #ifdef CONFIG_XEN
-       // psr.ic already off
+    movl r25=XSI_PSR_IC                        // clear psr.ic
+    ;;
+    st4 [r25]=r0
+    ;;
 #else
        rsm psr.ic                      // interrupts (psr.i) are already 
disabled here
 #endif
@@ -132,7 +123,13 @@ GLOBAL_ENTRY(ia64_switch_to)
 #endif
        ;;
        itr.d dtr[r25]=r23              // wire in new mapping...
-#ifndef CONFIG_XEN
+#ifdef CONFIG_XEN
+       mov r9=1
+       movl r8=XSI_PSR_IC
+       ;;
+       st4 [r8]=r9
+       ;;
+#else    
        ssm psr.ic                      // reenable the psr.ic bit
        ;;
        srlz.d
@@ -415,7 +412,16 @@ ENTRY(ia64_leave_syscall)
 (pUStk) add r14=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
        ;;
        ld8 r26=[r2],PT(B0)-PT(AR_PFS)  // M0|1 load ar.pfs
+#ifdef CONFIG_XEN
+(pKStk)        mov r21=r8
+(pKStk)        XEN_HYPER_GET_PSR
+       ;;
+(pKStk)        mov r22=r8
+(pKStk)        mov r8=r21
+       ;;
+#else    
 (pKStk)        mov r22=psr                     // M2   read PSR now that 
interrupts are disabled
+#endif
        nop 0
        ;;
        ld8 r21=[r2],PT(AR_RNAT)-PT(B0) // M0|1 load b0
@@ -645,7 +651,16 @@ GLOBAL_ENTRY(ia64_leave_kernel)
        adds r16=PT(CR_IPSR)+16,r12
        adds r17=PT(CR_IIP)+16,r12
 
+#ifdef CONFIG_XEN    
+(pKStk)        mov r29=r8
+(pKStk)        XEN_HYPER_GET_PSR
+       ;;
+(pKStk)        mov r22=r8
+(pKStk)        mov r8=r29
+       ;;
+#else
 (pKStk)        mov r22=psr             // M2 read PSR now that interrupts are 
disabled
+#endif
        nop.i 0
        nop.i 0
        ;;
diff -r 8475a4e0425e -r 3c8bb086025e linux-2.6-xen-sparse/arch/ia64/xen/xenivt.S
--- a/linux-2.6-xen-sparse/arch/ia64/xen/xenivt.S       Thu Jan 18 15:18:07 
2007 +0000
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/xenivt.S       Fri Jan 19 14:48:57 
2007 +0000
@@ -709,11 +709,23 @@ ENTRY(page_fault)
        ;;
 #endif
 #ifdef CONFIG_XEN
-       br.cond.sptk.many       xen_page_fault
-       ;;
-done_xen_page_fault:
-#endif
+    
+#define MASK_TO_PEND_OFS    (-1)
+    
+(p15)  movl r14=XSI_PSR_I_ADDR
+       ;;
+(p15)  ld8 r14=[r14]
+       ;;
+(p15)  st1 [r14]=r0,MASK_TO_PEND_OFS           // if (p15) vpsr.i = 1
+       ;;              // if (p15) (vcpu->vcpu_info->evtchn_upcall_mask)=0
+(p15)  ld1 r14=[r14]   // if (vcpu->vcpu_info->evtchn_upcall_pending)
+       ;;
+(p15)  cmp.ne  p15,p0=r14,r0
+       ;;
+(p15)  XEN_HYPER_SSM_I
+#else
 (p15)  ssm psr.i                               // restore psr.i
+#endif
        movl r14=ia64_leave_kernel
        ;;
        SAVE_REST
@@ -729,25 +741,6 @@ ENTRY(dkey_miss)
 ENTRY(dkey_miss)
        DBG_FAULT(7)
        FAULT(7)
-#ifdef CONFIG_XEN
-       // Leaving this code inline above results in an IVT section overflow
-       // There is no particular reason for this code to be here...
-xen_page_fault:
-(p15)  movl r3=XSI_PSR_I_ADDR
-       ;;
-(p15)  ld8 r3=[r3]
-       ;;
-(p15)  st1 [r3]=r0,-1  // if (p15) vpsr.i = 1
-       mov r14=r0
-       ;;
-(p15)  ld1 r14=[r3]                            // if (pending_events)
-       adds r3=8,r2                            // re-set up second base pointer
-       ;;
-(p15)  cmp.ne  p15,p0=r14,r0
-       ;;
-       br.cond.sptk.many done_xen_page_fault
-       ;;
-#endif
 END(dkey_miss)
 
        .org ia64_ivt+0x2000
@@ -1170,14 +1163,13 @@ 1:
 #ifdef CONFIG_XEN
 (p15)  ld8 r16=[r16]                           // vpsr.i
        ;;
-(p15)  st1 [r16]=r0,-1         // if (p15) vpsr.i = 1
-       mov r2=r0
-       ;;
-(p15)  ld1 r2=[r16]                            // if (pending_events)
-       ;;
-       cmp.ne  p6,p0=r2,r0
-       ;;
-(p6)   ssm     psr.i                           //   do a real ssm psr.i
+(p15)  st1 [r16]=r0,MASK_TO_PEND_OFS           // if (p15) vpsr.i = 1
+       ;;              // if (p15) (vcpu->vcpu_info->evtchn_upcall_mask)=0
+(p15)  ld1 r2=[r16]    // if (vcpu->vcpu_info->evtchn_upcall_pending)
+       ;;
+(p15)  cmp.ne.unc p6,p0=r2,r0
+       ;;
+(p6)   XEN_HYPER_SSM_I                         //   do a real ssm psr.i
 #else
 (p15)  ssm psr.i                               // M2   restore psr.i
 #endif
diff -r 8475a4e0425e -r 3c8bb086025e 
linux-2.6-xen-sparse/arch/ia64/xen/xenminstate.h
--- a/linux-2.6-xen-sparse/arch/ia64/xen/xenminstate.h  Thu Jan 18 15:18:07 
2007 +0000
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/xenminstate.h  Fri Jan 19 14:48:57 
2007 +0000
@@ -151,16 +151,7 @@
 .mem.offset 8,0; st8.spill [r17]=r11,24;                                       
                \
         ;;                                                                     
                \
        /* xen special handling for possibly lazy cover */                      
                \
-       movl r8=XSI_INCOMPL_REGFR;                                              
                \
-       ;;                                                                      
                \
-       ld4 r30=[r8];                                                           
                \
-       ;;                                                                      
                \
-       /* set XSI_INCOMPL_REGFR 0 */                                           
                \
-       st4 [r8]=r0;                                                            
                \
-       cmp.eq  p6,p7=r30,r0;                                                   
                \
-       ;; /* not sure if this stop bit is necessary */                         
                \
-(p6)   adds r8=XSI_PRECOVER_IFS-XSI_INCOMPL_REGFR,r8;                          
                \
-(p7)   adds r8=XSI_IFS-XSI_INCOMPL_REGFR,r8;                                   
                \
+       movl r8=XSI_PRECOVER_IFS;                                               
                \
        ;;                                                                      
                \
        ld8 r30=[r8];                                                           
                \
        ;;                                                                      
                \
diff -r 8475a4e0425e -r 3c8bb086025e linux-2.6-xen-sparse/arch/ia64/xen/xenpal.S
--- a/linux-2.6-xen-sparse/arch/ia64/xen/xenpal.S       Thu Jan 18 15:18:07 
2007 +0000
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/xenpal.S       Fri Jan 19 14:48:57 
2007 +0000
@@ -33,7 +33,16 @@ 1:   {
        mov loc4=ar.rsc                 // save RSE configuration
        ;;
        mov ar.rsc=0                    // put RSE in enforced lazy, LE mode
+#ifdef CONFIG_XEN
+       mov r9 = r8
+       XEN_HYPER_GET_PSR
+       ;;
+       mov loc3 = r8
+       mov r8 = r9
+       ;;
+#else    
        mov loc3 = psr
+#endif    
        mov loc0 = rp
        .body
        mov r30 = in2
@@ -41,16 +50,16 @@ 1:  {
 #ifdef CONFIG_XEN
        // this is low priority for paravirtualization, but is called
        // from the idle loop so confuses privop counting
-       movl r31=XSI_PSR_IC
+       movl r31=XSI_PSR_I_ADDR
        ;;
-(p6)   st4 [r31]=r0
+       ld8 r31=[r31]
+       mov r22=1
        ;;
-(p7)   adds r31=XSI_PSR_I_ADDR_OFS-XSI_PSR_IC_OFS,r31
-(p7)   mov r22=1
+       st1 [r31]=r22
+       ;;  
+(p6)   movl r31=XSI_PSR_IC
        ;;
-(p7)   ld8 r31=[r31]
-       ;;
-(p7)   st1 [r31]=r22
+(p6)   st4.rel [r31]=r0
        ;;
        mov r31 = in3
        mov b7 = loc2
diff -r 8475a4e0425e -r 3c8bb086025e 
linux-2.6-xen-sparse/arch/ia64/xen/xensetup.S
--- a/linux-2.6-xen-sparse/arch/ia64/xen/xensetup.S     Thu Jan 18 15:18:07 
2007 +0000
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/xensetup.S     Fri Jan 19 14:48:57 
2007 +0000
@@ -38,17 +38,16 @@ END(early_xen_setup)
 /* Stub for suspend.
    Just force the stacked registers to be written in memory.  */       
 GLOBAL_ENTRY(xencomm_arch_hypercall_suspend)
-       mov r15=r32
        ;; 
-       alloc r20=ar.pfs,0,0,0,0
+       alloc r20=ar.pfs,0,0,6,0
        mov r2=__HYPERVISOR_sched_op
        ;; 
        /* We don't want to deal with RSE.  */
        flushrs
-       mov r14=2 // SCHEDOP_shutdown
+       mov r33=r32
+       mov r32=2 // SCHEDOP_shutdown
        ;;
        break 0x1000
        ;; 
-       mov ar.pfs=r20
        br.ret.sptk.many b0
 END(xencomm_arch_hypercall_suspend)
diff -r 8475a4e0425e -r 3c8bb086025e linux-2.6-xen-sparse/drivers/xen/Kconfig
--- a/linux-2.6-xen-sparse/drivers/xen/Kconfig  Thu Jan 18 15:18:07 2007 +0000
+++ b/linux-2.6-xen-sparse/drivers/xen/Kconfig  Fri Jan 19 14:48:57 2007 +0000
@@ -238,14 +238,6 @@ config XEN_COMPAT_030002
 
 endmenu
 
-config HAVE_ARCH_ALLOC_SKB
-       bool
-       default y
-
-config HAVE_ARCH_DEV_ALLOC_SKB
-       bool
-       default y
-
 config HAVE_IRQ_IGNORE_UNHANDLED
        bool
        default y
diff -r 8475a4e0425e -r 3c8bb086025e 
linux-2.6-xen-sparse/include/asm-ia64/hypercall.h
--- a/linux-2.6-xen-sparse/include/asm-ia64/hypercall.h Thu Jan 18 15:18:07 
2007 +0000
+++ b/linux-2.6-xen-sparse/include/asm-ia64/hypercall.h Fri Jan 19 14:48:57 
2007 +0000
@@ -39,6 +39,9 @@
 
 #include <asm/xen/xcom_hcall.h>
 struct xencomm_handle;
+extern unsigned long __hypercall(unsigned long a1, unsigned long a2,
+                                 unsigned long a3, unsigned long a4,
+                                 unsigned long a5, unsigned long cmd);
 
 /*
  * Assembler stubs for hyper-calls.
@@ -47,115 +50,58 @@ struct xencomm_handle;
 #define _hypercall0(type, name)                                        \
 ({                                                             \
        long __res;                                             \
-       __asm__ __volatile__ (";;\n"                            \
-                             "mov r2=%1\n"                     \
-                             "break 0x1000 ;;\n"               \
-                             "mov %0=r8 ;;\n"                  \
-                             : "=r" (__res)                    \
-                             : "J" (__HYPERVISOR_##name)       \
-                             : "r2","r8",                      \
-                               "memory" );                     \
+       __res=__hypercall(0, 0, 0, 0, 0, __HYPERVISOR_##name);  \
        (type)__res;                                            \
 })
 
 #define _hypercall1(type, name, a1)                            \
 ({                                                             \
        long __res;                                             \
-       __asm__ __volatile__ (";;\n"                            \
-                             "mov r14=%2\n"                    \
-                             "mov r2=%1\n"                     \
-                             "break 0x1000 ;;\n"               \
-                             "mov %0=r8 ;;\n"                  \
-                             : "=r" (__res)                    \
-                             : "J" (__HYPERVISOR_##name),      \
-                               "rI" ((unsigned long)(a1))      \
-                             : "r14","r2","r8",                \
-                               "memory" );                     \
+       __res = __hypercall((unsigned long)a1,                  \
+                            0, 0, 0, 0, __HYPERVISOR_##name);  \
        (type)__res;                                            \
 })
 
 #define _hypercall2(type, name, a1, a2)                                \
 ({                                                             \
        long __res;                                             \
-       __asm__ __volatile__ (";;\n"                            \
-                             "mov r14=%2\n"                    \
-                             "mov r15=%3\n"                    \
-                             "mov r2=%1\n"                     \
-                             "break 0x1000 ;;\n"               \
-                             "mov %0=r8 ;;\n"                  \
-                             : "=r" (__res)                    \
-                             : "J" (__HYPERVISOR_##name),      \
-                               "rI" ((unsigned long)(a1)),     \
-                               "rI" ((unsigned long)(a2))      \
-                             : "r14","r15","r2","r8",          \
-                               "memory" );                     \
+       __res = __hypercall((unsigned long)a1,                  \
+                           (unsigned long)a2,                  \
+                           0, 0, 0, __HYPERVISOR_##name);      \
        (type)__res;                                            \
 })
 
 #define _hypercall3(type, name, a1, a2, a3)                    \
 ({                                                             \
        long __res;                                             \
-       __asm__ __volatile__ (";;\n"                            \
-                             "mov r14=%2\n"                    \
-                             "mov r15=%3\n"                    \
-                             "mov r16=%4\n"                    \
-                             "mov r2=%1\n"                     \
-                             "break 0x1000 ;;\n"               \
-                             "mov %0=r8 ;;\n"                  \
-                             : "=r" (__res)                    \
-                             : "J" (__HYPERVISOR_##name),      \
-                               "rI" ((unsigned long)(a1)),     \
-                               "rI" ((unsigned long)(a2)),     \
-                               "rI" ((unsigned long)(a3))      \
-                             : "r14","r15","r16","r2","r8",    \
-                               "memory" );                     \
-       (type)__res;                                            \
+       __res = __hypercall((unsigned long)a1,                  \
+                           (unsigned long)a2,                  \
+                           (unsigned long)a3,                  \
+                           0, 0, __HYPERVISOR_##name);         \
+       (type)__res;                                            \
 })
 
 #define _hypercall4(type, name, a1, a2, a3, a4)                        \
 ({                                                             \
        long __res;                                             \
-       __asm__ __volatile__ (";;\n"                            \
-                             "mov r14=%2\n"                    \
-                             "mov r15=%3\n"                    \
-                             "mov r16=%4\n"                    \
-                             "mov r17=%5\n"                    \
-                             "mov r2=%1\n"                     \
-                             "break 0x1000 ;;\n"               \
-                             "mov %0=r8 ;;\n"                  \
-                             : "=r" (__res)                    \
-                             : "J" (__HYPERVISOR_##name),      \
-                               "rI" ((unsigned long)(a1)),     \
-                               "rI" ((unsigned long)(a2)),     \
-                               "rI" ((unsigned long)(a3)),     \
-                               "rI" ((unsigned long)(a4))      \
-                             : "r14","r15","r16","r2","r8",    \
-                               "r17","memory" );               \
-       (type)__res;                                            \
+       __res = __hypercall((unsigned long)a1,                  \
+                           (unsigned long)a2,                  \
+                           (unsigned long)a3,                  \
+                           (unsigned long)a4,                  \
+                           0, __HYPERVISOR_##name);            \
+       (type)__res;                                            \
 })
 
 #define _hypercall5(type, name, a1, a2, a3, a4, a5)            \
 ({                                                             \
        long __res;                                             \
-       __asm__ __volatile__ (";;\n"                            \
-                             "mov r14=%2\n"                    \
-                             "mov r15=%3\n"                    \
-                             "mov r16=%4\n"                    \
-                             "mov r17=%5\n"                    \
-                             "mov r18=%6\n"                    \
-                             "mov r2=%1\n"                     \
-                             "break 0x1000 ;;\n"               \
-                             "mov %0=r8 ;;\n"                  \
-                             : "=r" (__res)                    \
-                             : "J" (__HYPERVISOR_##name),      \
-                               "rI" ((unsigned long)(a1)),     \
-                               "rI" ((unsigned long)(a2)),     \
-                               "rI" ((unsigned long)(a3)),     \
-                               "rI" ((unsigned long)(a4)),     \
-                               "rI" ((unsigned long)(a5))      \
-                             : "r14","r15","r16","r2","r8",    \
-                               "r17","r18","memory" );         \
-       (type)__res;                                            \
+       __res = __hypercall((unsigned long)a1,                  \
+                           (unsigned long)a2,                  \
+                           (unsigned long)a3,                  \
+                           (unsigned long)a4,                  \
+                           (unsigned long)a5,                  \
+                           __HYPERVISOR_##name);               \
+       (type)__res;                                            \
 })
 
 
diff -r 8475a4e0425e -r 3c8bb086025e 
linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h
--- a/linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h        Thu Jan 18 
15:18:07 2007 +0000
+++ b/linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h        Fri Jan 19 
14:48:57 2007 +0000
@@ -215,7 +215,8 @@ asmlinkage int xprintk(const char *fmt, 
 #endif /* CONFIG_XEN || CONFIG_VMX_GUEST */
 
 #ifdef CONFIG_XEN_PRIVILEGED_GUEST
-#define is_initial_xendomain() (xen_start_info->flags & SIF_INITDOMAIN)
+#define is_initial_xendomain()                                         \
+       (is_running_on_xen() ? xen_start_info->flags & SIF_INITDOMAIN : 0)
 #else
 #define is_initial_xendomain() 0
 #endif
diff -r 8475a4e0425e -r 3c8bb086025e 
linux-2.6-xen-sparse/include/asm-ia64/maddr.h
--- a/linux-2.6-xen-sparse/include/asm-ia64/maddr.h     Thu Jan 18 15:18:07 
2007 +0000
+++ b/linux-2.6-xen-sparse/include/asm-ia64/maddr.h     Fri Jan 19 14:48:57 
2007 +0000
@@ -68,7 +68,6 @@ static inline unsigned long
 static inline unsigned long
 mfn_to_local_pfn(unsigned long mfn)
 {
-       extern unsigned long max_mapnr;
        unsigned long pfn = mfn_to_pfn_for_dma(mfn);
        if (!pfn_valid(pfn))
                return INVALID_P2M_ENTRY;
diff -r 8475a4e0425e -r 3c8bb086025e 
linux-2.6-xen-sparse/include/asm-ia64/page.h
--- a/linux-2.6-xen-sparse/include/asm-ia64/page.h      Thu Jan 18 15:18:07 
2007 +0000
+++ b/linux-2.6-xen-sparse/include/asm-ia64/page.h      Fri Jan 19 14:48:57 
2007 +0000
@@ -119,6 +119,7 @@ extern struct page *vmem_map;
 #endif
 
 #ifdef CONFIG_FLATMEM
+extern unsigned long max_mapnr;
 # define pfn_valid(pfn)                (((pfn) < max_mapnr) && 
ia64_pfn_valid(pfn))
 #elif defined(CONFIG_DISCONTIGMEM)
 extern unsigned long min_low_pfn;
diff -r 8475a4e0425e -r 3c8bb086025e 
linux-2.6-xen-sparse/include/asm-ia64/xen/privop.h
--- a/linux-2.6-xen-sparse/include/asm-ia64/xen/privop.h        Thu Jan 18 
15:18:07 2007 +0000
+++ b/linux-2.6-xen-sparse/include/asm-ia64/xen/privop.h        Fri Jan 19 
14:48:57 2007 +0000
@@ -45,12 +45,10 @@
 #define        XEN_HYPER_GET_PMD               break HYPERPRIVOP_GET_PMD
 #define        XEN_HYPER_GET_EFLAG             break HYPERPRIVOP_GET_EFLAG
 #define        XEN_HYPER_SET_EFLAG             break HYPERPRIVOP_SET_EFLAG
-#define        XEN_HYPER_RSM_BE                break HYPERPRIVOP_RSM_BE
 #define        XEN_HYPER_GET_PSR               break HYPERPRIVOP_GET_PSR
 
 #define XSI_IFS                        (XSI_BASE + XSI_IFS_OFS)
 #define XSI_PRECOVER_IFS       (XSI_BASE + XSI_PRECOVER_IFS_OFS)
-#define XSI_INCOMPL_REGFR      (XSI_BASE + XSI_INCOMPL_REGFR_OFS)
 #define XSI_IFA                        (XSI_BASE + XSI_IFA_OFS)
 #define XSI_ISR                        (XSI_BASE + XSI_ISR_OFS)
 #define XSI_IIM                        (XSI_BASE + XSI_IIM_OFS)
@@ -123,8 +121,6 @@ extern void xen_set_eflag(unsigned long)
  * that we inline it */
 #define xen_hyper_ssm_i()                                              \
 ({                                                                     \
-       xen_set_virtual_psr_i(0);                                       \
-       xen_set_virtual_psr_ic(0);                                      \
        XEN_HYPER_SSM_I;                                                \
 })
 
@@ -139,8 +135,12 @@ extern void xen_set_eflag(unsigned long)
 #define xen_ssm_i()                                                    \
 ({                                                                     \
        int old = xen_get_virtual_psr_i();                              \
-       xen_set_virtual_psr_i(1);                                       \
-       if (!old && xen_get_virtual_pend()) xen_hyper_ssm_i();          \
+       if (!old) {                                                     \
+               if (xen_get_virtual_pend())                             \
+                       xen_hyper_ssm_i();                              \
+               else                                                    \
+                       xen_set_virtual_psr_i(1);                       \
+       }                                                               \
 })
 
 #define xen_ia64_intrin_local_irq_restore(x)                           \
@@ -182,6 +182,7 @@ extern void xen_set_eflag(unsigned long)
  * be properly handled by Xen, some are frequent enough that we use
  * hyperprivops for performance. */
 
+extern unsigned long xen_get_psr(void);
 extern unsigned long xen_get_ivr(void);
 extern unsigned long xen_get_tpr(void);
 extern void xen_set_itm(unsigned long);
@@ -201,6 +202,11 @@ extern void xen_ptcga(unsigned long addr
        __u64 ia64_intri_res;                                           \
                                                                        \
        switch(regnum) {                                                \
+       case _IA64_REG_PSR:                                             \
+               ia64_intri_res = (is_running_on_xen()) ?                        
\
+                       xen_get_psr() :                                 \
+                       __ia64_getreg(regnum);                          \
+               break;                                                  \
        case _IA64_REG_CR_IVR:                                          \
                ia64_intri_res = (is_running_on_xen()) ?                        
\
                        xen_get_ivr() :                                 \
diff -r 8475a4e0425e -r 3c8bb086025e linux-2.6-xen-sparse/include/linux/skbuff.h
--- a/linux-2.6-xen-sparse/include/linux/skbuff.h       Thu Jan 18 15:18:07 
2007 +0000
+++ b/linux-2.6-xen-sparse/include/linux/skbuff.h       Fri Jan 19 14:48:57 
2007 +0000
@@ -353,8 +353,7 @@ static inline struct sk_buff *alloc_skb_
 
 extern struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp,
                                            unsigned int size,
-                                           gfp_t priority,
-                                           int fclone);
+                                           gfp_t priority);
 extern void           kfree_skbmem(struct sk_buff *skb);
 extern struct sk_buff *skb_clone(struct sk_buff *skb,
                                 gfp_t priority);
diff -r 8475a4e0425e -r 3c8bb086025e linux-2.6-xen-sparse/net/core/skbuff.c
--- a/linux-2.6-xen-sparse/net/core/skbuff.c    Thu Jan 18 15:18:07 2007 +0000
+++ b/linux-2.6-xen-sparse/net/core/skbuff.c    Fri Jan 19 14:48:57 2007 +0000
@@ -210,18 +210,14 @@ nodata:
  */
 struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp,
                                     unsigned int size,
-                                    gfp_t gfp_mask,
-                                    int fclone)
-{
-       kmem_cache_t *cache;
-       struct skb_shared_info *shinfo;
+                                    gfp_t gfp_mask)
+{
        struct sk_buff *skb;
        u8 *data;
 
-       cache = fclone ? skbuff_fclone_cache : skbuff_head_cache;
-
        /* Get the HEAD */
-       skb = kmem_cache_alloc(cache, gfp_mask & ~__GFP_DMA);
+       skb = kmem_cache_alloc(skbuff_head_cache,
+                              gfp_mask & ~__GFP_DMA);
        if (!skb)
                goto out;
 
@@ -238,29 +234,18 @@ struct sk_buff *alloc_skb_from_cache(kme
        skb->data = data;
        skb->tail = data;
        skb->end  = data + size;
-       /* make sure we initialize shinfo sequentially */
-       shinfo = skb_shinfo(skb);
-       atomic_set(&shinfo->dataref, 1);
-       shinfo->nr_frags  = 0;
-       shinfo->gso_size = 0;
-       shinfo->gso_segs = 0;
-       shinfo->gso_type = 0;
-       shinfo->ip6_frag_id = 0;
-       shinfo->frag_list = NULL;
-
-       if (fclone) {
-               struct sk_buff *child = skb + 1;
-               atomic_t *fclone_ref = (atomic_t *) (child + 1);
-
-               skb->fclone = SKB_FCLONE_ORIG;
-               atomic_set(fclone_ref, 1);
-
-               child->fclone = SKB_FCLONE_UNAVAILABLE;
-       }
+
+       atomic_set(&(skb_shinfo(skb)->dataref), 1);
+       skb_shinfo(skb)->nr_frags  = 0;
+       skb_shinfo(skb)->gso_size = 0;
+       skb_shinfo(skb)->gso_segs = 0;
+       skb_shinfo(skb)->gso_type = 0;
+       skb_shinfo(skb)->ip6_frag_id = 0;
+       skb_shinfo(skb)->frag_list = NULL;
 out:
        return skb;
 nodata:
-       kmem_cache_free(cache, skb);
+       kmem_cache_free(skbuff_head_cache, skb);
        skb = NULL;
        goto out;
 }
diff -r 8475a4e0425e -r 3c8bb086025e tools/ioemu/hw/cirrus_vga.c
--- a/tools/ioemu/hw/cirrus_vga.c       Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/ioemu/hw/cirrus_vga.c       Fri Jan 19 14:48:57 2007 +0000
@@ -3010,11 +3010,44 @@ static CPUWriteMemoryFunc *cirrus_mmio_w
     cirrus_mmio_writel,
 };
 
+void cirrus_stop_acc(CirrusVGAState *s)
+{
+    if (s->map_addr){
+        int error;
+        s->map_addr = 0;
+        error = unset_vram_mapping(s->cirrus_lfb_addr,
+                s->cirrus_lfb_end);
+        fprintf(stderr, "cirrus_stop_acc:unset_vram_mapping.\n");
+
+        munmap(s->vram_ptr, VGA_RAM_SIZE);
+    }
+}
+
+void cirrus_restart_acc(CirrusVGAState *s)
+{
+    if (s->cirrus_lfb_addr && s->cirrus_lfb_end) {
+        void *vram_pointer, *old_vram;
+        fprintf(stderr, "cirrus_vga_load:re-enable vga acc.lfb_addr=0x%lx, 
lfb_end=0x%lx.\n",
+                s->cirrus_lfb_addr, s->cirrus_lfb_end);
+        vram_pointer = set_vram_mapping(s->cirrus_lfb_addr ,s->cirrus_lfb_end);
+        if (!vram_pointer){
+            fprintf(stderr, "cirrus_vga_load:NULL vram_pointer\n");
+        } else {
+            old_vram = vga_update_vram((VGAState *)s, vram_pointer,
+                    VGA_RAM_SIZE);
+            qemu_free(old_vram);
+            s->map_addr = s->cirrus_lfb_addr;
+            s->map_end = s->cirrus_lfb_end;
+        }
+    }
+}
+
 /* load/save state */
 
 static void cirrus_vga_save(QEMUFile *f, void *opaque)
 {
     CirrusVGAState *s = opaque;
+    uint8_t vga_acc;
 
     qemu_put_be32s(f, &s->latch);
     qemu_put_8s(f, &s->sr_index);
@@ -3049,11 +3082,20 @@ static void cirrus_vga_save(QEMUFile *f,
     qemu_put_be32s(f, &s->hw_cursor_y);
     /* XXX: we do not save the bitblt state - we assume we do not save
        the state when the blitter is active */
+
+    vga_acc = (!!s->map_addr);
+    qemu_put_8s(f, &vga_acc);
+    qemu_put_be64s(f, (uint64_t*)&s->cirrus_lfb_addr);
+    qemu_put_be64s(f, (uint64_t*)&s->cirrus_lfb_end);
+    qemu_put_buffer(f, s->vram_ptr, VGA_RAM_SIZE); 
+    if (vga_acc)
+        cirrus_stop_acc(s);
 }
 
 static int cirrus_vga_load(QEMUFile *f, void *opaque, int version_id)
 {
     CirrusVGAState *s = opaque;
+    uint8_t vga_acc = 0;
 
     if (version_id != 1)
         return -EINVAL;
@@ -3091,6 +3133,14 @@ static int cirrus_vga_load(QEMUFile *f, 
 
     qemu_get_be32s(f, &s->hw_cursor_x);
     qemu_get_be32s(f, &s->hw_cursor_y);
+
+    qemu_get_8s(f, &vga_acc);
+    qemu_get_be64s(f, (uint64_t*)&s->cirrus_lfb_addr);
+    qemu_get_be64s(f, (uint64_t*)&s->cirrus_lfb_end);
+    qemu_get_buffer(f, s->vram_ptr, VGA_RAM_SIZE); 
+    if (vga_acc){
+        cirrus_restart_acc(s);
+    }
 
     /* force refresh */
     s->graphic_mode = -1;
@@ -3297,6 +3347,8 @@ void pci_cirrus_vga_init(PCIBus *bus, Di
                     ds, vga_ram_base, vga_ram_offset, vga_ram_size);
     cirrus_init_common(s, device_id, 1);
 
+    register_savevm("cirrus_vga_pci", 0, 1, generic_pci_save, 
generic_pci_load, d);
+
     /* setup memory space */
     /* memory #0 LFB */
     /* memory #1 memory-mapped I/O */
diff -r 8475a4e0425e -r 3c8bb086025e tools/ioemu/hw/ide.c
--- a/tools/ioemu/hw/ide.c      Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/ioemu/hw/ide.c      Fri Jan 19 14:48:57 2007 +0000
@@ -2512,6 +2512,9 @@ void pci_piix3_ide_init(PCIBus *bus, Blo
               pic_set_irq_new, isa_pic, 15);
     ide_init_ioport(&d->ide_if[0], 0x1f0, 0x3f6);
     ide_init_ioport(&d->ide_if[2], 0x170, 0x376);
+
+    register_savevm("ide_pci", 0, 1, generic_pci_save, generic_pci_load, d);
+
 #ifdef DMA_MULTI_THREAD    
     dma_create_thread();
 #endif //DMA_MULTI_THREAD    
diff -r 8475a4e0425e -r 3c8bb086025e tools/ioemu/hw/pci.c
--- a/tools/ioemu/hw/pci.c      Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/ioemu/hw/pci.c      Fri Jan 19 14:48:57 2007 +0000
@@ -40,6 +40,8 @@ static int pci_irq_index;
 static int pci_irq_index;
 static PCIBus *first_bus;
 
+static void pci_update_mappings(PCIDevice *d);
+
 PCIBus *pci_register_bus(pci_set_irq_fn set_irq, void *pic, int devfn_min)
 {
     PCIBus *bus;
@@ -71,6 +73,7 @@ int generic_pci_load(QEMUFile* f, void *
         return -EINVAL;
 
     qemu_get_buffer(f, s->config, 256);
+    pci_update_mappings(s);
     return 0;
 }
 
diff -r 8475a4e0425e -r 3c8bb086025e tools/ioemu/target-i386-dm/helper2.c
--- a/tools/ioemu/target-i386-dm/helper2.c      Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/ioemu/target-i386-dm/helper2.c      Fri Jan 19 14:48:57 2007 +0000
@@ -546,6 +546,7 @@ int main_loop(void)
 {
     extern int vm_running;
     extern int shutdown_requested;
+    extern int suspend_requested;
     CPUState *env = cpu_single_env;
     int evtchn_fd = xc_evtchn_fd(xce_handle);
 
@@ -563,12 +564,24 @@ int main_loop(void)
                 qemu_system_reset();
                 reset_requested = 0;
             }
+            if (suspend_requested) {
+                fprintf(logfile, "device model received suspend signal!\n");
+                break;
+            }
         }
 
         /* Wait up to 10 msec. */
         main_loop_wait(10);
     }
-    destroy_hvm_domain();
+    if (!suspend_requested)
+        destroy_hvm_domain();
+    else {
+        char qemu_file[20];
+        sprintf(qemu_file, "/tmp/xen.qemu-dm.%d", domid);
+        if (qemu_savevm(qemu_file) < 0)
+            fprintf(stderr, "qemu save fail.\n");
+    }
+
     return 0;
 }
 
diff -r 8475a4e0425e -r 3c8bb086025e tools/ioemu/vl.c
--- a/tools/ioemu/vl.c  Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/ioemu/vl.c  Fri Jan 19 14:48:57 2007 +0000
@@ -4441,6 +4441,11 @@ int qemu_loadvm(const char *filename)
         qemu_fseek(f, cur_pos + record_len, SEEK_SET);
     }
     fclose(f);
+
+    /* del tmp file */
+    if (unlink(filename) == -1)
+        fprintf(stderr, "delete tmp qemu state file failed.\n");
+
     ret = 0;
  the_end:
     if (saved_vm_running)
@@ -5027,6 +5032,7 @@ static QEMUResetEntry *first_reset_entry
 static QEMUResetEntry *first_reset_entry;
 int reset_requested;
 int shutdown_requested;
+int suspend_requested;
 static int powerdown_requested;
 
 void qemu_register_reset(QEMUResetHandler *func, void *opaque)
@@ -5806,6 +5812,14 @@ int set_mm_mapping(int xc_handle, uint32
     }
 
     return 0;
+}
+
+void suspend(int sig)
+{
+   fprintf(logfile, "suspend sig handler called with requested=%d!\n", 
suspend_requested);
+    if (sig != SIGUSR1)
+        fprintf(logfile, "suspend signal dismatch, get sig=%d!\n", sig);
+    suspend_requested = 1;
 }
 
 #if defined(__i386__) || defined(__x86_64__)
@@ -6464,10 +6478,6 @@ int main(int argc, char **argv)
     }
 
 #if defined (__ia64__)
-    /* ram_size passed from xend has added on GFW memory,
-       so we must subtract it here */
-    ram_size -= 16 * MEM_M;
-
     if (ram_size > MMIO_START)
         ram_size += 1 * MEM_G; /* skip 3G-4G MMIO, LEGACY_IO_SPACE etc. */
 #endif
@@ -6718,6 +6728,26 @@ int main(int argc, char **argv)
             vm_start();
         }
     }
+
+    /* register signal for the suspend request when save */
+    {
+        struct sigaction act;
+        sigset_t set;
+        act.sa_handler = suspend;
+        act.sa_flags = SA_RESTART;
+        sigemptyset(&act.sa_mask);
+
+        sigaction(SIGUSR1, &act, NULL);
+
+        /* control panel mask some signals when spawn qemu, need unmask here*/
+        sigemptyset(&set);
+        sigaddset(&set, SIGUSR1);
+        sigaddset(&set, SIGTERM);
+        if (sigprocmask(SIG_UNBLOCK, &set, NULL) == -1)
+            fprintf(stderr, "unblock signal fail, possible issue for HVM 
save!\n");
+
+    }
+
     main_loop();
     quit_timers();
     return 0;
diff -r 8475a4e0425e -r 3c8bb086025e tools/libxc/Makefile
--- a/tools/libxc/Makefile      Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/libxc/Makefile      Fri Jan 19 14:48:57 2007 +0000
@@ -27,7 +27,7 @@ GUEST_SRCS-$(CONFIG_X86) += xc_linux_bui
 GUEST_SRCS-$(CONFIG_X86) += xc_linux_build.c
 GUEST_SRCS-$(CONFIG_IA64) += xc_linux_build.c
 GUEST_SRCS-$(CONFIG_MIGRATE) += xc_linux_restore.c xc_linux_save.c
-GUEST_SRCS-$(CONFIG_HVM) += xc_hvm_build.c
+GUEST_SRCS-$(CONFIG_HVM) += xc_hvm_build.c xc_hvm_restore.c xc_hvm_save.c
 
 -include $(XEN_TARGET_ARCH)/Makefile
 
diff -r 8475a4e0425e -r 3c8bb086025e tools/libxc/ia64/xc_ia64_hvm_build.c
--- a/tools/libxc/ia64/xc_ia64_hvm_build.c      Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/libxc/ia64/xc_ia64_hvm_build.c      Fri Jan 19 14:48:57 2007 +0000
@@ -569,18 +569,13 @@ setup_guest(int xc_handle, uint32_t dom,
     xen_pfn_t *pfn_list;
     shared_iopage_t *sp;
     void *ioreq_buffer_page;
-    // memsize equal to normal memory size(in configure file) + 16M
-    // dom_memsize will pass to xc_ia64_build_hob(), so must be subbed 16M 
-    unsigned long dom_memsize = ((memsize - 16) << 20);
-    unsigned long nr_pages = (unsigned long)memsize << (20 - PAGE_SHIFT);
-    unsigned long normal_pages = nr_pages - GFW_PAGES;
+    unsigned long dom_memsize = memsize << 20;
+    unsigned long nr_pages = memsize << (20 - PAGE_SHIFT);
     unsigned long vcpus;
     int rc;
-    long i, j;
+    long i;
     DECLARE_DOMCTL;
 
-    // ROM size for guest firmware, ioreq page and xenstore page
-    nr_pages += 3; 
 
     if ((image_size > 12 * MEM_M) || (image_size & (PAGE_SIZE - 1))) {
         PERROR("Guest firmware size is incorrect [%ld]?", image_size);
@@ -598,20 +593,20 @@ setup_guest(int xc_handle, uint32_t dom,
         pfn_list[i] = i;
 
     // If normal memory > 3G. Reserve 3G ~ 4G for MMIO, GFW and others.
-    for (j = (MMIO_START >> PAGE_SHIFT); j < (dom_memsize >> PAGE_SHIFT); j++)
-        pfn_list[j] += ((1 * MEM_G) >> PAGE_SHIFT);
+    for (i = (MMIO_START >> PAGE_SHIFT); i < (dom_memsize >> PAGE_SHIFT); i++)
+        pfn_list[i] += ((1 * MEM_G) >> PAGE_SHIFT);
 
     // Allocate memory for VTI guest, up to VGA hole from 0xA0000-0xC0000. 
     rc = xc_domain_memory_populate_physmap(xc_handle, dom,
-                                           (normal_pages > VGA_START_PAGE) ?
-                                           VGA_START_PAGE : normal_pages,
+                                           (nr_pages > VGA_START_PAGE) ?
+                                           VGA_START_PAGE : nr_pages,
                                            0, 0, &pfn_list[0]);
 
     // We're not likely to attempt to create a domain with less than
     // 640k of memory, but test for completeness
     if (rc == 0 && nr_pages > VGA_END_PAGE)
         rc = xc_domain_memory_populate_physmap(xc_handle, dom,
-                                               normal_pages - VGA_END_PAGE,
+                                               nr_pages - VGA_END_PAGE,
                                                0, 0, &pfn_list[VGA_END_PAGE]);
     if (rc != 0) {
         PERROR("Could not allocate normal memory for Vti guest.\n");
@@ -621,24 +616,22 @@ setup_guest(int xc_handle, uint32_t dom,
     // We allocate additional pfn for GFW and other three pages, so
     // the pfn_list is not contiguous.  Due to this we must support
     // old interface xc_ia64_get_pfn_list().
-    // Here i = (dom_memsize >> PAGE_SHIFT)
-    for (j = 0; i < nr_pages - 3; i++, j++) 
-        pfn_list[i] = (GFW_START >> PAGE_SHIFT) + j;
+    for (i = 0; i < GFW_PAGES; i++) 
+        pfn_list[i] = (GFW_START >> PAGE_SHIFT) + i;
 
     rc = xc_domain_memory_populate_physmap(xc_handle, dom, GFW_PAGES,
-                                           0, 0, &pfn_list[normal_pages]);
+                                           0, 0, &pfn_list[0]);
     if (rc != 0) {
         PERROR("Could not allocate GFW memory for Vti guest.\n");
         goto error_out;
     }
 
-    // Here i = (dom_memsize >> PAGE_SHIFT) + GFW_PAGES
-    pfn_list[i] = IO_PAGE_START >> PAGE_SHIFT;
-    pfn_list[i+1] = STORE_PAGE_START >> PAGE_SHIFT;
-    pfn_list[i+2] = BUFFER_IO_PAGE_START >> PAGE_SHIFT; 
+    pfn_list[0] = IO_PAGE_START >> PAGE_SHIFT;
+    pfn_list[1] = STORE_PAGE_START >> PAGE_SHIFT;
+    pfn_list[2] = BUFFER_IO_PAGE_START >> PAGE_SHIFT; 
 
     rc = xc_domain_memory_populate_physmap(xc_handle, dom, 3,
-                                           0, 0, &pfn_list[nr_pages - 3]);
+                                           0, 0, &pfn_list[0]);
     if (rc != 0) {
         PERROR("Could not allocate IO page or store page or buffer io 
page.\n");
         goto error_out;
@@ -675,13 +668,12 @@ setup_guest(int xc_handle, uint32_t dom,
         goto error_out;
     }
 
-    xc_set_hvm_param(xc_handle, dom,
-                     HVM_PARAM_STORE_PFN, pfn_list[nr_pages - 2]);
+    xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN, pfn_list[1]);
 
     // Retrieve special pages like io, xenstore, etc. 
     sp = (shared_iopage_t *)xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
                                                  PROT_READ | PROT_WRITE,
-                                                 pfn_list[nr_pages - 3]);
+                                                 pfn_list[0]);
     if (sp == 0)
         goto error_out;
 
@@ -689,7 +681,7 @@ setup_guest(int xc_handle, uint32_t dom,
     munmap(sp, PAGE_SIZE);
     ioreq_buffer_page = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
                                              PROT_READ | PROT_WRITE,
-                                             pfn_list[nr_pages - 1]); 
+                                             pfn_list[2]); 
     memset(ioreq_buffer_page,0,PAGE_SIZE);
     munmap(ioreq_buffer_page, PAGE_SIZE);
     free(pfn_list);
diff -r 8475a4e0425e -r 3c8bb086025e tools/libxc/xc_domain.c
--- a/tools/libxc/xc_domain.c   Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/libxc/xc_domain.c   Fri Jan 19 14:48:57 2007 +0000
@@ -233,6 +233,50 @@ int xc_domain_getinfolist(int xc_handle,
     unlock_pages(info, max_domains*sizeof(xc_domaininfo_t));
 
     return ret;
+}
+
+/* get info from hvm guest for save */
+int xc_domain_hvm_getcontext(int xc_handle,
+                             uint32_t domid,
+                             hvm_domain_context_t *hvm_ctxt)
+{
+    int rc;
+    DECLARE_DOMCTL;
+
+    domctl.cmd = XEN_DOMCTL_gethvmcontext;
+    domctl.domain = (domid_t)domid;
+    set_xen_guest_handle(domctl.u.hvmcontext.ctxt, hvm_ctxt);
+
+    if ( (rc = mlock(hvm_ctxt, sizeof(*hvm_ctxt))) != 0 )
+        return rc;
+
+    rc = do_domctl(xc_handle, &domctl);
+
+    safe_munlock(hvm_ctxt, sizeof(*hvm_ctxt));
+
+    return rc;
+}
+
+/* set info to hvm guest for restore */
+int xc_domain_hvm_setcontext(int xc_handle,
+                             uint32_t domid,
+                             hvm_domain_context_t *hvm_ctxt)
+{
+    int rc;
+    DECLARE_DOMCTL;
+
+    domctl.cmd = XEN_DOMCTL_sethvmcontext;
+    domctl.domain = domid;
+    set_xen_guest_handle(domctl.u.hvmcontext.ctxt, hvm_ctxt);
+
+    if ( (rc = mlock(hvm_ctxt, sizeof(*hvm_ctxt))) != 0 )
+        return rc;
+
+    rc = do_domctl(xc_handle, &domctl);
+
+    safe_munlock(hvm_ctxt, sizeof(*hvm_ctxt));
+
+    return rc;
 }
 
 int xc_vcpu_getcontext(int xc_handle,
diff -r 8475a4e0425e -r 3c8bb086025e tools/libxc/xc_hvm_restore.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/libxc/xc_hvm_restore.c      Fri Jan 19 14:48:57 2007 +0000
@@ -0,0 +1,360 @@
+/******************************************************************************
+ * xc_hvm_restore.c
+ *
+ * Restore the state of a HVM guest.
+ *
+ * Copyright (c) 2003, K A Fraser.
+ * Copyright (c) 2006 Intel Corperation
+ * rewriten for hvm guest by Zhai Edwin <edwin.zhai@xxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "xg_private.h"
+#include "xg_save_restore.h"
+
+#include <xen/hvm/ioreq.h>
+#include <xen/hvm/params.h>
+#include <xen/hvm/e820.h>
+
+/* max mfn of the whole machine */
+static unsigned long max_mfn;
+
+/* virtual starting address of the hypervisor */
+static unsigned long hvirt_start;
+
+/* #levels of page tables used by the currrent guest */
+static unsigned int pt_levels;
+
+/* total number of pages used by the current guest */
+static unsigned long max_pfn;
+
+/* A table mapping each PFN to its new MFN. */
+static xen_pfn_t *p2m = NULL;
+
+static ssize_t
+read_exact(int fd, void *buf, size_t count)
+{
+    int r = 0, s;
+    unsigned char *b = buf;
+
+    while (r < count) {
+        s = read(fd, &b[r], count - r);
+        if ((s == -1) && (errno == EINTR))
+            continue;
+        if (s <= 0) {
+            break;
+        }
+        r += s;
+    }
+
+    return (r == count) ? 1 : 0;
+}
+
+int xc_hvm_restore(int xc_handle, int io_fd,
+                     uint32_t dom, unsigned long nr_pfns,
+                     unsigned int store_evtchn, unsigned long *store_mfn,
+                     unsigned int console_evtchn, unsigned long *console_mfn,
+                     unsigned int pae, unsigned int apic)
+{
+    DECLARE_DOMCTL;
+
+    /* The new domain's shared-info frame number. */
+    unsigned long shared_info_frame;
+
+    /* A copy of the CPU context of the guest. */
+    vcpu_guest_context_t ctxt;
+
+    char *region_base;
+
+    unsigned long buf[PAGE_SIZE/sizeof(unsigned long)];
+
+    xc_dominfo_t info;
+    unsigned int rc = 1, n, i;
+    uint32_t rec_len, nr_vcpus;
+    hvm_domain_context_t hvm_ctxt;
+    unsigned long long v_end, memsize;
+    unsigned long shared_page_nr;
+
+    unsigned long mfn, pfn;
+    unsigned int prev_pc, this_pc;
+    int verify = 0;
+
+    /* Types of the pfns in the current region */
+    unsigned long region_pfn_type[MAX_BATCH_SIZE];
+
+    /* hvm guest mem size (Mb) */
+    memsize = (unsigned long long)*store_mfn;
+    v_end = memsize << 20;
+
+    DPRINTF("xc_hvm_restore:dom=%d, nr_pfns=0x%lx, store_evtchn=%d, 
*store_mfn=%ld, console_evtchn=%d, *console_mfn=%ld, pae=%u, apic=%u.\n", 
+            dom, nr_pfns, store_evtchn, *store_mfn, console_evtchn, 
*console_mfn, pae, apic);
+
+    max_pfn = nr_pfns;
+
+    if(!get_platform_info(xc_handle, dom,
+                          &max_mfn, &hvirt_start, &pt_levels)) {
+        ERROR("Unable to get platform info.");
+        return 1;
+    }
+
+    DPRINTF("xc_hvm_restore start: max_pfn = %lx, max_mfn = %lx, 
hvirt_start=%lx, pt_levels=%d\n",
+            max_pfn,
+            max_mfn,
+            hvirt_start,
+            pt_levels);
+
+    if (mlock(&ctxt, sizeof(ctxt))) {
+        /* needed for build dom0 op, but might as well do early */
+        ERROR("Unable to mlock ctxt");
+        return 1;
+    }
+
+
+    p2m        = malloc(max_pfn * sizeof(xen_pfn_t));
+
+    if (p2m == NULL) {
+        ERROR("memory alloc failed");
+        errno = ENOMEM;
+        goto out;
+    }
+
+    /* Get the domain's shared-info frame. */
+    domctl.cmd = XEN_DOMCTL_getdomaininfo;
+    domctl.domain = (domid_t)dom;
+    if (xc_domctl(xc_handle, &domctl) < 0) {
+        ERROR("Could not get information on new domain");
+        goto out;
+    }
+    shared_info_frame = domctl.u.getdomaininfo.shared_info_frame;
+
+    if(xc_domain_setmaxmem(xc_handle, dom, PFN_TO_KB(max_pfn)) != 0) {
+        errno = ENOMEM;
+        goto out;
+    }
+
+    for ( i = 0; i < max_pfn; i++ )
+        p2m[i] = i;
+    for ( i = HVM_BELOW_4G_RAM_END >> PAGE_SHIFT; i < max_pfn; i++ )
+        p2m[i] += HVM_BELOW_4G_MMIO_LENGTH >> PAGE_SHIFT;
+
+    /* Allocate memory for HVM guest, skipping VGA hole 0xA0000-0xC0000. */
+    rc = xc_domain_memory_populate_physmap(
+        xc_handle, dom, (max_pfn > 0xa0) ? 0xa0 : max_pfn,
+        0, 0, &p2m[0x00]);
+    if ( (rc == 0) && (max_pfn > 0xc0) )
+        rc = xc_domain_memory_populate_physmap(
+            xc_handle, dom, max_pfn - 0xc0, 0, 0, &p2m[0xc0]);
+    if ( rc != 0 )
+    {
+        PERROR("Could not allocate memory for HVM guest.\n");
+        goto out;
+    }
+
+
+    /**********XXXXXXXXXXXXXXXX******************/
+    if (xc_domain_getinfo(xc_handle, dom, 1, &info) != 1) {
+        ERROR("Could not get domain info");
+        return 1;
+    }
+
+    domctl.cmd = XEN_DOMCTL_getdomaininfo;
+    domctl.domain = (domid_t)dom;
+    if (xc_domctl(xc_handle, &domctl) < 0) {
+        ERROR("Could not get information on new domain");
+        goto out;
+    }
+
+    for ( i = 0; i < max_pfn; i++)
+        p2m[i] = i;
+
+    prev_pc = 0;
+
+    n = 0;
+    while (1) {
+
+        int j;
+
+        this_pc = (n * 100) / max_pfn;
+        if ( (this_pc - prev_pc) >= 5 )
+        {
+            PPRINTF("\b\b\b\b%3d%%", this_pc);
+            prev_pc = this_pc;
+        }
+
+        if (!read_exact(io_fd, &j, sizeof(int))) {
+            ERROR("HVM restore Error when reading batch size");
+            goto out;
+        }
+
+        PPRINTF("batch %d\n",j);
+
+        if (j == -1) {
+            verify = 1;
+            DPRINTF("Entering page verify mode\n");
+            continue;
+        }
+
+        if (j == 0)
+            break;  /* our work here is done */
+
+        if (j > MAX_BATCH_SIZE) {
+            ERROR("Max batch size exceeded. Giving up.");
+            goto out;
+        }
+
+        if (!read_exact(io_fd, region_pfn_type, j*sizeof(unsigned long))) {
+            ERROR("Error when reading region pfn types");
+            goto out;
+        }
+
+        region_base = xc_map_foreign_batch(
+            xc_handle, dom, PROT_WRITE, region_pfn_type, j);
+
+        for ( i = 0; i < j; i++ )
+        {
+            void *page;
+
+            pfn = region_pfn_type[i];
+            if ( pfn > max_pfn )
+            {
+                ERROR("pfn out of range");
+                goto out;
+            }
+
+            if ( pfn >= 0xa0 && pfn < 0xc0) {
+                ERROR("hvm restore:pfn in vga hole");
+                goto out;
+            }
+
+
+            mfn = p2m[pfn];
+
+            /* In verify mode, we use a copy; otherwise we work in place */
+            page = verify ? (void *)buf : (region_base + i*PAGE_SIZE);
+
+            if (!read_exact(io_fd, page, PAGE_SIZE)) {
+                ERROR("Error when reading page (%x)", i);
+                goto out;
+            }
+
+            if (verify) {
+
+                int res = memcmp(buf, (region_base + i*PAGE_SIZE), PAGE_SIZE);
+
+                if (res) {
+
+                    int v;
+
+                    DPRINTF("************** pfn=%lx mfn=%lx gotcs=%08lx "
+                            "actualcs=%08lx\n", pfn, p2m[pfn],
+                            csum_page(region_base + i*PAGE_SIZE),
+                            csum_page(buf));
+
+                    for (v = 0; v < 4; v++) {
+
+                        unsigned long *p = (unsigned long *)
+                            (region_base + i*PAGE_SIZE);
+                        if (buf[v] != p[v])
+                            DPRINTF("    %d: %08lx %08lx\n", v, buf[v], p[v]);
+                    }
+                }
+            }
+
+        } /* end of 'batch' for loop */
+        munmap(region_base, j*PAGE_SIZE);
+        n+= j; /* crude stats */
+
+    }/*while 1*/
+    
+/*    xc_set_hvm_param(xc_handle, dom, HVM_PARAM_APIC_ENABLED, apic);*/
+    xc_set_hvm_param(xc_handle, dom, HVM_PARAM_PAE_ENABLED, pae);
+    xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_EVTCHN, store_evtchn);
+
+    if ( v_end > HVM_BELOW_4G_RAM_END )
+        shared_page_nr = (HVM_BELOW_4G_RAM_END >> PAGE_SHIFT) - 1;
+    else
+        shared_page_nr = (v_end >> PAGE_SHIFT) - 1;
+
+    xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN, shared_page_nr-1);
+    xc_set_hvm_param(xc_handle, dom, HVM_PARAM_BUFIOREQ_PFN, shared_page_nr-2);
+    xc_set_hvm_param(xc_handle, dom, HVM_PARAM_IOREQ_PFN, shared_page_nr);
+
+    /* caculate the store_mfn , wrong val cause hang when introduceDomain */
+    *store_mfn = (v_end >> PAGE_SHIFT) - 2;
+    DPRINTF("hvm restore:calculate new store_mfn=0x%lx,v_end=0x%llx..\n", 
*store_mfn, v_end);
+
+    /* restore hvm context including pic/pit/shpage */
+    if (!read_exact(io_fd, &rec_len, sizeof(uint32_t))) {
+        ERROR("error read hvm context size!\n");
+        goto out;
+    }
+    if (rec_len != sizeof(hvm_ctxt)) {
+        ERROR("hvm context size dismatch!\n");
+        goto out;
+    }
+
+    if (!read_exact(io_fd, &hvm_ctxt, sizeof(hvm_ctxt))) {
+        ERROR("error read hvm context!\n");
+        goto out;
+    }
+
+    if (( rc = xc_domain_hvm_setcontext(xc_handle, dom, &hvm_ctxt))) {
+        ERROR("error set hvm context!\n");
+        goto out;
+    }
+
+    if (!read_exact(io_fd, &nr_vcpus, sizeof(uint32_t))) {
+        ERROR("error read nr vcpu !\n");
+        goto out;
+    }
+    DPRINTF("hvm restore:get nr_vcpus=%d.\n", nr_vcpus);
+
+    for (i =0; i < nr_vcpus; i++) {
+        if (!read_exact(io_fd, &rec_len, sizeof(uint32_t))) {
+            ERROR("error read vcpu context size!\n");
+            goto out;
+        }
+        if (rec_len != sizeof(ctxt)) {
+            ERROR("vcpu context size dismatch!\n");
+            goto out;
+        }
+
+        if (!read_exact(io_fd, &(ctxt), sizeof(ctxt))) {
+            ERROR("error read vcpu context.\n");
+            goto out;
+        }
+
+        if ( (rc = xc_vcpu_setcontext(xc_handle, dom, i, &ctxt)) ) {
+            ERROR("Could not set vcpu context, rc=%d", rc);
+            goto out;
+        }
+    }
+
+    rc = 0;
+    goto out;
+
+ out:
+    if ( (rc != 0) && (dom != 0) )
+        xc_domain_destroy(xc_handle, dom);
+    free(p2m);
+
+    DPRINTF("Restore exit with rc=%d\n", rc);
+
+    return rc;
+}
diff -r 8475a4e0425e -r 3c8bb086025e tools/libxc/xc_hvm_save.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/libxc/xc_hvm_save.c Fri Jan 19 14:48:57 2007 +0000
@@ -0,0 +1,727 @@
+/******************************************************************************
+ * xc_hvm_save.c
+ *
+ * Save the state of a running HVM guest.
+ *
+ * Copyright (c) 2003, K A Fraser.
+ * Copyright (c) 2006 Intel Corperation
+ * rewriten for hvm guest by Zhai Edwin <edwin.zhai@xxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#include <inttypes.h>
+#include <time.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/time.h>
+
+#include "xc_private.h"
+#include "xg_private.h"
+#include "xg_save_restore.h"
+
+/*
+** Default values for important tuning parameters. Can override by passing
+** non-zero replacement values to xc_hvm_save().
+**
+** XXX SMH: should consider if want to be able to override MAX_MBIT_RATE too.
+**
+*/
+#define DEF_MAX_ITERS   29   /* limit us to 30 times round loop   */
+#define DEF_MAX_FACTOR   3   /* never send more than 3x nr_pfns   */
+
+/* max mfn of the whole machine */
+static unsigned long max_mfn;
+
+/* virtual starting address of the hypervisor */
+static unsigned long hvirt_start;
+
+/* #levels of page tables used by the currrent guest */
+static unsigned int pt_levels;
+
+/* total number of pages used by the current guest */
+static unsigned long max_pfn;
+
+/*
+** During (live) save/migrate, we maintain a number of bitmaps to track
+** which pages we have to send, to fixup, and to skip.
+*/
+
+#define BITS_PER_LONG (sizeof(unsigned long) * 8)
+#define BITMAP_SIZE   ((max_pfn + BITS_PER_LONG - 1) / 8)
+
+#define BITMAP_ENTRY(_nr,_bmap) \
+   ((unsigned long *)(_bmap))[(_nr)/BITS_PER_LONG]
+
+#define BITMAP_SHIFT(_nr) ((_nr) % BITS_PER_LONG)
+
+static inline int test_bit (int nr, volatile void * addr)
+{
+    return (BITMAP_ENTRY(nr, addr) >> BITMAP_SHIFT(nr)) & 1;
+}
+
+static inline void clear_bit (int nr, volatile void * addr)
+{
+    BITMAP_ENTRY(nr, addr) &= ~(1UL << BITMAP_SHIFT(nr));
+}
+
+static inline int permute( int i, int nr, int order_nr  )
+{
+    /* Need a simple permutation function so that we scan pages in a
+       pseudo random order, enabling us to get a better estimate of
+       the domain's page dirtying rate as we go (there are often
+       contiguous ranges of pfns that have similar behaviour, and we
+       want to mix them up. */
+
+    /* e.g. nr->oder 15->4 16->4 17->5 */
+    /* 512MB domain, 128k pages, order 17 */
+
+    /*
+      QPONMLKJIHGFEDCBA
+             QPONMLKJIH
+      GFEDCBA
+     */
+
+    /*
+      QPONMLKJIHGFEDCBA
+                  EDCBA
+             QPONM
+      LKJIHGF
+      */
+
+    do { i = ((i>>(order_nr-10)) | ( i<<10 ) ) & ((1<<order_nr)-1); }
+    while ( i >= nr ); /* this won't ever loop if nr is a power of 2 */
+
+    return i;
+}
+
+static uint64_t tv_to_us(struct timeval *new)
+{
+    return (new->tv_sec * 1000000) + new->tv_usec;
+}
+
+static uint64_t llgettimeofday(void)
+{
+    struct timeval now;
+    gettimeofday(&now, NULL);
+    return tv_to_us(&now);
+}
+
+static uint64_t tv_delta(struct timeval *new, struct timeval *old)
+{
+    return ((new->tv_sec - old->tv_sec)*1000000 ) +
+        (new->tv_usec - old->tv_usec);
+}
+
+
+#define RATE_IS_MAX() (0)
+#define ratewrite(_io_fd, _buf, _n) write((_io_fd), (_buf), (_n))
+#define initialize_mbit_rate()
+
+static inline ssize_t write_exact(int fd, void *buf, size_t count)
+{
+    if(write(fd, buf, count) != count)
+        return 0;
+    return 1;
+}
+
+static int print_stats(int xc_handle, uint32_t domid, int pages_sent,
+                       xc_shadow_op_stats_t *stats, int print)
+{
+    static struct timeval wall_last;
+    static long long      d0_cpu_last;
+    static long long      d1_cpu_last;
+
+    struct timeval        wall_now;
+    long long             wall_delta;
+    long long             d0_cpu_now, d0_cpu_delta;
+    long long             d1_cpu_now, d1_cpu_delta;
+
+    gettimeofday(&wall_now, NULL);
+
+    d0_cpu_now = xc_domain_get_cpu_usage(xc_handle, 0, /* FIXME */ 0)/1000;
+    d1_cpu_now = xc_domain_get_cpu_usage(xc_handle, domid, /* FIXME */ 0)/1000;
+
+    if ( (d0_cpu_now == -1) || (d1_cpu_now == -1) )
+        DPRINTF("ARRHHH!!\n");
+
+    wall_delta = tv_delta(&wall_now,&wall_last)/1000;
+
+    if (wall_delta == 0) wall_delta = 1;
+
+    d0_cpu_delta = (d0_cpu_now - d0_cpu_last)/1000;
+    d1_cpu_delta = (d1_cpu_now - d1_cpu_last)/1000;
+
+    if (print)
+        DPRINTF(
+                "delta %lldms, dom0 %d%%, target %d%%, sent %dMb/s, "
+                "dirtied %dMb/s %" PRId32 " pages\n",
+                wall_delta,
+                (int)((d0_cpu_delta*100)/wall_delta),
+                (int)((d1_cpu_delta*100)/wall_delta),
+                (int)((pages_sent*PAGE_SIZE)/(wall_delta*(1000/8))),
+                (int)((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8))),
+                stats->dirty_count);
+
+    d0_cpu_last = d0_cpu_now;
+    d1_cpu_last = d1_cpu_now;
+    wall_last   = wall_now;
+
+    return 0;
+}
+
+static int analysis_phase(int xc_handle, uint32_t domid, int max_pfn,
+                          unsigned long *arr, int runs)
+{
+    long long start, now;
+    xc_shadow_op_stats_t stats;
+    int j;
+
+    start = llgettimeofday();
+
+    for (j = 0; j < runs; j++) {
+        int i;
+
+        xc_shadow_control(xc_handle, domid, XEN_DOMCTL_SHADOW_OP_CLEAN,
+                          arr, max_pfn, NULL, 0, NULL);
+        DPRINTF("#Flush\n");
+        for ( i = 0; i < 40; i++ ) {
+            usleep(50000);
+            now = llgettimeofday();
+            xc_shadow_control(xc_handle, domid, XEN_DOMCTL_SHADOW_OP_PEEK,
+                              NULL, 0, NULL, 0, &stats);
+
+            DPRINTF("now= %lld faults= %"PRId32" dirty= %"PRId32"\n",
+                    ((now-start)+500)/1000,
+                    stats.fault_count, stats.dirty_count);
+        }
+    }
+
+    return -1;
+}
+
+static int suspend_and_state(int (*suspend)(int), int xc_handle, int io_fd,
+                             int dom, xc_dominfo_t *info,
+                             vcpu_guest_context_t *ctxt)
+{
+    int i = 0;
+
+    if (!(*suspend)(dom)) {
+        ERROR("Suspend request failed");
+        return -1;
+    }
+
+ retry:
+
+    if (xc_domain_getinfo(xc_handle, dom, 1, info) != 1) {
+        ERROR("Could not get domain info");
+        return -1;
+    }
+
+    if ( xc_vcpu_getcontext(xc_handle, dom, 0 /* XXX */, ctxt))
+        ERROR("Could not get vcpu context");
+
+
+    if (info->shutdown && info->shutdown_reason == SHUTDOWN_suspend)
+        return 0; // success
+
+    if (info->paused) {
+        // try unpausing domain, wait, and retest
+        xc_domain_unpause( xc_handle, dom );
+
+        ERROR("Domain was paused. Wait and re-test.");
+        usleep(10000);  // 10ms
+
+        goto retry;
+    }
+
+
+    if( ++i < 100 ) {
+        ERROR("Retry suspend domain.");
+        usleep(10000);  // 10ms
+        goto retry;
+    }
+
+    ERROR("Unable to suspend domain.");
+
+    return -1;
+}
+
+int xc_hvm_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
+                  uint32_t max_factor, uint32_t flags, int (*suspend)(int))
+{
+    xc_dominfo_t info;
+
+    int rc = 1, i, last_iter, iter = 0;
+    int live  = (flags & XCFLAGS_LIVE);
+    int debug = (flags & XCFLAGS_DEBUG);
+    int sent_last_iter, skip_this_iter;
+
+    /* The new domain's shared-info frame number. */
+    unsigned long shared_info_frame;
+
+    /* A copy of the CPU context of the guest. */
+    vcpu_guest_context_t ctxt;
+
+    /* A table containg the type of each PFN (/not/ MFN!). */
+    unsigned long *pfn_type = NULL;
+    unsigned long *pfn_batch = NULL;
+
+    /* A copy of hvm domain context */
+    hvm_domain_context_t hvm_ctxt;
+
+    /* Live mapping of shared info structure */
+    shared_info_t *live_shinfo = NULL;
+
+    /* base of the region in which domain memory is mapped */
+    unsigned char *region_base = NULL;
+
+    uint32_t nr_pfns, rec_size, nr_vcpus;
+    unsigned long *page_array = NULL;
+
+    /* power of 2 order of max_pfn */
+    int order_nr;
+
+    /* bitmap of pages:
+       - that should be sent this iteration (unless later marked as skip);
+       - to skip this iteration because already dirty; */
+    unsigned long *to_send = NULL, *to_skip = NULL;
+
+    xc_shadow_op_stats_t stats;
+
+    unsigned long total_sent    = 0;
+
+    DPRINTF("xc_hvm_save:dom=%d, max_iters=%d, max_factor=%d, flags=0x%x, 
live=%d, debug=%d.\n",
+            dom, max_iters, max_factor, flags,
+            live, debug);
+
+    /* If no explicit control parameters given, use defaults */
+    if(!max_iters)
+        max_iters = DEF_MAX_ITERS;
+    if(!max_factor)
+        max_factor = DEF_MAX_FACTOR;
+
+    initialize_mbit_rate();
+
+    if(!get_platform_info(xc_handle, dom,
+                          &max_mfn, &hvirt_start, &pt_levels)) {
+        ERROR("HVM:Unable to get platform info.");
+        return 1;
+    }
+
+    if (xc_domain_getinfo(xc_handle, dom, 1, &info) != 1) {
+        ERROR("HVM:Could not get domain info");
+        return 1;
+    }
+    nr_vcpus = info.nr_online_vcpus;
+
+    if (mlock(&ctxt, sizeof(ctxt))) {
+        ERROR("HVM:Unable to mlock ctxt");
+        return 1;
+    }
+
+    /* Only have to worry about vcpu 0 even for SMP */
+    if (xc_vcpu_getcontext(xc_handle, dom, 0, &ctxt)) {
+        ERROR("HVM:Could not get vcpu context");
+        goto out;
+    }
+    shared_info_frame = info.shared_info_frame;
+
+    /* A cheesy test to see whether the domain contains valid state. */
+    if (ctxt.ctrlreg[3] == 0)
+    {
+        ERROR("Domain is not in a valid HVM guest state");
+        goto out;
+    }
+
+   /* cheesy sanity check */
+    if ((info.max_memkb >> (PAGE_SHIFT - 10)) > max_mfn) {
+        ERROR("Invalid HVM state record -- pfn count out of range: %lu",
+            (info.max_memkb >> (PAGE_SHIFT - 10)));
+        goto out;
+    }
+
+    /* Map the shared info frame */
+    if(!(live_shinfo = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+                                            PROT_READ, shared_info_frame))) {
+        ERROR("HVM:Couldn't map live_shinfo");
+        goto out;
+    }
+
+    max_pfn = live_shinfo->arch.max_pfn;
+
+    DPRINTF("saved hvm domain info:max_memkb=0x%lx, max_mfn=0x%lx, 
nr_pages=0x%lx\n", info.max_memkb, max_mfn, info.nr_pages); 
+
+    /* nr_pfns: total pages excluding vga acc mem
+     * max_pfn: nr_pfns + 0x20 vga hole(0xa0~0xc0)
+     * getdomaininfo.tot_pages: all the allocated pages for this domain
+     */
+    if (live) {
+        ERROR("hvm domain doesn't support live migration now.\n");
+        goto out;
+
+        if (xc_shadow_control(xc_handle, dom,
+                              XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY,
+                              NULL, 0, NULL, 0, NULL) < 0) {
+            ERROR("Couldn't enable shadow mode");
+            goto out;
+        }
+
+        /* excludes vga acc mem */
+        nr_pfns = info.nr_pages - 0x800;
+
+        last_iter = 0;
+        DPRINTF("hvm domain live migration debug start: logdirty enable.\n");
+    } else {
+        /* This is a non-live suspend. Issue the call back to get the
+           domain suspended */
+
+        last_iter = 1;
+
+        /* suspend hvm domain */
+        if (suspend_and_state(suspend, xc_handle, io_fd, dom, &info, &ctxt)) {
+            ERROR("HVM Domain appears not to have suspended");
+            goto out;
+        }
+        nr_pfns = info.nr_pages;
+        DPRINTF("after suspend hvm domain nr_pages=0x%x.\n", nr_pfns);
+    }
+
+    DPRINTF("after 1st handle hvm domain nr_pfns=0x%x, nr_pages=0x%lx, 
max_memkb=0x%lx, live=%d.\n",
+            nr_pfns,
+            info.nr_pages,
+            info.max_memkb,
+            live);
+
+    nr_pfns = info.nr_pages;
+
+    /*XXX: caculate the VGA hole*/
+    max_pfn = nr_pfns + 0x20;
+
+    skip_this_iter = 0;/*XXX*/
+    /* pretend we sent all the pages last iteration */
+    sent_last_iter = max_pfn;
+
+    /* calculate the power of 2 order of max_pfn, e.g.
+       15->4 16->4 17->5 */
+    for (i = max_pfn-1, order_nr = 0; i ; i >>= 1, order_nr++)
+        continue;
+
+    /* Setup to_send / to_fix and to_skip bitmaps */
+    to_send = malloc(BITMAP_SIZE);
+    to_skip = malloc(BITMAP_SIZE);
+
+    if (!to_send ||!to_skip) {
+        ERROR("Couldn't allocate to_send array");
+        goto out;
+    }
+
+    memset(to_send, 0xff, BITMAP_SIZE);
+
+    if (lock_pages(to_send, BITMAP_SIZE)) {
+        ERROR("Unable to lock to_send");
+        return 1;
+    }
+
+    /* (to fix is local only) */
+    if (lock_pages(to_skip, BITMAP_SIZE)) {
+        ERROR("Unable to lock to_skip");
+        return 1;
+    }
+
+    analysis_phase(xc_handle, dom, max_pfn, to_skip, 0);
+
+    /* get all the HVM domain pfns */
+    if ( (page_array = (unsigned long *) malloc (sizeof(unsigned long) * 
max_pfn)) == NULL) {
+        ERROR("HVM:malloc fail!\n");
+        goto out;
+    }
+
+    for ( i = 0; i < max_pfn; i++)
+        page_array[i] = i;
+
+
+    /* We want zeroed memory so use calloc rather than malloc. */
+    pfn_type  = calloc(MAX_BATCH_SIZE, sizeof(*pfn_type));
+    pfn_batch = calloc(MAX_BATCH_SIZE, sizeof(*pfn_batch));
+
+    if ((pfn_type == NULL) || (pfn_batch == NULL)) {
+        ERROR("failed to alloc memory for pfn_type and/or pfn_batch arrays");
+        errno = ENOMEM;
+        goto out;
+    }
+
+    if (lock_pages(pfn_type, MAX_BATCH_SIZE * sizeof(*pfn_type))) {
+        ERROR("Unable to lock");
+        goto out;
+    }
+
+    /* Start writing out the saved-domain record. */
+    if (!write_exact(io_fd, &max_pfn, sizeof(unsigned long))) {
+        ERROR("write: max_pfn");
+        goto out;
+    }
+
+    while(1) {
+
+        unsigned int prev_pc, sent_this_iter, N, batch;
+
+        iter++;
+        sent_this_iter = 0;
+        skip_this_iter = 0;
+        prev_pc = 0;
+        N=0;
+
+        DPRINTF("Saving HVM domain memory pages: iter %d   0%%", iter);
+
+        while( N < max_pfn ){
+
+            unsigned int this_pc = (N * 100) / max_pfn;
+
+            if ((this_pc - prev_pc) >= 5) {
+                DPRINTF("\b\b\b\b%3d%%", this_pc);
+                prev_pc = this_pc;
+            }
+
+            /* slightly wasteful to peek the whole array evey time,
+               but this is fast enough for the moment. */
+            if (!last_iter && xc_shadow_control(
+                    xc_handle, dom, XEN_DOMCTL_SHADOW_OP_PEEK,
+                    to_skip, max_pfn, NULL, 0, NULL) != max_pfn) {
+                ERROR("Error peeking HVM shadow bitmap");
+                goto out;
+            }
+
+
+            /* load pfn_type[] with the mfn of all the pages we're doing in
+               this batch. */
+            for (batch = 0; batch < MAX_BATCH_SIZE && N < max_pfn ; N++) {
+
+                int n = permute(N, max_pfn, order_nr);
+
+                if (debug) {
+                    DPRINTF("%d pfn= %08lx mfn= %08lx %d \n",
+                            iter, (unsigned long)n, page_array[n],
+                            test_bit(n, to_send));
+                }
+
+                if (!last_iter && test_bit(n, to_send)&& test_bit(n, to_skip))
+                    skip_this_iter++; /* stats keeping */
+
+                if (!((test_bit(n, to_send) && !test_bit(n, to_skip)) ||
+                      (test_bit(n, to_send) && last_iter)))
+                    continue;
+
+                if (n >= 0xa0 && n < 0xc0) {
+/*                    DPRINTF("get a vga hole pfn= %x.\n", n);*/
+                    continue;
+                }
+                /*
+                ** we get here if:
+                **  1. page is marked to_send & hasn't already been re-dirtied
+                **  2. (ignore to_skip in last iteration)
+                */
+
+                pfn_batch[batch] = n;
+                pfn_type[batch]  = page_array[n];
+
+                batch++;
+            }
+
+            if (batch == 0)
+                goto skip; /* vanishingly unlikely... */
+
+            /* map_foreign use pfns now !*/
+            if ((region_base = xc_map_foreign_batch(
+                     xc_handle, dom, PROT_READ, pfn_batch, batch)) == 0) {
+                ERROR("map batch failed");
+                goto out;
+            }
+
+            /* write num of pfns */
+            if(!write_exact(io_fd, &batch, sizeof(unsigned int))) {
+                ERROR("Error when writing to state file (2)");
+                goto out;
+            }
+
+            /* write all the pfns */
+            if(!write_exact(io_fd, pfn_batch, sizeof(unsigned long)*batch)) {
+                ERROR("Error when writing to state file (3)");
+                goto out;
+            }
+
+            if (ratewrite(io_fd, region_base, PAGE_SIZE * batch) != PAGE_SIZE 
* batch) {
+                ERROR("ERROR when writting to state file (4)");
+                goto out;
+            }
+
+
+            sent_this_iter += batch;
+
+            munmap(region_base, batch*PAGE_SIZE);
+
+        } /* end of this while loop for this iteration */
+
+      skip:
+
+        total_sent += sent_this_iter;
+
+        DPRINTF("\r %d: sent %d, skipped %d, ",
+                iter, sent_this_iter, skip_this_iter );
+
+        if (last_iter) {
+            print_stats( xc_handle, dom, sent_this_iter, &stats, 1);
+
+            DPRINTF("Total pages sent= %ld (%.2fx)\n",
+                    total_sent, ((float)total_sent)/max_pfn );
+        }
+
+        if (last_iter && debug){
+            int minusone = -1;
+            memset(to_send, 0xff, BITMAP_SIZE);
+            debug = 0;
+            DPRINTF("Entering debug resend-all mode\n");
+
+            /* send "-1" to put receiver into debug mode */
+            if(!write_exact(io_fd, &minusone, sizeof(int))) {
+                ERROR("Error when writing to state file (6)");
+                goto out;
+            }
+
+            continue;
+        }
+
+        if (last_iter) break;
+
+        if (live) {
+
+
+            if(
+                ((sent_this_iter > sent_last_iter) && RATE_IS_MAX()) ||
+                (iter >= max_iters) ||
+                (sent_this_iter+skip_this_iter < 50) ||
+                (total_sent > max_pfn*max_factor) ) {
+
+                DPRINTF("Start last iteration for HVM domain\n");
+                last_iter = 1;
+
+                if (suspend_and_state(suspend, xc_handle, io_fd, dom, &info,
+                                      &ctxt)) {
+                    ERROR("Domain appears not to have suspended");
+                    goto out;
+                }
+
+                DPRINTF("SUSPEND shinfo %08lx eip %08lx edx %08lx\n",
+                        info.shared_info_frame,
+                        (unsigned long)ctxt.user_regs.eip,
+                        (unsigned long)ctxt.user_regs.edx);
+            }
+
+            if (xc_shadow_control(xc_handle, dom, 
+                                  XEN_DOMCTL_SHADOW_OP_CLEAN, to_send, 
+                                  max_pfn, NULL, 0, &stats) != max_pfn) {
+                ERROR("Error flushing shadow PT");
+                goto out;
+            }
+
+            sent_last_iter = sent_this_iter;
+
+            print_stats(xc_handle, dom, sent_this_iter, &stats, 1);
+
+        }
+
+
+    } /* end of while 1 */
+
+
+    DPRINTF("All HVM memory is saved\n");
+
+    /* Zero terminate */
+    i = 0;
+    if (!write_exact(io_fd, &i, sizeof(int))) {
+        ERROR("Error when writing to state file (6)");
+        goto out;
+    }
+
+    /* save hvm hypervisor state including pic/pit/shpage */
+    if (mlock(&hvm_ctxt, sizeof(hvm_ctxt))) {
+        ERROR("Unable to mlock ctxt");
+        return 1;
+    }
+
+    if (xc_domain_hvm_getcontext(xc_handle, dom, &hvm_ctxt)){
+        ERROR("HVM:Could not get hvm context");
+        goto out;
+    }
+
+    rec_size = sizeof(hvm_ctxt);
+    if (!write_exact(io_fd, &rec_size, sizeof(uint32_t))) {
+        ERROR("error write hvm ctxt size");
+        goto out;
+    }
+
+    if ( !write_exact(io_fd, &hvm_ctxt, sizeof(hvm_ctxt)) ) {
+        ERROR("write HVM info failed!\n");
+    }
+
+    /* save vcpu/vmcs context */
+    if (!write_exact(io_fd, &nr_vcpus, sizeof(uint32_t))) {
+        ERROR("error write nr vcpus");
+        goto out;
+    }
+
+    /*XXX: need a online map to exclude down cpu */
+    for (i = 0; i < nr_vcpus; i++) {
+
+        if (xc_vcpu_getcontext(xc_handle, dom, i, &ctxt)) {
+            ERROR("HVM:Could not get vcpu context");
+            goto out;
+        }
+
+        rec_size = sizeof(ctxt);
+        DPRINTF("write %d vcpucontext of total %d.\n", i, nr_vcpus); 
+        if (!write_exact(io_fd, &rec_size, sizeof(uint32_t))) {
+            ERROR("error write vcpu ctxt size");
+            goto out;
+        }
+
+        if (!write_exact(io_fd, &(ctxt), sizeof(ctxt)) ) {
+            ERROR("write vmcs failed!\n");
+            goto out;
+        }
+    }
+ 
+    /* Success! */
+    rc = 0;
+
+ out:
+
+    if (live) {
+        if(xc_shadow_control(xc_handle, dom, 
+                             XEN_DOMCTL_SHADOW_OP_OFF,
+                             NULL, 0, NULL, 0, NULL) < 0) {
+            DPRINTF("Warning - couldn't disable shadow mode");
+        }
+    }
+
+    free(page_array);
+
+    free(pfn_type);
+    free(pfn_batch);
+    free(to_send);
+    free(to_skip);
+
+    return !!rc;
+}
diff -r 8475a4e0425e -r 3c8bb086025e tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h     Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/libxc/xenctrl.h     Fri Jan 19 14:48:57 2007 +0000
@@ -313,6 +313,30 @@ int xc_domain_getinfolist(int xc_handle,
                           xc_domaininfo_t *info);
 
 /**
+ * This function returns information about the context of a hvm domain
+ * @parm xc_handle a handle to an open hypervisor interface
+ * @parm domid the domain to get information from
+ * @parm hvm_ctxt a pointer to a structure to store the execution context of 
the
+ *            hvm domain
+ * @return 0 on success, -1 on failure
+ */
+int xc_domain_hvm_getcontext(int xc_handle,
+                             uint32_t domid,
+                             hvm_domain_context_t *hvm_ctxt);
+
+/**
+ * This function will set the context for hvm domain
+ *
+ * @parm xc_handle a handle to an open hypervisor interface
+ * @parm domid the domain to set the hvm domain context for
+ * @parm hvm_ctxt pointer to the the hvm context with the values to set
+ * @return 0 on success, -1 on failure
+ */
+int xc_domain_hvm_setcontext(int xc_handle,
+                             uint32_t domid,
+                             hvm_domain_context_t *hvm_ctxt);
+
+/**
  * This function returns information about the execution context of a
  * particular vcpu of a domain.
  *
diff -r 8475a4e0425e -r 3c8bb086025e tools/libxc/xenguest.h
--- a/tools/libxc/xenguest.h    Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/libxc/xenguest.h    Fri Jan 19 14:48:57 2007 +0000
@@ -11,6 +11,7 @@
 
 #define XCFLAGS_LIVE      1
 #define XCFLAGS_DEBUG     2
+#define XCFLAGS_HVM       4
 
 
 /**
@@ -25,6 +26,13 @@ int xc_linux_save(int xc_handle, int io_
                   uint32_t max_factor, uint32_t flags /* XCFLAGS_xxx */,
                   int (*suspend)(int domid));
 
+/**
+ * This function will save a hvm domain running unmodified guest.
+ * @return 0 on success, -1 on failure
+ */
+int xc_hvm_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
+                  uint32_t max_factor, uint32_t flags /* XCFLAGS_xxx */,
+                  int (*suspend)(int domid));
 
 /**
  * This function will restore a saved domain running Linux.
@@ -41,6 +49,18 @@ int xc_linux_restore(int xc_handle, int 
                      unsigned long nr_pfns, unsigned int store_evtchn,
                      unsigned long *store_mfn, unsigned int console_evtchn,
                      unsigned long *console_mfn);
+
+/**
+ * This function will restore a saved hvm domain running unmodified guest.
+ *
+ * @parm store_mfn pass mem size & returned with the mfn of the store page
+ * @return 0 on success, -1 on failure
+ */
+int xc_hvm_restore(int xc_handle, int io_fd, uint32_t dom,
+                      unsigned long nr_pfns, unsigned int store_evtchn,
+                      unsigned long *store_mfn, unsigned int console_evtchn,
+                      unsigned long *console_mfn,
+                      unsigned int pae, unsigned int apic);
 
 /**
  * This function will create a domain for a paravirtualized Linux
diff -r 8475a4e0425e -r 3c8bb086025e tools/pygrub/src/pygrub
--- a/tools/pygrub/src/pygrub   Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/pygrub/src/pygrub   Fri Jan 19 14:48:57 2007 +0000
@@ -503,7 +503,7 @@ def run_grub(file, entry, fs):
 # If nothing has been specified, look for a Solaris domU. If found, perform the
 # necessary tweaks.
 def sniff_solaris(fs, cfg):
-    if not fs.file_exists("/platform/i86xen/kernel/unix"):
+    if not fs.file_exists("/platform/i86xpv/kernel/unix"):
         return cfg
     
     # darned python
@@ -516,10 +516,10 @@ def sniff_solaris(fs, cfg):
             longmode = True
 
     if not cfg["kernel"]:
-        cfg["kernel"] = "/platform/i86xen/kernel/unix"
+        cfg["kernel"] = "/platform/i86xpv/kernel/unix"
         cfg["ramdisk"] = "/platform/i86pc/boot_archive"
         if longmode:
-            cfg["kernel"] = "/platform/i86xen/kernel/amd64/unix"
+            cfg["kernel"] = "/platform/i86xpv/kernel/amd64/unix"
             cfg["ramdisk"] = "/platform/i86pc/amd64/boot_archive"
 
     # Unpleasant. Typically we'll have 'root=foo -k' or 'root=foo /kernel -k',
diff -r 8475a4e0425e -r 3c8bb086025e tools/python/setup.py
--- a/tools/python/setup.py     Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/python/setup.py     Fri Jan 19 14:48:57 2007 +0000
@@ -30,12 +30,23 @@ xs = Extension("xs",
                libraries          = libraries,
                sources            = [ "xen/lowlevel/xs/xs.c" ])
 
+scf = Extension("scf",
+               extra_compile_args = extra_compile_args,
+               include_dirs       = include_dirs + [ "xen/lowlevel/scf" ],
+               library_dirs       = library_dirs,
+               libraries          = libraries,
+               sources            = [ "xen/lowlevel/scf/scf.c" ])
+             
 acm = Extension("acm",
                extra_compile_args = extra_compile_args,
                include_dirs       = include_dirs + [ "xen/lowlevel/acm" ],
                library_dirs       = library_dirs,
                libraries          = libraries,
                sources            = [ "xen/lowlevel/acm/acm.c" ])
+
+modules = [ xc, xs, acm ]
+if os.uname()[0] == 'SunOS':
+    modules.append(scf)
 
 setup(name            = 'xen',
       version         = '3.0',
@@ -56,7 +67,7 @@ setup(name            = 'xen',
                          'xen.xm.tests'
                          ],
       ext_package = "xen.lowlevel",
-      ext_modules = [ xc, xs, acm ]
+      ext_modules = modules
       )
 
 os.chdir('logging')
diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/lowlevel/scf/scf.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/python/xen/lowlevel/scf/scf.c       Fri Jan 19 14:48:57 2007 +0000
@@ -0,0 +1,156 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <Python.h>
+
+#include <libscf.h>
+#include <stdio.h>
+
+#define        XEND_FMRI "svc:/system/xen/xend:default"
+#define        XEND_PG "config"
+
+static PyObject *scf_exc;
+
+static void *
+scf_exception(const char *err, const char *value)
+{
+       int scferr = scf_error();
+       const char *scfstrerr = scf_strerror(scferr);
+       PyObject *obj = Py_BuildValue("(isss)", scferr, err, scfstrerr, value);
+       PyErr_SetObject(scf_exc, obj);
+       return (NULL);
+}
+
+static PyObject *
+pyscf_get_bool(PyObject *o, PyObject *args, PyObject *kwargs)
+{
+       static char *kwlist[] = { "name", NULL };
+       scf_simple_prop_t *prop;
+       uint8_t *val;
+       char *name;
+
+       if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s", kwlist, &name))
+               return (NULL);
+
+       prop = scf_simple_prop_get(NULL, XEND_FMRI, XEND_PG, name);
+
+       if (prop == NULL)
+               return (scf_exception("scf_simple_prop_get() failed", name));
+
+       if ((val = scf_simple_prop_next_boolean(prop)) == NULL)
+               return (scf_exception("scf_simple_prop_next_boolean() failed",
+                   name));
+
+       if (*val) {
+               scf_simple_prop_free(prop);
+               Py_INCREF(Py_True);
+               return (Py_True);
+       }
+
+       scf_simple_prop_free(prop);
+       Py_INCREF(Py_False);
+       return (Py_False);
+}
+
+static PyObject *
+pyscf_get_int(PyObject *o, PyObject *args, PyObject *kwargs)
+{
+       static char *kwlist[] = { "name", NULL };
+       scf_simple_prop_t *prop;
+       PyObject *obj;
+       int64_t *val;
+       char *name;
+
+       if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s", kwlist, &name))
+               return (NULL);
+
+       prop = scf_simple_prop_get(NULL, XEND_FMRI, XEND_PG, name);
+
+       if (prop == NULL)
+               return (scf_exception("scf_simple_prop_get() failed", name));
+
+       if ((val = scf_simple_prop_next_integer(prop)) == NULL)
+               return (scf_exception("scf_simple_prop_next_integer() failed",
+                   name));
+
+       obj = PyInt_FromLong((long)*val);
+       scf_simple_prop_free(prop);
+       return (obj);
+}
+
+static PyObject *
+pyscf_get_string(PyObject *o, PyObject *args, PyObject *kwargs)
+{
+       static char *kwlist[] = { "name", NULL };
+       scf_simple_prop_t *prop;
+       PyObject *obj;
+       char *name;
+       char *str;
+
+       if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s", kwlist, &name))
+               return (NULL);
+
+       prop = scf_simple_prop_get(NULL, XEND_FMRI, XEND_PG, name);
+
+       if (prop == NULL)
+               return (scf_exception("scf_simple_prop_get() failed", name));
+
+       if ((str = scf_simple_prop_next_astring(prop)) == NULL) {
+               scf_simple_prop_free(prop);
+               return (scf_exception("scf_simple_prop_next_astring() failed",
+                   name));
+       }
+
+       obj = PyString_FromString(str);
+       scf_simple_prop_free(prop);
+       return (obj);
+}
+
+PyDoc_STRVAR(pyscf_get_bool__doc__,
+   "get_bool(name) - get the value of the named boolean property");
+PyDoc_STRVAR(pyscf_get_int__doc__,
+   "get_int(name) - get the value of the named integer property");
+PyDoc_STRVAR(pyscf_get_string__doc__,
+   "get_string(name) - get the value of the named string property");
+
+static struct PyMethodDef pyscf_module_methods[] = {
+       { "get_bool", (PyCFunction) pyscf_get_bool,
+         METH_VARARGS|METH_KEYWORDS, pyscf_get_bool__doc__ },
+       { "get_int", (PyCFunction) pyscf_get_int,
+         METH_VARARGS|METH_KEYWORDS, pyscf_get_int__doc__ },
+       { "get_string", (PyCFunction) pyscf_get_string,
+         METH_VARARGS|METH_KEYWORDS, pyscf_get_string__doc__ },
+       { NULL, NULL, 0, NULL } 
+};
+
+PyMODINIT_FUNC
+initscf(void)
+{
+       PyObject *m;
+       m = Py_InitModule("scf", pyscf_module_methods);
+
+       scf_exc = PyErr_NewException("scf.error", NULL, NULL);
+       Py_INCREF(scf_exc);
+       PyModule_AddObject(m, "error", scf_exc);
+       PyModule_AddIntConstant(m, "SCF_ERROR_NOT_FOUND", SCF_ERROR_NOT_FOUND);
+}
diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/lowlevel/xc/xc.c
--- a/tools/python/xen/lowlevel/xc/xc.c Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/python/xen/lowlevel/xc/xc.c Fri Jan 19 14:48:57 2007 +0000
@@ -158,6 +158,20 @@ static PyObject *pyxc_domain_destroy(XcO
 static PyObject *pyxc_domain_destroy(XcObject *self, PyObject *args)
 {
     return dom_op(self, args, xc_domain_destroy);
+}
+
+static PyObject *pyxc_domain_shutdown(XcObject *self, PyObject *args)
+{
+    uint32_t dom, reason;
+
+    if (!PyArg_ParseTuple(args, "ii", &dom, &reason))
+      return NULL;
+
+    if (xc_domain_shutdown(self->xc_handle, dom, reason) != 0)
+        return pyxc_error_to_exception();
+    
+    Py_INCREF(zero);
+    return zero;
 }
 
 
@@ -1027,6 +1041,14 @@ static PyMethodDef pyxc_methods[] = {
       METH_VARARGS, "\n"
       "Destroy a domain.\n"
       " dom [int]:    Identifier of domain to be destroyed.\n\n"
+      "Returns: [int] 0 on success; -1 on error.\n" },
+
+    { "domain_shutdown", 
+      (PyCFunction)pyxc_domain_shutdown,
+      METH_VARARGS, "\n"
+      "Shutdown a domain.\n"
+      " dom       [int, 0]:      Domain identifier to use.\n"
+      " reason     [int, 0]:      Reason for shutdown.\n"
       "Returns: [int] 0 on success; -1 on error.\n" },
 
     { "vcpu_setaffinity", 
diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/util/xmlrpclib2.py
--- a/tools/python/xen/util/xmlrpclib2.py       Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/python/xen/util/xmlrpclib2.py       Fri Jan 19 14:48:57 2007 +0000
@@ -256,6 +256,7 @@ class UnixXMLRPCRequestHandler(XMLRPCReq
 
 class UnixXMLRPCServer(TCPXMLRPCServer):
     address_family = socket.AF_UNIX
+    allow_address_reuse = True
 
     def __init__(self, addr, allowed, xenapi, logRequests = 1):
         mkdir.parents(os.path.dirname(addr), stat.S_IRWXU, True)
diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/web/httpserver.py
--- a/tools/python/xen/web/httpserver.py        Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/python/xen/web/httpserver.py        Fri Jan 19 14:48:57 2007 +0000
@@ -294,8 +294,6 @@ class HttpServer:
 
     backlog = 5
 
-    closed = False
-
     def __init__(self, root, interface, port=8080):
         self.root = root
         self.interface = interface
@@ -303,6 +301,7 @@ class HttpServer:
         # ready indicates when we are ready to begin accept connections
         # it should be set after a successful bind
         self.ready = False
+        self.closed = False
 
     def run(self):
         self.bind()
@@ -316,7 +315,6 @@ class HttpServer:
 
     def stop(self):
         self.close()
-
 
     def bind(self):
         self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
@@ -334,7 +332,10 @@ class HttpServer:
 
     def close(self):
         self.closed = True
-        try:
+        self.ready = False
+        try:
+            # shutdown socket explicitly to allow reuse
+            self.socket.shutdown(socket.SHUT_RDWR)
             self.socket.close()
         except:
             pass
@@ -344,6 +345,9 @@ class HttpServer:
 
     def getResource(self, req):
         return self.root.getRequestResource(req)
+
+    def shutdown(self):
+        self.close()
 
 
 class UnixHttpServer(HttpServer):
diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/xend/Vifctl.py
--- a/tools/python/xen/xend/Vifctl.py   Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/python/xen/xend/Vifctl.py   Fri Jan 19 14:48:57 2007 +0000
@@ -20,7 +20,7 @@
 """
 import os
 
-import XendRoot
+import XendOptions
 
 
 def network(op):
@@ -30,7 +30,7 @@ def network(op):
     """
     if op not in ['start', 'stop']:
         raise ValueError('Invalid operation: ' + op)
-    script = XendRoot.instance().get_network_script()
+    script = XendOptions.instance().get_network_script()
     if script:
         script.insert(1, op)
         os.spawnv(os.P_WAIT, script[0], script)
diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/xend/XendCheckpoint.py
--- a/tools/python/xen/xend/XendCheckpoint.py   Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/python/xen/xend/XendCheckpoint.py   Fri Jan 19 14:48:57 2007 +0000
@@ -22,11 +22,14 @@ from xen.xend.XendConstants import *
 from xen.xend.XendConstants import *
 
 SIGNATURE = "LinuxGuestRecord"
+QEMU_SIGNATURE = "QemuDeviceModelRecord"
+dm_batch = 512
 XC_SAVE = "xc_save"
 XC_RESTORE = "xc_restore"
 
 
 sizeof_int = calcsize("i")
+sizeof_unsigned_int = calcsize("I")
 sizeof_unsigned_long = calcsize("L")
 
 
@@ -69,6 +72,11 @@ def save(fd, dominfo, network, live, dst
                     "could not write guest state file: config len")
         write_exact(fd, config, "could not write guest state file: config")
 
+        image_cfg = dominfo.info.get('image', {})
+        hvm = image_cfg.has_key('hvm')
+
+        if hvm:
+            log.info("save hvm domain")
         # xc_save takes three customization parameters: maxit, max_f, and
         # flags the last controls whether or not save is 'live', while the
         # first two further customize behaviour when 'live' save is
@@ -76,7 +84,7 @@ def save(fd, dominfo, network, live, dst
         # libxenguest; see the comments and/or code in xc_linux_save() for
         # more information.
         cmd = [xen.util.auxbin.pathTo(XC_SAVE), str(fd),
-               str(dominfo.getDomid()), "0", "0", str(int(live)) ]
+               str(dominfo.getDomid()), "0", "0", str(int(live) | (int(hvm) << 
2)) ]
         log.debug("[xc_save]: %s", string.join(cmd))
 
         def saveInputHandler(line, tochild):
@@ -90,11 +98,28 @@ def save(fd, dominfo, network, live, dst
                 log.info("Domain %d suspended.", dominfo.getDomid())
                 dominfo.migrateDevices(network, dst, DEV_MIGRATE_STEP3,
                                        domain_name)
+                #send signal to device model for save
+                if hvm:
+                    log.info("release_devices for hvm domain")
+                    dominfo._releaseDevices(True)
                 tochild.write("done\n")
                 tochild.flush()
                 log.debug('Written done')
 
         forkHelper(cmd, fd, saveInputHandler, False)
+
+        # put qemu device model state
+        if hvm:
+            write_exact(fd, QEMU_SIGNATURE, "could not write qemu signature")
+            qemu_fd = os.open("/tmp/xen.qemu-dm.%d" % dominfo.getDomid(), 
os.O_RDONLY)
+            while True:
+                buf = os.read(qemu_fd, dm_batch)
+                if len(buf):
+                    write_exact(fd, buf, "could not write device model state")
+                else:
+                    break
+            os.close(qemu_fd)
+            os.remove("/tmp/xen.qemu-dm.%d" % dominfo.getDomid())
 
         dominfo.destroyDomain()
         try:
@@ -149,19 +174,43 @@ def restore(xd, fd, dominfo = None, paus
 
     nr_pfns = (dominfo.getMemoryTarget() + 3) / 4 
 
+    # if hvm, pass mem size to calculate the store_mfn
+    image_cfg = dominfo.info.get('image', {})
+    is_hvm  = image_cfg.has_key('hvm')
+    if is_hvm:
+        hvm  = dominfo.info['memory_static_min']
+        apic = dominfo.info['image']['hvm'].get('apic', 0)
+        pae  = dominfo.info['image']['hvm'].get('pae',  0)
+        log.info("restore hvm domain %d, mem=%d, apic=%d, pae=%d",
+                 dominfo.domid, hvm, apic, pae)
+    else:
+        hvm  = 0
+        apic = 0
+        pae  = 0
+
     try:
         l = read_exact(fd, sizeof_unsigned_long,
                        "not a valid guest state file: pfn count read")
         max_pfn = unpack("L", l)[0]    # native sizeof long
+
         if max_pfn > 16*1024*1024:     # XXX 
             raise XendError(
                 "not a valid guest state file: pfn count out of range")
 
-        balloon.free(xc.pages_to_kib(nr_pfns))
+        shadow = dominfo.info['shadow_memory']
+        log.debug("restore:shadow=0x%x, _static_max=0x%x, _static_min=0x%x, "
+                  "nr_pfns=0x%x.", dominfo.info['shadow_memory'],
+                  dominfo.info['memory_static_max'],
+                  dominfo.info['memory_static_min'], nr_pfns)
+
+        balloon.free(xc.pages_to_kib(nr_pfns) + shadow * 1024)
+
+        shadow_cur = xc.shadow_mem_control(dominfo.getDomid(), shadow)
+        dominfo.info['shadow_memory'] = shadow_cur
 
         cmd = map(str, [xen.util.auxbin.pathTo(XC_RESTORE),
                         fd, dominfo.getDomid(), max_pfn,
-                        store_port, console_port])
+                        store_port, console_port, hvm, pae, apic])
         log.debug("[xc_restore]: %s", string.join(cmd))
 
         handler = RestoreInputHandler()
@@ -171,10 +220,30 @@ def restore(xd, fd, dominfo = None, paus
         if handler.store_mfn is None or handler.console_mfn is None:
             raise XendError('Could not read store/console MFN')
 
-        os.read(fd, 1)           # Wait for source to close connection
         dominfo.waitForDevices() # Wait for backends to set up
         if not paused:
             dominfo.unpause()
+
+         # get qemu state and create a tmp file for dm restore
+        if is_hvm:
+            qemu_signature = read_exact(fd, len(QEMU_SIGNATURE),
+                                        "invalid device model signature read")
+            if qemu_signature != QEMU_SIGNATURE:
+                raise XendError("not a valid device model state: found '%s'" %
+                                qemu_signature)
+            qemu_fd = os.open("/tmp/xen.qemu-dm.%d" % dominfo.getDomid(),
+                              os.O_WRONLY | os.O_CREAT | os.O_TRUNC)
+            while True:
+                buf = os.read(fd, dm_batch)
+                if len(buf):
+                    write_exact(qemu_fd, buf,
+                                "could not write dm state to tmp file")
+                else:
+                    break
+            os.close(qemu_fd)
+
+
+        os.read(fd, 1)           # Wait for source to close connection
         
         dominfo.completeRestore(handler.store_mfn, handler.console_mfn)
         
diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/xend/XendConfig.py
--- a/tools/python/xen/xend/XendConfig.py       Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/python/xen/xend/XendConfig.py       Fri Jan 19 14:48:57 2007 +0000
@@ -77,6 +77,25 @@ def scrub_password(data):
         return re.sub(r'\(vncpasswd\s+[^\)]+\)','(vncpasswd XXXXXX)', data)
     else:
         return data
+
+#
+# CPU fields:
+#
+# vcpus_number -- the maximum number of vcpus that this domain may ever have.
+#                 aka XendDomainInfo.getVCpuCount().
+# vcpus        -- the legacy configuration name for above.
+# max_vcpu_id  -- vcpus_number - 1.  This is given to us by Xen.
+#
+# cpus         -- the list of pCPUs available to each vCPU.
+#
+#   vcpu_avail:  a bitmap telling the guest domain whether it may use each of
+#                its VCPUs.  This is translated to
+#                <dompath>/cpu/<id>/availability = {online,offline} for use
+#                by the guest domain.
+# online_vpcus -- the number of VCPUs currently up, as reported by Xen.  This
+#                 is changed by changing vcpu_avail, and waiting for the
+#                 domain to respond.
+#
 
 
 # Mapping from XendConfig configuration keys to the old
@@ -185,7 +204,7 @@ LEGACY_CFG_TYPES = {
     'uuid':          str,
     'name':          str,
     'vcpus':         int,
-    'vcpu_avail':    int,
+    'vcpu_avail':    long,
     'memory':        int,
     'shadow_memory': int,
     'maxmem':        int,
@@ -355,9 +374,6 @@ class XendConfig(dict):
             'cpu_weight': 256,
             'cpu_cap': 0,
             'vcpus_number': 1,
-            'online_vcpus': 1,
-            'max_vcpu_id': 0,
-            'vcpu_avail': 1,
             'console_refs': [],
             'vif_refs': [],
             'vbd_refs': [],
@@ -389,7 +405,7 @@ class XendConfig(dict):
                                       event)
 
     def _vcpus_sanity_check(self):
-        if self.get('vcpus_number') != None:
+        if 'vcpus_number' in self and 'vcpu_avail' not in self:
             self['vcpu_avail'] = (1 << self['vcpus_number']) - 1
 
     def _uuid_sanity_check(self):
@@ -405,7 +421,7 @@ class XendConfig(dict):
     def _dominfo_to_xapi(self, dominfo):
         self['domid'] = dominfo['domid']
         self['online_vcpus'] = dominfo['online_vcpus']
-        self['max_vcpu_id'] = dominfo['max_vcpu_id']
+        self['vcpus_number'] = dominfo['max_vcpu_id'] + 1
         self['memory_dynamic_min'] = (dominfo['mem_kb'] + 1023)/1024
         self['memory_dynamic_max'] = (dominfo['maxmem_kb'] + 1023)/1024
         self['cpu_time'] = dominfo['cpu_time']/1e9
@@ -636,9 +652,6 @@ class XendConfig(dict):
         self['memory_dynamic_max'] = self['memory_static_max']
         self['memory_dynamic_min'] = self['memory_static_min']
 
-        # make sure max_vcpu_id is set correctly
-        self['max_vcpu_id'] = self['vcpus_number'] - 1
-
         # set device references in the configuration
         self['devices'] = cfg.get('devices', {})
         
@@ -720,13 +733,11 @@ class XendConfig(dict):
         _set_cfg_if_exists('on_xend_stop')
         _set_cfg_if_exists('on_xend_start')
         _set_cfg_if_exists('vcpu_avail')
-        _set_cfg_if_exists('max_vcpu_id') # needed for vcpuDomDetails
         _set_cfg_if_exists('cpu_weight')
         _set_cfg_if_exists('cpu_cap')
         
         # Parse and store runtime configuration 
         _set_cfg_if_exists('start_time')
-        _set_cfg_if_exists('online_vcpus')
         _set_cfg_if_exists('cpu_time')
         _set_cfg_if_exists('shutdown_reason')
         _set_cfg_if_exists('up_time')
@@ -1115,19 +1126,17 @@ class XendConfig(dict):
         # configuration
         log.debug("update_with_image_sxp(%s)" % scrub_password(image_sxp))
 
-        kernel_args = ""
+        # user-specified args must come last: previous releases did this and
+        # some domU kernels rely upon the ordering.
+        kernel_args = sxp.child_value(image_sxp, 'args', '')
 
         # attempt to extract extra arguments from SXP config
         arg_ip = sxp.child_value(image_sxp, 'ip')
         if arg_ip and not re.search(r'ip=[^ ]+', kernel_args):
-            kernel_args += 'ip=%s ' % arg_ip
+            kernel_args = 'ip=%s ' % arg_ip + kernel_args
         arg_root = sxp.child_value(image_sxp, 'root')
         if arg_root and not re.search(r'root=', kernel_args):
-            kernel_args += 'root=%s ' % arg_root
-
-        # user-specified args must come last: previous releases did this and
-        # some domU kernels rely upon the ordering.
-        kernel_args += sxp.child_value(image_sxp, 'args', '')
+            kernel_args = 'root=%s ' % arg_root + kernel_args
 
         if bootloader:
             self['_temp_using_bootloader'] = '1'
diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/xend/XendDomain.py
--- a/tools/python/xen/xend/XendDomain.py       Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/python/xen/xend/XendDomain.py       Fri Jan 19 14:48:57 2007 +0000
@@ -32,7 +32,7 @@ import xen.lowlevel.xc
 import xen.lowlevel.xc
 
 
-from xen.xend import XendRoot, XendCheckpoint, XendDomainInfo
+from xen.xend import XendOptions, XendCheckpoint, XendDomainInfo
 from xen.xend.PrettyPrint import prettyprint
 from xen.xend.XendConfig import XendConfig
 from xen.xend.XendError import XendError, XendInvalidDomain, VmError
@@ -51,7 +51,7 @@ from xen.xend import uuid
 from xen.xend import uuid
 
 xc = xen.lowlevel.xc.xc()
-xroot = XendRoot.instance() 
+xoptions = XendOptions.instance() 
 
 __all__ = [ "XendDomain" ]
 
@@ -214,7 +214,7 @@ class XendDomain:
         @rtype: String
         @return: Path.
         """
-        dom_path = xroot.get_xend_domains_path()
+        dom_path = xoptions.get_xend_domains_path()
         if domuuid:
             dom_path = os.path.join(dom_path, domuuid)
         return dom_path
@@ -361,7 +361,7 @@ class XendDomain:
 
     def _setDom0CPUCount(self):
         """Sets the number of VCPUs dom0 has. Retreived from the
-        Xend configuration, L{XendRoot}.
+        Xend configuration, L{XendOptions}.
 
         @requires: Expects to be protected by domains_lock.
         @rtype: None
@@ -369,7 +369,7 @@ class XendDomain:
         dom0 = self.privilegedDomain()
 
         # get max number of vcpus to use for dom0 from config
-        target = int(xroot.get_dom0_vcpus())
+        target = int(xoptions.get_dom0_vcpus())
         log.debug("number of vcpus to use is %d", target)
    
         # target == 0 means use all processors
@@ -1164,7 +1164,7 @@ class XendDomain:
             dominfo.checkLiveMigrateMemory()
 
         if port == 0:
-            port = xroot.get_xend_relocation_port()
+            port = xoptions.get_xend_relocation_port()
         try:
             sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
             sock.connect((dst, port))
diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py   Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/python/xen/xend/XendDomainInfo.py   Fri Jan 19 14:48:57 2007 +0000
@@ -38,7 +38,7 @@ from xen.util import security
 from xen.util import security
 
 from xen.xend import balloon, sxp, uuid, image, arch, osdep
-from xen.xend import XendRoot, XendNode, XendConfig
+from xen.xend import XendOptions, XendNode, XendConfig
 
 from xen.xend.XendConfig import scrub_password
 from xen.xend.XendBootloader import bootloader
@@ -54,29 +54,10 @@ BOOTLOADER_LOOPBACK_DEVICE = '/dev/xvdp'
 BOOTLOADER_LOOPBACK_DEVICE = '/dev/xvdp'
 
 xc = xen.lowlevel.xc.xc()
-xroot = XendRoot.instance()
+xoptions = XendOptions.instance()
 
 log = logging.getLogger("xend.XendDomainInfo")
 #log.setLevel(logging.TRACE)
-
-
-#
-# There are a number of CPU-related fields:
-#
-#   vcpus:       the number of virtual CPUs this domain is configured to use.
-#   vcpu_avail:  a bitmap telling the guest domain whether it may use each of
-#                its VCPUs.  This is translated to
-#                <dompath>/cpu/<id>/availability = {online,offline} for use
-#                by the guest domain.
-#   cpumap:      a list of bitmaps, one for each VCPU, giving the physical
-#                CPUs that that VCPU may use.
-#   cpu:         a configuration setting requesting that VCPU 0 is pinned to
-#                the specified physical CPU.
-#
-# vcpus and vcpu_avail settings persist with the VM (i.e. they are persistent
-# across save, restore, migrate, and restart).  The other settings are only
-# specific to the domain, so are lost when the VM moves.
-#
 
 
 def create(config):
@@ -451,6 +432,16 @@ class XendDomainInfo:
         self._removeVm('xend/previous_restart_time')
         self.storeDom("control/shutdown", reason)
 
+        ## shutdown hypercall for hvm domain desides xenstore write
+        image_cfg = self.info.get('image', {})
+        hvm = image_cfg.has_key('hvm')
+        if hvm:
+            for code in DOMAIN_SHUTDOWN_REASONS.keys():
+                if DOMAIN_SHUTDOWN_REASONS[code] == reason:
+                    break
+            xc.domain_shutdown(self.domid, code)
+
+
     def pause(self):
         """Pause domain
         
@@ -614,7 +605,7 @@ class XendDomainInfo:
                     ['name',       self.info['name_label']],
                     ['vcpu_count', self.info['vcpus_number']]]
 
-            for i in range(0, self.info['max_vcpu_id']+1):
+            for i in range(0, self.info['vcpus_number']):
                 info = xc.vcpu_getinfo(self.domid, i)
 
                 sxpr.append(['vcpu',
@@ -739,7 +730,7 @@ class XendDomainInfo:
             'domid':              str(self.domid),
             'vm':                 self.vmpath,
             'name':               self.info['name_label'],
-            'console/limit':      str(xroot.get_console_limit() * 1024),
+            'console/limit':      str(xoptions.get_console_limit() * 1024),
             'memory/target':      str(self.info['memory_static_min'] * 1024)
             }
 
@@ -898,8 +889,9 @@ class XendDomainInfo:
                 self._writeDom(self._vcpuDomDetails())
         else:
             self.info['vcpus_number'] = vcpus
-            self.info['online_vcpus'] = vcpus
             xen.xend.XendDomain.instance().managed_config_save(self)
+        log.info("Set VCPU count on domain %s to %d", self.info['name_label'],
+                 vcpus)
 
     def getLabel(self):
         return security.get_security_info(self.info, 'label')
@@ -976,7 +968,7 @@ class XendDomainInfo:
                          self.info['name_label'], self.domid)
                 self._writeVm(LAST_SHUTDOWN_REASON, 'crash')
 
-                if xroot.get_enable_dump():
+                if xoptions.get_enable_dump():
                     try:
                         self.dumpCore()
                     except XendError:
@@ -1228,8 +1220,11 @@ class XendDomainInfo:
         if self.image:
             self.image.createDeviceModel()
 
-    def _releaseDevices(self):
+    def _releaseDevices(self, suspend = False):
         """Release all domain's devices.  Nothrow guarantee."""
+        if suspend and self.image:
+            self.image.destroy(suspend)
+            return
 
         while True:
             t = xstransact("%s/device" % self.dompath)
@@ -1381,7 +1376,7 @@ class XendDomainInfo:
             # this is done prior to memory allocation to aide in memory
             # distribution for NUMA systems.
             if self.info['cpus'] is not None and len(self.info['cpus']) > 0:
-                for v in range(0, self.info['max_vcpu_id']+1):
+                for v in range(0, self.info['vcpus_number']):
                     xc.vcpu_setaffinity(self.domid, v, self.info['cpus'])
 
             # Use architecture- and image-specific calculations to determine
@@ -1395,6 +1390,7 @@ class XendDomainInfo:
                 self.info['shadow_memory'] * 1024,
                 self.info['memory_static_max'] * 1024)
 
+            log.debug("_initDomain:shadow_memory=0x%x, memory_static_max=0x%x, 
memory_static_min=0x%x.", self.info['shadow_memory'], 
self.info['memory_static_max'], self.info['memory_static_min'],)
             # Round shadow up to a multiple of a MiB, as shadow_mem_control
             # takes MiB and we must not round down and end up under-providing.
             shadow = ((shadow + 1023) / 1024) * 1024
@@ -1494,6 +1490,16 @@ class XendDomainInfo:
         self.console_mfn = console_mfn
 
         self._introduceDomain()
+        image_cfg = self.info.get('image', {})
+        hvm = image_cfg.has_key('hvm')
+        if hvm:
+            self.image = image.create(self,
+                    self.info,
+                    self.info['image'],
+                    self.info['devices'])
+            if self.image:
+                self.image.createDeviceModel(True)
+                self.image.register_shutdown_watch()
         self._storeDomDetails()
         self._registerWatches()
         self.refreshShutdown()
@@ -2028,8 +2034,8 @@ class XendDomainInfo:
         # TODO: spec says that key is int, however, python does not allow
         #       non-string keys to dictionaries.
         vcpu_util = {}
-        if 'max_vcpu_id' in self.info and self.domid != None:
-            for i in range(0, self.info['max_vcpu_id']+1):
+        if 'vcpus_number' in self.info and self.domid != None:
+            for i in range(0, self.info['vcpus_number']):
                 info = xc.vcpu_getinfo(self.domid, i)
                 vcpu_util[str(i)] = info['cpu_time']/1000000000.0
                 
diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/xend/XendNode.py
--- a/tools/python/xen/xend/XendNode.py Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/python/xen/xend/XendNode.py Fri Jan 19 14:48:57 2007 +0000
@@ -24,7 +24,7 @@ from xen.util import Brctl
 
 from xen.xend import uuid
 from xen.xend.XendError import XendError, NetworkAlreadyConnected
-from xen.xend.XendRoot import instance as xendroot
+from xen.xend.XendOptions import instance as xendoptions
 from xen.xend.XendStorageRepository import XendStorageRepository
 from xen.xend.XendLogging import log
 from xen.xend.XendPIF import *
@@ -45,7 +45,7 @@ class XendNode:
         """
         
         self.xc = xen.lowlevel.xc.xc()
-        self.state_store = XendStateStore(xendroot().get_xend_state_path())
+        self.state_store = XendStateStore(xendoptions().get_xend_state_path())
 
         # load host state from XML file
         saved_host = self.state_store.load_state('host')
diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/xend/XendOptions.py
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/python/xen/xend/XendOptions.py      Fri Jan 19 14:48:57 2007 +0000
@@ -0,0 +1,373 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+# Copyright (C) 2005 XenSource Ltd
+#============================================================================
+
+"""Xend root class.
+Creates the servers and handles configuration.
+
+Other classes get config variables by importing this module,
+using instance() to get a XendOptions instance, and then
+the config functions (e.g. get_xend_port()) to get
+configured values.
+"""
+
+import os
+import os.path
+import string
+import sys
+
+from xen.xend import sxp, osdep, XendLogging
+from xen.xend.XendError import XendError
+
+if os.uname()[0] == 'SunOS':
+    from xen.lowlevel import scf
+
+class XendOptions:
+    """Configuration options."""
+
+    """Where network control scripts live."""
+    network_script_dir = osdep.scripts_dir
+
+    """Where block control scripts live."""
+    block_script_dir = osdep.scripts_dir
+
+    """Default path to the log file. """
+    logfile_default = "/var/log/xen/xend.log"
+
+    """Default level of information to be logged."""
+    loglevel_default = 'DEBUG'
+
+    """Default Xen-API server configuration. """
+    xen_api_server_default = [['unix']]
+
+    """Default for the flag indicating whether xend should run an http server
+    (deprecated)."""
+    xend_http_server_default = 'no'
+
+    xend_tcp_xmlrpc_server_default = 'no'
+
+    xend_unix_xmlrpc_server_default = 'yes'
+
+    """Default interface address xend listens at. """
+    xend_address_default      = ''
+
+    """Default for the flag indicating whether xend should run a relocation 
server."""
+    xend_relocation_server_default = 'no'
+
+    """Default interface address the xend relocation server listens at. """
+    xend_relocation_address_default = ''
+
+    """Default port xend serves HTTP at. """
+    xend_port_default         = 8000
+
+    """Default port xend serves relocation at. """
+    xend_relocation_port_default = 8002
+
+    xend_relocation_hosts_allow_default = ''
+
+    """Default for the flag indicating whether xend should run a unix-domain
+    server (deprecated)."""
+    xend_unix_server_default = 'no'
+
+    """Default external migration tool """
+    external_migration_tool_default = ''
+
+    """Default path the unix-domain server listens at."""
+    xend_unix_path_default = '/var/lib/xend/xend-socket'
+
+    dom0_min_mem_default = 0
+
+    dom0_vcpus_default = 0
+
+    vncpasswd_default = None
+
+    """Default interface to listen for VNC connections on"""
+    xend_vnc_listen_default = '127.0.0.1'
+
+    """Default session storage path."""
+    xend_domains_path_default = '/var/lib/xend/domains'
+
+    """Default xend management state storage."""
+    xend_state_path_default = '/var/lib/xend/state'
+
+    """Default type of backend network interfaces"""
+    netback_type = osdep.netback_type
+
+    """Default script to configure a backend network interface"""
+    vif_script = osdep.vif_script
+
+    def __init__(self):
+        self.configure()
+
+    def _logError(self, fmt, *args):
+        """Logging function to log to stderr. We use this for XendOptions log
+        messages because they may be logged before the logger has been
+        configured.  Other components can safely use the logger.
+        """
+        print >>sys.stderr, "xend [ERROR]", fmt % args
+
+
+    def configure(self):
+        self.set_config()
+        XendLogging.init(self.get_config_string("logfile",
+                                               self.logfile_default),
+                         self.get_config_string("loglevel",
+                                               self.loglevel_default))
+
+    def set_config(self):
+        raise NotImplementedError()
+
+    def get_config_bool(self, name, val=None):
+        raise NotImplementedError()
+         
+    def get_config_int(self, name, val=None):
+        raise NotImplementedError()
+
+    def get_config_string(self, name, val=None):
+        raise NotImplementedError()
+
+    def get_xen_api_server(self):
+        raise NotImplementedError()
+
+    def get_xend_http_server(self):
+        """Get the flag indicating whether xend should run an http server.
+        """
+        return self.get_config_bool("xend-http-server", 
self.xend_http_server_default)
+
+    def get_xend_tcp_xmlrpc_server(self):
+        return self.get_config_bool("xend-tcp-xmlrpc-server",
+                                    self.xend_tcp_xmlrpc_server_default)
+
+    def get_xend_unix_xmlrpc_server(self):
+        return self.get_config_bool("xend-unix-xmlrpc-server",
+                                    self.xend_unix_xmlrpc_server_default)
+
+    def get_xend_relocation_server(self):
+        """Get the flag indicating whether xend should run a relocation server.
+        """
+        return self.get_config_bool("xend-relocation-server",
+                                    self.xend_relocation_server_default)
+
+    def get_xend_port(self):
+        """Get the port xend listens at for its HTTP interface.
+        """
+        return self.get_config_int('xend-port', self.xend_port_default)
+
+    def get_xend_relocation_port(self):
+        """Get the port xend listens at for connection to its relocation 
server.
+        """
+        return self.get_config_int('xend-relocation-port',
+                                   self.xend_relocation_port_default)
+
+    def get_xend_relocation_hosts_allow(self):
+        return self.get_config_string("xend-relocation-hosts-allow",
+                                     self.xend_relocation_hosts_allow_default)
+
+    def get_xend_address(self):
+        """Get the address xend listens at for its HTTP port.
+        This defaults to the empty string which allows all hosts to connect.
+        If this is set to 'localhost' only the localhost will be able to 
connect
+        to the HTTP port.
+        """
+        return self.get_config_string('xend-address', 
self.xend_address_default)
+
+    def get_xend_relocation_address(self):
+        """Get the address xend listens at for its relocation server port.
+        This defaults to the empty string which allows all hosts to connect.
+        If this is set to 'localhost' only the localhost will be able to 
connect
+        to the relocation port.
+        """
+        return self.get_config_string('xend-relocation-address', 
self.xend_relocation_address_default)
+
+    def get_xend_unix_server(self):
+        """Get the flag indicating whether xend should run a unix-domain 
server.
+        """
+        return self.get_config_bool("xend-unix-server", 
self.xend_unix_server_default)
+
+    def get_xend_unix_path(self):
+        """Get the path the xend unix-domain server listens at.
+        """
+        return self.get_config_string("xend-unix-path", 
self.xend_unix_path_default)
+
+    def get_xend_domains_path(self):
+        """ Get the path for persistent domain configuration storage
+        """
+        return self.get_config_string("xend-domains-path", 
self.xend_domains_path_default)
+
+    def get_xend_state_path(self):
+        """ Get the path for persistent domain configuration storage
+        """
+        return self.get_config_string("xend-state-path", 
self.xend_state_path_default)    
+
+    def get_network_script(self):
+        """@return the script used to alter the network configuration when
+        Xend starts and stops, or None if no such script is specified."""
+        
+        s = self.get_config_string('network-script')
+
+        if s:
+            result = s.split(" ")
+            result[0] = os.path.join(self.network_script_dir, result[0])
+            return result
+        else:
+            return None
+
+    def get_external_migration_tool(self):
+        """@return the name of the tool to handle virtual TPM migration."""
+        return self.get_config_string('external-migration-tool', 
self.external_migration_tool_default)
+
+    def get_enable_dump(self):
+        return self.get_config_bool('enable-dump', 'no')
+
+    def get_vif_script(self):
+        return self.get_config_string('vif-script', self.vif_script)
+
+    def get_dom0_min_mem(self):
+        return self.get_config_int('dom0-min-mem', self.dom0_min_mem_default)
+
+    def get_dom0_vcpus(self):
+        return self.get_config_int('dom0-cpus', self.dom0_vcpus_default)
+
+    def get_console_limit(self):
+        return self.get_config_int('console-limit', 1024)
+
+    def get_vnclisten_address(self):
+        return self.get_config_string('vnc-listen', 
self.xend_vnc_listen_default)
+
+    def get_vncpasswd_default(self):
+        return self.get_config_string('vncpasswd',
+                                     self.vncpasswd_default)
+
+class XendOptionsFile(XendOptions):
+
+    """Default path to the config file."""
+    config_default = "/etc/xen/xend-config.sxp"
+
+    """Environment variable used to override config_default."""
+    config_var     = "XEND_CONFIG"
+
+    def set_config(self):
+        """If the config file exists, read it. If not, ignore it.
+
+        The config file is a sequence of sxp forms.
+        """
+        self.config_path = os.getenv(self.config_var, self.config_default)
+        if os.path.exists(self.config_path):
+            try:
+                fin = file(self.config_path, 'rb')
+                try:
+                    config = sxp.parse(fin)
+                finally:
+                    fin.close()
+                if config is None:
+                    config = ['xend-config']
+                else:
+                    config.insert(0, 'xend-config')
+                self.config = config
+            except Exception, ex:
+                self._logError('Reading config file %s: %s',
+                               self.config_path, str(ex))
+                raise
+        else:
+            self._logError('Config file does not exist: %s',
+                           self.config_path)
+            self.config = ['xend-config']
+
+    def get_config_value(self, name, val=None):
+        """Get the value of an atomic configuration element.
+
+        @param name: element name
+        @param val:  default value (optional, defaults to None)
+        @return: value
+        """
+        return sxp.child_value(self.config, name, val=val)
+
+    def get_config_bool(self, name, val=None):
+        v = string.lower(str(self.get_config_value(name, val)))
+        if v in ['yes', 'y', '1', 'on',  'true',  't']:
+            return True
+        if v in ['no',  'n', '0', 'off', 'false', 'f']:
+            return False
+        raise XendError("invalid xend config %s: expected bool: %s" % (name, 
v))
+
+    def get_config_int(self, name, val=None):
+        v = self.get_config_value(name, val)
+        try:
+            return int(v)
+        except Exception:
+            raise XendError("invalid xend config %s: expected int: %s" % 
(name, v))
+
+    def get_config_string(self, name, val=None):
+        return self.get_config_value(name, val)
+
+    def get_xen_api_server(self):
+        """Get the Xen-API server configuration.
+        """
+        return self.get_config_value('xen-api-server',
+                                     self.xen_api_server_default)
+
+if os.uname()[0] == 'SunOS':
+    class XendOptionsSMF(XendOptions):
+
+        def set_config(self):
+            pass
+
+        def get_config_bool(self, name, val=None):
+            try:
+                return scf.get_bool(name)
+            except scf.error, e:
+                if e[0] == scf.SCF_ERROR_NOT_FOUND:
+                    return val
+                else:
+                    raise XendError("option %s: %s:%s" % (name, e[1], e[2]))
+
+        def get_config_int(self, name, val=None):
+            try:
+                return scf.get_int(name)
+            except scf.error, e:
+                if e[0] == scf.SCF_ERROR_NOT_FOUND:
+                    return val
+                else:
+                    raise XendError("option %s: %s:%s" % (name, e[1], e[2]))
+
+        def get_config_string(self, name, val=None):
+            try:
+                return scf.get_string(name)
+            except scf.error, e:
+                if e[0] == scf.SCF_ERROR_NOT_FOUND:
+                    return val
+                else:
+                    raise XendError("option %s: %s:%s" % (name, e[1], e[2]))
+
+        def get_xen_api_server(self):
+            # When the new server is a supported configuration, we should
+            # expand this.
+            return [["unix"]]
+
+def instance():
+    """Get an instance of XendOptions.
+    Use this instead of the constructor.
+    """
+    global inst
+    try:
+        inst
+    except:
+        if os.uname()[0] == 'SunOS':
+            inst = XendOptionsSMF()
+        else:
+            inst = XendOptionsFile()
+    return inst
diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/xend/XendProtocol.py
--- a/tools/python/xen/xend/XendProtocol.py     Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/python/xen/xend/XendProtocol.py     Fri Jan 19 14:48:57 2007 +0000
@@ -24,7 +24,7 @@ from encode import *
 from encode import *
 from xen.xend import sxp
 
-from xen.xend import XendRoot
+from xen.xend import XendOptions
 
 DEBUG = 0
 
@@ -34,7 +34,7 @@ HTTP_NO_CONTENT                      = 2
 HTTP_NO_CONTENT                      = 204
 
 
-xroot = XendRoot.instance()
+xoptions = XendOptions.instance()
 
 
 class XendError(RuntimeError):
@@ -218,7 +218,7 @@ class UnixXendClientProtocol(HttpXendCli
 
     def __init__(self, path=None):
         if path is None:
-            path = xroot.get_xend_unix_path()
+            path = xoptions.get_xend_unix_path()
         self.path = path
 
     def makeConnection(self, _):
diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/xend/XendRoot.py
--- a/tools/python/xen/xend/XendRoot.py Thu Jan 18 15:18:07 2007 +0000
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,322 +0,0 @@
-#============================================================================
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of version 2.1 of the GNU Lesser General Public
-# License as published by the Free Software Foundation.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-#============================================================================
-# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
-# Copyright (C) 2005 XenSource Ltd
-#============================================================================
-
-"""Xend root class.
-Creates the servers and handles configuration.
-
-Other classes get config variables by importing this module,
-using instance() to get a XendRoot instance, and then
-the config functions (e.g. get_xend_port()) to get
-configured values.
-"""
-
-import os
-import os.path
-import string
-import sys
-
-from xen.xend import sxp, osdep, XendLogging
-from xen.xend.XendError import XendError
-
-class XendRoot:
-    """Root of the management classes."""
-
-    """Default path to the config file."""
-    config_default = "/etc/xen/xend-config.sxp"
-
-    """Environment variable used to override config_default."""
-    config_var     = "XEND_CONFIG"
-
-    """Where network control scripts live."""
-    network_script_dir = osdep.scripts_dir
-
-    """Where block control scripts live."""
-    block_script_dir = osdep.scripts_dir
-
-    """Default path to the log file. """
-    logfile_default = "/var/log/xen/xend.log"
-
-    """Default level of information to be logged."""
-    loglevel_default = 'DEBUG'
-
-    """Default Xen-API server configuration. """
-    xen_api_server_default = [['unix']]
-
-    """Default for the flag indicating whether xend should run an http server
-    (deprecated)."""
-    xend_http_server_default = 'no'
-
-    xend_tcp_xmlrpc_server_default = 'no'
-
-    xend_unix_xmlrpc_server_default = 'yes'
-
-    """Default interface address xend listens at. """
-    xend_address_default      = ''
-
-    """Default for the flag indicating whether xend should run a relocation 
server."""
-    xend_relocation_server_default = 'no'
-
-    """Default interface address the xend relocation server listens at. """
-    xend_relocation_address_default = ''
-
-    """Default port xend serves HTTP at. """
-    xend_port_default         = '8000'
-
-    """Default port xend serves relocation at. """
-    xend_relocation_port_default = '8002'
-
-    xend_relocation_hosts_allow_default = ''
-
-    """Default for the flag indicating whether xend should run a unix-domain
-    server (deprecated)."""
-    xend_unix_server_default = 'no'
-
-    """Default external migration tool """
-    external_migration_tool_default = ''
-
-    """Default path the unix-domain server listens at."""
-    xend_unix_path_default = '/var/lib/xend/xend-socket'
-
-    dom0_min_mem_default = '0'
-
-    dom0_vcpus_default = '0'
-
-    vncpasswd_default = None
-
-    """Default interface to listen for VNC connections on"""
-    xend_vnc_listen_default = '127.0.0.1'
-
-    """Default session storage path."""
-    xend_domains_path_default = '/var/lib/xend/domains'
-
-    """Default xend management state storage."""
-    xend_state_path_default = '/var/lib/xend/state'
-
-    components = {}
-
-    def __init__(self):
-        self.config_path = None
-        self.config = None
-        self.configure()
-
-
-    def _logError(self, fmt, *args):
-        """Logging function to log to stderr. We use this for XendRoot log
-        messages because they may be logged before the logger has been
-        configured.  Other components can safely use the logger.
-        """
-        print >>sys.stderr, "xend [ERROR]", fmt % args
-
-
-    def configure(self):
-        self.set_config()
-        XendLogging.init(self.get_config_value("logfile",
-                                               self.logfile_default),
-                         self.get_config_value("loglevel",
-                                               self.loglevel_default))
-
-
-    def set_config(self):
-        """If the config file exists, read it. If not, ignore it.
-
-        The config file is a sequence of sxp forms.
-        """
-        self.config_path = os.getenv(self.config_var, self.config_default)
-        if os.path.exists(self.config_path):
-            try:
-                fin = file(self.config_path, 'rb')
-                try:
-                    config = sxp.parse(fin)
-                finally:
-                    fin.close()
-                if config is None:
-                    config = ['xend-config']
-                else:
-                    config.insert(0, 'xend-config')
-                self.config = config
-            except Exception, ex:
-                self._logError('Reading config file %s: %s',
-                               self.config_path, str(ex))
-                raise
-        else:
-            self._logError('Config file does not exist: %s',
-                           self.config_path)
-            self.config = ['xend-config']
-
-    def get_config(self, name=None):
-        """Get the configuration element with the given name, or
-        the whole configuration if no name is given.
-
-        @param name: element name (optional)
-        @return: config or none
-        """
-        if name is None:
-            val = self.config
-        else:
-            val = sxp.child(self.config, name)
-        return val
-
-    def get_config_value(self, name, val=None):
-        """Get the value of an atomic configuration element.
-
-        @param name: element name
-        @param val:  default value (optional, defaults to None)
-        @return: value
-        """
-        return sxp.child_value(self.config, name, val=val)
-
-    def get_config_bool(self, name, val=None):
-        v = string.lower(str(self.get_config_value(name, val)))
-        if v in ['yes', 'y', '1', 'on',  'true',  't']:
-            return True
-        if v in ['no',  'n', '0', 'off', 'false', 'f']:
-            return False
-        raise XendError("invalid xend config %s: expected bool: %s" % (name, 
v))
-
-    def get_config_int(self, name, val=None):
-        v = self.get_config_value(name, val)
-        try:
-            return int(v)
-        except Exception:
-            raise XendError("invalid xend config %s: expected int: %s" % 
(name, v))
-
-    def get_xen_api_server(self):
-        """Get the Xen-API server configuration.
-        """
-        return self.get_config_value('xen-api-server',
-                                     self.xen_api_server_default)
-
-    def get_xend_http_server(self):
-        """Get the flag indicating whether xend should run an http server.
-        """
-        return self.get_config_bool("xend-http-server", 
self.xend_http_server_default)
-
-    def get_xend_tcp_xmlrpc_server(self):
-        return self.get_config_bool("xend-tcp-xmlrpc-server",
-                                    self.xend_tcp_xmlrpc_server_default)
-
-    def get_xend_unix_xmlrpc_server(self):
-        return self.get_config_bool("xend-unix-xmlrpc-server",
-                                    self.xend_unix_xmlrpc_server_default)
-
-    def get_xend_relocation_server(self):
-        """Get the flag indicating whether xend should run a relocation server.
-        """
-        return self.get_config_bool("xend-relocation-server",
-                                    self.xend_relocation_server_default)
-
-    def get_xend_port(self):
-        """Get the port xend listens at for its HTTP interface.
-        """
-        return self.get_config_int('xend-port', self.xend_port_default)
-
-    def get_xend_relocation_port(self):
-        """Get the port xend listens at for connection to its relocation 
server.
-        """
-        return self.get_config_int('xend-relocation-port',
-                                   self.xend_relocation_port_default)
-
-    def get_xend_relocation_hosts_allow(self):
-        return self.get_config_value("xend-relocation-hosts-allow",
-                                     self.xend_relocation_hosts_allow_default)
-
-    def get_xend_address(self):
-        """Get the address xend listens at for its HTTP port.
-        This defaults to the empty string which allows all hosts to connect.
-        If this is set to 'localhost' only the localhost will be able to 
connect
-        to the HTTP port.
-        """
-        return self.get_config_value('xend-address', self.xend_address_default)
-
-    def get_xend_relocation_address(self):
-        """Get the address xend listens at for its relocation server port.
-        This defaults to the empty string which allows all hosts to connect.
-        If this is set to 'localhost' only the localhost will be able to 
connect
-        to the relocation port.
-        """
-        return self.get_config_value('xend-relocation-address', 
self.xend_relocation_address_default)
-
-    def get_xend_unix_server(self):
-        """Get the flag indicating whether xend should run a unix-domain 
server.
-        """
-        return self.get_config_bool("xend-unix-server", 
self.xend_unix_server_default)
-
-    def get_xend_unix_path(self):
-        """Get the path the xend unix-domain server listens at.
-        """
-        return self.get_config_value("xend-unix-path", 
self.xend_unix_path_default)
-
-    def get_xend_domains_path(self):
-        """ Get the path for persistent domain configuration storage
-        """
-        return self.get_config_value("xend-domains-path", 
self.xend_domains_path_default)
-
-    def get_xend_state_path(self):
-        """ Get the path for persistent domain configuration storage
-        """
-        return self.get_config_value("xend-state-path", 
self.xend_state_path_default)    
-
-    def get_network_script(self):
-        """@return the script used to alter the network configuration when
-        Xend starts and stops, or None if no such script is specified."""
-        
-        s = self.get_config_value('network-script')
-
-        if s:
-            result = s.split(" ")
-            result[0] = os.path.join(self.network_script_dir, result[0])
-            return result
-        else:
-            return None
-
-    def get_external_migration_tool(self):
-        """@return the name of the tool to handle virtual TPM migration."""
-        return self.get_config_value('external-migration-tool', 
self.external_migration_tool_default)
-
-    def get_enable_dump(self):
-        return self.get_config_bool('enable-dump', 'no')
-
-    def get_vif_script(self):
-        return self.get_config_value('vif-script', 'vif-bridge')
-
-    def get_dom0_min_mem(self):
-        return self.get_config_int('dom0-min-mem', self.dom0_min_mem_default)
-
-    def get_dom0_vcpus(self):
-        return self.get_config_int('dom0-cpus', self.dom0_vcpus_default)
-
-    def get_console_limit(self):
-        return self.get_config_int('console-limit', 1024)
-
-    def get_vnclisten_address(self):
-        return self.get_config_value('vnc-listen', 
self.xend_vnc_listen_default)
-
-    def get_vncpasswd_default(self):
-        return self.get_config_value('vncpasswd',
-                                     self.vncpasswd_default)
-
-def instance():
-    """Get an instance of XendRoot.
-    Use this instead of the constructor.
-    """
-    global inst
-    try:
-        inst
-    except:
-        inst = XendRoot()
-    return inst
diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/xend/balloon.py
--- a/tools/python/xen/xend/balloon.py  Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/python/xen/xend/balloon.py  Fri Jan 19 14:48:57 2007 +0000
@@ -22,7 +22,7 @@ import xen.lowlevel.xc
 import xen.lowlevel.xc
 
 import XendDomain
-import XendRoot
+import XendOptions
 from XendLogging import log
 from XendError import VmError
 
@@ -107,11 +107,11 @@ def free(need_mem):
     # usage, so we recheck the required alloc each time around the loop, but
     # track the last used value so that we don't trigger too many watches.
 
-    xroot = XendRoot.instance()
+    xoptions = XendOptions.instance()
     xc = xen.lowlevel.xc.xc()
 
     try:
-        dom0_min_mem = xroot.get_dom0_min_mem() * 1024
+        dom0_min_mem = xoptions.get_dom0_min_mem() * 1024
 
         retries = 0
         sleep_time = SLEEP_TIME_GROWTH
diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/xend/image.py
--- a/tools/python/xen/xend/image.py    Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/python/xen/xend/image.py    Fri Jan 19 14:48:57 2007 +0000
@@ -173,7 +173,7 @@ class ImageHandler:
         """Build the domain. Define in subclass."""
         raise NotImplementedError()
 
-    def createDeviceModel(self):
+    def createDeviceModel(self, restore = False):
         """Create device model for the domain (define in subclass if 
needed)."""
         pass
     
@@ -377,11 +377,12 @@ class HVMImageHandler(ImageHandler):
     # xm config file
     def parseDeviceModelArgs(self, imageConfig, deviceConfig):
         dmargs = [ 'boot', 'fda', 'fdb', 'soundhw',
-                   'localtime', 'serial', 'stdvga', 'isa', 'vcpus',
+                   'localtime', 'serial', 'stdvga', 'isa',
                    'acpi', 'usb', 'usbdevice', 'keymap' ]
-        ret = []
         hvmDeviceConfig = imageConfig['hvm']['devices']
-        
+
+        ret = ['-vcpus', str(self.vm.getVCpuCount())]
+
         for a in dmargs:
             v = hvmDeviceConfig.get(a)
 
@@ -461,14 +462,14 @@ class HVMImageHandler(ImageHandler):
             vnclisten = imageConfig.get('vnclisten')
 
             if not(vnclisten):
-                vnclisten = (xen.xend.XendRoot.instance().
+                vnclisten = (xen.xend.XendOptions.instance().
                              get_vnclisten_address())
             if vnclisten:
                 ret += ['-vnclisten', vnclisten]
 
             vncpasswd = vncpasswd_vmconfig
             if vncpasswd is None:
-                vncpasswd = (xen.xend.XendRoot.instance().
+                vncpasswd = (xen.xend.XendOptions.instance().
                              get_vncpasswd_default())
                 if vncpasswd is None:
                     raise VmError('vncpasswd is not set up in ' +
@@ -478,7 +479,7 @@ class HVMImageHandler(ImageHandler):
 
         return ret
 
-    def createDeviceModel(self):
+    def createDeviceModel(self, restore = False):
         if self.pid:
             return
         # Execute device model.
@@ -487,6 +488,8 @@ class HVMImageHandler(ImageHandler):
         args = args + ([ "-d",  "%d" % self.vm.getDomid(),
                   "-m", "%s" % (self.getRequiredInitialReservation() / 1024)])
         args = args + self.dmargs
+        if restore:
+            args = args + ([ "-loadvm", "/tmp/xen.qemu-dm.%d" % 
self.vm.getDomid() ])
         env = dict(os.environ)
         if self.display:
             env['DISPLAY'] = self.display
@@ -505,12 +508,16 @@ class HVMImageHandler(ImageHandler):
         self.register_reboot_feature_watch()
         self.pid = self.vm.gatherDom(('image/device-model-pid', int))
 
-    def destroy(self):
+    def destroy(self, suspend = False):
         self.unregister_shutdown_watch()
         self.unregister_reboot_feature_watch();
         if self.pid:
             try:
-                os.kill(self.pid, signal.SIGKILL)
+                sig = signal.SIGKILL
+                if suspend:
+                    log.info("use sigusr1 to signal qemu %d", self.pid)
+                    sig = signal.SIGUSR1
+                os.kill(self.pid, sig)
             except OSError, exn:
                 log.exception(exn)
             try:
@@ -598,6 +605,9 @@ class IA64_HVM_ImageHandler(HVMImageHand
         # ROM size for guest firmware, ioreq page and xenstore page
         extra_pages = 1024 + 3
         return mem_kb + extra_pages * page_kb
+
+    def getRequiredInitialReservation(self):
+        return self.vm.getMemoryTarget()
 
     def getRequiredShadowMemory(self, shadow_mem_kb, maxmem_kb):
         # Explicit shadow memory is not a concept 
diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/xend/osdep.py
--- a/tools/python/xen/xend/osdep.py    Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/python/xen/xend/osdep.py    Fri Jan 19 14:48:57 2007 +0000
@@ -33,9 +33,19 @@ _pygrub_path = {
     "SunOS": "/usr/lib/xen/bin/pygrub"
 }
 
+_netback_type = {
+    "SunOS": "SUNW_mac"
+}
+
+_vif_script = {
+    "SunOS": "vif-vnic"
+}
+
 def _get(var, default=None):
     return var.get(os.uname()[0], default)
 
 scripts_dir = _get(_scripts_dir, "/etc/xen/scripts")
 xend_autorestart = _get(_xend_autorestart)
 pygrub_path = _get(_pygrub_path, "/usr/bin/pygrub")
+netback_type = _get(_netback_type, "netfront")
+vif_script = _get(_vif_script, "vif-bridge")
diff -r 8475a4e0425e -r 3c8bb086025e 
tools/python/xen/xend/server/DevController.py
--- a/tools/python/xen/xend/server/DevController.py     Thu Jan 18 15:18:07 
2007 +0000
+++ b/tools/python/xen/xend/server/DevController.py     Fri Jan 19 14:48:57 
2007 +0000
@@ -19,7 +19,7 @@ from threading import Event
 from threading import Event
 import types
 
-from xen.xend import sxp, XendRoot
+from xen.xend import sxp, XendOptions
 from xen.xend.XendError import VmError
 from xen.xend.XendLogging import log
 
@@ -50,7 +50,7 @@ xenbusState = {
     'Closed'       : 6,
     }
 
-xroot = XendRoot.instance()
+xoptions = XendOptions.instance()
 
 xenbusState.update(dict(zip(xenbusState.values(), xenbusState.keys())))
 
@@ -324,7 +324,7 @@ class DevController:
                       Make sure that the migration has finished and only
                       then return from the call.
         """
-        tool = xroot.get_external_migration_tool()
+        tool = xoptions.get_external_migration_tool()
         if tool:
             log.info("Calling external migration tool for step %d" % step)
             fd = os.popen("%s -type %s -step %d -host %s -domname %s" %
@@ -341,7 +341,7 @@ class DevController:
         """ Recover from device migration. The given step was the
             last one that was successfully executed.
         """
-        tool = xroot.get_external_migration_tool()
+        tool = xoptions.get_external_migration_tool()
         if tool:
             log.info("Calling external migration tool")
             fd = os.popen("%s -type %s -step %d -host %s -domname %s -recover" 
%
diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/xend/server/SrvRoot.py
--- a/tools/python/xen/xend/server/SrvRoot.py   Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/python/xen/xend/server/SrvRoot.py   Fri Jan 19 14:48:57 2007 +0000
@@ -25,7 +25,7 @@ class SrvRoot(SrvDir):
     """Server sub-components. Each entry is (name, class), where
     'name' is the entry name and  'class' is the name of its class.
     """
-    #todo Get this list from the XendRoot config.
+    #todo Get this list from the XendOptions config.
     subdirs = [
         ('node',    'SrvNode'       ),
         ('domain',  'SrvDomainDir'  ),
diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/xend/server/SrvServer.py
--- a/tools/python/xen/xend/server/SrvServer.py Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/python/xen/xend/server/SrvServer.py Fri Jan 19 14:48:57 2007 +0000
@@ -48,7 +48,7 @@ from threading import Thread
 
 from xen.web.httpserver import HttpServer, UnixHttpServer
 
-from xen.xend import XendNode, XendRoot, XendAPI
+from xen.xend import XendNode, XendOptions, XendAPI
 from xen.xend import Vifctl
 from xen.xend.XendLogging import log
 from xen.xend.XendClient import XEN_API_SOCKET
@@ -57,7 +57,7 @@ from SrvRoot import SrvRoot
 from SrvRoot import SrvRoot
 from XMLRPCServer import XMLRPCServer
 
-xroot = XendRoot.instance()
+xoptions = XendOptions.instance()
 
 
 class XendServers:
@@ -65,6 +65,7 @@ class XendServers:
     def __init__(self, root):
         self.servers = []
         self.root = root
+        self.running = False
         self.cleaningUp = False
         self.reloadingConfig = False
 
@@ -79,6 +80,7 @@ class XendServers:
                 server.shutdown()
             except:
                 pass
+        self.running = False
 
     def reloadConfig(self, signum = 0, frame = None):
         log.debug("SrvServer.reloadConfig()")
@@ -107,12 +109,11 @@ class XendServers:
                 if server.ready:
                     continue
 
-                thread = Thread(target=server.run, 
name=server.__class__.__name__)
-                if isinstance(server, HttpServer):
-                    thread.setDaemon(True)
+                thread = Thread(target=server.run,
+                                name=server.__class__.__name__)
+                thread.setDaemon(True)
                 thread.start()
                 threads.append(thread)
-
 
             # check for when all threads have initialized themselves and then
             # close the status pipe
@@ -143,47 +144,32 @@ class XendServers:
                 status.close()
                 status = None
 
-            # Interruptible Thread.join - Python Bug #1167930
-            #   Replaces: for t in threads: t.join()
-            #   Reason:   The above will cause python signal handlers to be
-            #             blocked so we're not able to catch SIGTERM in any
-            #             way for cleanup
-            runningThreads = threads
-            while len(runningThreads) > 0:
-                try:
-                    for t in threads:
-                        t.join(1.0)
-                    runningThreads = [t for t in threads
-                                      if t.isAlive() and not t.isDaemon()]
-                    if self.cleaningUp and len(runningThreads) > 0:
-                        log.debug("Waiting for %s." %
-                                  [x.getName() for x in runningThreads])
-                except:
-                    pass
-
+            # loop to keep main thread alive until it receives a SIGTERM
+            self.running = True
+            while self.running:
+                time.sleep(100000000)
+                
             if self.reloadingConfig:
                 log.info("Restarting all XML-RPC and Xen-API servers...")
                 self.cleaningUp = False
                 self.reloadingConfig = False
-                xroot.set_config()
-                new_servers = [x for x in self.servers
-                               if isinstance(x, HttpServer)]
-                self.servers = new_servers
+                xoptions.set_config()
+                self.servers = []
                 _loadConfig(self, self.root, True)
             else:
                 break
 
 def _loadConfig(servers, root, reload):
-    if not reload and xroot.get_xend_http_server():
+    if xoptions.get_xend_http_server():
         servers.add(HttpServer(root,
-                               xroot.get_xend_address(),
-                               xroot.get_xend_port()))
-    if not reload and xroot.get_xend_unix_server():
-        path = xroot.get_xend_unix_path()
+                               xoptions.get_xend_address(),
+                               xoptions.get_xend_port()))
+    if  xoptions.get_xend_unix_server():
+        path = xoptions.get_xend_unix_path()
         log.info('unix path=' + path)
         servers.add(UnixHttpServer(root, path))
 
-    api_cfg = xroot.get_xen_api_server()
+    api_cfg = xoptions.get_xen_api_server()
     if api_cfg:
         try:
             addrs = [(str(x[0]).split(':'),
@@ -218,10 +204,10 @@ def _loadConfig(servers, root, reload):
         except TypeError, exn:
             log.error('Xen-API server configuration %s is invalid.', api_cfg)
 
-    if xroot.get_xend_tcp_xmlrpc_server():
+    if xoptions.get_xend_tcp_xmlrpc_server():
         servers.add(XMLRPCServer(XendAPI.AUTH_PAM, False, True))
 
-    if xroot.get_xend_unix_xmlrpc_server():
+    if xoptions.get_xend_unix_xmlrpc_server():
         servers.add(XMLRPCServer(XendAPI.AUTH_PAM, False))
 
 
diff -r 8475a4e0425e -r 3c8bb086025e 
tools/python/xen/xend/server/XMLRPCServer.py
--- a/tools/python/xen/xend/server/XMLRPCServer.py      Thu Jan 18 15:18:07 
2007 +0000
+++ b/tools/python/xen/xend/server/XMLRPCServer.py      Fri Jan 19 14:48:57 
2007 +0000
@@ -179,21 +179,24 @@ class XMLRPCServer:
         # Custom runloop so we can cleanup when exiting.
         # -----------------------------------------------------------------
         try:
-            self.server.socket.settimeout(1.0)
             while self.running:
                 self.server.handle_request()
         finally:
-            self.cleanup()
+            self.shutdown()
 
     def cleanup(self):
-        log.debug("XMLRPCServer.cleanup()")
-        try:
-            self.server.socket.close()
+        log.debug('XMLRPCServer.cleanup()')
+        try:
+            if hasattr(self, 'server'):
+                # shutdown socket explicitly to allow reuse
+                self.server.socket.shutdown(socket.SHUT_RDWR)
+                self.server.socket.close()
         except Exception, exn:
             log.exception(exn)
             pass
 
     def shutdown(self):
         self.running = False
-        self.ready = False
-
+        if self.ready:
+            self.ready = False
+            self.cleanup()
diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/xend/server/netif.py
--- a/tools/python/xen/xend/server/netif.py     Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/python/xen/xend/server/netif.py     Fri Jan 19 14:48:57 2007 +0000
@@ -24,10 +24,10 @@ import random
 import random
 import re
 
-from xen.xend import XendRoot
+from xen.xend import XendOptions
 from xen.xend.server.DevController import DevController
 
-xroot = XendRoot.instance()
+xoptions = XendOptions.instance()
 
 def randomMAC():
     """Generate a random MAC address.
@@ -138,8 +138,8 @@ class NetifController(DevController):
     def getDeviceDetails(self, config):
         """@see DevController.getDeviceDetails"""
 
-        script = os.path.join(xroot.network_script_dir,
-                              config.get('script', xroot.get_vif_script()))
+        script = os.path.join(xoptions.network_script_dir,
+                              config.get('script', xoptions.get_vif_script()))
         typ     = config.get('type')
         bridge  = config.get('bridge')
         mac     = config.get('mac')
@@ -150,9 +150,8 @@ class NetifController(DevController):
 
         devid = self.allocateDeviceID()
 
-        # The default type is 'netfront'.
         if not typ:
-            typ = 'netfront'
+            typ = xoptions.netback_type
             
         if not mac:
             mac = randomMAC()
@@ -190,7 +189,7 @@ class NetifController(DevController):
         (script, ip, bridge, mac, typ, vifname, rate, uuid) = devinfo
 
         if script:
-            network_script_dir = xroot.network_script_dir + os.sep
+            network_script_dir = xoptions.network_script_dir + os.sep
             result['script'] = script.replace(network_script_dir, "")
         if ip:
             result['ip'] = ip
diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/xend/server/relocate.py
--- a/tools/python/xen/xend/server/relocate.py  Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/python/xen/xend/server/relocate.py  Fri Jan 19 14:48:57 2007 +0000
@@ -24,7 +24,7 @@ from xen.web import protocol, tcp, unix
 
 from xen.xend import sxp
 from xen.xend import XendDomain
-from xen.xend import XendRoot
+from xen.xend import XendOptions
 from xen.xend.XendError import XendError
 from xen.xend.XendLogging import log
 
@@ -114,15 +114,15 @@ class RelocationProtocol(protocol.Protoc
 
 
 def listenRelocation():
-    xroot = XendRoot.instance()
-    if xroot.get_xend_unix_server():
+    xoptions = XendOptions.instance()
+    if xoptions.get_xend_unix_server():
         path = '/var/lib/xend/relocation-socket'
         unix.UnixListener(path, RelocationProtocol)
-    if xroot.get_xend_relocation_server():
-        port = xroot.get_xend_relocation_port()
-        interface = xroot.get_xend_relocation_address()
+    if xoptions.get_xend_relocation_server():
+        port = xoptions.get_xend_relocation_port()
+        interface = xoptions.get_xend_relocation_address()
 
-        hosts_allow = xroot.get_xend_relocation_hosts_allow()
+        hosts_allow = xoptions.get_xend_relocation_hosts_allow()
         if hosts_allow == '':
             hosts_allow = None
         else:
diff -r 8475a4e0425e -r 3c8bb086025e 
tools/python/xen/xend/server/tests/test_controllers.py
--- a/tools/python/xen/xend/server/tests/test_controllers.py    Thu Jan 18 
15:18:07 2007 +0000
+++ b/tools/python/xen/xend/server/tests/test_controllers.py    Fri Jan 19 
14:48:57 2007 +0000
@@ -2,9 +2,9 @@ import re
 import re
 import unittest
 
-import xen.xend.XendRoot
+import xen.xend.XendOptions
 
-xen.xend.XendRoot.XendRoot.config_default = '/dev/null'
+xen.xend.XendOptions.XendOptions.config_default = '/dev/null'
 
 from xen.xend.server import netif
 
@@ -13,7 +13,7 @@ FAKE_DEVID = 63
 FAKE_DEVID = 63
 
 
-xroot = xen.xend.XendRoot.instance()
+xoptions = xen.xend.XendOptions.instance()
 
 
 class test_controllers(unittest.TestCase):
@@ -36,8 +36,8 @@ class test_controllers(unittest.TestCase
 
         self.assertEqual(backdets['handle'], str(FAKE_DEVID))
         self.assertEqual(backdets['script'],
-                         os.path.join(xroot.network_script_dir,
-                                      xroot.get_vif_script()))
+                         os.path.join(xoptions.network_script_dir,
+                                      xoptions.get_vif_script()))
         self.assertValidMac(backdets['mac'], expectedMac)
 
         self.assertEqual(frontdets['handle'], str(FAKE_DEVID))
diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/xend/server/tpmif.py
--- a/tools/python/xen/xend/server/tpmif.py     Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/python/xen/xend/server/tpmif.py     Fri Jan 19 14:48:57 2007 +0000
@@ -20,7 +20,7 @@
 
 """Support for virtual TPM interfaces."""
 
-from xen.xend import XendRoot
+from xen.xend import XendOptions
 from xen.xend.XendLogging import log
 from xen.xend.XendError import XendError
 from xen.xend.XendConstants import DEV_MIGRATE_TEST, VTPM_DELETE_SCRIPT
@@ -29,7 +29,7 @@ import os
 import os
 import re
 
-xroot = XendRoot.instance()
+xoptions = XendOptions.instance()
 
 def destroy_vtpmstate(name):
     if os.path.exists(VTPM_DELETE_SCRIPT):
@@ -88,7 +88,7 @@ class TPMifController(DevController):
     def migrate(self, deviceConfig, network, dst, step, domName):
         """@see DevContoller.migrate"""
         if network:
-            tool = xroot.get_external_migration_tool()
+            tool = xoptions.get_external_migration_tool()
             if tool != '':
                 log.info("Request to network-migrate device to %s. step=%d.",
                          dst, step)
@@ -116,7 +116,7 @@ class TPMifController(DevController):
     def recover_migrate(self, deviceConfig, network, dst, step, domName):
         """@see DevContoller.recover_migrate"""
         if network:
-            tool = xroot.get_external_migration_tool()
+            tool = xoptions.get_external_migration_tool()
             if tool != '':
                 log.info("Request to recover network-migrated device. last 
good step=%d.",
                          step)
diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/xend/server/vfbif.py
--- a/tools/python/xen/xend/server/vfbif.py     Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/python/xen/xend/server/vfbif.py     Fri Jan 19 14:48:57 2007 +0000
@@ -52,7 +52,7 @@ class VfbifController(DevController):
             if config.has_key("vncpasswd"):
                 passwd = config["vncpasswd"]
             else:
-                passwd = xen.xend.XendRoot.instance().get_vncpasswd_default()
+                passwd = 
xen.xend.XendOptions.instance().get_vncpasswd_default()
             if passwd:
                 self.vm.storeVm("vncpasswd", passwd)
                 log.debug("Stored a VNC password for vfb access")
@@ -66,7 +66,7 @@ class VfbifController(DevController):
             elif config.has_key("vncdisplay"):
                 args += ["--vncport", "%d" % (5900 + 
int(config["vncdisplay"]))]
             vnclisten = config.get("vnclisten",
-                                   
xen.xend.XendRoot.instance().get_vnclisten_address())
+                                   
xen.xend.XendOptions.instance().get_vnclisten_address())
             args += [ "--listen", vnclisten ]
             spawn_detached(args[0], args + std_args, os.environ)
         elif t == "sdl":
diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/xm/create.py
--- a/tools/python/xen/xm/create.py     Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/python/xen/xm/create.py     Fri Jan 19 14:48:57 2007 +0000
@@ -189,6 +189,10 @@ gopts.var('vcpus', val='VCPUS',
 gopts.var('vcpus', val='VCPUS',
           fn=set_int, default=1,
           use="# of Virtual CPUS in domain.")
+
+gopts.var('vcpu_avail', val='VCPUS',
+          fn=set_long, default=None,
+          use="Bitmask for virtual CPUs to make available immediately.")
 
 gopts.var('cpu_cap', val='CAP',
           fn=set_int, default=None,
@@ -740,7 +744,7 @@ def make_config(vals):
 
     map(add_conf, ['name', 'memory', 'maxmem', 'shadow_memory',
                    'restart', 'on_poweroff',
-                   'on_reboot', 'on_crash', 'vcpus', 'features',
+                   'on_reboot', 'on_crash', 'vcpus', 'vcpu_avail', 'features',
                    'on_xend_start', 'on_xend_stop'])
 
     if vals.uuid is not None:
diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/xm/main.py
--- a/tools/python/xen/xm/main.py       Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/python/xen/xm/main.py       Fri Jan 19 14:48:57 2007 +0000
@@ -693,12 +693,15 @@ def parse_doms_info(info):
         up_time = time.time() - start_time
 
     return {
-        'domid'    : get_info('domid',        str,   ''),
-        'name'     : get_info('name',         str,   '??'),
+        'domid'    : get_info('domid',              str,   ''),
+        'name'     : get_info('name',               str,   '??'),
         'mem'      : get_info('memory_dynamic_min', int,   0),
-        'vcpus'    : get_info('online_vcpus',        int,   0),
-        'state'    : get_info('state',        str,    ''),
-        'cpu_time' : get_info('cpu_time',     float, 0),
+        'state'    : get_info('state',              str,   ''),
+        'cpu_time' : get_info('cpu_time',           float, 0.0),
+        # VCPUs is the number online when the VM is up, or the number
+        # configured otherwise.
+        'vcpus'    : get_info('online_vcpus', int,
+                              get_info('vcpus', int, 0)),
         'up_time'  : up_time,
         'seclabel' : security.get_security_printlabel(info),
         }
diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/xm/opts.py
--- a/tools/python/xen/xm/opts.py       Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/python/xen/xm/opts.py       Fri Jan 19 14:48:57 2007 +0000
@@ -571,6 +571,14 @@ def set_int(opt, k, v):
         opt.opts.err('Invalid value: ' + str(v))
     opt.set(v)
 
+def set_long(opt, k, v):
+    """Set an option to a long integer value."""
+    try:
+        v = long(v)
+    except:
+        opt.opts.err('Invalid value: ' + str(v))
+    opt.set(v)
+
 def set_float(opt, k, v):
     """Set an option to a float value."""
     try:
diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/xm/tests/test_create.py
--- a/tools/python/xen/xm/tests/test_create.py  Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/python/xen/xm/tests/test_create.py  Fri Jan 19 14:48:57 2007 +0000
@@ -3,9 +3,9 @@ import tempfile
 import tempfile
 import unittest
 
-import xen.xend.XendRoot
-
-xen.xend.XendRoot.XendRoot.config_default = '/dev/null'
+import xen.xend.XendOptions
+
+xen.xend.XendOptions.XendOptions.config_default = '/dev/null'
 
 import xen.xm.create
 
diff -r 8475a4e0425e -r 3c8bb086025e tools/xcutils/xc_restore.c
--- a/tools/xcutils/xc_restore.c        Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/xcutils/xc_restore.c        Fri Jan 19 14:48:57 2007 +0000
@@ -19,12 +19,13 @@ main(int argc, char **argv)
 main(int argc, char **argv)
 {
     unsigned int xc_fd, io_fd, domid, nr_pfns, store_evtchn, console_evtchn;
+    unsigned int hvm, pae, apic;
     int ret;
     unsigned long store_mfn, console_mfn;
 
-    if (argc != 6)
+    if (argc != 9)
        errx(1,
-            "usage: %s iofd domid nr_pfns store_evtchn console_evtchn",
+            "usage: %s iofd domid nr_pfns store_evtchn console_evtchn hvm pae 
apic",
             argv[0]);
 
     xc_fd = xc_interface_open();
@@ -36,9 +37,19 @@ main(int argc, char **argv)
     nr_pfns = atoi(argv[3]);
     store_evtchn = atoi(argv[4]);
     console_evtchn = atoi(argv[5]);
+    hvm  = atoi(argv[6]);
+    pae  = atoi(argv[7]);
+    apic = atoi(argv[8]);
 
-    ret = xc_linux_restore(xc_fd, io_fd, domid, nr_pfns, store_evtchn,
-                          &store_mfn, console_evtchn, &console_mfn);
+    if (hvm) {
+         /* pass the memsize to xc_hvm_restore to find the store_mfn */
+        store_mfn = hvm;
+        ret = xc_hvm_restore(xc_fd, io_fd, domid, nr_pfns, store_evtchn,
+                &store_mfn, console_evtchn, &console_mfn, pae, apic);
+    } else 
+        ret = xc_linux_restore(xc_fd, io_fd, domid, nr_pfns, store_evtchn,
+                &store_mfn, console_evtchn, &console_mfn);
+
     if (ret == 0) {
        printf("store-mfn %li\n", store_mfn);
        printf("console-mfn %li\n", console_mfn);
diff -r 8475a4e0425e -r 3c8bb086025e tools/xcutils/xc_save.c
--- a/tools/xcutils/xc_save.c   Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/xcutils/xc_save.c   Fri Jan 19 14:48:57 2007 +0000
@@ -51,7 +51,10 @@ main(int argc, char **argv)
     max_f = atoi(argv[4]);
     flags = atoi(argv[5]);
 
-    ret = xc_linux_save(xc_fd, io_fd, domid, maxit, max_f, flags, &suspend);
+    if (flags & XCFLAGS_HVM)
+        ret = xc_hvm_save(xc_fd, io_fd, domid, maxit, max_f, flags, &suspend);
+    else 
+        ret = xc_linux_save(xc_fd, io_fd, domid, maxit, max_f, flags, 
&suspend);
 
     xc_interface_close(xc_fd);
 
diff -r 8475a4e0425e -r 3c8bb086025e 
unmodified_drivers/linux-2.6/platform-pci/platform-pci.c
--- a/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c  Thu Jan 18 
15:18:07 2007 +0000
+++ b/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c  Fri Jan 19 
14:48:57 2007 +0000
@@ -182,12 +182,17 @@ static uint64_t get_callback_via(struct 
 static uint64_t get_callback_via(struct pci_dev *pdev)
 {
 #ifdef __ia64__
-       int irq;
+       int irq, rid;
        for (irq = 0; irq < 16; irq++) {
                if (isa_irq_to_vector(irq) == pdev->irq)
                        return irq;
        }
-       return 0;
+       /* use Requester-ID as callback_irq */
+       /* RID: '<#bus(8)><#dev(5)><#func(3)>' (cf. PCI-Express spec) */
+       rid = ((pdev->bus->number & 0xff) << 8) | pdev->devfn;
+       printk(KERN_INFO DRV_NAME ":use Requester-ID(%04x) as callback irq\n",
+              rid);
+       return rid | IA64_CALLBACK_IRQ_RID;
 #else /* !__ia64__ */
        if (pdev->irq < 16)
                return pdev->irq; /* ISA IRQ */
diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/asm-offsets.c
--- a/xen/arch/ia64/asm-offsets.c       Thu Jan 18 15:18:07 2007 +0000
+++ b/xen/arch/ia64/asm-offsets.c       Fri Jan 19 14:48:57 2007 +0000
@@ -56,10 +56,12 @@ void foo(void)
        DEFINE(IA64_TASK_THREAD_ON_USTACK_OFFSET, offsetof (struct vcpu, 
arch._thread.on_ustack));
 
        DEFINE(IA64_VCPU_DOMAIN_OFFSET, offsetof (struct vcpu, domain));
+       DEFINE(IA64_VCPU_HYPERCALL_CONTINUATION_OFS, offsetof (struct vcpu, 
arch.hypercall_continuation));
        DEFINE(IA64_VCPU_META_RR0_OFFSET, offsetof (struct vcpu, 
arch.metaphysical_rr0));
        DEFINE(IA64_VCPU_META_SAVED_RR0_OFFSET, offsetof (struct vcpu, 
arch.metaphysical_saved_rr0));
        DEFINE(IA64_VCPU_BREAKIMM_OFFSET, offsetof (struct vcpu, 
arch.breakimm));
        DEFINE(IA64_VCPU_IVA_OFFSET, offsetof (struct vcpu, arch.iva));
+       DEFINE(IA64_VCPU_EVENT_CALLBACK_IP_OFFSET, offsetof (struct vcpu, 
arch.event_callback_ip));
        DEFINE(IA64_VCPU_IRR0_OFFSET, offsetof (struct vcpu, arch.irr[0]));
        DEFINE(IA64_VCPU_IRR3_OFFSET, offsetof (struct vcpu, arch.irr[3]));
        DEFINE(IA64_VCPU_INSVC3_OFFSET, offsetof (struct vcpu, arch.insvc[3]));
diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/asm-xsi-offsets.c
--- a/xen/arch/ia64/asm-xsi-offsets.c   Thu Jan 18 15:18:07 2007 +0000
+++ b/xen/arch/ia64/asm-xsi-offsets.c   Fri Jan 19 14:48:57 2007 +0000
@@ -62,7 +62,7 @@ void foo(void)
        DEFINE_MAPPED_REG_OFS(XSI_ITV_OFS, itv);
        DEFINE_MAPPED_REG_OFS(XSI_PTA_OFS, pta);
        DEFINE_MAPPED_REG_OFS(XSI_PSR_IC_OFS, interrupt_collection_enabled);
-       DEFINE_MAPPED_REG_OFS(XSI_INCOMPL_REGFR_OFS, incomplete_regframe);
+       DEFINE_MAPPED_REG_OFS(XSI_VPSR_PP_OFS, vpsr_pp);
        DEFINE_MAPPED_REG_OFS(XSI_METAPHYS_OFS, metaphysical_mode);
        DEFINE_MAPPED_REG_OFS(XSI_BANKNUM_OFS, banknum);
        DEFINE_MAPPED_REG_OFS(XSI_BANK0_R16_OFS, bank0_regs[0]);
diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/linux-xen/Makefile
--- a/xen/arch/ia64/linux-xen/Makefile  Thu Jan 18 15:18:07 2007 +0000
+++ b/xen/arch/ia64/linux-xen/Makefile  Fri Jan 19 14:48:57 2007 +0000
@@ -1,3 +1,6 @@ obj-y += efi.o
+subdir-y += sn
+
+obj-y += cmdline.o
 obj-y += efi.o
 obj-y += entry.o
 obj-y += irq_ia64.o
diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/linux-xen/README.origin
--- a/xen/arch/ia64/linux-xen/README.origin     Thu Jan 18 15:18:07 2007 +0000
+++ b/xen/arch/ia64/linux-xen/README.origin     Fri Jan 19 14:48:57 2007 +0000
@@ -5,6 +5,7 @@
 # (e.g. with #ifdef XEN or XEN in a comment) so that they can be
 # easily updated to future versions of the corresponding Linux files.
 
+cmdline.c              -> linux/lib/cmdline.c
 efi.c                  -> linux/arch/ia64/kernel/efi.c
 entry.h                        -> linux/arch/ia64/kernel/entry.h
 entry.S                        -> linux/arch/ia64/kernel/entry.S
diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/linux-xen/cmdline.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/ia64/linux-xen/cmdline.c Fri Jan 19 14:48:57 2007 +0000
@@ -0,0 +1,131 @@
+/*
+ * linux/lib/cmdline.c
+ * Helper functions generally used for parsing kernel command line
+ * and module options.
+ *
+ * Code and copyrights come from init/main.c and arch/i386/kernel/setup.c.
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2.  See the file COPYING for more details.
+ *
+ * GNU Indent formatting options for this file: -kr -i8 -npsl -pcs
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#ifdef XEN
+#include <xen/lib.h>
+#endif
+
+
+/**
+ *     get_option - Parse integer from an option string
+ *     @str: option string
+ *     @pint: (output) integer value parsed from @str
+ *
+ *     Read an int from an option string; if available accept a subsequent
+ *     comma as well.
+ *
+ *     Return values:
+ *     0 : no int in string
+ *     1 : int found, no subsequent comma
+ *     2 : int found including a subsequent comma
+ */
+
+int get_option (char **str, int *pint)
+{
+       char *cur = *str;
+
+       if (!cur || !(*cur))
+               return 0;
+#ifndef XEN
+       *pint = simple_strtol (cur, str, 0);
+#else
+       *pint = simple_strtol (cur, (const char**)str, 0);
+#endif
+       if (cur == *str)
+               return 0;
+       if (**str == ',') {
+               (*str)++;
+               return 2;
+       }
+
+       return 1;
+}
+
+/**
+ *     get_options - Parse a string into a list of integers
+ *     @str: String to be parsed
+ *     @nints: size of integer array
+ *     @ints: integer array
+ *
+ *     This function parses a string containing a comma-separated
+ *     list of integers.  The parse halts when the array is
+ *     full, or when no more numbers can be retrieved from the
+ *     string.
+ *
+ *     Return value is the character in the string which caused
+ *     the parse to end (typically a null terminator, if @str is
+ *     completely parseable).
+ */
+ 
+char *get_options(const char *str, int nints, int *ints)
+{
+       int res, i = 1;
+
+       while (i < nints) {
+               res = get_option ((char **)&str, ints + i);
+               if (res == 0)
+                       break;
+               i++;
+               if (res == 1)
+                       break;
+       }
+       ints[0] = i - 1;
+       return (char *)str;
+}
+
+/**
+ *     memparse - parse a string with mem suffixes into a number
+ *     @ptr: Where parse begins
+ *     @retptr: (output) Pointer to next char after parse completes
+ *
+ *     Parses a string into a number.  The number stored at @ptr is
+ *     potentially suffixed with %K (for kilobytes, or 1024 bytes),
+ *     %M (for megabytes, or 1048576 bytes), or %G (for gigabytes, or
+ *     1073741824).  If the number is suffixed with K, M, or G, then
+ *     the return value is the number multiplied by one kilobyte, one
+ *     megabyte, or one gigabyte, respectively.
+ */
+
+unsigned long long memparse (char *ptr, char **retptr)
+{
+#ifndef XEN
+       unsigned long long ret = simple_strtoull (ptr, retptr, 0);
+#else
+       unsigned long long ret = simple_strtoull (ptr, (const char**)retptr, 0);
+#endif
+
+       switch (**retptr) {
+       case 'G':
+       case 'g':
+               ret <<= 10;
+       case 'M':
+       case 'm':
+               ret <<= 10;
+       case 'K':
+       case 'k':
+               ret <<= 10;
+               (*retptr)++;
+       default:
+               break;
+       }
+       return ret;
+}
+
+
+EXPORT_SYMBOL(memparse);
+EXPORT_SYMBOL(get_option);
+EXPORT_SYMBOL(get_options);
diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/linux-xen/entry.S
--- a/xen/arch/ia64/linux-xen/entry.S   Thu Jan 18 15:18:07 2007 +0000
+++ b/xen/arch/ia64/linux-xen/entry.S   Fri Jan 19 14:48:57 2007 +0000
@@ -676,7 +676,9 @@ GLOBAL_ENTRY(ia64_ret_from_syscall)
        cmp.ge p6,p7=r8,r0                      // syscall executed 
successfully?
        adds r2=PT(R8)+16,sp                    // r2 = &pt_regs.r8
        mov r10=r0                              // clear error indication in r10
+#ifndef XEN    
 (p7)   br.cond.spnt handle_syscall_error       // handle potential syscall 
failure
+#endif
 END(ia64_ret_from_syscall)
        // fall through
 /*
@@ -764,7 +766,9 @@ ENTRY(ia64_leave_syscall)
        ld8 r19=[r2],PT(B6)-PT(LOADRS)          // load ar.rsc value for 
"loadrs"
        nop.i 0
        ;;
+#ifndef XEN    
        mov r16=ar.bsp                          // M2  get existing backing 
store pointer
+#endif    
        ld8 r18=[r2],PT(R9)-PT(B6)              // load b6
 #ifndef XEN
 (p6)   and r15=TIF_WORK_MASK,r31               // any work other than 
TIF_SYSCALL_TRACE?
@@ -814,7 +818,11 @@ ENTRY(ia64_leave_syscall)
        mov f8=f0                               // F    clear f8
        ;;
        ld8.fill r12=[r2]                       // M0|1 restore r12 (sp)
+#ifdef XEN    
+       ld8.fill r2=[r3]                        // M0|1
+#else    
        ld8.fill r15=[r3]                       // M0|1 restore r15
+#endif    
        mov b6=r18                              // I0   restore b6
 
 #ifdef XEN
@@ -827,7 +835,9 @@ ENTRY(ia64_leave_syscall)
 
        srlz.d                          // M0   ensure interruption collection 
is off (for cover)
        shr.u r18=r19,16                // I0|1 get byte size of existing 
"dirty" partition
+#ifndef XEN    
        cover                           // B    add current frame into dirty 
partition & set cr.ifs
+#endif    
        ;;
 (pUStk) ld4 r17=[r17]                  // M0|1 r17 = 
cpu_data->phys_stacked_size_p8
        mov r19=ar.bsp                  // M2   get new backing store pointer
@@ -893,23 +903,18 @@ GLOBAL_ENTRY(ia64_leave_kernel)
 .work_processed_kernel:
 #ifdef XEN
        ;;
-(pUStk) ssm psr.i
-(pUStk)    br.call.sptk.many b0=do_softirq
-(pUStk) rsm psr.i
-    ;;
-       alloc loc0=ar.pfs,0,1,1,0
-       adds out0=16,r12
+(pUStk)        ssm psr.i
+(pUStk)        br.call.sptk.many b0=do_softirq
+(pUStk)        ssm psr.i
+       ;;
+(pUStk)        br.call.sptk.many b0=reflect_event
+       ;;
        adds r7 = PT(EML_UNAT)+16,r12
        ;;
        ld8 r7 = [r7]
        ;;
-(pUStk)        br.call.sptk.many b0=reflect_event
-//(pUStk)      br.call.sptk.many b0=deliver_pending_interrupt
-    ;;
-       mov ar.pfs=loc0
        mov ar.unat=r7  /* load eml_unat  */
        mov r31=r0
-
 
 #else
        adds r17=TI_FLAGS+IA64_TASK_SIZE,r13
@@ -1184,8 +1189,11 @@ skip_rbs_switch:
        mov cr.ipsr=r29         // M2
        mov ar.pfs=r26          // I0
 (pLvSys)mov r17=r0             // A  clear r17 for leave_syscall, no-op 
otherwise
-
+#ifdef XEN
+       mov cr.ifs=r30          // M2
+#else    
 (p9)   mov cr.ifs=r30          // M2
+#endif
        mov b0=r21              // I0
 (pLvSys)mov r18=r0             // A  clear r18 for leave_syscall, no-op 
otherwise
 
@@ -1195,7 +1203,11 @@ skip_rbs_switch:
        ;;
 (pUStk)        mov ar.rnat=r24         // M2 must happen with RSE in lazy mode
        nop 0
+#ifdef XEN    
+(pLvSys)mov r15=r0
+#else
 (pLvSys)mov r2=r0
+#endif
 
        mov ar.rsc=r27          // M2
        mov pr=r31,-1           // I0
@@ -1459,7 +1471,89 @@ 1:       mov gp=loc2                             // 
restore gp
        br.ret.sptk.many rp
 END(unw_init_running)
 
-#ifndef XEN
+#ifdef XEN
+GLOBAL_ENTRY(ia64_do_multicall_call)
+       movl r2=ia64_hypercall_table;;
+       shladd r2=r38,3,r2;;
+       ld8 r2=[r2];;
+       mov b6=r2
+       br.sptk.many b6;;
+END(ia64_do_multicall_call)
+
+    
+       .rodata
+       .align 8
+       .globl ia64_hypercall_table
+ia64_hypercall_table:
+       data8 do_ni_hypercall           /* do_set_trap_table *//*  0 */
+       data8 do_ni_hypercall           /* do_mmu_update */
+       data8 do_ni_hypercall           /* do_set_gdt */
+       data8 do_ni_hypercall           /* do_stack_switch */
+       data8 do_ni_hypercall           /* do_set_callbacks */
+       data8 do_ni_hypercall           /* do_fpu_taskswitch *//*  5 */
+       data8 do_sched_op_compat
+       data8 do_ni_hypercall
+       data8 do_ni_hypercall           /* do_set_debugreg */
+       data8 do_ni_hypercall           /* do_get_debugreg */
+       data8 do_ni_hypercall           /* do_update_descriptor * 10 */
+       data8 do_ni_hypercall           /* do_ni_hypercall */
+       data8 do_memory_op
+       data8 do_multicall
+       data8 do_ni_hypercall           /* do_update_va_mapping */
+       data8 do_ni_hypercall           /* do_set_timer_op */  /* 15 */
+       data8 do_ni_hypercall
+       data8 do_xen_version
+       data8 do_console_io
+       data8 do_ni_hypercall
+       data8 do_grant_table_op                                /* 20 */
+       data8 do_ni_hypercall           /* do_vm_assist */
+       data8 do_ni_hypercall           /* do_update_va_mapping_othe */
+       data8 do_ni_hypercall           /* (x86 only) */
+       data8 do_ni_hypercall           /* do_vcpu_op */
+       data8 do_ni_hypercall           /* (x86_64 only) */    /* 25 */
+       data8 do_ni_hypercall           /* do_mmuext_op */
+       data8 do_ni_hypercall           /* do_acm_op */
+       data8 do_ni_hypercall           /* do_nmi_op */
+       data8 do_sched_op
+       data8 do_callback_op            /*  */                 /* 30 */
+       data8 do_xenoprof_op            /*  */
+       data8 do_event_channel_op
+       data8 do_physdev_op
+       data8 do_hvm_op                 /*  */
+       data8 do_sysctl                 /*  */                  /* 35 */
+       data8 do_domctl                 /*  */
+       data8 do_ni_hypercall           /*  */
+       data8 do_ni_hypercall           /*  */
+       data8 do_ni_hypercall           /*  */
+       data8 do_ni_hypercall           /*  */                 /* 40 */
+       data8 do_ni_hypercall           /*  */
+       data8 do_ni_hypercall           /*  */
+       data8 do_ni_hypercall           /*  */
+       data8 do_ni_hypercall           /*  */
+       data8 do_ni_hypercall           /*  */                 /* 45 */
+       data8 do_ni_hypercall           /*  */
+       data8 do_ni_hypercall           /*  */
+       data8 do_dom0vp_op              /* dom0vp_op */
+       data8 do_pirq_guest_eoi         /* arch_1 */
+       data8 do_ni_hypercall           /* arch_2 */           /* 50 */
+       data8 do_ni_hypercall           /* arch_3 */
+       data8 do_ni_hypercall           /* arch_4 */
+       data8 do_ni_hypercall           /* arch_5 */
+       data8 do_ni_hypercall           /* arch_6 */
+       data8 do_ni_hypercall           /* arch_7 */           /* 55 */
+       data8 do_ni_hypercall
+       data8 do_ni_hypercall
+       data8 do_ni_hypercall
+       data8 do_ni_hypercall
+       data8 do_ni_hypercall                                  /* 60 */
+       data8 do_ni_hypercall
+       data8 do_ni_hypercall
+       data8 do_ni_hypercall
+
+       // guard against failures to increase NR_hypercalls
+       .org ia64_hypercall_table + 8*NR_hypercalls
+
+#else
        .rodata
        .align 8
        .globl sys_call_table
diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/linux-xen/sn/Makefile
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/ia64/linux-xen/sn/Makefile       Fri Jan 19 14:48:57 2007 +0000
@@ -0,0 +1,1 @@
+subdir-y += kernel
diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/linux-xen/sn/kernel/Makefile
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/ia64/linux-xen/sn/kernel/Makefile        Fri Jan 19 14:48:57 
2007 +0000
@@ -0,0 +1,5 @@
+obj-y += sn2_smp.o
+obj-y += setup.o
+obj-y += iomv.o
+obj-y += irq.o
+obj-y += io_init.o
diff -r 8475a4e0425e -r 3c8bb086025e 
xen/arch/ia64/linux-xen/sn/kernel/README.origin
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/ia64/linux-xen/sn/kernel/README.origin   Fri Jan 19 14:48:57 
2007 +0000
@@ -0,0 +1,12 @@
+# Source files in this directory are near-identical copies of linux-2.6.19
+# files:
+
+# NOTE: ALL changes to these files should be clearly marked
+# (e.g. with #ifdef XEN or XEN in a comment) so that they can be
+# easily updated to future versions of the corresponding Linux files.
+
+io_init.c              -> linux/arch/ia64/sn/kernel/io_init.c
+iomv.c                 -> linux/arch/ia64/sn/kernel/iomv.c
+irq.c                  -> linux/arch/ia64/sn/kernel/irq.c
+setup.c                        -> linux/arch/ia64/sn/kernel/setup.c
+sn2_smp.c              -> linux/arch/ia64/sn/kernel/sn2/sn2_smp.c
diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/linux-xen/sn/kernel/io_init.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/ia64/linux-xen/sn/kernel/io_init.c       Fri Jan 19 14:48:57 
2007 +0000
@@ -0,0 +1,783 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992 - 1997, 2000-2005 Silicon Graphics, Inc. All rights 
reserved.
+ */
+
+#include <linux/bootmem.h>
+#include <linux/nodemask.h>
+#ifdef XEN
+#include <linux/init.h>
+#endif
+#include <asm/sn/types.h>
+#include <asm/sn/addrs.h>
+#include <asm/sn/sn_feature_sets.h>
+#include <asm/sn/geo.h>
+#include <asm/sn/io.h>
+#include <asm/sn/l1.h>
+#include <asm/sn/module.h>
+#include <asm/sn/pcibr_provider.h>
+#include <asm/sn/pcibus_provider_defs.h>
+#ifndef XEN
+#include <asm/sn/pcidev.h>
+#endif
+#include <asm/sn/simulator.h>
+#include <asm/sn/sn_sal.h>
+#ifndef XEN
+#include <asm/sn/tioca_provider.h>
+#include <asm/sn/tioce_provider.h>
+#endif
+#ifdef XEN
+#include "asm/sn/hubdev.h"
+#include "asm/sn/xwidgetdev.h"
+#else
+#include "xtalk/hubdev.h"
+#include "xtalk/xwidgetdev.h"
+#endif
+
+
+extern void sn_init_cpei_timer(void);
+extern void register_sn_procfs(void);
+#ifdef XEN
+extern void sn_irq_lh_init(void);
+#endif
+
+static struct list_head sn_sysdata_list;
+
+/* sysdata list struct */
+struct sysdata_el {
+       struct list_head entry;
+       void *sysdata;
+};
+
+struct slab_info {
+       struct hubdev_info hubdev;
+};
+
+struct brick {
+       moduleid_t id;          /* Module ID of this module        */
+       struct slab_info slab_info[MAX_SLABS + 1];
+};
+
+int sn_ioif_inited;            /* SN I/O infrastructure initialized? */
+
+struct sn_pcibus_provider *sn_pci_provider[PCIIO_ASIC_MAX_TYPES];      /* 
indexed by asic type */
+
+#ifndef XEN
+static int max_segment_number;          /* Default highest segment number */
+static int max_pcibus_number = 255;    /* Default highest pci bus number */
+
+/*
+ * Hooks and struct for unsupported pci providers
+ */
+
+static dma_addr_t
+sn_default_pci_map(struct pci_dev *pdev, unsigned long paddr, size_t size, int 
type)
+{
+       return 0;
+}
+
+static void
+sn_default_pci_unmap(struct pci_dev *pdev, dma_addr_t addr, int direction)
+{
+       return;
+}
+
+static void *
+sn_default_pci_bus_fixup(struct pcibus_bussoft *soft, struct pci_controller 
*controller)
+{
+       return NULL;
+}
+
+static struct sn_pcibus_provider sn_pci_default_provider = {
+       .dma_map = sn_default_pci_map,
+       .dma_map_consistent = sn_default_pci_map,
+       .dma_unmap = sn_default_pci_unmap,
+       .bus_fixup = sn_default_pci_bus_fixup,
+};
+#endif
+
+/*
+ * Retrieve the DMA Flush List given nasid, widget, and device.
+ * This list is needed to implement the WAR - Flush DMA data on PIO Reads.
+ */
+static inline u64
+sal_get_device_dmaflush_list(u64 nasid, u64 widget_num, u64 device_num,
+                            u64 address)
+{
+       struct ia64_sal_retval ret_stuff;
+       ret_stuff.status = 0;
+       ret_stuff.v0 = 0;
+
+       SAL_CALL_NOLOCK(ret_stuff,
+                       (u64) SN_SAL_IOIF_GET_DEVICE_DMAFLUSH_LIST,
+                       (u64) nasid, (u64) widget_num,
+                       (u64) device_num, (u64) address, 0, 0, 0);
+       return ret_stuff.status;
+}
+
+/*
+ * Retrieve the hub device info structure for the given nasid.
+ */
+static inline u64 sal_get_hubdev_info(u64 handle, u64 address)
+{
+       struct ia64_sal_retval ret_stuff;
+       ret_stuff.status = 0;
+       ret_stuff.v0 = 0;
+
+       SAL_CALL_NOLOCK(ret_stuff,
+                       (u64) SN_SAL_IOIF_GET_HUBDEV_INFO,
+                       (u64) handle, (u64) address, 0, 0, 0, 0, 0);
+       return ret_stuff.v0;
+}
+
+/*
+ * Retrieve the pci bus information given the bus number.
+ */
+static inline u64 sal_get_pcibus_info(u64 segment, u64 busnum, u64 address)
+{
+       struct ia64_sal_retval ret_stuff;
+       ret_stuff.status = 0;
+       ret_stuff.v0 = 0;
+
+       SAL_CALL_NOLOCK(ret_stuff,
+                       (u64) SN_SAL_IOIF_GET_PCIBUS_INFO,
+                       (u64) segment, (u64) busnum, (u64) address, 0, 0, 0, 0);
+       return ret_stuff.v0;
+}
+
+#ifndef XEN
+/*
+ * Retrieve the pci device information given the bus and device|function 
number.
+ */
+static inline u64
+sal_get_pcidev_info(u64 segment, u64 bus_number, u64 devfn, u64 pci_dev,
+                   u64 sn_irq_info)
+{
+       struct ia64_sal_retval ret_stuff;
+       ret_stuff.status = 0;
+       ret_stuff.v0 = 0;
+
+       SAL_CALL_NOLOCK(ret_stuff,
+                       (u64) SN_SAL_IOIF_GET_PCIDEV_INFO,
+                       (u64) segment, (u64) bus_number, (u64) devfn,
+                       (u64) pci_dev,
+                       sn_irq_info, 0, 0);
+       return ret_stuff.v0;
+}
+
+/*
+ * sn_pcidev_info_get() - Retrieve the pcidev_info struct for the specified
+ *                       device.
+ */
+inline struct pcidev_info *
+sn_pcidev_info_get(struct pci_dev *dev)
+{
+       struct pcidev_info *pcidev;
+
+       list_for_each_entry(pcidev,
+                           &(SN_PCI_CONTROLLER(dev)->pcidev_info), pdi_list) {
+               if (pcidev->pdi_linux_pcidev == dev) {
+                       return pcidev;
+               }
+       }
+       return NULL;
+}
+
+/* Older PROM flush WAR
+ *
+ * 01/16/06 -- This war will be in place until a new official PROM is released.
+ * Additionally note that the struct sn_flush_device_war also has to be
+ * removed from arch/ia64/sn/include/xtalk/hubdev.h
+ */
+static u8 war_implemented = 0;
+
+static s64 sn_device_fixup_war(u64 nasid, u64 widget, int device,
+                              struct sn_flush_device_common *common)
+{
+       struct sn_flush_device_war *war_list;
+       struct sn_flush_device_war *dev_entry;
+       struct ia64_sal_retval isrv = {0,0,0,0};
+
+       if (!war_implemented) {
+               printk(KERN_WARNING "PROM version < 4.50 -- implementing old "
+                      "PROM flush WAR\n");
+               war_implemented = 1;
+       }
+
+       war_list = kzalloc(DEV_PER_WIDGET * sizeof(*war_list), GFP_KERNEL);
+       if (!war_list)
+               BUG();
+
+       SAL_CALL_NOLOCK(isrv, SN_SAL_IOIF_GET_WIDGET_DMAFLUSH_LIST,
+                       nasid, widget, __pa(war_list), 0, 0, 0 ,0);
+       if (isrv.status)
+               panic("sn_device_fixup_war failed: %s\n",
+                     ia64_sal_strerror(isrv.status));
+
+       dev_entry = war_list + device;
+       memcpy(common,dev_entry, sizeof(*common));
+       kfree(war_list);
+
+       return isrv.status;
+}
+
+/*
+ * sn_fixup_ionodes() - This routine initializes the HUB data strcuture for
+ *     each node in the system.
+ */
+static void __init sn_fixup_ionodes(void)
+{
+       struct sn_flush_device_kernel *sn_flush_device_kernel;
+       struct sn_flush_device_kernel *dev_entry;
+       struct hubdev_info *hubdev;
+       u64 status;
+       u64 nasid;
+       int i, widget, device, size;
+
+       /*
+        * Get SGI Specific HUB chipset information.
+        * Inform Prom that this kernel can support domain bus numbering.
+        */
+       for (i = 0; i < num_cnodes; i++) {
+               hubdev = (struct hubdev_info *)(NODEPDA(i)->pdinfo);
+               nasid = cnodeid_to_nasid(i);
+               hubdev->max_segment_number = 0xffffffff;
+               hubdev->max_pcibus_number = 0xff;
+               status = sal_get_hubdev_info(nasid, (u64) __pa(hubdev));
+               if (status)
+                       continue;
+
+               /* Save the largest Domain and pcibus numbers found. */
+               if (hubdev->max_segment_number) {
+                       /*
+                        * Dealing with a Prom that supports segments.
+                        */
+                       max_segment_number = hubdev->max_segment_number;
+                       max_pcibus_number = hubdev->max_pcibus_number;
+               }
+
+               /* Attach the error interrupt handlers */
+               if (nasid & 1)
+                       ice_error_init(hubdev);
+               else
+                       hub_error_init(hubdev);
+
+               for (widget = 0; widget <= HUB_WIDGET_ID_MAX; widget++)
+                       hubdev->hdi_xwidget_info[widget].xwi_hubinfo = hubdev;
+
+               if (!hubdev->hdi_flush_nasid_list.widget_p)
+                       continue;
+
+               size = (HUB_WIDGET_ID_MAX + 1) *
+                       sizeof(struct sn_flush_device_kernel *);
+               hubdev->hdi_flush_nasid_list.widget_p =
+                       kzalloc(size, GFP_KERNEL);
+               if (!hubdev->hdi_flush_nasid_list.widget_p)
+                       BUG();
+
+               for (widget = 0; widget <= HUB_WIDGET_ID_MAX; widget++) {
+                       size = DEV_PER_WIDGET *
+                               sizeof(struct sn_flush_device_kernel);
+                       sn_flush_device_kernel = kzalloc(size, GFP_KERNEL);
+                       if (!sn_flush_device_kernel)
+                               BUG();
+
+                       dev_entry = sn_flush_device_kernel;
+                       for (device = 0; device < DEV_PER_WIDGET;
+                            device++,dev_entry++) {
+                               size = sizeof(struct sn_flush_device_common);
+                               dev_entry->common = kzalloc(size, GFP_KERNEL);
+                               if (!dev_entry->common)
+                                       BUG();
+
+                               if (sn_prom_feature_available(
+                                                      PRF_DEVICE_FLUSH_LIST))
+                                       status = sal_get_device_dmaflush_list(
+                                                    nasid, widget, device,
+                                                    (u64)(dev_entry->common));
+                               else
+#ifdef XEN
+                                       BUG();
+#else
+                                       status = sn_device_fixup_war(nasid,
+                                                    widget, device,
+                                                    dev_entry->common);
+#endif
+                               if (status != SALRET_OK)
+                                       panic("SAL call failed: %s\n",
+                                             ia64_sal_strerror(status));
+
+                               spin_lock_init(&dev_entry->sfdl_flush_lock);
+                       }
+
+                       if (sn_flush_device_kernel)
+                               hubdev->hdi_flush_nasid_list.widget_p[widget] =
+                                                      sn_flush_device_kernel;
+               }
+       }
+}
+
+/*
+ * sn_pci_window_fixup() - Create a pci_window for each device resource.
+ *                        Until ACPI support is added, we need this code
+ *                        to setup pci_windows for use by
+ *                        pcibios_bus_to_resource(),
+ *                        pcibios_resource_to_bus(), etc.
+ */
+static void
+sn_pci_window_fixup(struct pci_dev *dev, unsigned int count,
+                   s64 * pci_addrs)
+{
+       struct pci_controller *controller = PCI_CONTROLLER(dev->bus);
+       unsigned int i;
+       unsigned int idx;
+       unsigned int new_count;
+       struct pci_window *new_window;
+
+       if (count == 0)
+               return;
+       idx = controller->windows;
+       new_count = controller->windows + count;
+       new_window = kcalloc(new_count, sizeof(struct pci_window), GFP_KERNEL);
+       if (new_window == NULL)
+               BUG();
+       if (controller->window) {
+               memcpy(new_window, controller->window,
+                      sizeof(struct pci_window) * controller->windows);
+               kfree(controller->window);
+       }
+
+       /* Setup a pci_window for each device resource. */
+       for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
+               if (pci_addrs[i] == -1)
+                       continue;
+
+               new_window[idx].offset = dev->resource[i].start - pci_addrs[i];
+               new_window[idx].resource = dev->resource[i];
+               idx++;
+       }
+
+       controller->windows = new_count;
+       controller->window = new_window;
+}
+
+void sn_pci_unfixup_slot(struct pci_dev *dev)
+{
+       struct pci_dev *host_pci_dev = SN_PCIDEV_INFO(dev)->host_pci_dev;
+
+       sn_irq_unfixup(dev);
+       pci_dev_put(host_pci_dev);
+       pci_dev_put(dev);
+}
+
+/*
+ * sn_pci_fixup_slot() - This routine sets up a slot's resources
+ * consistent with the Linux PCI abstraction layer.  Resources acquired
+ * from our PCI provider include PIO maps to BAR space and interrupt
+ * objects.
+ */
+void sn_pci_fixup_slot(struct pci_dev *dev)
+{
+       unsigned int count = 0;
+       int idx;
+       int segment = pci_domain_nr(dev->bus);
+       int status = 0;
+       struct pcibus_bussoft *bs;
+       struct pci_bus *host_pci_bus;
+       struct pci_dev *host_pci_dev;
+       struct pcidev_info *pcidev_info;
+       s64 pci_addrs[PCI_ROM_RESOURCE + 1];
+       struct sn_irq_info *sn_irq_info;
+       unsigned long size;
+       unsigned int bus_no, devfn;
+
+       pci_dev_get(dev); /* for the sysdata pointer */
+       pcidev_info = kzalloc(sizeof(struct pcidev_info), GFP_KERNEL);
+       if (!pcidev_info)
+               BUG();          /* Cannot afford to run out of memory */
+
+       sn_irq_info = kzalloc(sizeof(struct sn_irq_info), GFP_KERNEL);
+       if (!sn_irq_info)
+               BUG();          /* Cannot afford to run out of memory */
+
+       /* Call to retrieve pci device information needed by kernel. */
+       status = sal_get_pcidev_info((u64) segment, (u64) dev->bus->number, 
+                                    dev->devfn,
+                                    (u64) __pa(pcidev_info),
+                                    (u64) __pa(sn_irq_info));
+       if (status)
+               BUG(); /* Cannot get platform pci device information */
+
+       /* Add pcidev_info to list in sn_pci_controller struct */
+       list_add_tail(&pcidev_info->pdi_list,
+                     &(SN_PCI_CONTROLLER(dev->bus)->pcidev_info));
+
+       /* Copy over PIO Mapped Addresses */
+       for (idx = 0; idx <= PCI_ROM_RESOURCE; idx++) {
+               unsigned long start, end, addr;
+
+               if (!pcidev_info->pdi_pio_mapped_addr[idx]) {
+                       pci_addrs[idx] = -1;
+                       continue;
+               }
+
+               start = dev->resource[idx].start;
+               end = dev->resource[idx].end;
+               size = end - start;
+               if (size == 0) {
+                       pci_addrs[idx] = -1;
+                       continue;
+               }
+               pci_addrs[idx] = start;
+               count++;
+               addr = pcidev_info->pdi_pio_mapped_addr[idx];
+               addr = ((addr << 4) >> 4) | __IA64_UNCACHED_OFFSET;
+               dev->resource[idx].start = addr;
+               dev->resource[idx].end = addr + size;
+               if (dev->resource[idx].flags & IORESOURCE_IO)
+                       dev->resource[idx].parent = &ioport_resource;
+               else
+                       dev->resource[idx].parent = &iomem_resource;
+       }
+       /* Create a pci_window in the pci_controller struct for
+        * each device resource.
+        */
+       if (count > 0)
+               sn_pci_window_fixup(dev, count, pci_addrs);
+
+       /*
+        * Using the PROMs values for the PCI host bus, get the Linux
+        * PCI host_pci_dev struct and set up host bus linkages
+        */
+
+       bus_no = (pcidev_info->pdi_slot_host_handle >> 32) & 0xff;
+       devfn = pcidev_info->pdi_slot_host_handle & 0xffffffff;
+       host_pci_bus = pci_find_bus(segment, bus_no);
+       host_pci_dev = pci_get_slot(host_pci_bus, devfn);
+
+       pcidev_info->host_pci_dev = host_pci_dev;
+       pcidev_info->pdi_linux_pcidev = dev;
+       pcidev_info->pdi_host_pcidev_info = SN_PCIDEV_INFO(host_pci_dev);
+       bs = SN_PCIBUS_BUSSOFT(dev->bus);
+       pcidev_info->pdi_pcibus_info = bs;
+
+       if (bs && bs->bs_asic_type < PCIIO_ASIC_MAX_TYPES) {
+               SN_PCIDEV_BUSPROVIDER(dev) = sn_pci_provider[bs->bs_asic_type];
+       } else {
+               SN_PCIDEV_BUSPROVIDER(dev) = &sn_pci_default_provider;
+       }
+
+       /* Only set up IRQ stuff if this device has a host bus context */
+       if (bs && sn_irq_info->irq_irq) {
+               pcidev_info->pdi_sn_irq_info = sn_irq_info;
+               dev->irq = pcidev_info->pdi_sn_irq_info->irq_irq;
+               sn_irq_fixup(dev, sn_irq_info);
+       } else {
+               pcidev_info->pdi_sn_irq_info = NULL;
+               kfree(sn_irq_info);
+       }
+}
+
+/*
+ * sn_pci_controller_fixup() - This routine sets up a bus's resources
+ * consistent with the Linux PCI abstraction layer.
+ */
+void sn_pci_controller_fixup(int segment, int busnum, struct pci_bus *bus)
+{
+       int status;
+       int nasid, cnode;
+       struct pci_controller *controller;
+       struct sn_pci_controller *sn_controller;
+       struct pcibus_bussoft *prom_bussoft_ptr;
+       struct hubdev_info *hubdev_info;
+       void *provider_soft;
+       struct sn_pcibus_provider *provider;
+
+       status = sal_get_pcibus_info((u64) segment, (u64) busnum,
+                                    (u64) ia64_tpa(&prom_bussoft_ptr));
+       if (status > 0)
+               return;         /*bus # does not exist */
+       prom_bussoft_ptr = __va(prom_bussoft_ptr);
+
+       /* Allocate a sn_pci_controller, which has a pci_controller struct
+        * as the first member.
+        */
+       sn_controller = kzalloc(sizeof(struct sn_pci_controller), GFP_KERNEL);
+       if (!sn_controller)
+               BUG();
+       INIT_LIST_HEAD(&sn_controller->pcidev_info);
+       controller = &sn_controller->pci_controller;
+       controller->segment = segment;
+
+       if (bus == NULL) {
+               bus = pci_scan_bus(busnum, &pci_root_ops, controller);
+               if (bus == NULL)
+                       goto error_return; /* error, or bus already scanned */
+               bus->sysdata = NULL;
+       }
+
+       if (bus->sysdata)
+               goto error_return; /* sysdata already alloc'd */
+
+       /*
+        * Per-provider fixup.  Copies the contents from prom to local
+        * area and links SN_PCIBUS_BUSSOFT().
+        */
+
+       if (prom_bussoft_ptr->bs_asic_type >= PCIIO_ASIC_MAX_TYPES)
+               goto error_return; /* unsupported asic type */
+
+       if (prom_bussoft_ptr->bs_asic_type == PCIIO_ASIC_TYPE_PPB)
+               goto error_return; /* no further fixup necessary */
+
+       provider = sn_pci_provider[prom_bussoft_ptr->bs_asic_type];
+       if (provider == NULL)
+               goto error_return; /* no provider registerd for this asic */
+
+       bus->sysdata = controller;
+       if (provider->bus_fixup)
+               provider_soft = (*provider->bus_fixup) (prom_bussoft_ptr, 
controller);
+       else
+               provider_soft = NULL;
+
+       if (provider_soft == NULL) {
+               /* fixup failed or not applicable */
+               bus->sysdata = NULL;
+               goto error_return;
+       }
+
+       /*
+        * Setup pci_windows for legacy IO and MEM space.
+        * (Temporary until ACPI support is in place.)
+        */
+       controller->window = kcalloc(2, sizeof(struct pci_window), GFP_KERNEL);
+       if (controller->window == NULL)
+               BUG();
+       controller->window[0].offset = prom_bussoft_ptr->bs_legacy_io;
+       controller->window[0].resource.name = "legacy_io";
+       controller->window[0].resource.flags = IORESOURCE_IO;
+       controller->window[0].resource.start = prom_bussoft_ptr->bs_legacy_io;
+       controller->window[0].resource.end =
+           controller->window[0].resource.start + 0xffff;
+       controller->window[0].resource.parent = &ioport_resource;
+       controller->window[1].offset = prom_bussoft_ptr->bs_legacy_mem;
+       controller->window[1].resource.name = "legacy_mem";
+       controller->window[1].resource.flags = IORESOURCE_MEM;
+       controller->window[1].resource.start = prom_bussoft_ptr->bs_legacy_mem;
+       controller->window[1].resource.end =
+           controller->window[1].resource.start + (1024 * 1024) - 1;
+       controller->window[1].resource.parent = &iomem_resource;
+       controller->windows = 2;
+
+       /*
+        * Generic bus fixup goes here.  Don't reference prom_bussoft_ptr
+        * after this point.
+        */
+
+       PCI_CONTROLLER(bus)->platform_data = provider_soft;
+       nasid = NASID_GET(SN_PCIBUS_BUSSOFT(bus)->bs_base);
+       cnode = nasid_to_cnodeid(nasid);
+       hubdev_info = (struct hubdev_info *)(NODEPDA(cnode)->pdinfo);
+       SN_PCIBUS_BUSSOFT(bus)->bs_xwidget_info =
+           &(hubdev_info->hdi_xwidget_info[SN_PCIBUS_BUSSOFT(bus)->bs_xid]);
+
+       /*
+        * If the node information we obtained during the fixup phase is invalid
+        * then set controller->node to -1 (undetermined)
+        */
+       if (controller->node >= num_online_nodes()) {
+               struct pcibus_bussoft *b = SN_PCIBUS_BUSSOFT(bus);
+
+               printk(KERN_WARNING "Device ASIC=%u XID=%u PBUSNUM=%u"
+                                   "L_IO=%lx L_MEM=%lx BASE=%lx\n",
+                       b->bs_asic_type, b->bs_xid, b->bs_persist_busnum,
+                       b->bs_legacy_io, b->bs_legacy_mem, b->bs_base);
+               printk(KERN_WARNING "on node %d but only %d nodes online."
+                       "Association set to undetermined.\n",
+                       controller->node, num_online_nodes());
+               controller->node = -1;
+       }
+       return;
+
+error_return:
+
+       kfree(sn_controller);
+       return;
+}
+
+void sn_bus_store_sysdata(struct pci_dev *dev)
+{
+       struct sysdata_el *element;
+
+       element = kzalloc(sizeof(struct sysdata_el), GFP_KERNEL);
+       if (!element) {
+               dev_dbg(dev, "%s: out of memory!\n", __FUNCTION__);
+               return;
+       }
+       element->sysdata = SN_PCIDEV_INFO(dev);
+       list_add(&element->entry, &sn_sysdata_list);
+}
+
+void sn_bus_free_sysdata(void)
+{
+       struct sysdata_el *element;
+       struct list_head *list, *safe;
+
+       list_for_each_safe(list, safe, &sn_sysdata_list) {
+               element = list_entry(list, struct sysdata_el, entry);
+               list_del(&element->entry);
+               list_del(&(((struct pcidev_info *)
+                            (element->sysdata))->pdi_list));
+               kfree(element->sysdata);
+               kfree(element);
+       }
+       return;
+}
+#endif
+
+/*
+ * Ugly hack to get PCI setup until we have a proper ACPI namespace.
+ */
+
+#define PCI_BUSES_TO_SCAN 256
+
+static int __init sn_pci_init(void)
+{
+#ifndef XEN
+       int i, j;
+       struct pci_dev *pci_dev = NULL;
+#endif
+
+       if (!ia64_platform_is("sn2") || IS_RUNNING_ON_FAKE_PROM())
+               return 0;
+
+#ifndef XEN
+       /*
+        * prime sn_pci_provider[].  Individial provider init routines will
+        * override their respective default entries.
+        */
+
+       for (i = 0; i < PCIIO_ASIC_MAX_TYPES; i++)
+               sn_pci_provider[i] = &sn_pci_default_provider;
+
+       pcibr_init_provider();
+       tioca_init_provider();
+       tioce_init_provider();
+#endif
+
+       /*
+        * This is needed to avoid bounce limit checks in the blk layer
+        */
+       ia64_max_iommu_merge_mask = ~PAGE_MASK;
+#ifndef XEN
+       sn_fixup_ionodes();
+#endif
+       sn_irq_lh_init();
+       INIT_LIST_HEAD(&sn_sysdata_list);
+#ifndef XEN
+       sn_init_cpei_timer();
+
+#ifdef CONFIG_PROC_FS
+       register_sn_procfs();
+#endif
+
+       /* busses are not known yet ... */
+       for (i = 0; i <= max_segment_number; i++)
+               for (j = 0; j <= max_pcibus_number; j++)
+                       sn_pci_controller_fixup(i, j, NULL);
+
+       /*
+        * Generic Linux PCI Layer has created the pci_bus and pci_dev 
+        * structures - time for us to add our SN PLatform specific 
+        * information.
+        */
+
+       while ((pci_dev =
+               pci_get_device(PCI_ANY_ID, PCI_ANY_ID, pci_dev)) != NULL)
+               sn_pci_fixup_slot(pci_dev);
+#endif
+
+       sn_ioif_inited = 1;     /* sn I/O infrastructure now initialized */
+
+       return 0;
+}
+
+/*
+ * hubdev_init_node() - Creates the HUB data structure and link them to it's 
+ *     own NODE specific data area.
+ */
+void hubdev_init_node(nodepda_t * npda, cnodeid_t node)
+{
+       struct hubdev_info *hubdev_info;
+       int size;
+#ifndef XEN
+       pg_data_t *pg;
+#else
+       struct pglist_data *pg;
+#endif
+
+       size = sizeof(struct hubdev_info);
+
+       if (node >= num_online_nodes()) /* Headless/memless IO nodes */
+               pg = NODE_DATA(0);
+       else
+               pg = NODE_DATA(node);
+
+       hubdev_info = (struct hubdev_info *)alloc_bootmem_node(pg, size);
+
+       npda->pdinfo = (void *)hubdev_info;
+}
+
+geoid_t
+cnodeid_get_geoid(cnodeid_t cnode)
+{
+       struct hubdev_info *hubdev;
+
+       hubdev = (struct hubdev_info *)(NODEPDA(cnode)->pdinfo);
+       return hubdev->hdi_geoid;
+}
+
+#ifndef XEN
+void sn_generate_path(struct pci_bus *pci_bus, char *address)
+{
+       nasid_t nasid;
+       cnodeid_t cnode;
+       geoid_t geoid;
+       moduleid_t moduleid;
+       u16 bricktype;
+
+       nasid = NASID_GET(SN_PCIBUS_BUSSOFT(pci_bus)->bs_base);
+       cnode = nasid_to_cnodeid(nasid);
+       geoid = cnodeid_get_geoid(cnode);
+       moduleid = geo_module(geoid);
+
+       sprintf(address, "module_%c%c%c%c%.2d",
+               '0'+RACK_GET_CLASS(MODULE_GET_RACK(moduleid)),
+               '0'+RACK_GET_GROUP(MODULE_GET_RACK(moduleid)),
+               '0'+RACK_GET_NUM(MODULE_GET_RACK(moduleid)),
+               MODULE_GET_BTCHAR(moduleid), MODULE_GET_BPOS(moduleid));
+
+       /* Tollhouse requires slot id to be displayed */
+       bricktype = MODULE_GET_BTYPE(moduleid);
+       if ((bricktype == L1_BRICKTYPE_191010) ||
+           (bricktype == L1_BRICKTYPE_1932))
+                       sprintf(address, "%s^%d", address, geo_slot(geoid));
+}
+#endif
+
+#ifdef XEN
+__initcall(sn_pci_init);
+#else
+subsys_initcall(sn_pci_init);
+#endif
+#ifndef XEN
+EXPORT_SYMBOL(sn_pci_fixup_slot);
+EXPORT_SYMBOL(sn_pci_unfixup_slot);
+EXPORT_SYMBOL(sn_pci_controller_fixup);
+EXPORT_SYMBOL(sn_bus_store_sysdata);
+EXPORT_SYMBOL(sn_bus_free_sysdata);
+EXPORT_SYMBOL(sn_generate_path);
+#endif
diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/linux-xen/sn/kernel/iomv.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/ia64/linux-xen/sn/kernel/iomv.c  Fri Jan 19 14:48:57 2007 +0000
@@ -0,0 +1,82 @@
+/* 
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2000-2003 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#include <linux/module.h>
+#include <asm/io.h>
+#include <asm/delay.h>
+#ifndef XEN
+#include <asm/vga.h>
+#endif
+#include <asm/sn/nodepda.h>
+#include <asm/sn/simulator.h>
+#include <asm/sn/pda.h>
+#include <asm/sn/sn_cpuid.h>
+#include <asm/sn/shub_mmr.h>
+
+#define IS_LEGACY_VGA_IOPORT(p) \
+       (((p) >= 0x3b0 && (p) <= 0x3bb) || ((p) >= 0x3c0 && (p) <= 0x3df))
+
+#ifdef XEN
+#define vga_console_iobase     0
+#endif
+
+/**
+ * sn_io_addr - convert an in/out port to an i/o address
+ * @port: port to convert
+ *
+ * Legacy in/out instructions are converted to ld/st instructions
+ * on IA64.  This routine will convert a port number into a valid 
+ * SN i/o address.  Used by sn_in*() and sn_out*().
+ */
+void *sn_io_addr(unsigned long port)
+{
+       if (!IS_RUNNING_ON_SIMULATOR()) {
+               if (IS_LEGACY_VGA_IOPORT(port))
+                       port += vga_console_iobase;
+               /* On sn2, legacy I/O ports don't point at anything */
+               if (port < (64 * 1024))
+                       return NULL;
+               return ((void *)(port | __IA64_UNCACHED_OFFSET));
+       } else {
+               /* but the simulator uses them... */
+               unsigned long addr;
+
+               /*
+                * word align port, but need more than 10 bits
+                * for accessing registers in bedrock local block
+                * (so we don't do port&0xfff)
+                */
+               addr = (is_shub2() ? 0xc00000028c000000UL : 
0xc0000087cc000000UL) | ((port >> 2) << 12);
+               if ((port >= 0x1f0 && port <= 0x1f7) || port == 0x3f6 || port 
== 0x3f7)
+                       addr |= port;
+               return (void *)addr;
+       }
+}
+
+EXPORT_SYMBOL(sn_io_addr);
+
+/**
+ * __sn_mmiowb - I/O space memory barrier
+ *
+ * See include/asm-ia64/io.h and Documentation/DocBook/deviceiobook.tmpl
+ * for details.
+ *
+ * On SN2, we wait for the PIO_WRITE_STATUS SHub register to clear.
+ * See PV 871084 for details about the WAR about zero value.
+ *
+ */
+void __sn_mmiowb(void)
+{
+       volatile unsigned long *adr = pda->pio_write_status_addr;
+       unsigned long val = pda->pio_write_status_val;
+
+       while ((*adr & SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK) != val)
+               cpu_relax();
+}
+
+EXPORT_SYMBOL(__sn_mmiowb);
diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/linux-xen/sn/kernel/irq.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/ia64/linux-xen/sn/kernel/irq.c   Fri Jan 19 14:48:57 2007 +0000
@@ -0,0 +1,542 @@
+/*
+ * Platform dependent support for SGI SN
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+
+#include <linux/irq.h>
+#include <linux/spinlock.h>
+#include <linux/init.h>
+#ifdef XEN
+#include <linux/pci.h>
+#include <asm/hw_irq.h>
+#endif
+#include <asm/sn/addrs.h>
+#include <asm/sn/arch.h>
+#include <asm/sn/intr.h>
+#include <asm/sn/pcibr_provider.h>
+#include <asm/sn/pcibus_provider_defs.h>
+#ifndef XEN
+#include <asm/sn/pcidev.h>
+#endif
+#include <asm/sn/shub_mmr.h>
+#include <asm/sn/sn_sal.h>
+
+#ifdef XEN
+#define move_native_irq(foo)   do {} while(0)
+#endif
+
+static void force_interrupt(int irq);
+#ifndef XEN
+static void register_intr_pda(struct sn_irq_info *sn_irq_info);
+static void unregister_intr_pda(struct sn_irq_info *sn_irq_info);
+#endif
+
+int sn_force_interrupt_flag = 1;
+extern int sn_ioif_inited;
+struct list_head **sn_irq_lh;
+static DEFINE_SPINLOCK(sn_irq_info_lock); /* non-IRQ lock */
+
+u64 sn_intr_alloc(nasid_t local_nasid, int local_widget,
+                                    struct sn_irq_info *sn_irq_info,
+                                    int req_irq, nasid_t req_nasid,
+                                    int req_slice)
+{
+       struct ia64_sal_retval ret_stuff;
+       ret_stuff.status = 0;
+       ret_stuff.v0 = 0;
+
+       SAL_CALL_NOLOCK(ret_stuff, (u64) SN_SAL_IOIF_INTERRUPT,
+                       (u64) SAL_INTR_ALLOC, (u64) local_nasid,
+                       (u64) local_widget, __pa(sn_irq_info), (u64) req_irq,
+                       (u64) req_nasid, (u64) req_slice);
+
+       return ret_stuff.status;
+}
+
+void sn_intr_free(nasid_t local_nasid, int local_widget,
+                               struct sn_irq_info *sn_irq_info)
+{
+       struct ia64_sal_retval ret_stuff;
+       ret_stuff.status = 0;
+       ret_stuff.v0 = 0;
+
+       SAL_CALL_NOLOCK(ret_stuff, (u64) SN_SAL_IOIF_INTERRUPT,
+                       (u64) SAL_INTR_FREE, (u64) local_nasid,
+                       (u64) local_widget, (u64) sn_irq_info->irq_irq,
+                       (u64) sn_irq_info->irq_cookie, 0, 0);
+}
+
+static unsigned int sn_startup_irq(unsigned int irq)
+{
+       return 0;
+}
+
+static void sn_shutdown_irq(unsigned int irq)
+{
+}
+
+static void sn_disable_irq(unsigned int irq)
+{
+}
+
+static void sn_enable_irq(unsigned int irq)
+{
+}
+
+static void sn_ack_irq(unsigned int irq)
+{
+       u64 event_occurred, mask;
+
+       irq = irq & 0xff;
+       event_occurred = HUB_L((u64*)LOCAL_MMR_ADDR(SH_EVENT_OCCURRED));
+       mask = event_occurred & SH_ALL_INT_MASK;
+       HUB_S((u64*)LOCAL_MMR_ADDR(SH_EVENT_OCCURRED_ALIAS), mask);
+       __set_bit(irq, (volatile void *)pda->sn_in_service_ivecs);
+
+       move_native_irq(irq);
+}
+
+static void sn_end_irq(unsigned int irq)
+{
+       int ivec;
+       u64 event_occurred;
+
+       ivec = irq & 0xff;
+       if (ivec == SGI_UART_VECTOR) {
+               event_occurred = HUB_L((u64*)LOCAL_MMR_ADDR 
(SH_EVENT_OCCURRED));
+               /* If the UART bit is set here, we may have received an
+                * interrupt from the UART that the driver missed.  To
+                * make sure, we IPI ourselves to force us to look again.
+                */
+               if (event_occurred & SH_EVENT_OCCURRED_UART_INT_MASK) {
+                       platform_send_ipi(smp_processor_id(), SGI_UART_VECTOR,
+                                         IA64_IPI_DM_INT, 0);
+               }
+       }
+       __clear_bit(ivec, (volatile void *)pda->sn_in_service_ivecs);
+       if (sn_force_interrupt_flag)
+               force_interrupt(irq);
+}
+
+#ifndef XEN
+static void sn_irq_info_free(struct rcu_head *head);
+
+struct sn_irq_info *sn_retarget_vector(struct sn_irq_info *sn_irq_info,
+                                      nasid_t nasid, int slice)
+{
+       int vector;
+       int cpuphys;
+       int64_t bridge;
+       int local_widget, status;
+       nasid_t local_nasid;
+       struct sn_irq_info *new_irq_info;
+       struct sn_pcibus_provider *pci_provider;
+
+       new_irq_info = kmalloc(sizeof(struct sn_irq_info), GFP_ATOMIC);
+       if (new_irq_info == NULL)
+               return NULL;
+
+       memcpy(new_irq_info, sn_irq_info, sizeof(struct sn_irq_info));
+
+       bridge = (u64) new_irq_info->irq_bridge;
+       if (!bridge) {
+               kfree(new_irq_info);
+               return NULL; /* irq is not a device interrupt */
+       }
+
+       local_nasid = NASID_GET(bridge);
+
+       if (local_nasid & 1)
+               local_widget = TIO_SWIN_WIDGETNUM(bridge);
+       else
+               local_widget = SWIN_WIDGETNUM(bridge);
+
+       vector = sn_irq_info->irq_irq;
+       /* Free the old PROM new_irq_info structure */
+       sn_intr_free(local_nasid, local_widget, new_irq_info);
+       /* Update kernels new_irq_info with new target info */
+       unregister_intr_pda(new_irq_info);
+
+       /* allocate a new PROM new_irq_info struct */
+       status = sn_intr_alloc(local_nasid, local_widget,
+                              new_irq_info, vector,
+                              nasid, slice);
+
+       /* SAL call failed */
+       if (status) {
+               kfree(new_irq_info);
+               return NULL;
+       }
+
+       cpuphys = nasid_slice_to_cpuid(nasid, slice);
+       new_irq_info->irq_cpuid = cpuphys;
+       register_intr_pda(new_irq_info);
+
+       pci_provider = sn_pci_provider[new_irq_info->irq_bridge_type];
+
+       /*
+        * If this represents a line interrupt, target it.  If it's
+        * an msi (irq_int_bit < 0), it's already targeted.
+        */
+       if (new_irq_info->irq_int_bit >= 0 &&
+           pci_provider && pci_provider->target_interrupt)
+               (pci_provider->target_interrupt)(new_irq_info);
+
+       spin_lock(&sn_irq_info_lock);
+#ifdef XEN
+       list_replace(&sn_irq_info->list, &new_irq_info->list);
+#else
+       list_replace_rcu(&sn_irq_info->list, &new_irq_info->list);
+#endif
+       spin_unlock(&sn_irq_info_lock);
+#ifndef XEN
+       call_rcu(&sn_irq_info->rcu, sn_irq_info_free);
+#endif
+
+#ifdef CONFIG_SMP
+       set_irq_affinity_info((vector & 0xff), cpuphys, 0);
+#endif
+
+       return new_irq_info;
+}
+
+static void sn_set_affinity_irq(unsigned int irq, cpumask_t mask)
+{
+       struct sn_irq_info *sn_irq_info, *sn_irq_info_safe;
+       nasid_t nasid;
+       int slice;
+
+       nasid = cpuid_to_nasid(first_cpu(mask));
+       slice = cpuid_to_slice(first_cpu(mask));
+
+       list_for_each_entry_safe(sn_irq_info, sn_irq_info_safe,
+                                sn_irq_lh[irq], list)
+               (void)sn_retarget_vector(sn_irq_info, nasid, slice);
+}
+#endif
+
+struct hw_interrupt_type irq_type_sn = {
+#ifndef XEN
+       .name           = "SN hub",
+#endif
+       .startup        = sn_startup_irq,
+       .shutdown       = sn_shutdown_irq,
+       .enable         = sn_enable_irq,
+       .disable        = sn_disable_irq,
+       .ack            = sn_ack_irq,
+       .end            = sn_end_irq,
+#ifndef XEN
+       .set_affinity   = sn_set_affinity_irq
+#endif
+};
+
+unsigned int sn_local_vector_to_irq(u8 vector)
+{
+       return (CPU_VECTOR_TO_IRQ(smp_processor_id(), vector));
+}
+
+void sn_irq_init(void)
+{
+#ifndef XEN
+       int i;
+       irq_desc_t *base_desc = irq_desc;
+
+       ia64_first_device_vector = IA64_SN2_FIRST_DEVICE_VECTOR;
+       ia64_last_device_vector = IA64_SN2_LAST_DEVICE_VECTOR;
+
+       for (i = 0; i < NR_IRQS; i++) {
+               if (base_desc[i].chip == &no_irq_type) {
+                       base_desc[i].chip = &irq_type_sn;
+               }
+       }
+#endif
+}
+
+#ifndef XEN
+static void register_intr_pda(struct sn_irq_info *sn_irq_info)
+{
+       int irq = sn_irq_info->irq_irq;
+       int cpu = sn_irq_info->irq_cpuid;
+
+       if (pdacpu(cpu)->sn_last_irq < irq) {
+               pdacpu(cpu)->sn_last_irq = irq;
+       }
+
+       if (pdacpu(cpu)->sn_first_irq == 0 || pdacpu(cpu)->sn_first_irq > irq)
+               pdacpu(cpu)->sn_first_irq = irq;
+}
+
+static void unregister_intr_pda(struct sn_irq_info *sn_irq_info)
+{
+       int irq = sn_irq_info->irq_irq;
+       int cpu = sn_irq_info->irq_cpuid;
+       struct sn_irq_info *tmp_irq_info;
+       int i, foundmatch;
+
+#ifndef XEN
+       rcu_read_lock();
+#else
+       spin_lock(&sn_irq_info_lock);
+#endif
+       if (pdacpu(cpu)->sn_last_irq == irq) {
+               foundmatch = 0;
+               for (i = pdacpu(cpu)->sn_last_irq - 1;
+                    i && !foundmatch; i--) {
+#ifdef XEN
+                       list_for_each_entry(tmp_irq_info,
+                                               sn_irq_lh[i],
+                                               list) {
+#else
+                       list_for_each_entry_rcu(tmp_irq_info,
+                                               sn_irq_lh[i],
+                                               list) {
+#endif
+                               if (tmp_irq_info->irq_cpuid == cpu) {
+                                       foundmatch = 1;
+                                       break;
+                               }
+                       }
+               }
+               pdacpu(cpu)->sn_last_irq = i;
+       }
+
+       if (pdacpu(cpu)->sn_first_irq == irq) {
+               foundmatch = 0;
+               for (i = pdacpu(cpu)->sn_first_irq + 1;
+                    i < NR_IRQS && !foundmatch; i++) {
+#ifdef XEN
+                       list_for_each_entry(tmp_irq_info,
+                                               sn_irq_lh[i],
+                                               list) {
+#else
+                       list_for_each_entry_rcu(tmp_irq_info,
+                                               sn_irq_lh[i],
+                                               list) {
+#endif
+                               if (tmp_irq_info->irq_cpuid == cpu) {
+                                       foundmatch = 1;
+                                       break;
+                               }
+                       }
+               }
+               pdacpu(cpu)->sn_first_irq = ((i == NR_IRQS) ? 0 : i);
+       }
+#ifndef XEN
+       rcu_read_unlock();
+#else
+       spin_unlock(&sn_irq_info_lock);
+#endif
+}
+#endif /* XEN */
+
+#ifndef XEN
+static void sn_irq_info_free(struct rcu_head *head)
+{
+       struct sn_irq_info *sn_irq_info;
+
+       sn_irq_info = container_of(head, struct sn_irq_info, rcu);
+       kfree(sn_irq_info);
+}
+#endif
+
+#ifndef XEN
+void sn_irq_fixup(struct pci_dev *pci_dev, struct sn_irq_info *sn_irq_info)
+{
+       nasid_t nasid = sn_irq_info->irq_nasid;
+       int slice = sn_irq_info->irq_slice;
+       int cpu = nasid_slice_to_cpuid(nasid, slice);
+
+       pci_dev_get(pci_dev);
+       sn_irq_info->irq_cpuid = cpu;
+       sn_irq_info->irq_pciioinfo = SN_PCIDEV_INFO(pci_dev);
+
+       /* link it into the sn_irq[irq] list */
+       spin_lock(&sn_irq_info_lock);
+#ifdef XEN
+       list_add(&sn_irq_info->list, sn_irq_lh[sn_irq_info->irq_irq]);
+#else
+       list_add_rcu(&sn_irq_info->list, sn_irq_lh[sn_irq_info->irq_irq]);
+#endif
+#ifndef XEN
+       reserve_irq_vector(sn_irq_info->irq_irq);
+#endif
+       spin_unlock(&sn_irq_info_lock);
+
+       register_intr_pda(sn_irq_info);
+}
+
+void sn_irq_unfixup(struct pci_dev *pci_dev)
+{
+       struct sn_irq_info *sn_irq_info;
+
+       /* Only cleanup IRQ stuff if this device has a host bus context */
+       if (!SN_PCIDEV_BUSSOFT(pci_dev))
+               return;
+
+       sn_irq_info = SN_PCIDEV_INFO(pci_dev)->pdi_sn_irq_info;
+       if (!sn_irq_info)
+               return;
+       if (!sn_irq_info->irq_irq) {
+               kfree(sn_irq_info);
+               return;
+       }
+
+       unregister_intr_pda(sn_irq_info);
+       spin_lock(&sn_irq_info_lock);
+#ifdef XEN
+       list_del(&sn_irq_info->list);
+#else
+       list_del_rcu(&sn_irq_info->list);
+#endif
+       spin_unlock(&sn_irq_info_lock);
+       if (list_empty(sn_irq_lh[sn_irq_info->irq_irq]))
+               free_irq_vector(sn_irq_info->irq_irq);
+#ifndef XEN
+       call_rcu(&sn_irq_info->rcu, sn_irq_info_free);
+#endif
+       pci_dev_put(pci_dev);
+
+}
+#endif
+
+static inline void
+sn_call_force_intr_provider(struct sn_irq_info *sn_irq_info)
+{
+       struct sn_pcibus_provider *pci_provider;
+
+       pci_provider = sn_pci_provider[sn_irq_info->irq_bridge_type];
+       if (pci_provider && pci_provider->force_interrupt)
+               (*pci_provider->force_interrupt)(sn_irq_info);
+}
+
+static void force_interrupt(int irq)
+{
+       struct sn_irq_info *sn_irq_info;
+
+#ifndef XEN
+       if (!sn_ioif_inited)
+               return;
+#endif
+
+#ifdef XEN
+       spin_lock(&sn_irq_info_lock);
+#else
+       rcu_read_lock();
+#endif
+#ifdef XEN
+       list_for_each_entry(sn_irq_info, sn_irq_lh[irq], list)
+#else
+       list_for_each_entry_rcu(sn_irq_info, sn_irq_lh[irq], list)
+#endif
+               sn_call_force_intr_provider(sn_irq_info);
+
+#ifdef XEN
+       spin_unlock(&sn_irq_info_lock);
+#else
+       rcu_read_unlock();
+#endif
+}
+
+#ifndef XEN
+/*
+ * Check for lost interrupts.  If the PIC int_status reg. says that
+ * an interrupt has been sent, but not handled, and the interrupt
+ * is not pending in either the cpu irr regs or in the soft irr regs,
+ * and the interrupt is not in service, then the interrupt may have
+ * been lost.  Force an interrupt on that pin.  It is possible that
+ * the interrupt is in flight, so we may generate a spurious interrupt,
+ * but we should never miss a real lost interrupt.
+ */
+static void sn_check_intr(int irq, struct sn_irq_info *sn_irq_info)
+{
+       u64 regval;
+       struct pcidev_info *pcidev_info;
+       struct pcibus_info *pcibus_info;
+
+       /*
+        * Bridge types attached to TIO (anything but PIC) do not need this WAR
+        * since they do not target Shub II interrupt registers.  If that
+        * ever changes, this check needs to accomodate.
+        */
+       if (sn_irq_info->irq_bridge_type != PCIIO_ASIC_TYPE_PIC)
+               return;
+
+       pcidev_info = (struct pcidev_info *)sn_irq_info->irq_pciioinfo;
+       if (!pcidev_info)
+               return;
+
+       pcibus_info =
+           (struct pcibus_info *)pcidev_info->pdi_host_pcidev_info->
+           pdi_pcibus_info;
+       regval = pcireg_intr_status_get(pcibus_info);
+
+       if (!ia64_get_irr(irq_to_vector(irq))) {
+               if (!test_bit(irq, pda->sn_in_service_ivecs)) {
+                       regval &= 0xff;
+                       if (sn_irq_info->irq_int_bit & regval &
+                           sn_irq_info->irq_last_intr) {
+                               regval &= ~(sn_irq_info->irq_int_bit & regval);
+                               sn_call_force_intr_provider(sn_irq_info);
+                       }
+               }
+       }
+       sn_irq_info->irq_last_intr = regval;
+}
+#endif
+
+void sn_lb_int_war_check(void)
+{
+#ifndef XEN
+       struct sn_irq_info *sn_irq_info;
+       int i;
+
+#ifdef XEN
+       if (pda->sn_first_irq == 0)
+#else
+       if (!sn_ioif_inited || pda->sn_first_irq == 0)
+#endif
+               return;
+
+#ifdef XEN
+       spin_lock(&sn_irq_info_lock);
+#else
+       rcu_read_lock();
+#endif
+       for (i = pda->sn_first_irq; i <= pda->sn_last_irq; i++) {
+#ifdef XEN
+               list_for_each_entry(sn_irq_info, sn_irq_lh[i], list) {
+#else
+               list_for_each_entry_rcu(sn_irq_info, sn_irq_lh[i], list) {
+#endif
+                       sn_check_intr(i, sn_irq_info);
+               }
+       }
+#ifdef XEN
+       spin_unlock(&sn_irq_info_lock);
+#else
+       rcu_read_unlock();
+#endif
+#endif
+}
+
+void __init sn_irq_lh_init(void)
+{
+       int i;
+
+       sn_irq_lh = kmalloc(sizeof(struct list_head *) * NR_IRQS, GFP_KERNEL);
+       if (!sn_irq_lh)
+               panic("SN PCI INIT: Failed to allocate memory for PCI init\n");
+
+       for (i = 0; i < NR_IRQS; i++) {
+               sn_irq_lh[i] = kmalloc(sizeof(struct list_head), GFP_KERNEL);
+               if (!sn_irq_lh[i])
+                       panic("SN PCI INIT: Failed IRQ memory allocation\n");
+
+               INIT_LIST_HEAD(sn_irq_lh[i]);
+       }
+}
diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/linux-xen/sn/kernel/setup.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/ia64/linux-xen/sn/kernel/setup.c Fri Jan 19 14:48:57 2007 +0000
@@ -0,0 +1,808 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1999,2001-2006 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#ifndef XEN
+#include <linux/kdev_t.h>
+#endif
+#include <linux/string.h>
+#ifndef XEN
+#include <linux/screen_info.h>
+#endif
+#include <linux/console.h>
+#include <linux/timex.h>
+#include <linux/sched.h>
+#include <linux/ioport.h>
+#include <linux/mm.h>
+#include <linux/serial.h>
+#include <linux/irq.h>
+#include <linux/bootmem.h>
+#include <linux/mmzone.h>
+#include <linux/interrupt.h>
+#include <linux/acpi.h>
+#include <linux/compiler.h>
+#include <linux/sched.h>
+#ifndef XEN
+#include <linux/root_dev.h>
+#endif
+#include <linux/nodemask.h>
+#include <linux/pm.h>
+#include <linux/efi.h>
+
+#include <asm/io.h>
+#include <asm/sal.h>
+#include <asm/machvec.h>
+#include <asm/system.h>
+#include <asm/processor.h>
+#ifndef XEN
+#include <asm/vga.h>
+#endif
+#include <asm/sn/arch.h>
+#include <asm/sn/addrs.h>
+#include <asm/sn/pda.h>
+#include <asm/sn/nodepda.h>
+#include <asm/sn/sn_cpuid.h>
+#include <asm/sn/simulator.h>
+#include <asm/sn/leds.h>
+#ifndef XEN
+#include <asm/sn/bte.h>
+#endif
+#include <asm/sn/shub_mmr.h>
+#ifndef XEN
+#include <asm/sn/clksupport.h>
+#endif
+#include <asm/sn/sn_sal.h>
+#include <asm/sn/geo.h>
+#include <asm/sn/sn_feature_sets.h>
+#ifndef XEN
+#include "xtalk/xwidgetdev.h"
+#include "xtalk/hubdev.h"
+#else
+#include "asm/sn/xwidgetdev.h"
+#include "asm/sn/hubdev.h"
+#endif
+#include <asm/sn/klconfig.h>
+#ifdef XEN
+#include <asm/sn/shubio.h>
+
+/* Xen has no clue about NUMA ....  grrrr */
+#define pxm_to_node(foo)               0
+#define node_to_pxm(foo)               0
+#define numa_node_id()                 0
+#endif
+
+
+DEFINE_PER_CPU(struct pda_s, pda_percpu);
+
+#define MAX_PHYS_MEMORY                (1UL << IA64_MAX_PHYS_BITS)     /* Max 
physical address supported */
+
+extern void bte_init_node(nodepda_t *, cnodeid_t);
+
+extern void sn_timer_init(void);
+extern unsigned long last_time_offset;
+extern void (*ia64_mark_idle) (int);
+extern void snidle(int);
+extern unsigned long long (*ia64_printk_clock)(void);
+
+unsigned long sn_rtc_cycles_per_second;
+EXPORT_SYMBOL(sn_rtc_cycles_per_second);
+
+DEFINE_PER_CPU(struct sn_hub_info_s, __sn_hub_info);
+EXPORT_PER_CPU_SYMBOL(__sn_hub_info);
+
+DEFINE_PER_CPU(short, __sn_cnodeid_to_nasid[MAX_COMPACT_NODES]);
+EXPORT_PER_CPU_SYMBOL(__sn_cnodeid_to_nasid);
+
+DEFINE_PER_CPU(struct nodepda_s *, __sn_nodepda);
+EXPORT_PER_CPU_SYMBOL(__sn_nodepda);
+
+char sn_system_serial_number_string[128];
+EXPORT_SYMBOL(sn_system_serial_number_string);
+u64 sn_partition_serial_number;
+EXPORT_SYMBOL(sn_partition_serial_number);
+u8 sn_partition_id;
+EXPORT_SYMBOL(sn_partition_id);
+u8 sn_system_size;
+EXPORT_SYMBOL(sn_system_size);
+u8 sn_sharing_domain_size;
+EXPORT_SYMBOL(sn_sharing_domain_size);
+u8 sn_coherency_id;
+EXPORT_SYMBOL(sn_coherency_id);
+u8 sn_region_size;
+EXPORT_SYMBOL(sn_region_size);
+int sn_prom_type;      /* 0=hardware, 1=medusa/realprom, 2=medusa/fakeprom */
+
+short physical_node_map[MAX_NUMALINK_NODES];
+static unsigned long sn_prom_features[MAX_PROM_FEATURE_SETS];
+
+EXPORT_SYMBOL(physical_node_map);
+
+int num_cnodes;
+
+static void sn_init_pdas(char **);
+static void build_cnode_tables(void);
+
+static nodepda_t *nodepdaindr[MAX_COMPACT_NODES];
+
+#ifndef XEN
+/*
+ * The format of "screen_info" is strange, and due to early i386-setup
+ * code. This is just enough to make the console code think we're on a
+ * VGA color display.
+ */
+struct screen_info sn_screen_info = {
+       .orig_x = 0,
+       .orig_y = 0,
+       .orig_video_mode = 3,
+       .orig_video_cols = 80,
+       .orig_video_ega_bx = 3,
+       .orig_video_lines = 25,
+       .orig_video_isVGA = 1,
+       .orig_video_points = 16
+};
+#endif
+
+/*
+ * This routine can only be used during init, since
+ * smp_boot_data is an init data structure.
+ * We have to use smp_boot_data.cpu_phys_id to find
+ * the physical id of the processor because the normal
+ * cpu_physical_id() relies on data structures that
+ * may not be initialized yet.
+ */
+
+static int __init pxm_to_nasid(int pxm)
+{
+       int i;
+       int nid;
+
+       nid = pxm_to_node(pxm);
+       for (i = 0; i < num_node_memblks; i++) {
+               if (node_memblk[i].nid == nid) {
+                       return NASID_GET(node_memblk[i].start_paddr);
+               }
+       }
+       return -1;
+}
+
+/**
+ * early_sn_setup - early setup routine for SN platforms
+ *
+ * Sets up an initial console to aid debugging.  Intended primarily
+ * for bringup.  See start_kernel() in init/main.c.
+ */
+
+void __init early_sn_setup(void)
+{
+       efi_system_table_t *efi_systab;
+       efi_config_table_t *config_tables;
+       struct ia64_sal_systab *sal_systab;
+       struct ia64_sal_desc_entry_point *ep;
+       char *p;
+       int i, j;
+
+       /*
+        * Parse enough of the SAL tables to locate the SAL entry point. Since, 
console
+        * IO on SN2 is done via SAL calls, early_printk won't work without 
this.
+        *
+        * This code duplicates some of the ACPI table parsing that is in efi.c 
& sal.c.
+        * Any changes to those file may have to be made hereas well.
+        */
+       efi_systab = (efi_system_table_t *) __va(ia64_boot_param->efi_systab);
+       config_tables = __va(efi_systab->tables);
+       for (i = 0; i < efi_systab->nr_tables; i++) {
+               if (efi_guidcmp(config_tables[i].guid, SAL_SYSTEM_TABLE_GUID) ==
+                   0) {
+                       sal_systab = __va(config_tables[i].table);
+                       p = (char *)(sal_systab + 1);
+                       for (j = 0; j < sal_systab->entry_count; j++) {
+                               if (*p == SAL_DESC_ENTRY_POINT) {
+                                       ep = (struct ia64_sal_desc_entry_point
+                                             *)p;
+                                       ia64_sal_handler_init(__va
+                                                             (ep->sal_proc),
+                                                             __va(ep->gp));
+                                       return;
+                               }
+                               p += SAL_DESC_SIZE(*p);
+                       }
+               }
+       }
+       /* Uh-oh, SAL not available?? */
+       printk(KERN_ERR "failed to find SAL entry point\n");
+}
+
+extern int platform_intr_list[];
+static int __initdata shub_1_1_found;
+
+/*
+ * sn_check_for_wars
+ *
+ * Set flag for enabling shub specific wars
+ */
+
+static inline int __init is_shub_1_1(int nasid)
+{
+       unsigned long id;
+       int rev;
+
+       if (is_shub2())
+               return 0;
+       id = REMOTE_HUB_L(nasid, SH1_SHUB_ID);
+       rev = (id & SH1_SHUB_ID_REVISION_MASK) >> SH1_SHUB_ID_REVISION_SHFT;
+       return rev <= 2;
+}
+
+static void __init sn_check_for_wars(void)
+{
+       int cnode;
+
+       if (is_shub2()) {
+               /* none yet */
+       } else {
+               for_each_online_node(cnode) {
+                       if (is_shub_1_1(cnodeid_to_nasid(cnode)))
+                               shub_1_1_found = 1;
+               }
+       }
+}
+
+#ifndef XEN
+/*
+ * Scan the EFI PCDP table (if it exists) for an acceptable VGA console
+ * output device.  If one exists, pick it and set sn_legacy_{io,mem} to
+ * reflect the bus offsets needed to address it.
+ *
+ * Since pcdp support in SN is not supported in the 2.4 kernel (or at least
+ * the one lbs is based on) just declare the needed structs here.
+ *
+ * Reference spec http://www.dig64.org/specifications/DIG64_PCDPv20.pdf
+ *
+ * Returns 0 if no acceptable vga is found, !0 otherwise.
+ *
+ * Note:  This stuff is duped here because Altix requires the PCDP to
+ * locate a usable VGA device due to lack of proper ACPI support.  Structures
+ * could be used from drivers/firmware/pcdp.h, but it was decided that moving
+ * this file to a more public location just for Altix use was undesireable.
+ */
+
+struct hcdp_uart_desc {
+       u8      pad[45];
+};
+
+struct pcdp {
+       u8      signature[4];   /* should be 'HCDP' */
+       u32     length;
+       u8      rev;            /* should be >=3 for pcdp, <3 for hcdp */
+       u8      sum;
+       u8      oem_id[6];
+       u64     oem_tableid;
+       u32     oem_rev;
+       u32     creator_id;
+       u32     creator_rev;
+       u32     num_type0;
+       struct hcdp_uart_desc uart[0];  /* num_type0 of these */
+       /* pcdp descriptors follow */
+}  __attribute__((packed));
+
+struct pcdp_device_desc {
+       u8      type;
+       u8      primary;
+       u16     length;
+       u16     index;
+       /* interconnect specific structure follows */
+       /* device specific structure follows that */
+}  __attribute__((packed));
+
+struct pcdp_interface_pci {
+       u8      type;           /* 1 == pci */
+       u8      reserved;
+       u16     length;
+       u8      segment;
+       u8      bus;
+       u8      dev;
+       u8      fun;
+       u16     devid;
+       u16     vendid;
+       u32     acpi_interrupt;
+       u64     mmio_tra;
+       u64     ioport_tra;
+       u8      flags;
+       u8      translation;
+}  __attribute__((packed));
+
+struct pcdp_vga_device {
+       u8      num_eas_desc;
+       /* ACPI Extended Address Space Desc follows */
+}  __attribute__((packed));
+
+/* from pcdp_device_desc.primary */
+#define PCDP_PRIMARY_CONSOLE   0x01
+
+/* from pcdp_device_desc.type */
+#define PCDP_CONSOLE_INOUT     0x0
+#define PCDP_CONSOLE_DEBUG     0x1
+#define PCDP_CONSOLE_OUT       0x2
+#define PCDP_CONSOLE_IN                0x3
+#define PCDP_CONSOLE_TYPE_VGA  0x8
+
+#define PCDP_CONSOLE_VGA       (PCDP_CONSOLE_TYPE_VGA | PCDP_CONSOLE_OUT)
+
+/* from pcdp_interface_pci.type */
+#define PCDP_IF_PCI            1
+
+/* from pcdp_interface_pci.translation */
+#define PCDP_PCI_TRANS_IOPORT  0x02
+#define PCDP_PCI_TRANS_MMIO    0x01
+
+#if defined(CONFIG_VT) && defined(CONFIG_VGA_CONSOLE)
+static void
+sn_scan_pcdp(void)
+{
+       u8 *bp;
+       struct pcdp *pcdp;
+       struct pcdp_device_desc device;
+       struct pcdp_interface_pci if_pci;
+       extern struct efi efi;
+
+       if (efi.hcdp == EFI_INVALID_TABLE_ADDR)
+               return;         /* no hcdp/pcdp table */
+
+       pcdp = __va(efi.hcdp);
+
+       if (pcdp->rev < 3)
+               return;         /* only support PCDP (rev >= 3) */
+
+       for (bp = (u8 *)&pcdp->uart[pcdp->num_type0];
+            bp < (u8 *)pcdp + pcdp->length;
+            bp += device.length) {
+               memcpy(&device, bp, sizeof(device));
+               if (! (device.primary & PCDP_PRIMARY_CONSOLE))
+                       continue;       /* not primary console */
+
+               if (device.type != PCDP_CONSOLE_VGA)
+                       continue;       /* not VGA descriptor */
+
+               memcpy(&if_pci, bp+sizeof(device), sizeof(if_pci));
+               if (if_pci.type != PCDP_IF_PCI)
+                       continue;       /* not PCI interconnect */
+
+               if (if_pci.translation & PCDP_PCI_TRANS_IOPORT)
+                       vga_console_iobase =
+                               if_pci.ioport_tra | __IA64_UNCACHED_OFFSET;
+
+               if (if_pci.translation & PCDP_PCI_TRANS_MMIO)
+                       vga_console_membase =
+                               if_pci.mmio_tra | __IA64_UNCACHED_OFFSET;
+
+               break; /* once we find the primary, we're done */
+       }
+}
+#endif
+
+static unsigned long sn2_rtc_initial;
+
+static unsigned long long ia64_sn2_printk_clock(void)
+{
+       unsigned long rtc_now = rtc_time();
+
+       return (rtc_now - sn2_rtc_initial) *
+               (1000000000 / sn_rtc_cycles_per_second);
+}
+#endif
+
+/**
+ * sn_setup - SN platform setup routine
+ * @cmdline_p: kernel command line
+ *
+ * Handles platform setup for SN machines.  This includes determining
+ * the RTC frequency (via a SAL call), initializing secondary CPUs, and
+ * setting up per-node data areas.  The console is also initialized here.
+ */
+#ifdef XEN
+void __cpuinit sn_cpu_init(void);
+#endif
+
+void __init sn_setup(char **cmdline_p)
+{
+#ifndef XEN
+       long status, ticks_per_sec, drift;
+#else
+       unsigned long status, ticks_per_sec, drift;
+#endif
+       u32 version = sn_sal_rev();
+#ifndef XEN
+       extern void sn_cpu_init(void);
+
+       sn2_rtc_initial = rtc_time();
+       ia64_sn_plat_set_error_handling_features();     // obsolete
+       ia64_sn_set_os_feature(OSF_MCA_SLV_TO_OS_INIT_SLV);
+       ia64_sn_set_os_feature(OSF_FEAT_LOG_SBES);
+
+
+#if defined(CONFIG_VT) && defined(CONFIG_VGA_CONSOLE)
+       /*
+        * Handle SN vga console.
+        *
+        * SN systems do not have enough ACPI table information
+        * being passed from prom to identify VGA adapters and the legacy
+        * addresses to access them.  Until that is done, SN systems rely
+        * on the PCDP table to identify the primary VGA console if one
+        * exists.
+        *
+        * However, kernel PCDP support is optional, and even if it is built
+        * into the kernel, it will not be used if the boot cmdline contains
+        * console= directives.
+        *
+        * So, to work around this mess, we duplicate some of the PCDP code
+        * here so that the primary VGA console (as defined by PCDP) will
+        * work on SN systems even if a different console (e.g. serial) is
+        * selected on the boot line (or CONFIG_EFI_PCDP is off).
+        */
+
+       if (! vga_console_membase)
+               sn_scan_pcdp();
+
+       if (vga_console_membase) {
+               /* usable vga ... make tty0 the preferred default console */
+               if (!strstr(*cmdline_p, "console="))
+                       add_preferred_console("tty", 0, NULL);
+       } else {
+               printk(KERN_DEBUG "SGI: Disabling VGA console\n");
+               if (!strstr(*cmdline_p, "console="))
+                       add_preferred_console("ttySG", 0, NULL);
+#ifdef CONFIG_DUMMY_CONSOLE
+               conswitchp = &dummy_con;
+#else
+               conswitchp = NULL;
+#endif                         /* CONFIG_DUMMY_CONSOLE */
+       }
+#endif                         /* def(CONFIG_VT) && def(CONFIG_VGA_CONSOLE) */
+
+       MAX_DMA_ADDRESS = PAGE_OFFSET + MAX_PHYS_MEMORY;
+#endif
+
+       /*
+        * Build the tables for managing cnodes.
+        */
+       build_cnode_tables();
+
+       status =
+           ia64_sal_freq_base(SAL_FREQ_BASE_REALTIME_CLOCK, &ticks_per_sec,
+                              &drift);
+       if (status != 0 || ticks_per_sec < 100000) {
+               printk(KERN_WARNING
+                      "unable to determine platform RTC clock frequency, 
guessing.\n");
+               /* PROM gives wrong value for clock freq. so guess */
+               sn_rtc_cycles_per_second = 1000000000000UL / 30000UL;
+       } else
+               sn_rtc_cycles_per_second = ticks_per_sec;
+#ifndef XEN
+
+       platform_intr_list[ACPI_INTERRUPT_CPEI] = IA64_CPE_VECTOR;
+
+       ia64_printk_clock = ia64_sn2_printk_clock;
+#endif
+
+       printk("SGI SAL version %x.%02x\n", version >> 8, version & 0x00FF);
+
+       /*
+        * we set the default root device to /dev/hda
+        * to make simulation easy
+        */
+#ifndef XEN
+       ROOT_DEV = Root_HDA1;
+#endif
+
+       /*
+        * Create the PDAs and NODEPDAs for all the cpus.
+        */
+       sn_init_pdas(cmdline_p);
+
+#ifndef XEN
+       ia64_mark_idle = &snidle;
+#endif
+
+       /*
+        * For the bootcpu, we do this here. All other cpus will make the
+        * call as part of cpu_init in slave cpu initialization.
+        */
+       sn_cpu_init();
+
+#ifndef XEN
+#ifdef CONFIG_SMP
+       init_smp_config();
+#endif
+       screen_info = sn_screen_info;
+
+       sn_timer_init();
+
+       /*
+        * set pm_power_off to a SAL call to allow
+        * sn machines to power off. The SAL call can be replaced
+        * by an ACPI interface call when ACPI is fully implemented
+        * for sn.
+        */
+       pm_power_off = ia64_sn_power_down;
+       current->thread.flags |= IA64_THREAD_MIGRATION;
+#endif
+}
+
+/**
+ * sn_init_pdas - setup node data areas
+ *
+ * One time setup for Node Data Area.  Called by sn_setup().
+ */
+static void __init sn_init_pdas(char **cmdline_p)
+{
+       cnodeid_t cnode;
+
+       /*
+        * Allocate & initalize the nodepda for each node.
+        */
+       for_each_online_node(cnode) {
+               nodepdaindr[cnode] =
+                   alloc_bootmem_node(NODE_DATA(cnode), sizeof(nodepda_t));
+               memset(nodepdaindr[cnode], 0, sizeof(nodepda_t));
+               memset(nodepdaindr[cnode]->phys_cpuid, -1,
+                   sizeof(nodepdaindr[cnode]->phys_cpuid));
+               spin_lock_init(&nodepdaindr[cnode]->ptc_lock);
+       }
+
+       /*
+        * Allocate & initialize nodepda for TIOs.  For now, put them on node 0.
+        */
+       for (cnode = num_online_nodes(); cnode < num_cnodes; cnode++) {
+               nodepdaindr[cnode] =
+                   alloc_bootmem_node(NODE_DATA(0), sizeof(nodepda_t));
+               memset(nodepdaindr[cnode], 0, sizeof(nodepda_t));
+       }
+
+       /*
+        * Now copy the array of nodepda pointers to each nodepda.
+        */
+       for (cnode = 0; cnode < num_cnodes; cnode++)
+               memcpy(nodepdaindr[cnode]->pernode_pdaindr, nodepdaindr,
+                      sizeof(nodepdaindr));
+
+#ifndef XEN
+       /*
+        * Set up IO related platform-dependent nodepda fields.
+        * The following routine actually sets up the hubinfo struct
+        * in nodepda.
+        */
+       for_each_online_node(cnode) {
+               bte_init_node(nodepdaindr[cnode], cnode);
+       }
+
+       /*
+        * Initialize the per node hubdev.  This includes IO Nodes and
+        * headless/memless nodes.
+        */
+       for (cnode = 0; cnode < num_cnodes; cnode++) {
+               hubdev_init_node(nodepdaindr[cnode], cnode);
+       }
+#endif
+}
+
+/**
+ * sn_cpu_init - initialize per-cpu data areas
+ * @cpuid: cpuid of the caller
+ *
+ * Called during cpu initialization on each cpu as it starts.
+ * Currently, initializes the per-cpu data area for SNIA.
+ * Also sets up a few fields in the nodepda.  Also known as
+ * platform_cpu_init() by the ia64 machvec code.
+ */
+void __cpuinit sn_cpu_init(void)
+{
+       int cpuid;
+       int cpuphyid;
+       int nasid;
+       int subnode;
+       int slice;
+       int cnode;
+       int i;
+       static int wars_have_been_checked;
+
+       cpuid = smp_processor_id();
+#ifndef XEN
+       if (cpuid == 0 && IS_MEDUSA()) {
+               if (ia64_sn_is_fake_prom())
+                       sn_prom_type = 2;
+               else
+                       sn_prom_type = 1;
+               printk(KERN_INFO "Running on medusa with %s PROM\n",
+                      (sn_prom_type == 1) ? "real" : "fake");
+       }
+#endif
+
+       memset(pda, 0, sizeof(pda));
+       if (ia64_sn_get_sn_info(0, &sn_hub_info->shub2,
+                               &sn_hub_info->nasid_bitmask,
+                               &sn_hub_info->nasid_shift,
+                               &sn_system_size, &sn_sharing_domain_size,
+                               &sn_partition_id, &sn_coherency_id,
+                               &sn_region_size))
+               BUG();
+       sn_hub_info->as_shift = sn_hub_info->nasid_shift - 2;
+
+       /*
+        * Don't check status. The SAL call is not supported on all PROMs
+        * but a failure is harmless.
+        */
+       (void) ia64_sn_set_cpu_number(cpuid);
+
+       /*
+        * The boot cpu makes this call again after platform initialization is
+        * complete.
+        */
+       if (nodepdaindr[0] == NULL)
+               return;
+
+       for (i = 0; i < MAX_PROM_FEATURE_SETS; i++)
+               if (ia64_sn_get_prom_feature_set(i, &sn_prom_features[i]) != 0)
+                       break;
+
+       cpuphyid = get_sapicid();
+
+       if (ia64_sn_get_sapic_info(cpuphyid, &nasid, &subnode, &slice))
+               BUG();
+
+       for (i=0; i < MAX_NUMNODES; i++) {
+               if (nodepdaindr[i]) {
+                       nodepdaindr[i]->phys_cpuid[cpuid].nasid = nasid;
+                       nodepdaindr[i]->phys_cpuid[cpuid].slice = slice;
+                       nodepdaindr[i]->phys_cpuid[cpuid].subnode = subnode;
+               }
+       }
+
+       cnode = nasid_to_cnodeid(nasid);
+
+       sn_nodepda = nodepdaindr[cnode];
+
+       pda->led_address =
+           (typeof(pda->led_address)) (LED0 + (slice << LED_CPU_SHIFT));
+       pda->led_state = LED_ALWAYS_SET;
+       pda->hb_count = HZ / 2;
+       pda->hb_state = 0;
+       pda->idle_flag = 0;
+
+       if (cpuid != 0) {
+               /* copy cpu 0's sn_cnodeid_to_nasid table to this cpu's */
+               memcpy(sn_cnodeid_to_nasid,
+                      (&per_cpu(__sn_cnodeid_to_nasid, 0)),
+                      sizeof(__ia64_per_cpu_var(__sn_cnodeid_to_nasid)));
+       }
+
+       /*
+        * Check for WARs.
+        * Only needs to be done once, on BSP.
+        * Has to be done after loop above, because it uses this cpu's
+        * sn_cnodeid_to_nasid table which was just initialized if this
+        * isn't cpu 0.
+        * Has to be done before assignment below.
+        */
+       if (!wars_have_been_checked) {
+               sn_check_for_wars();
+               wars_have_been_checked = 1;
+       }
+       sn_hub_info->shub_1_1_found = shub_1_1_found;
+
+       /*
+        * Set up addresses of PIO/MEM write status registers.
+        */
+       {
+               u64 pio1[] = {SH1_PIO_WRITE_STATUS_0, 0, 
SH1_PIO_WRITE_STATUS_1, 0};
+               u64 pio2[] = {SH2_PIO_WRITE_STATUS_0, SH2_PIO_WRITE_STATUS_2,
+                       SH2_PIO_WRITE_STATUS_1, SH2_PIO_WRITE_STATUS_3};
+               u64 *pio;
+               pio = is_shub1() ? pio1 : pio2;
+               pda->pio_write_status_addr =
+                  (volatile unsigned long *)GLOBAL_MMR_ADDR(nasid, pio[slice]);
+               pda->pio_write_status_val = is_shub1() ? 
SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK : 0;
+       }
+
+#ifndef XEN  /* local_node_data is not allocated .... yet */
+       /*
+        * WAR addresses for SHUB 1.x.
+        */
+       if (local_node_data->active_cpu_count++ == 0 && is_shub1()) {
+               int buddy_nasid;
+               buddy_nasid =
+                   cnodeid_to_nasid(numa_node_id() ==
+                                    num_online_nodes() - 1 ? 0 : 
numa_node_id() + 1);
+               pda->pio_shub_war_cam_addr =
+                   (volatile unsigned long *)GLOBAL_MMR_ADDR(nasid,
+                                                             
SH1_PI_CAM_CONTROL);
+       }
+#endif
+}
+
+/*
+ * Build tables for converting between NASIDs and cnodes.
+ */
+static inline int __init board_needs_cnode(int type)
+{
+       return (type == KLTYPE_SNIA || type == KLTYPE_TIO);
+}
+
+void __init build_cnode_tables(void)
+{
+       int nasid;
+       int node;
+       lboard_t *brd;
+
+       memset(physical_node_map, -1, sizeof(physical_node_map));
+       memset(sn_cnodeid_to_nasid, -1,
+                       sizeof(__ia64_per_cpu_var(__sn_cnodeid_to_nasid)));
+
+       /*
+        * First populate the tables with C/M bricks. This ensures that
+        * cnode == node for all C & M bricks.
+        */
+       for_each_online_node(node) {
+               nasid = pxm_to_nasid(node_to_pxm(node));
+               sn_cnodeid_to_nasid[node] = nasid;
+               physical_node_map[nasid] = node;
+       }
+
+       /*
+        * num_cnodes is total number of C/M/TIO bricks. Because of the 256 node
+        * limit on the number of nodes, we can't use the generic node numbers 
+        * for this. Note that num_cnodes is incremented below as TIOs or
+        * headless/memoryless nodes are discovered.
+        */
+       num_cnodes = num_online_nodes();
+
+       /* fakeprom does not support klgraph */
+       if (IS_RUNNING_ON_FAKE_PROM())
+               return;
+
+       /* Find TIOs & headless/memoryless nodes and add them to the tables */
+       for_each_online_node(node) {
+               kl_config_hdr_t *klgraph_header;
+               nasid = cnodeid_to_nasid(node);
+               klgraph_header = ia64_sn_get_klconfig_addr(nasid);
+               if (klgraph_header == NULL)
+                       BUG();
+               brd = NODE_OFFSET_TO_LBOARD(nasid, 
klgraph_header->ch_board_info);
+               while (brd) {
+                       if (board_needs_cnode(brd->brd_type) && 
physical_node_map[brd->brd_nasid] < 0) {
+                               sn_cnodeid_to_nasid[num_cnodes] = 
brd->brd_nasid;
+                               physical_node_map[brd->brd_nasid] = 
num_cnodes++;
+                       }
+                       brd = find_lboard_next(brd);
+               }
+       }
+}
+
+int
+nasid_slice_to_cpuid(int nasid, int slice)
+{
+       long cpu;
+
+       for (cpu = 0; cpu < NR_CPUS; cpu++)
+               if (cpuid_to_nasid(cpu) == nasid &&
+                                       cpuid_to_slice(cpu) == slice)
+                       return cpu;
+
+       return -1;
+}
+
+int sn_prom_feature_available(int id)
+{
+       if (id >= BITS_PER_LONG * MAX_PROM_FEATURE_SETS)
+               return 0;
+       return test_bit(id, sn_prom_features);
+}
+EXPORT_SYMBOL(sn_prom_feature_available);
+
diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/linux-xen/sn/kernel/sn2_smp.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/ia64/linux-xen/sn/kernel/sn2_smp.c       Fri Jan 19 14:48:57 
2007 +0000
@@ -0,0 +1,548 @@
+/*
+ * SN2 Platform specific SMP Support
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2000-2006 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/threads.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/mmzone.h>
+#include <linux/module.h>
+#include <linux/bitops.h>
+#include <linux/nodemask.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+
+#include <asm/processor.h>
+#include <asm/irq.h>
+#include <asm/sal.h>
+#include <asm/system.h>
+#include <asm/delay.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/tlb.h>
+#include <asm/numa.h>
+#include <asm/hw_irq.h>
+#include <asm/current.h>
+#ifdef XEN
+#include <asm/sn/arch.h>
+#endif
+#include <asm/sn/sn_cpuid.h>
+#include <asm/sn/sn_sal.h>
+#include <asm/sn/addrs.h>
+#include <asm/sn/shub_mmr.h>
+#include <asm/sn/nodepda.h>
+#include <asm/sn/rw_mmr.h>
+
+DEFINE_PER_CPU(struct ptc_stats, ptcstats);
+DECLARE_PER_CPU(struct ptc_stats, ptcstats);
+
+static  __cacheline_aligned DEFINE_SPINLOCK(sn2_global_ptc_lock);
+
+extern unsigned long
+sn2_ptc_deadlock_recovery_core(volatile unsigned long *, unsigned long,
+                              volatile unsigned long *, unsigned long,
+                              volatile unsigned long *, unsigned long);
+void
+sn2_ptc_deadlock_recovery(short *, short, short, int,
+                         volatile unsigned long *, unsigned long,
+                         volatile unsigned long *, unsigned long);
+
+/*
+ * Note: some is the following is captured here to make degugging easier
+ * (the macros make more sense if you see the debug patch - not posted)
+ */
+#define sn2_ptctest    0
+#define local_node_uses_ptc_ga(sh1)    ((sh1) ? 1 : 0)
+#define max_active_pio(sh1)            ((sh1) ? 32 : 7)
+#define reset_max_active_on_deadlock() 1
+#ifndef XEN
+#define PTC_LOCK(sh1)                  ((sh1) ? &sn2_global_ptc_lock : 
&sn_nodepda->ptc_lock)
+#else
+#define PTC_LOCK(sh1)                  &sn2_global_ptc_lock
+#endif
+
+struct ptc_stats {
+       unsigned long ptc_l;
+       unsigned long change_rid;
+       unsigned long shub_ptc_flushes;
+       unsigned long nodes_flushed;
+       unsigned long deadlocks;
+       unsigned long deadlocks2;
+       unsigned long lock_itc_clocks;
+       unsigned long shub_itc_clocks;
+       unsigned long shub_itc_clocks_max;
+       unsigned long shub_ptc_flushes_not_my_mm;
+};
+
+#define sn2_ptctest    0
+
+static inline unsigned long wait_piowc(void)
+{
+       volatile unsigned long *piows;
+       unsigned long zeroval, ws;
+
+       piows = pda->pio_write_status_addr;
+       zeroval = pda->pio_write_status_val;
+       do {
+               cpu_relax();
+       } while (((ws = *piows) & SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK) 
!= zeroval);
+       return (ws & SH_PIO_WRITE_STATUS_WRITE_DEADLOCK_MASK) != 0;
+}
+
+#ifndef XEN  /* No idea if Xen will ever support this */
+/**
+ * sn_migrate - SN-specific task migration actions
+ * @task: Task being migrated to new CPU
+ *
+ * SN2 PIO writes from separate CPUs are not guaranteed to arrive in order.
+ * Context switching user threads which have memory-mapped MMIO may cause
+ * PIOs to issue from seperate CPUs, thus the PIO writes must be drained
+ * from the previous CPU's Shub before execution resumes on the new CPU.
+ */
+void sn_migrate(struct task_struct *task)
+{
+       pda_t *last_pda = pdacpu(task_thread_info(task)->last_cpu);
+       volatile unsigned long *adr = last_pda->pio_write_status_addr;
+       unsigned long val = last_pda->pio_write_status_val;
+
+       /* Drain PIO writes from old CPU's Shub */
+       while (unlikely((*adr & SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK)
+                       != val))
+               cpu_relax();
+}
+
+void sn_tlb_migrate_finish(struct mm_struct *mm)
+{
+       /* flush_tlb_mm is inefficient if more than 1 users of mm */
+#ifndef XEN
+       if (mm == current->mm && mm && atomic_read(&mm->mm_users) == 1)
+#else
+       if (mm == &current->arch.mm && mm && atomic_read(&mm->mm_users) == 1)
+#endif
+               flush_tlb_mm(mm);
+}
+#endif
+
+/**
+ * sn2_global_tlb_purge - globally purge translation cache of virtual address 
range
+ * @mm: mm_struct containing virtual address range
+ * @start: start of virtual address range
+ * @end: end of virtual address range
+ * @nbits: specifies number of bytes to purge per instruction (num = 1<<(nbits 
& 0xfc))
+ *
+ * Purges the translation caches of all processors of the given virtual address
+ * range.
+ *
+ * Note:
+ *     - cpu_vm_mask is a bit mask that indicates which cpus have loaded the 
context.
+ *     - cpu_vm_mask is converted into a nodemask of the nodes containing the
+ *       cpus in cpu_vm_mask.
+ *     - if only one bit is set in cpu_vm_mask & it is the current cpu & the
+ *       process is purging its own virtual address range, then only the
+ *       local TLB needs to be flushed. This flushing can be done using
+ *       ptc.l. This is the common case & avoids the global spinlock.
+ *     - if multiple cpus have loaded the context, then flushing has to be
+ *       done with ptc.g/MMRs under protection of the global ptc_lock.
+ */
+
+#ifdef XEN  /* Xen is soooooooo stupid! */
+// static cpumask_t mask_all = CPU_MASK_ALL;
+#endif
+
+void
+#ifndef XEN
+sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start,
+#else
+sn2_global_tlb_purge(unsigned long start,
+#endif
+                    unsigned long end, unsigned long nbits)
+{
+       int i, ibegin, shub1, cnode, mynasid, cpu, lcpu = 0, nasid;
+#ifndef XEN
+       int mymm = (mm == current->active_mm && mm == current->mm);
+#else
+       // struct mm_struct *mm;
+       int mymm = 0;
+#endif
+       int use_cpu_ptcga;
+       volatile unsigned long *ptc0, *ptc1;
+       unsigned long itc, itc2, flags, data0 = 0, data1 = 0, rr_value, old_rr 
= 0;
+       short nasids[MAX_NUMNODES], nix;
+       nodemask_t nodes_flushed;
+       int active, max_active, deadlock;
+
+       nodes_clear(nodes_flushed);
+       i = 0;
+
+#ifndef XEN  /* One day Xen will grow up! */
+       for_each_cpu_mask(cpu, mm->cpu_vm_mask) {
+               cnode = cpu_to_node(cpu);
+               node_set(cnode, nodes_flushed);
+               lcpu = cpu;
+               i++;
+       }
+#else
+       for_each_cpu(cpu) {
+               cnode = cpu_to_node(cpu);
+               node_set(cnode, nodes_flushed);
+               lcpu = cpu;
+               i++;
+       }
+#endif
+
+       if (i == 0)
+               return;
+
+       preempt_disable();
+
+       if (likely(i == 1 && lcpu == smp_processor_id() && mymm)) {
+               do {
+                       ia64_ptcl(start, nbits << 2);
+                       start += (1UL << nbits);
+               } while (start < end);
+               ia64_srlz_i();
+               __get_cpu_var(ptcstats).ptc_l++;
+               preempt_enable();
+               return;
+       }
+
+#ifndef XEN
+       if (atomic_read(&mm->mm_users) == 1 && mymm) {
+#ifndef XEN  /* I hate Xen! */
+               flush_tlb_mm(mm);
+#else
+               flush_tlb_mask(mask_all);
+#endif
+               __get_cpu_var(ptcstats).change_rid++;
+               preempt_enable();
+               return;
+       }
+#endif
+
+       itc = ia64_get_itc();
+       nix = 0;
+       for_each_node_mask(cnode, nodes_flushed)
+               nasids[nix++] = cnodeid_to_nasid(cnode);
+
+#ifndef XEN
+       rr_value = (mm->context << 3) | REGION_NUMBER(start);
+#else
+       rr_value = REGION_NUMBER(start);
+#endif
+
+       shub1 = is_shub1();
+       if (shub1) {
+               data0 = (1UL << SH1_PTC_0_A_SHFT) |
+                       (nbits << SH1_PTC_0_PS_SHFT) |
+                       (rr_value << SH1_PTC_0_RID_SHFT) |
+                       (1UL << SH1_PTC_0_START_SHFT);
+#ifndef XEN
+               ptc0 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH1_PTC_0);
+               ptc1 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH1_PTC_1);
+#else
+               ptc0 = (unsigned long *)GLOBAL_MMR_PHYS_ADDR(0, SH1_PTC_0);
+               ptc1 = (unsigned long *)GLOBAL_MMR_PHYS_ADDR(0, SH1_PTC_1);
+#endif
+       } else {
+               data0 = (1UL << SH2_PTC_A_SHFT) |
+                       (nbits << SH2_PTC_PS_SHFT) |
+                       (1UL << SH2_PTC_START_SHFT);
+#ifndef XEN
+               ptc0 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH2_PTC + 
+#else
+               ptc0 = (unsigned long *)GLOBAL_MMR_PHYS_ADDR(0, SH2_PTC + 
+#endif
+                       (rr_value << SH2_PTC_RID_SHFT));
+               ptc1 = NULL;
+       }
+       
+
+       mynasid = get_nasid();
+       use_cpu_ptcga = local_node_uses_ptc_ga(shub1);
+       max_active = max_active_pio(shub1);
+
+       itc = ia64_get_itc();
+       spin_lock_irqsave(PTC_LOCK(shub1), flags);
+       itc2 = ia64_get_itc();
+
+       __get_cpu_var(ptcstats).lock_itc_clocks += itc2 - itc;
+       __get_cpu_var(ptcstats).shub_ptc_flushes++;
+       __get_cpu_var(ptcstats).nodes_flushed += nix;
+       if (!mymm)
+                __get_cpu_var(ptcstats).shub_ptc_flushes_not_my_mm++;
+
+       if (use_cpu_ptcga && !mymm) {
+               old_rr = ia64_get_rr(start);
+               ia64_set_rr(start, (old_rr & 0xff) | (rr_value << 8));
+               ia64_srlz_d();
+       }
+
+       wait_piowc();
+       do {
+               if (shub1)
+                       data1 = start | (1UL << SH1_PTC_1_START_SHFT);
+               else
+                       data0 = (data0 & ~SH2_PTC_ADDR_MASK) | (start & 
SH2_PTC_ADDR_MASK);
+               deadlock = 0;
+               active = 0;
+               for (ibegin = 0, i = 0; i < nix; i++) {
+                       nasid = nasids[i];
+                       if (use_cpu_ptcga && unlikely(nasid == mynasid)) {
+                               ia64_ptcga(start, nbits << 2);
+                               ia64_srlz_i();
+                       } else {
+                               ptc0 = CHANGE_NASID(nasid, ptc0);
+                               if (ptc1)
+                                       ptc1 = CHANGE_NASID(nasid, ptc1);
+                               pio_atomic_phys_write_mmrs(ptc0, data0, ptc1, 
data1);
+                               active++;
+                       }
+                       if (active >= max_active || i == (nix - 1)) {
+                               if ((deadlock = wait_piowc())) {
+                                       sn2_ptc_deadlock_recovery(nasids, 
ibegin, i, mynasid, ptc0, data0, ptc1, data1);
+                                       if (reset_max_active_on_deadlock())
+                                               max_active = 1;
+                               }
+                               active = 0;
+                               ibegin = i + 1;
+                       }
+               }
+               start += (1UL << nbits);
+       } while (start < end);
+
+       itc2 = ia64_get_itc() - itc2;
+       __get_cpu_var(ptcstats).shub_itc_clocks += itc2;
+       if (itc2 > __get_cpu_var(ptcstats).shub_itc_clocks_max)
+               __get_cpu_var(ptcstats).shub_itc_clocks_max = itc2;
+
+       if (old_rr) {
+               ia64_set_rr(start, old_rr);
+               ia64_srlz_d();
+       }
+
+       spin_unlock_irqrestore(PTC_LOCK(shub1), flags);
+
+       preempt_enable();
+}
+
+/*
+ * sn2_ptc_deadlock_recovery
+ *
+ * Recover from PTC deadlocks conditions. Recovery requires stepping thru each 
+ * TLB flush transaction.  The recovery sequence is somewhat tricky & is
+ * coded in assembly language.
+ */
+
+void
+sn2_ptc_deadlock_recovery(short *nasids, short ib, short ie, int mynasid,
+                         volatile unsigned long *ptc0, unsigned long data0,
+                         volatile unsigned long *ptc1, unsigned long data1)
+{
+       short nasid, i;
+       unsigned long *piows, zeroval, n;
+
+       __get_cpu_var(ptcstats).deadlocks++;
+
+       piows = (unsigned long *) pda->pio_write_status_addr;
+       zeroval = pda->pio_write_status_val;
+
+
+       for (i=ib; i <= ie; i++) {
+               nasid = nasids[i];
+               if (local_node_uses_ptc_ga(is_shub1()) && nasid == mynasid)
+                       continue;
+               ptc0 = CHANGE_NASID(nasid, ptc0);
+               if (ptc1)
+                       ptc1 = CHANGE_NASID(nasid, ptc1);
+
+               n = sn2_ptc_deadlock_recovery_core(ptc0, data0, ptc1, data1, 
piows, zeroval);
+               __get_cpu_var(ptcstats).deadlocks2 += n;
+       }
+
+}
+
+/**
+ * sn_send_IPI_phys - send an IPI to a Nasid and slice
+ * @nasid: nasid to receive the interrupt (may be outside partition)
+ * @physid: physical cpuid to receive the interrupt.
+ * @vector: command to send
+ * @delivery_mode: delivery mechanism
+ *
+ * Sends an IPI (interprocessor interrupt) to the processor specified by
+ * @physid
+ *
+ * @delivery_mode can be one of the following
+ *
+ * %IA64_IPI_DM_INT - pend an interrupt
+ * %IA64_IPI_DM_PMI - pend a PMI
+ * %IA64_IPI_DM_NMI - pend an NMI
+ * %IA64_IPI_DM_INIT - pend an INIT interrupt
+ */
+void sn_send_IPI_phys(int nasid, long physid, int vector, int delivery_mode)
+{
+       long val;
+       unsigned long flags = 0;
+       volatile long *p;
+
+       p = (long *)GLOBAL_MMR_PHYS_ADDR(nasid, SH_IPI_INT);
+       val = (1UL << SH_IPI_INT_SEND_SHFT) |
+           (physid << SH_IPI_INT_PID_SHFT) |
+           ((long)delivery_mode << SH_IPI_INT_TYPE_SHFT) |
+           ((long)vector << SH_IPI_INT_IDX_SHFT) |
+           (0x000feeUL << SH_IPI_INT_BASE_SHFT);
+
+       mb();
+       if (enable_shub_wars_1_1()) {
+               spin_lock_irqsave(&sn2_global_ptc_lock, flags);
+       }
+       pio_phys_write_mmr(p, val);
+       if (enable_shub_wars_1_1()) {
+               wait_piowc();
+               spin_unlock_irqrestore(&sn2_global_ptc_lock, flags);
+       }
+
+}
+
+EXPORT_SYMBOL(sn_send_IPI_phys);
+
+/**
+ * sn2_send_IPI - send an IPI to a processor
+ * @cpuid: target of the IPI
+ * @vector: command to send
+ * @delivery_mode: delivery mechanism
+ * @redirect: redirect the IPI?
+ *
+ * Sends an IPI (InterProcessor Interrupt) to the processor specified by
+ * @cpuid.  @vector specifies the command to send, while @delivery_mode can 
+ * be one of the following
+ *
+ * %IA64_IPI_DM_INT - pend an interrupt
+ * %IA64_IPI_DM_PMI - pend a PMI
+ * %IA64_IPI_DM_NMI - pend an NMI
+ * %IA64_IPI_DM_INIT - pend an INIT interrupt
+ */
+void sn2_send_IPI(int cpuid, int vector, int delivery_mode, int redirect)
+{
+       long physid;
+       int nasid;
+
+       physid = cpu_physical_id(cpuid);
+#ifdef XEN
+       if (!sn_nodepda) {
+               ia64_sn_get_sapic_info(physid, &nasid, NULL, NULL);
+       } else
+#endif
+       nasid = cpuid_to_nasid(cpuid);
+
+       /* the following is used only when starting cpus at boot time */
+       if (unlikely(nasid == -1))
+               ia64_sn_get_sapic_info(physid, &nasid, NULL, NULL);
+
+       sn_send_IPI_phys(nasid, physid, vector, delivery_mode);
+}
+
+#ifdef CONFIG_PROC_FS
+
+#define PTC_BASENAME   "sgi_sn/ptc_statistics"
+
+static void *sn2_ptc_seq_start(struct seq_file *file, loff_t * offset)
+{
+       if (*offset < NR_CPUS)
+               return offset;
+       return NULL;
+}
+
+static void *sn2_ptc_seq_next(struct seq_file *file, void *data, loff_t * 
offset)
+{
+       (*offset)++;
+       if (*offset < NR_CPUS)
+               return offset;
+       return NULL;
+}
+
+static void sn2_ptc_seq_stop(struct seq_file *file, void *data)
+{
+}
+
+static int sn2_ptc_seq_show(struct seq_file *file, void *data)
+{
+       struct ptc_stats *stat;
+       int cpu;
+
+       cpu = *(loff_t *) data;
+
+       if (!cpu) {
+               seq_printf(file,
+                          "# cpu ptc_l newrid ptc_flushes nodes_flushed 
deadlocks lock_nsec shub_nsec shub_nsec_max not_my_mm deadlock2\n");
+               seq_printf(file, "# ptctest %d\n", sn2_ptctest);
+       }
+
+       if (cpu < NR_CPUS && cpu_online(cpu)) {
+               stat = &per_cpu(ptcstats, cpu);
+               seq_printf(file, "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld %ld 
%ld\n", cpu, stat->ptc_l,
+                               stat->change_rid, stat->shub_ptc_flushes, 
stat->nodes_flushed,
+                               stat->deadlocks,
+                               1000 * stat->lock_itc_clocks / 
per_cpu(cpu_info, cpu).cyc_per_usec,
+                               1000 * stat->shub_itc_clocks / 
per_cpu(cpu_info, cpu).cyc_per_usec,
+                               1000 * stat->shub_itc_clocks_max / 
per_cpu(cpu_info, cpu).cyc_per_usec,
+                               stat->shub_ptc_flushes_not_my_mm,
+                               stat->deadlocks2);
+       }
+       return 0;
+}
+
+static struct seq_operations sn2_ptc_seq_ops = {
+       .start = sn2_ptc_seq_start,
+       .next = sn2_ptc_seq_next,
+       .stop = sn2_ptc_seq_stop,
+       .show = sn2_ptc_seq_show
+};
+
+static int sn2_ptc_proc_open(struct inode *inode, struct file *file)
+{
+       return seq_open(file, &sn2_ptc_seq_ops);
+}
+
+static struct file_operations proc_sn2_ptc_operations = {
+       .open = sn2_ptc_proc_open,
+       .read = seq_read,
+       .llseek = seq_lseek,
+       .release = seq_release,
+};
+
+static struct proc_dir_entry *proc_sn2_ptc;
+
+static int __init sn2_ptc_init(void)
+{
+       if (!ia64_platform_is("sn2"))
+               return 0;
+
+       if (!(proc_sn2_ptc = create_proc_entry(PTC_BASENAME, 0444, NULL))) {
+               printk(KERN_ERR "unable to create %s proc entry", PTC_BASENAME);
+               return -EINVAL;
+       }
+       proc_sn2_ptc->proc_fops = &proc_sn2_ptc_operations;
+       spin_lock_init(&sn2_global_ptc_lock);
+       return 0;
+}
+
+static void __exit sn2_ptc_exit(void)
+{
+       remove_proc_entry(PTC_BASENAME, NULL);
+}
+
+module_init(sn2_ptc_init);
+module_exit(sn2_ptc_exit);
+#endif /* CONFIG_PROC_FS */
+
diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/linux/Makefile
--- a/xen/arch/ia64/linux/Makefile      Thu Jan 18 15:18:07 2007 +0000
+++ b/xen/arch/ia64/linux/Makefile      Fri Jan 19 14:48:57 2007 +0000
@@ -1,6 +1,9 @@ obj-y += bitop.o
+subdir-y += dig
+subdir-y += hp
+subdir-y += sn
+
 obj-y += bitop.o
 obj-y += clear_page.o
-obj-y += cmdline.o
 obj-y += copy_page_mck.o
 obj-y += efi_stub.o
 obj-y += extable.o
@@ -23,6 +26,7 @@ obj-y += __moddi3.o
 obj-y += __moddi3.o
 obj-y += __umoddi3.o
 obj-y += carta_random.o
+obj-y += io.o
 
 ## variants of divide/modulo
 ## see files in xen/arch/ia64/linux/lib (linux/arch/ia64/lib)
diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/linux/README.origin
--- a/xen/arch/ia64/linux/README.origin Thu Jan 18 15:18:07 2007 +0000
+++ b/xen/arch/ia64/linux/README.origin Fri Jan 19 14:48:57 2007 +0000
@@ -4,7 +4,6 @@ needs to be changed, move it to ../linux
 needs to be changed, move it to ../linux-xen and follow
 the instructions in the README there.
 
-cmdline.c              -> linux/lib/cmdline.c
 efi_stub.S             -> linux/arch/ia64/kernel/efi_stub.S
 extable.c              -> linux/arch/ia64/mm/extable.c
 hpsim.S                        -> linux/arch/ia64/hp/sim/hpsim.S
@@ -27,3 +26,6 @@ strlen.S              -> linux/arch/ia64/lib/strlen.
 
 # The files below are from Linux-2.6.16.33
 carta_random.S         -> linux/arch/ia64/lib/carta_random.S
+
+# The files below are from Linux-2.6.19
+io.c                   -> linux/arch/ia64/lib/io.c
diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/linux/cmdline.c
--- a/xen/arch/ia64/linux/cmdline.c     Thu Jan 18 15:18:07 2007 +0000
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,121 +0,0 @@
-/*
- * linux/lib/cmdline.c
- * Helper functions generally used for parsing kernel command line
- * and module options.
- *
- * Code and copyrights come from init/main.c and arch/i386/kernel/setup.c.
- *
- * This source code is licensed under the GNU General Public License,
- * Version 2.  See the file COPYING for more details.
- *
- * GNU Indent formatting options for this file: -kr -i8 -npsl -pcs
- *
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <xen/lib.h>
-
-
-/**
- *     get_option - Parse integer from an option string
- *     @str: option string
- *     @pint: (output) integer value parsed from @str
- *
- *     Read an int from an option string; if available accept a subsequent
- *     comma as well.
- *
- *     Return values:
- *     0 : no int in string
- *     1 : int found, no subsequent comma
- *     2 : int found including a subsequent comma
- */
-
-int get_option (char **str, int *pint)
-{
-       char *cur = *str;
-
-       if (!cur || !(*cur))
-               return 0;
-       *pint = simple_strtol (cur, str, 0);
-       if (cur == *str)
-               return 0;
-       if (**str == ',') {
-               (*str)++;
-               return 2;
-       }
-
-       return 1;
-}
-
-/**
- *     get_options - Parse a string into a list of integers
- *     @str: String to be parsed
- *     @nints: size of integer array
- *     @ints: integer array
- *
- *     This function parses a string containing a comma-separated
- *     list of integers.  The parse halts when the array is
- *     full, or when no more numbers can be retrieved from the
- *     string.
- *
- *     Return value is the character in the string which caused
- *     the parse to end (typically a null terminator, if @str is
- *     completely parseable).
- */
- 
-char *get_options(const char *str, int nints, int *ints)
-{
-       int res, i = 1;
-
-       while (i < nints) {
-               res = get_option ((char **)&str, ints + i);
-               if (res == 0)
-                       break;
-               i++;
-               if (res == 1)
-                       break;
-       }
-       ints[0] = i - 1;
-       return (char *)str;
-}
-
-/**
- *     memparse - parse a string with mem suffixes into a number
- *     @ptr: Where parse begins
- *     @retptr: (output) Pointer to next char after parse completes
- *
- *     Parses a string into a number.  The number stored at @ptr is
- *     potentially suffixed with %K (for kilobytes, or 1024 bytes),
- *     %M (for megabytes, or 1048576 bytes), or %G (for gigabytes, or
- *     1073741824).  If the number is suffixed with K, M, or G, then
- *     the return value is the number multiplied by one kilobyte, one
- *     megabyte, or one gigabyte, respectively.
- */
-
-unsigned long long memparse (char *ptr, char **retptr)
-{
-       unsigned long long ret = simple_strtoull (ptr, retptr, 0);
-
-       switch (**retptr) {
-       case 'G':
-       case 'g':
-               ret <<= 10;
-       case 'M':
-       case 'm':
-               ret <<= 10;
-       case 'K':
-       case 'k':
-               ret <<= 10;
-               (*retptr)++;
-       default:
-               break;
-       }
-       return ret;
-}
-
-
-EXPORT_SYMBOL(memparse);
-EXPORT_SYMBOL(get_option);
-EXPORT_SYMBOL(get_options);
diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/linux/dig/Makefile
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/ia64/linux/dig/Makefile  Fri Jan 19 14:48:57 2007 +0000
@@ -0,0 +1,1 @@
+obj-y += machvec.o
diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/linux/dig/README.origin
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/ia64/linux/dig/README.origin     Fri Jan 19 14:48:57 2007 +0000
@@ -0,0 +1,7 @@
+Source files in this directory are identical copies of linux-2.6.19 files:
+
+NOTE: DO NOT commit changes to these files!   If a file
+needs to be changed, move it to ../linux-xen and follow
+the instructions in the README there.
+
+machvec.c              -> linux/arch/ia64/dig/machvec.c
diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/linux/dig/machvec.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/ia64/linux/dig/machvec.c Fri Jan 19 14:48:57 2007 +0000
@@ -0,0 +1,3 @@
+#define MACHVEC_PLATFORM_NAME          dig
+#define MACHVEC_PLATFORM_HEADER                <asm/machvec_dig.h>
+#include <asm/machvec_init.h>
diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/linux/hp/Makefile
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/ia64/linux/hp/Makefile   Fri Jan 19 14:48:57 2007 +0000
@@ -0,0 +1,1 @@
+subdir-y += zx1
diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/linux/hp/zx1/Makefile
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/ia64/linux/hp/zx1/Makefile       Fri Jan 19 14:48:57 2007 +0000
@@ -0,0 +1,1 @@
+obj-y += hpzx1_machvec.o
diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/linux/hp/zx1/README.origin
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/ia64/linux/hp/zx1/README.origin  Fri Jan 19 14:48:57 2007 +0000
@@ -0,0 +1,7 @@
+Source files in this directory are identical copies of linux-2.6.19 files:
+
+NOTE: DO NOT commit changes to these files!   If a file
+needs to be changed, move it to ../linux-xen and follow
+the instructions in the README there.
+
+hpzx1_machvec.c                -> linux/arch/ia64/hp/zx1/hpzx1_machvec.c
diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/linux/hp/zx1/hpzx1_machvec.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/ia64/linux/hp/zx1/hpzx1_machvec.c        Fri Jan 19 14:48:57 
2007 +0000
@@ -0,0 +1,3 @@
+#define MACHVEC_PLATFORM_NAME          hpzx1
+#define MACHVEC_PLATFORM_HEADER                <asm/machvec_hpzx1.h>
+#include <asm/machvec_init.h>
diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/linux/io.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/ia64/linux/io.c  Fri Jan 19 14:48:57 2007 +0000
@@ -0,0 +1,164 @@
+#include <linux/module.h>
+#include <linux/types.h>
+
+#include <asm/io.h>
+
+/*
+ * Copy data from IO memory space to "real" memory space.
+ * This needs to be optimized.
+ */
+void memcpy_fromio(void *to, const volatile void __iomem *from, long count)
+{
+       char *dst = to;
+
+       while (count) {
+               count--;
+               *dst++ = readb(from++);
+       }
+}
+EXPORT_SYMBOL(memcpy_fromio);
+
+/*
+ * Copy data from "real" memory space to IO memory space.
+ * This needs to be optimized.
+ */
+void memcpy_toio(volatile void __iomem *to, const void *from, long count)
+{
+       const char *src = from;
+
+       while (count) {
+               count--;
+               writeb(*src++, to++);
+       }
+}
+EXPORT_SYMBOL(memcpy_toio);
+
+/*
+ * "memset" on IO memory space.
+ * This needs to be optimized.
+ */
+void memset_io(volatile void __iomem *dst, int c, long count)
+{
+       unsigned char ch = (char)(c & 0xff);
+
+       while (count) {
+               count--;
+               writeb(ch, dst);
+               dst++;
+       }
+}
+EXPORT_SYMBOL(memset_io);
+
+#ifdef CONFIG_IA64_GENERIC
+
+#undef __ia64_inb
+#undef __ia64_inw
+#undef __ia64_inl
+#undef __ia64_outb
+#undef __ia64_outw
+#undef __ia64_outl
+#undef __ia64_readb

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>