WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-unstable] merge with xen-unstable.hg

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] [xen-unstable] merge with xen-unstable.hg
From: Xen patchbot-unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Wed, 13 Sep 2006 13:40:19 +0000
Delivery-date: Wed, 13 Sep 2006 06:41:17 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User awilliam@xxxxxxxxxxx
# Node ID 1bab7d65171b762bb3cf1ae426bc6c403f847ebf
# Parent  4ba0982264290acfa208304b4e3343ec8c3ec903
# Parent  3e6325b73474b3764573178152503af27a914ab8
merge with xen-unstable.hg
---
 xen/arch/powerpc/htab.c                                         |   68 --
 .hgignore                                                       |    2 
 extras/mini-os/Makefile                                         |    3 
 linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c               |    7 
 linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c                 |   42 -
 linux-2.6-xen-sparse/arch/i386/oprofile/xenoprof.c              |   56 +
 linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c              |    2 
 linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c             |   11 
 linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c               |   10 
 linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c            |    4 
 linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c                |    8 
 linux-2.6-xen-sparse/drivers/xen/blktap/common.h                |    1 
 linux-2.6-xen-sparse/drivers/xen/blktap/interface.c             |   23 
 linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c                |   16 
 linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c               |   10 
 linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c            |   67 +-
 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_backend_client.c |   12 
 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c         |   21 
 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c          |   34 -
 linux-2.6-xen-sparse/include/xen/xenbus.h                       |    6 
 tools/blktap/drivers/block-aio.c                                |   19 
 tools/blktap/drivers/block-qcow.c                               |   19 
 tools/blktap/drivers/tapdisk.c                                  |    1 
 tools/blktap/lib/xs_api.c                                       |   23 
 tools/libxc/ia64/xc_ia64_stubs.c                                |   16 
 tools/libxc/powerpc64/xc_linux_build.c                          |    4 
 tools/libxc/xenctrl.h                                           |    5 
 tools/python/xen/xend/FlatDeviceTree.py                         |  323 
++++++++++
 tools/python/xen/xend/XendCheckpoint.py                         |    6 
 tools/python/xen/xend/XendDomain.py                             |    3 
 tools/python/xen/xend/XendDomainInfo.py                         |   63 +
 tools/python/xen/xend/arch.py                                   |   32 
 tools/python/xen/xend/image.py                                  |  143 ++--
 tools/python/xen/xend/server/DevController.py                   |    6 
 tools/python/xen/xend/server/XMLRPCServer.py                    |    3 
 tools/python/xen/xend/server/blkif.py                           |   16 
 tools/python/xen/xm/migrate.py                                  |    3 
 tools/python/xen/xm/shutdown.py                                 |   49 +
 xen/arch/ia64/xen/dom0_ops.c                                    |    4 
 xen/arch/powerpc/Makefile                                       |   31 
 xen/arch/powerpc/Rules.mk                                       |    2 
 xen/arch/powerpc/backtrace.c                                    |  193 +++++
 xen/arch/powerpc/boot_of.c                                      |  208 ++++--
 xen/arch/powerpc/dart_u3.c                                      |    8 
 xen/arch/powerpc/dom0_ops.c                                     |   20 
 xen/arch/powerpc/domain.c                                       |   58 -
 xen/arch/powerpc/domain_build.c                                 |   60 +
 xen/arch/powerpc/exceptions.c                                   |    2 
 xen/arch/powerpc/exceptions.h                                   |    3 
 xen/arch/powerpc/external.c                                     |    3 
 xen/arch/powerpc/iommu.c                                        |   17 
 xen/arch/powerpc/memory.c                                       |  206 ++++++
 xen/arch/powerpc/mm.c                                           |  298 
++++++++-
 xen/arch/powerpc/mpic.c                                         |    6 
 xen/arch/powerpc/ofd_fixup.c                                    |  101 ---
 xen/arch/powerpc/ofd_fixup_memory.c                             |  107 +++
 xen/arch/powerpc/oftree.h                                       |    8 
 xen/arch/powerpc/papr/tce.c                                     |    6 
 xen/arch/powerpc/papr/xlate.c                                   |   46 +
 xen/arch/powerpc/powerpc64/exceptions.S                         |   37 +
 xen/arch/powerpc/powerpc64/ppc970.c                             |  112 ++-
 xen/arch/powerpc/setup.c                                        |  207 +++---
 xen/arch/powerpc/shadow.c                                       |  159 ++++
 xen/arch/powerpc/xen.lds.S                                      |   10 
 xen/arch/x86/hvm/io.c                                           |   10 
 xen/arch/x86/hvm/platform.c                                     |   32 
 xen/arch/x86/hvm/svm/intr.c                                     |   43 -
 xen/arch/x86/hvm/svm/svm.c                                      |    5 
 xen/arch/x86/hvm/vlapic.c                                       |   14 
 xen/arch/x86/hvm/vmx/io.c                                       |   13 
 xen/arch/x86/hvm/vmx/vmx.c                                      |   29 
 xen/arch/x86/mm/shadow/multi.c                                  |   66 --
 xen/arch/x86/physdev.c                                          |    5 
 xen/arch/x86/time.c                                             |    4 
 xen/arch/x86/traps.c                                            |    5 
 xen/arch/x86/x86_32/traps.c                                     |   46 +
 xen/arch/x86/x86_64/traps.c                                     |   43 -
 xen/common/perfc.c                                              |    4 
 xen/include/asm-ia64/mm.h                                       |    1 
 xen/include/asm-powerpc/config.h                                |    4 
 xen/include/asm-powerpc/current.h                               |    4 
 xen/include/asm-powerpc/domain.h                                |    7 
 xen/include/asm-powerpc/htab.h                                  |    4 
 xen/include/asm-powerpc/mm.h                                    |  183 +++--
 xen/include/asm-powerpc/powerpc64/procarea.h                    |    1 
 xen/include/asm-powerpc/processor.h                             |    7 
 xen/include/asm-powerpc/shadow.h                                |   16 
 xen/include/asm-powerpc/smp.h                                   |    4 
 xen/include/asm-powerpc/types.h                                 |   19 
 xen/include/asm-x86/mm.h                                        |    1 
 xen/include/asm-x86/page.h                                      |    7 
 xen/include/asm-x86/processor.h                                 |    7 
 xen/include/asm-x86/x86_32/page-2level.h                        |    3 
 xen/include/asm-x86/x86_32/page-3level.h                        |    2 
 xen/include/asm-x86/x86_32/page.h                               |    9 
 xen/include/asm-x86/x86_64/page.h                               |   11 
 xen/include/public/arch-ia64.h                                  |    3 
 xen/include/public/arch-powerpc.h                               |    3 
 xen/include/public/arch-x86_32.h                                |   17 
 xen/include/public/arch-x86_64.h                                |    3 
 xen/include/public/domctl.h                                     |   21 
 xen/include/public/sysctl.h                                     |   16 
 xen/include/public/xen.h                                        |    1 
 103 files changed, 2690 insertions(+), 1032 deletions(-)

diff -r 4ba098226429 -r 1bab7d65171b .hgignore
--- a/.hgignore Fri Sep 01 12:52:12 2006 -0600
+++ b/.hgignore Fri Sep 01 13:04:02 2006 -0600
@@ -203,6 +203,8 @@
 ^xen/arch/powerpc/firmware$
 ^xen/arch/powerpc/firmware_image$
 ^xen/arch/powerpc/xen\.lds$
+^xen/arch/powerpc/.xen-syms$
+^xen/arch/powerpc/xen-syms.S$
 ^unmodified_drivers/linux-2.6/\.tmp_versions
 ^unmodified_drivers/linux-2.6/.*\.cmd$
 ^unmodified_drivers/linux-2.6/.*\.ko$
diff -r 4ba098226429 -r 1bab7d65171b extras/mini-os/Makefile
--- a/extras/mini-os/Makefile   Fri Sep 01 12:52:12 2006 -0600
+++ b/extras/mini-os/Makefile   Fri Sep 01 13:04:02 2006 -0600
@@ -7,9 +7,12 @@ include $(XEN_ROOT)/Config.mk
 # Set TARGET_ARCH
 override TARGET_ARCH     := $(XEN_TARGET_ARCH)
 
+XEN_INTERFACE_VERSION := 0x00030203
+
 # NB. '-Wcast-qual' is nasty, so I omitted it.
 CFLAGS := -fno-builtin -Wall -Werror -Wredundant-decls -Wno-format
 CFLAGS += -Wstrict-prototypes -Wnested-externs -Wpointer-arith -Winline
+CFLAGS += -D__XEN_INTERFACE_VERSION__=$(XEN_INTERFACE_VERSION)
 
 ASFLAGS = -D__ASSEMBLY__
 
diff -r 4ba098226429 -r 1bab7d65171b 
linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c Fri Sep 01 12:52:12 
2006 -0600
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c Fri Sep 01 13:04:02 
2006 -0600
@@ -1380,8 +1380,10 @@ legacy_init_iomem_resources(struct e820e
                         *  so we try it repeatedly and let the resource manager
                         *  test it.
                         */
+#ifndef CONFIG_XEN
                        request_resource(res, code_resource);
                        request_resource(res, data_resource);
+#endif
 #ifdef CONFIG_KEXEC
                        request_resource(res, &crashk_res);
 #endif
@@ -1454,11 +1456,8 @@ static void __init register_memory(void)
        int           i;
 
        /* Nothing to do if not running in dom0. */
-       if (!is_initial_xendomain()) {
-               legacy_init_iomem_resources(e820.map, e820.nr_map,
-                                           &code_resource, &data_resource);
+       if (!is_initial_xendomain())
                return;
-       }
 
 #ifdef CONFIG_XEN
        machine_e820 = alloc_bootmem_low_pages(PAGE_SIZE);
diff -r 4ba098226429 -r 1bab7d65171b 
linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c   Fri Sep 01 12:52:12 
2006 -0600
+++ b/linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c   Fri Sep 01 13:04:02 
2006 -0600
@@ -22,15 +22,6 @@
 #define ISA_START_ADDRESS      0x0
 #define ISA_END_ADDRESS                0x100000
 
-#if 0 /* not PAE safe */
-/* These hacky macros avoid phys->machine translations. */
-#define __direct_pte(x) ((pte_t) { (x) } )
-#define __direct_mk_pte(page_nr,pgprot) \
-  __direct_pte(((page_nr) << PAGE_SHIFT) | pgprot_val(pgprot))
-#define direct_mk_pte_phys(physpage, pgprot) \
-  __direct_mk_pte((physpage) >> PAGE_SHIFT, pgprot)
-#endif
-
 static int direct_remap_area_pte_fn(pte_t *pte, 
                                    struct page *pmd_page,
                                    unsigned long address, 
@@ -66,17 +57,16 @@ static int __direct_remap_pfn_range(stru
 
        for (i = 0; i < size; i += PAGE_SIZE) {
                if ((v - u) == (PAGE_SIZE / sizeof(mmu_update_t))) {
-                       /* Fill in the PTE pointers. */
+                       /* Flush a full batch after filling in the PTE ptrs. */
                        rc = apply_to_page_range(mm, start_address, 
                                                 address - start_address,
                                                 direct_remap_area_pte_fn, &w);
                        if (rc)
                                goto out;
-                       w = u;
                        rc = -EFAULT;
                        if (HYPERVISOR_mmu_update(u, v - u, NULL, domid) < 0)
                                goto out;
-                       v = u;
+                       v = w = u;
                        start_address = address;
                }
 
@@ -92,7 +82,7 @@ static int __direct_remap_pfn_range(stru
        }
 
        if (v != u) {
-               /* get the ptep's filled in */
+               /* Final batch. */
                rc = apply_to_page_range(mm, start_address,
                                         address - start_address,
                                         direct_remap_area_pte_fn, &w);
@@ -178,32 +168,6 @@ int touch_pte_range(struct mm_struct *mm
 } 
 
 EXPORT_SYMBOL(touch_pte_range);
-
-void *vm_map_xen_pages (unsigned long maddr, int vm_size, pgprot_t prot)
-{
-       int error;
-       
-       struct vm_struct *vma;
-       vma = get_vm_area (vm_size, VM_IOREMAP);
-      
-       if (vma == NULL) {
-               printk ("ioremap.c,vm_map_xen_pages(): "
-                       "Failed to get VMA area\n");
-               return NULL;
-       }
-
-       error = direct_kernel_remap_pfn_range((unsigned long) vma->addr,
-                                             maddr >> PAGE_SHIFT, vm_size,
-                                             prot, DOMID_SELF );
-       if (error == 0) {
-               return vma->addr;
-       } else {
-               printk ("ioremap.c,vm_map_xen_pages(): "
-                       "Failed to map xen shared pages into kernel space\n");
-               return NULL;
-       }
-}
-EXPORT_SYMBOL(vm_map_xen_pages);
 
 /*
  * Does @address reside within a non-highmem page that is local to this virtual
diff -r 4ba098226429 -r 1bab7d65171b 
linux-2.6-xen-sparse/arch/i386/oprofile/xenoprof.c
--- a/linux-2.6-xen-sparse/arch/i386/oprofile/xenoprof.c        Fri Sep 01 
12:52:12 2006 -0600
+++ b/linux-2.6-xen-sparse/arch/i386/oprofile/xenoprof.c        Fri Sep 01 
13:04:02 2006 -0600
@@ -26,6 +26,7 @@
 #include <xen/evtchn.h>
 #include "op_counter.h"
 
+#include <xen/driver_util.h>
 #include <xen/interface/xen.h>
 #include <xen/interface/xenoprof.h>
 #include <../../../drivers/oprofile/cpu_buffer.h>
@@ -33,8 +34,6 @@
 
 static int xenoprof_start(void);
 static void xenoprof_stop(void);
-
-void * vm_map_xen_pages(unsigned long maddr, int vm_size, pgprot_t prot);
 
 static int xenoprof_enabled = 0;
 static unsigned int num_events = 0;
@@ -373,9 +372,9 @@ static int xenoprof_set_passive(int * p_
 {
        int ret;
        int i, j;
-       int vm_size;
        int npages;
        struct xenoprof_buf *buf;
+       struct vm_struct *area;
        pgprot_t prot = __pgprot(_KERNPG_TABLE);
 
        if (!is_primary)
@@ -391,19 +390,29 @@ static int xenoprof_set_passive(int * p_
        for (i = 0; i < pdoms; i++) {
                passive_domains[i].domain_id = p_domains[i];
                passive_domains[i].max_samples = 2048;
-               ret = HYPERVISOR_xenoprof_op(XENOPROF_set_passive, 
&passive_domains[i]);
+               ret = HYPERVISOR_xenoprof_op(XENOPROF_set_passive,
+                                            &passive_domains[i]);
                if (ret)
-                       return ret;
+                       goto out;
 
                npages = (passive_domains[i].bufsize * passive_domains[i].nbuf 
- 1) / PAGE_SIZE + 1;
-               vm_size = npages * PAGE_SIZE;
-
-               p_shared_buffer[i] = (char 
*)vm_map_xen_pages(passive_domains[i].buf_maddr,
-                                                             vm_size, prot);
-               if (!p_shared_buffer[i]) {
+
+               area = alloc_vm_area(npages * PAGE_SIZE);
+               if (area == NULL) {
                        ret = -ENOMEM;
                        goto out;
                }
+
+               ret = direct_kernel_remap_pfn_range(
+                       (unsigned long)area->addr,
+                       passive_domains[i].buf_maddr >> PAGE_SHIFT,
+                       npages * PAGE_SIZE, prot, DOMID_SELF);
+               if (ret) {
+                       vunmap(area->addr);
+                       goto out;
+               }
+
+               p_shared_buffer[i] = area->addr;
 
                for (j = 0; j < passive_domains[i].nbuf; j++) {
                        buf = (struct xenoprof_buf *)
@@ -473,11 +482,9 @@ int __init oprofile_arch_init(struct opr
 int __init oprofile_arch_init(struct oprofile_operations * ops)
 {
        struct xenoprof_init init;
-       struct xenoprof_buf * buf;
-       int vm_size;
-       int npages;
-       int ret;
-       int i;
+       struct xenoprof_buf *buf;
+       int npages, ret, i;
+       struct vm_struct *area;
 
        init.max_samples = 16;
        ret = HYPERVISOR_xenoprof_op(XENOPROF_init, &init);
@@ -495,14 +502,23 @@ int __init oprofile_arch_init(struct opr
                        num_events = OP_MAX_COUNTER;
 
                npages = (init.bufsize * nbuf - 1) / PAGE_SIZE + 1;
-               vm_size = npages * PAGE_SIZE;
-
-               shared_buffer = (char *)vm_map_xen_pages(init.buf_maddr,
-                                                        vm_size, prot);
-               if (!shared_buffer) {
+
+               area = alloc_vm_area(npages * PAGE_SIZE);
+               if (area == NULL) {
                        ret = -ENOMEM;
                        goto out;
                }
+
+               ret = direct_kernel_remap_pfn_range(
+                       (unsigned long)area->addr,
+                       init.buf_maddr >> PAGE_SHIFT,
+                       npages * PAGE_SIZE, prot, DOMID_SELF);
+               if (ret) {
+                       vunmap(area->addr);
+                       goto out;
+               }
+
+               shared_buffer = area->addr;
 
                for (i=0; i< nbuf; i++) {
                        buf = (struct xenoprof_buf*) 
diff -r 4ba098226429 -r 1bab7d65171b 
linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c        Fri Sep 01 
12:52:12 2006 -0600
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c        Fri Sep 01 
13:04:02 2006 -0600
@@ -255,8 +255,10 @@ void __init e820_reserve_resources(struc
                         *  so we try it repeatedly and let the resource manager
                         *  test it.
                         */
+#ifndef CONFIG_XEN
                        request_resource(res, &code_resource);
                        request_resource(res, &data_resource);
+#endif
 #ifdef CONFIG_KEXEC
                        request_resource(res, &crashk_res);
 #endif
diff -r 4ba098226429 -r 1bab7d65171b 
linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c       Fri Sep 01 
12:52:12 2006 -0600
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c       Fri Sep 01 
13:04:02 2006 -0600
@@ -846,7 +846,7 @@ void __init setup_arch(char **cmdline_p)
 
                if (!xen_feature(XENFEAT_auto_translated_physmap)) {
                        /* Make sure we have a large enough P->M table. */
-                       phys_to_machine_mapping = alloc_bootmem(
+                       phys_to_machine_mapping = alloc_bootmem_pages(
                                end_pfn * sizeof(unsigned long));
                        memset(phys_to_machine_mapping, ~0,
                               end_pfn * sizeof(unsigned long));
@@ -863,7 +863,7 @@ void __init setup_arch(char **cmdline_p)
                         * list of frames that make up the p2m table. Used by
                          * save/restore.
                         */
-                       pfn_to_mfn_frame_list_list = alloc_bootmem(PAGE_SIZE);
+                       pfn_to_mfn_frame_list_list = 
alloc_bootmem_pages(PAGE_SIZE);
                        HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list 
=
                                virt_to_mfn(pfn_to_mfn_frame_list_list);
 
@@ -873,7 +873,7 @@ void __init setup_arch(char **cmdline_p)
                                        k++;
                                        BUG_ON(k>=fpp);
                                        pfn_to_mfn_frame_list[k] =
-                                               alloc_bootmem(PAGE_SIZE);
+                                               alloc_bootmem_pages(PAGE_SIZE);
                                        pfn_to_mfn_frame_list_list[k] =
                                                
virt_to_mfn(pfn_to_mfn_frame_list[k]);
                                        j=0;
@@ -944,9 +944,10 @@ void __init setup_arch(char **cmdline_p)
                BUG_ON(HYPERVISOR_memory_op(XENMEM_machine_memory_map, 
&memmap));
 
                e820_reserve_resources(machine_e820, memmap.nr_entries);
-       } else
-#endif
+       }
+#else
        e820_reserve_resources(e820.map, e820.nr_map);
+#endif
 
        request_resource(&iomem_resource, &video_ram_resource);
 
diff -r 4ba098226429 -r 1bab7d65171b 
linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Fri Sep 01 12:52:12 
2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Fri Sep 01 13:04:02 
2006 -0600
@@ -301,11 +301,11 @@ static void frontend_changed(struct xenb
        struct backend_info *be = dev->dev.driver_data;
        int err;
 
-       DPRINTK("");
+       DPRINTK("%s", xenbus_strstate(frontend_state));
 
        switch (frontend_state) {
        case XenbusStateInitialising:
-               if (dev->state == XenbusStateClosing) {
+               if (dev->state == XenbusStateClosed) {
                        printk("%s: %s: prepare for reconnect\n",
                               __FUNCTION__, dev->nodename);
                        xenbus_switch_state(dev, XenbusStateInitWait);
@@ -331,8 +331,12 @@ static void frontend_changed(struct xenb
                xenbus_switch_state(dev, XenbusStateClosing);
                break;
 
+       case XenbusStateClosed:
+               xenbus_switch_state(dev, XenbusStateClosed);
+               if (xenbus_dev_is_online(dev))
+                       break;
+               /* fall through if not online */
        case XenbusStateUnknown:
-       case XenbusStateClosed:
                device_unregister(&dev->dev);
                break;
 
diff -r 4ba098226429 -r 1bab7d65171b 
linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c      Fri Sep 01 
12:52:12 2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c      Fri Sep 01 
13:04:02 2006 -0600
@@ -273,7 +273,7 @@ static void backend_changed(struct xenbu
                        xenbus_dev_fatal(dev, -ENODEV, "bdget failed");
 
                down(&bd->bd_sem);
-               if (info->users > 0)
+               if (info->users > 0 && system_state == SYSTEM_RUNNING)
                        xenbus_dev_error(dev, -EBUSY,
                                         "Device in use; refusing to close");
                else
@@ -360,7 +360,7 @@ static void blkfront_closing(struct xenb
 
        xlvbd_del(info);
 
-       xenbus_switch_state(dev, XenbusStateClosed);
+       xenbus_frontend_closed(dev);
 }
 
 
diff -r 4ba098226429 -r 1bab7d65171b 
linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c  Fri Sep 01 12:52:12 
2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c  Fri Sep 01 13:04:02 
2006 -0600
@@ -114,8 +114,8 @@ typedef struct domid_translate {
 } domid_translate_t ;
 
 
-domid_translate_t  translate_domid[MAX_TAP_DEV];
-tap_blkif_t *tapfds[MAX_TAP_DEV];
+static domid_translate_t  translate_domid[MAX_TAP_DEV];
+static tap_blkif_t *tapfds[MAX_TAP_DEV];
 
 static int __init set_blkif_reqs(char *str)
 {
@@ -1118,7 +1118,7 @@ static int do_block_io_op(blkif_t *blkif
                               "ring does not exist!\n");
                        print_dbug = 0; /*We only print this message once*/
                }
-               return 1;
+               return 0;
        }
 
        info = tapfds[blkif->dev_num];
@@ -1127,7 +1127,7 @@ static int do_block_io_op(blkif_t *blkif
                        WPRINTK("Can't get UE info!\n");
                        print_dbug = 0;
                }
-               return 1;
+               return 0;
        }
 
        while (rc != rp) {
diff -r 4ba098226429 -r 1bab7d65171b 
linux-2.6-xen-sparse/drivers/xen/blktap/common.h
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/common.h  Fri Sep 01 12:52:12 
2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/blktap/common.h  Fri Sep 01 13:04:02 
2006 -0600
@@ -91,6 +91,7 @@ void tap_blkif_free(blkif_t *blkif);
 void tap_blkif_free(blkif_t *blkif);
 int tap_blkif_map(blkif_t *blkif, unsigned long shared_page, 
                  unsigned int evtchn);
+void tap_blkif_unmap(blkif_t *blkif);
 
 #define blkif_get(_b) (atomic_inc(&(_b)->refcnt))
 #define blkif_put(_b)                                  \
diff -r 4ba098226429 -r 1bab7d65171b 
linux-2.6-xen-sparse/drivers/xen/blktap/interface.c
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/interface.c       Fri Sep 01 
12:52:12 2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/blktap/interface.c       Fri Sep 01 
13:04:02 2006 -0600
@@ -135,20 +135,25 @@ int tap_blkif_map(blkif_t *blkif, unsign
        return 0;
 }
 
+void tap_blkif_unmap(blkif_t *blkif)
+{
+       if (blkif->irq) {
+               unbind_from_irqhandler(blkif->irq, blkif);
+               blkif->irq = 0;
+       }
+       if (blkif->blk_ring.sring) {
+               unmap_frontend_page(blkif);
+               free_vm_area(blkif->blk_ring_area);
+               blkif->blk_ring.sring = NULL;
+       }
+}
+
 void tap_blkif_free(blkif_t *blkif)
 {
        atomic_dec(&blkif->refcnt);
        wait_event(blkif->waiting_to_free, atomic_read(&blkif->refcnt) == 0);
 
-       /* Already disconnected? */
-       if (blkif->irq)
-               unbind_from_irqhandler(blkif->irq, blkif);
-
-       if (blkif->blk_ring.sring) {
-               unmap_frontend_page(blkif);
-               free_vm_area(blkif->blk_ring_area);
-       }
-
+       tap_blkif_unmap(blkif);
        kmem_cache_free(blkif_cachep, blkif);
 }
 
diff -r 4ba098226429 -r 1bab7d65171b 
linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c  Fri Sep 01 12:52:12 
2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c  Fri Sep 01 13:04:02 
2006 -0600
@@ -247,6 +247,11 @@ static void tap_frontend_changed(struct 
 
        switch (frontend_state) {
        case XenbusStateInitialising:
+               if (dev->state == XenbusStateClosed) {
+                       printk("%s: %s: prepare for reconnect\n",
+                              __FUNCTION__, dev->nodename);
+                       xenbus_switch_state(dev, XenbusStateInitWait);
+               }
                break;
 
        case XenbusStateInitialised:
@@ -264,11 +269,20 @@ static void tap_frontend_changed(struct 
                break;
 
        case XenbusStateClosing:
+               if (be->blkif->xenblkd) {
+                       kthread_stop(be->blkif->xenblkd);
+                       be->blkif->xenblkd = NULL;
+               }
+               tap_blkif_unmap(be->blkif);
                xenbus_switch_state(dev, XenbusStateClosing);
                break;
 
+       case XenbusStateClosed:
+               xenbus_switch_state(dev, XenbusStateClosed);
+               if (xenbus_dev_is_online(dev))
+                       break;
+               /* fall through if not online */
        case XenbusStateUnknown:
-       case XenbusStateClosed:
                device_unregister(&dev->dev);
                break;
 
diff -r 4ba098226429 -r 1bab7d65171b 
linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Fri Sep 01 12:52:12 
2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Fri Sep 01 13:04:02 
2006 -0600
@@ -228,13 +228,13 @@ static void frontend_changed(struct xenb
 {
        struct backend_info *be = dev->dev.driver_data;
 
-       DPRINTK("");
+       DPRINTK("%s", xenbus_strstate(frontend_state));
 
        be->frontend_state = frontend_state;
 
        switch (frontend_state) {
        case XenbusStateInitialising:
-               if (dev->state == XenbusStateClosing) {
+               if (dev->state == XenbusStateClosed) {
                        printk("%s: %s: prepare for reconnect\n",
                               __FUNCTION__, dev->nodename);
                        if (be->netif) {
@@ -260,8 +260,12 @@ static void frontend_changed(struct xenb
                xenbus_switch_state(dev, XenbusStateClosing);
                break;
 
+       case XenbusStateClosed:
+               xenbus_switch_state(dev, XenbusStateClosed);
+               if (xenbus_dev_is_online(dev))
+                       break;
+               /* fall through if not online */
        case XenbusStateUnknown:
-       case XenbusStateClosed:
                if (be->netif != NULL)
                        kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE);
                device_unregister(&dev->dev);
diff -r 4ba098226429 -r 1bab7d65171b 
linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c      Fri Sep 01 
12:52:12 2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c      Fri Sep 01 
13:04:02 2006 -0600
@@ -193,6 +193,7 @@ static void netfront_closing(struct xenb
 
 static void end_access(int, void *);
 static void netif_disconnect_backend(struct netfront_info *);
+static int open_netdev(struct netfront_info *);
 static void close_netdev(struct netfront_info *);
 static void netif_free(struct netfront_info *);
 
@@ -263,15 +264,22 @@ static int __devinit netfront_probe(stru
        dev->dev.driver_data = info;
 
        err = talk_to_backend(dev, info);
-       if (err) {
-               xennet_sysfs_delif(info->netdev);
-               unregister_netdev(netdev);
-               free_netdev(netdev);
-               dev->dev.driver_data = NULL;
-               return err;
-       }
+       if (err)
+               goto fail_backend;
+
+       err = open_netdev(info);
+       if (err)
+               goto fail_open;
 
        return 0;
+
+ fail_open:
+       xennet_sysfs_delif(info->netdev);
+       unregister_netdev(netdev);
+ fail_backend:
+       free_netdev(netdev);
+       dev->dev.driver_data = NULL;
+       return err;
 }
 
 
@@ -478,7 +486,7 @@ static void backend_changed(struct xenbu
        struct netfront_info *np = dev->dev.driver_data;
        struct net_device *netdev = np->netdev;
 
-       DPRINTK("\n");
+       DPRINTK("%s\n", xenbus_strstate(backend_state));
 
        switch (backend_state) {
        case XenbusStateInitialising:
@@ -1887,27 +1895,9 @@ create_netdev(int handle, int copying_re
        SET_MODULE_OWNER(netdev);
        SET_NETDEV_DEV(netdev, &dev->dev);
 
-       err = register_netdev(netdev);
-       if (err) {
-               printk(KERN_WARNING "%s> register_netdev err=%d\n",
-                      __FUNCTION__, err);
-               goto exit_free_rx;
-       }
-
-       err = xennet_sysfs_addif(netdev);
-       if (err) {
-               /* This can be non-fatal: it only means no tuning parameters */
-               printk(KERN_WARNING "%s> add sysfs failed err=%d\n",
-                      __FUNCTION__, err);
-       }
-
        np->netdev = netdev;
-
        return netdev;
 
-
- exit_free_rx:
-       gnttab_free_grant_references(np->gref_rx_head);
  exit_free_tx:
        gnttab_free_grant_references(np->gref_tx_head);
  exit:
@@ -1946,11 +1936,10 @@ static void netfront_closing(struct xenb
 {
        struct netfront_info *info = dev->dev.driver_data;
 
-       DPRINTK("netfront_closing: %s removed\n", dev->nodename);
+       DPRINTK("%s\n", dev->nodename);
 
        close_netdev(info);
-
-       xenbus_switch_state(dev, XenbusStateClosed);
+       xenbus_frontend_closed(dev);
 }
 
 
@@ -1966,6 +1955,26 @@ static int __devexit netfront_remove(str
        return 0;
 }
 
+
+static int open_netdev(struct netfront_info *info)
+{
+       int err;
+       
+       err = register_netdev(info->netdev);
+       if (err) {
+               printk(KERN_WARNING "%s: register_netdev err=%d\n",
+                      __FUNCTION__, err);
+               return err;
+       }
+
+       err = xennet_sysfs_addif(info->netdev);
+       if (err) {
+               /* This can be non-fatal: it only means no tuning parameters */
+               printk(KERN_WARNING "%s: add sysfs failed err=%d\n",
+                      __FUNCTION__, err);
+       }
+       return 0;
+}
 
 static void close_netdev(struct netfront_info *info)
 {
diff -r 4ba098226429 -r 1bab7d65171b 
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_backend_client.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_backend_client.c   Fri Sep 
01 12:52:12 2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_backend_client.c   Fri Sep 
01 13:04:02 2006 -0600
@@ -132,4 +132,16 @@ int xenbus_unmap_ring(struct xenbus_devi
 }
 EXPORT_SYMBOL_GPL(xenbus_unmap_ring);
 
+int xenbus_dev_is_online(struct xenbus_device *dev)
+{
+       int rc, val;
+
+       rc = xenbus_scanf(XBT_NIL, dev->nodename, "online", "%d", &val);
+       if (rc != 1)
+               val = 0; /* no online node present */
+
+       return val;
+}
+EXPORT_SYMBOL_GPL(xenbus_dev_is_online);
+
 MODULE_LICENSE("Dual BSD/GPL");
diff -r 4ba098226429 -r 1bab7d65171b 
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c   Fri Sep 01 
12:52:12 2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c   Fri Sep 01 
13:04:02 2006 -0600
@@ -41,6 +41,20 @@ extern char *kasprintf(const char *fmt, 
 #define DPRINTK(fmt, args...) \
     pr_debug("xenbus_client (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, 
##args)
 
+char *xenbus_strstate(enum xenbus_state state)
+{
+       static char *name[] = {
+               [ XenbusStateUnknown      ] = "Unknown",
+               [ XenbusStateInitialising ] = "Initialising",
+               [ XenbusStateInitWait     ] = "InitWait",
+               [ XenbusStateInitialised  ] = "Initialised",
+               [ XenbusStateConnected    ] = "Connected",
+               [ XenbusStateClosing      ] = "Closing",
+               [ XenbusStateClosed       ] = "Closed",
+       };
+       return (state < ARRAY_SIZE(name)) ? name[state] : "INVALID";
+}
+
 int xenbus_watch_path(struct xenbus_device *dev, const char *path,
                      struct xenbus_watch *watch,
                      void (*callback)(struct xenbus_watch *,
@@ -124,6 +138,13 @@ int xenbus_switch_state(struct xenbus_de
 }
 EXPORT_SYMBOL_GPL(xenbus_switch_state);
 
+int xenbus_frontend_closed(struct xenbus_device *dev)
+{
+       xenbus_switch_state(dev, XenbusStateClosed);
+       complete(&dev->down);
+       return 0;
+}
+EXPORT_SYMBOL_GPL(xenbus_frontend_closed);
 
 /**
  * Return the path to the error node for the given device, or NULL on failure.
diff -r 4ba098226429 -r 1bab7d65171b 
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c    Fri Sep 01 
12:52:12 2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c    Fri Sep 01 
13:04:02 2006 -0600
@@ -73,6 +73,7 @@ static int xenbus_probe_backend(const ch
 
 static int xenbus_dev_probe(struct device *_dev);
 static int xenbus_dev_remove(struct device *_dev);
+static void xenbus_dev_shutdown(struct device *_dev);
 
 /* If something in array of ids matches this device, return it. */
 static const struct xenbus_device_id *
@@ -192,6 +193,7 @@ static struct xen_bus_type xenbus_fronte
                .match    = xenbus_match,
                .probe    = xenbus_dev_probe,
                .remove   = xenbus_dev_remove,
+               .shutdown = xenbus_dev_shutdown,
        },
        .dev = {
                .bus_id = "xen",
@@ -246,6 +248,7 @@ static struct xen_bus_type xenbus_backen
                .match    = xenbus_match,
                .probe    = xenbus_dev_probe,
                .remove   = xenbus_dev_remove,
+//             .shutdown = xenbus_dev_shutdown,
                .uevent   = xenbus_uevent_backend,
        },
        .dev = {
@@ -316,8 +319,9 @@ static void otherend_changed(struct xenb
 
        state = xenbus_read_driver_state(dev->otherend);
 
-       DPRINTK("state is %d, %s, %s",
-               state, dev->otherend_watch.node, vec[XS_WATCH_PATH]);
+       DPRINTK("state is %d (%s), %s, %s", state, xenbus_strstate(state),
+               dev->otherend_watch.node, vec[XS_WATCH_PATH]);
+
        if (drv->otherend_changed)
                drv->otherend_changed(dev, state);
 }
@@ -348,7 +352,7 @@ static int xenbus_dev_probe(struct devic
        const struct xenbus_device_id *id;
        int err;
 
-       DPRINTK("");
+       DPRINTK("%s", dev->nodename);
 
        if (!drv->probe) {
                err = -ENODEV;
@@ -393,7 +397,7 @@ static int xenbus_dev_remove(struct devi
        struct xenbus_device *dev = to_xenbus_device(_dev);
        struct xenbus_driver *drv = to_xenbus_driver(_dev->driver);
 
-       DPRINTK("");
+       DPRINTK("%s", dev->nodename);
 
        free_otherend_watch(dev);
        free_otherend_details(dev);
@@ -403,6 +407,27 @@ static int xenbus_dev_remove(struct devi
 
        xenbus_switch_state(dev, XenbusStateClosed);
        return 0;
+}
+
+static void xenbus_dev_shutdown(struct device *_dev)
+{
+       struct xenbus_device *dev = to_xenbus_device(_dev);
+       unsigned long timeout = 5*HZ;
+
+       DPRINTK("%s", dev->nodename);
+
+       get_device(&dev->dev);
+       if (dev->state != XenbusStateConnected) {
+               printk("%s: %s: %s != Connected, skipping\n", __FUNCTION__,
+                      dev->nodename, xenbus_strstate(dev->state));
+               goto out;
+       }
+       xenbus_switch_state(dev, XenbusStateClosing);
+       timeout = wait_for_completion_timeout(&dev->down, timeout);
+       if (!timeout)
+               printk("%s: %s timeout closing device\n", __FUNCTION__, 
dev->nodename);
+ out:
+       put_device(&dev->dev);
 }
 
 static int xenbus_register_driver_common(struct xenbus_driver *drv,
@@ -587,6 +612,7 @@ static int xenbus_probe_node(struct xen_
        tmpstring += strlen(tmpstring) + 1;
        strcpy(tmpstring, type);
        xendev->devicetype = tmpstring;
+       init_completion(&xendev->down);
 
        xendev->dev.parent = &bus->dev;
        xendev->dev.bus = &bus->bus;
diff -r 4ba098226429 -r 1bab7d65171b linux-2.6-xen-sparse/include/xen/xenbus.h
--- a/linux-2.6-xen-sparse/include/xen/xenbus.h Fri Sep 01 12:52:12 2006 -0600
+++ b/linux-2.6-xen-sparse/include/xen/xenbus.h Fri Sep 01 13:04:02 2006 -0600
@@ -37,6 +37,7 @@
 #include <linux/device.h>
 #include <linux/notifier.h>
 #include <linux/mutex.h>
+#include <linux/completion.h>
 #include <xen/interface/xen.h>
 #include <xen/interface/grant_table.h>
 #include <xen/interface/io/xenbus.h>
@@ -74,6 +75,7 @@ struct xenbus_device {
        struct xenbus_watch otherend_watch;
        struct device dev;
        enum xenbus_state state;
+       struct completion down;
 };
 
 static inline struct xenbus_device *to_xenbus_device(struct device *dev)
@@ -297,4 +299,8 @@ void xenbus_dev_fatal(struct xenbus_devi
 
 int __init xenbus_dev_init(void);
 
+char *xenbus_strstate(enum xenbus_state state);
+int xenbus_dev_is_online(struct xenbus_device *dev);
+int xenbus_frontend_closed(struct xenbus_device *dev);
+
 #endif /* _XEN_XENBUS_H */
diff -r 4ba098226429 -r 1bab7d65171b tools/blktap/drivers/block-aio.c
--- a/tools/blktap/drivers/block-aio.c  Fri Sep 01 12:52:12 2006 -0600
+++ b/tools/blktap/drivers/block-aio.c  Fri Sep 01 13:04:02 2006 -0600
@@ -52,7 +52,7 @@
  */
 #define REQUEST_ASYNC_FD 1
 
-#define MAX_AIO_REQS (MAX_REQUESTS * MAX_SEGMENTS_PER_REQ * 8)
+#define MAX_AIO_REQS (MAX_REQUESTS * MAX_SEGMENTS_PER_REQ)
 
 struct pending_aio {
        td_callback_t cb;
@@ -146,7 +146,7 @@ int tdaio_open (struct td_state *s, cons
        struct tdaio_state *prv = (struct tdaio_state *)s->private;
        s->private = prv;
 
-       DPRINTF("XXX: block-aio open('%s')", name);
+       DPRINTF("block-aio open('%s')", name);
        /* Initialize AIO */
        prv->iocb_free_count = MAX_AIO_REQS;
        prv->iocb_queued     = 0;
@@ -156,9 +156,18 @@ int tdaio_open (struct td_state *s, cons
 
        if (prv->poll_fd < 0) {
                ret = prv->poll_fd;
-               DPRINTF("Couldn't get fd for AIO poll support.  This is "
-                       "probably because your kernel does not have the "
-                       "aio-poll patch applied.\n");
+                if (ret == -EAGAIN) {
+                        DPRINTF("Couldn't setup AIO context.  If you are "
+                                "trying to concurrently use a large number "
+                                "of blktap-based disks, you may need to "
+                                "increase the system-wide aio request limit. "
+                                "(e.g. 'echo echo 1048576 > /proc/sys/fs/"
+                                "aio-max-nr')\n");
+                } else {
+                        DPRINTF("Couldn't get fd for AIO poll support.  This "
+                                "is probably because your kernel does not "
+                                "have the aio-poll patch applied.\n");
+                }
                goto done;
        }
 
diff -r 4ba098226429 -r 1bab7d65171b tools/blktap/drivers/block-qcow.c
--- a/tools/blktap/drivers/block-qcow.c Fri Sep 01 12:52:12 2006 -0600
+++ b/tools/blktap/drivers/block-qcow.c Fri Sep 01 13:04:02 2006 -0600
@@ -51,7 +51,7 @@
 /******AIO DEFINES******/
 #define REQUEST_ASYNC_FD 1
 #define MAX_QCOW_IDS  0xFFFF
-#define MAX_AIO_REQS (MAX_REQUESTS * MAX_SEGMENTS_PER_REQ * 8)
+#define MAX_AIO_REQS (MAX_REQUESTS * MAX_SEGMENTS_PER_REQ)
 
 struct pending_aio {
         td_callback_t cb;
@@ -176,10 +176,21 @@ static int init_aio_state(struct td_stat
         s->aio_ctx = (io_context_t) REQUEST_ASYNC_FD;   
         s->poll_fd = io_setup(MAX_AIO_REQS, &s->aio_ctx);
 
-        if (s->poll_fd < 0) {
-                DPRINTF("Retrieving Async poll fd failed\n");
+       if (s->poll_fd < 0) {
+                if (s->poll_fd == -EAGAIN) {
+                        DPRINTF("Couldn't setup AIO context.  If you are "
+                                "trying to concurrently use a large number "
+                                "of blktap-based disks, you may need to "
+                                "increase the system-wide aio request limit. "
+                                "(e.g. 'echo echo 1048576 > /proc/sys/fs/"
+                                "aio-max-nr')\n");
+                } else {
+                        DPRINTF("Couldn't get fd for AIO poll support.  This "
+                                "is probably because your kernel does not "
+                                "have the aio-poll patch applied.\n");
+                }
                goto fail;
-        }
+       }
 
         for (i=0;i<MAX_AIO_REQS;i++)
                 s->iocb_free[i] = &s->iocb_list[i];
diff -r 4ba098226429 -r 1bab7d65171b tools/blktap/drivers/tapdisk.c
--- a/tools/blktap/drivers/tapdisk.c    Fri Sep 01 12:52:12 2006 -0600
+++ b/tools/blktap/drivers/tapdisk.c    Fri Sep 01 13:04:02 2006 -0600
@@ -110,6 +110,7 @@ static void unmap_disk(struct td_state *
        free(s->fd_entry);
        free(s->blkif);
        free(s->ring_info);
+        free(s->private);
        free(s);
 
        return;
diff -r 4ba098226429 -r 1bab7d65171b tools/blktap/lib/xs_api.c
--- a/tools/blktap/lib/xs_api.c Fri Sep 01 12:52:12 2006 -0600
+++ b/tools/blktap/lib/xs_api.c Fri Sep 01 13:04:02 2006 -0600
@@ -204,7 +204,7 @@ int convert_dev_name_to_num(char *name) 
 int convert_dev_name_to_num(char *name) {
        char *p_sd, *p_hd, *p_xvd, *p_plx, *p, *alpha,*ptr;
        int majors[10] = {3,22,33,34,56,57,88,89,90,91};
-       int maj,i;
+       int maj,i,ret = 0;
 
        asprintf(&p_sd,"/dev/sd");
        asprintf(&p_hd,"/dev/hd");
@@ -221,7 +221,7 @@ int convert_dev_name_to_num(char *name) 
                        *ptr++;
                }
                *p++;
-               return BASE_DEV_VAL + (16*i) + atoi(p);
+               ret = BASE_DEV_VAL + (16*i) + atoi(p);
        } else if (strstr(name, p_hd) != NULL) {
                p = name + strlen(p_hd);
                for (i = 0, ptr = alpha; i < strlen(alpha); i++) {
@@ -229,7 +229,7 @@ int convert_dev_name_to_num(char *name) 
                        *ptr++;
                }
                *p++;
-               return (majors[i/2]*256) + atoi(p);
+               ret = (majors[i/2]*256) + atoi(p);
 
        } else if (strstr(name, p_xvd) != NULL) {
                p = name + strlen(p_xvd);
@@ -238,17 +238,24 @@ int convert_dev_name_to_num(char *name) 
                        *ptr++;
                }
                *p++;
-               return (202*256) + (16*i) + atoi(p);
+               ret = (202*256) + (16*i) + atoi(p);
 
        } else if (strstr(name, p_plx) != NULL) {
                p = name + strlen(p_plx);
-               return atoi(p);
+               ret = atoi(p);
 
        } else {
                DPRINTF("Unknown device type, setting to default.\n");
-               return BASE_DEV_VAL;
-       }
-       return 0;
+               ret = BASE_DEV_VAL;
+       }
+
+        free(p_sd);
+        free(p_hd);
+        free(p_xvd);
+        free(p_plx);
+        free(alpha);
+        
+       return ret;
 }
 
 /**
diff -r 4ba098226429 -r 1bab7d65171b tools/libxc/ia64/xc_ia64_stubs.c
--- a/tools/libxc/ia64/xc_ia64_stubs.c  Fri Sep 01 12:52:12 2006 -0600
+++ b/tools/libxc/ia64/xc_ia64_stubs.c  Fri Sep 01 13:04:02 2006 -0600
@@ -36,7 +36,6 @@ xc_ia64_get_pfn_list(int xc_handle, uint
     struct xen_domctl domctl;
     int num_pfns,ret;
     unsigned int __start_page, __nr_pages;
-    unsigned long max_pfns;
     xen_pfn_t *__pfn_buf;
 
     __start_page = start_page;
@@ -44,27 +43,22 @@ xc_ia64_get_pfn_list(int xc_handle, uint
     __pfn_buf = pfn_buf;
   
     while (__nr_pages) {
-        max_pfns = ((unsigned long)__start_page << 32) | __nr_pages;
         domctl.cmd = XEN_DOMCTL_getmemlist;
-        domctl.domain   = (domid_t)domid;
-        domctl.u.getmemlist.max_pfns = max_pfns;
+        domctl.domain = (domid_t)domid;
+        domctl.u.getmemlist.max_pfns = __nr_pages;
+        domctl.u.getmemlist.start_pfn =__start_page;
         domctl.u.getmemlist.num_pfns = 0;
         set_xen_guest_handle(domctl.u.getmemlist.buffer, __pfn_buf);
 
-        if ((max_pfns != -1UL)
-            && mlock(__pfn_buf, __nr_pages * sizeof(xen_pfn_t)) != 0) {
+        if (mlock(__pfn_buf, __nr_pages * sizeof(xen_pfn_t)) != 0) {
             PERROR("Could not lock pfn list buffer");
             return -1;
         }
 
         ret = do_domctl(xc_handle, &domctl);
 
-        if (max_pfns != -1UL)
-            (void)munlock(__pfn_buf, __nr_pages * sizeof(xen_pfn_t));
+        (void)munlock(__pfn_buf, __nr_pages * sizeof(xen_pfn_t));
 
-        if (max_pfns == -1UL)
-            return 0;
-        
         num_pfns = domctl.u.getmemlist.num_pfns;
         __start_page += num_pfns;
         __nr_pages -= num_pfns;
diff -r 4ba098226429 -r 1bab7d65171b tools/libxc/powerpc64/xc_linux_build.c
--- a/tools/libxc/powerpc64/xc_linux_build.c    Fri Sep 01 12:52:12 2006 -0600
+++ b/tools/libxc/powerpc64/xc_linux_build.c    Fri Sep 01 13:04:02 2006 -0600
@@ -309,7 +309,7 @@ static unsigned long create_start_info(s
     si->store_evtchn = store_evtchn;
     si->console.domU.mfn = si->nr_pages - 3;
     si->console.domU.evtchn = console_evtchn;
-    si_addr = eomem - (PAGE_SIZE * 4);
+    si_addr = (si->nr_pages - 4) << PAGE_SHIFT;
 
     return si_addr;
 }
@@ -388,7 +388,7 @@ int xc_linux_build(int xc_handle,
     }
 
     si_addr = create_start_info(&si, console_evtchn, store_evtchn);
-    *console_mfn = page_array[si.console_mfn];
+    *console_mfn = page_array[si.console.domU.mfn];
     *store_mfn = page_array[si.store_mfn];
     
     if (install_image(xc_handle, domid, page_array, &si, si_addr,
diff -r 4ba098226429 -r 1bab7d65171b tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h     Fri Sep 01 12:52:12 2006 -0600
+++ b/tools/libxc/xenctrl.h     Fri Sep 01 13:04:02 2006 -0600
@@ -8,6 +8,11 @@
 
 #ifndef XENCTRL_H
 #define XENCTRL_H
+
+/* Tell the Xen public headers we are a user-space tools build. */
+#ifndef __XEN_TOOLS__
+#define __XEN_TOOLS__ 1
+#endif
 
 #include <stddef.h>
 #include <stdint.h>
diff -r 4ba098226429 -r 1bab7d65171b tools/python/xen/xend/XendCheckpoint.py
--- a/tools/python/xen/xend/XendCheckpoint.py   Fri Sep 01 12:52:12 2006 -0600
+++ b/tools/python/xen/xend/XendCheckpoint.py   Fri Sep 01 13:04:02 2006 -0600
@@ -161,10 +161,12 @@ def restore(xd, fd):
         if handler.store_mfn is None or handler.console_mfn is None:
             raise XendError('Could not read store/console MFN')
 
+        #Block until src closes connection
+        os.read(fd, 1)
         dominfo.unpause()
-
+        
         dominfo.completeRestore(handler.store_mfn, handler.console_mfn)
-
+        
         return dominfo
     except:
         dominfo.destroy()
diff -r 4ba098226429 -r 1bab7d65171b tools/python/xen/xend/XendDomain.py
--- a/tools/python/xen/xend/XendDomain.py       Fri Sep 01 12:52:12 2006 -0600
+++ b/tools/python/xen/xend/XendDomain.py       Fri Sep 01 13:04:02 2006 -0600
@@ -431,7 +431,8 @@ class XendDomain:
         sock.send("receive\n")
         sock.recv(80)
         XendCheckpoint.save(sock.fileno(), dominfo, True, live, dst)
-
+        dominfo.testDeviceComplete()
+        sock.close()
 
     def domain_save(self, domid, dst):
         """Start saving a domain to file.
diff -r 4ba098226429 -r 1bab7d65171b tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py   Fri Sep 01 12:52:12 2006 -0600
+++ b/tools/python/xen/xend/XendDomainInfo.py   Fri Sep 01 13:04:02 2006 -0600
@@ -30,7 +30,6 @@ import time
 import time
 import threading
 import os
-import math
 
 import xen.lowlevel.xc
 from xen.util import asserts
@@ -703,6 +702,9 @@ class XendDomainInfo:
                 if security[idx][0] == 'ssidref':
                     to_store['security/ssidref'] = str(security[idx][1])
 
+        if not self.readVm('xend/restart_count'):
+            to_store['xend/restart_count'] = str(0)
+
         log.debug("Storing VM details: %s", to_store)
 
         self.writeVm(to_store)
@@ -823,6 +825,9 @@ class XendDomainInfo:
 
     def setResume(self, state):
         self.info['resume'] = state
+
+    def getRestartCount(self):
+        return self.readVm('xend/restart_count')
 
     def refreshShutdown(self, xeninfo = None):
         # If set at the end of this method, a restart is required, with the
@@ -1280,34 +1285,28 @@ class XendDomainInfo:
                 for v in range(0, self.info['max_vcpu_id']+1):
                     xc.vcpu_setaffinity(self.domid, v, self.info['cpus'])
 
-            # set domain maxmem in KiB
-            xc.domain_setmaxmem(self.domid, self.info['maxmem'] * 1024)
-
-            m = self.image.getDomainMemory(self.info['memory'] * 1024)
+            # set memory limit
+            maxmem = self.image.getRequiredMemory(self.info['maxmem'] * 1024)
+            xc.domain_setmaxmem(self.domid, maxmem)
+
+            mem_kb = self.image.getRequiredMemory(self.info['memory'] * 1024)
 
             # get the domain's shadow memory requirement
-            sm = int(math.ceil(self.image.getDomainShadowMemory(m) / 1024.0))
-            if self.info['shadow_memory'] > sm:
-                sm = self.info['shadow_memory']
+            shadow_kb = self.image.getRequiredShadowMemory(mem_kb)
+            shadow_kb_req = self.info['shadow_memory'] * 1024
+            if shadow_kb_req > shadow_kb:
+                shadow_kb = shadow_kb_req
+            shadow_mb = (shadow_kb + 1023) / 1024
 
             # Make sure there's enough RAM available for the domain
-            balloon.free(m + sm * 1024)
+            balloon.free(mem_kb + shadow_mb * 1024)
 
             # Set up the shadow memory
-            sm = xc.shadow_mem_control(self.domid, mb=sm)
-            self.info['shadow_memory'] = sm
-
-            init_reservation = self.info['memory'] * 1024
-            if os.uname()[4] in ('ia64', 'ppc64'):
-                # Workaround for architectures that don't yet support
-                # ballooning.
-                init_reservation = m
-                # Following line from xiantao.zhang@xxxxxxxxx
-                # Needed for IA64 until supports ballooning -- okay for PPC64?
-                xc.domain_setmaxmem(self.domid, m)
-
-            xc.domain_memory_increase_reservation(self.domid, init_reservation,
-                                                  0, 0)
+            shadow_cur = xc.shadow_mem_control(self.domid, shadow_mb)
+            self.info['shadow_memory'] = shadow_cur
+
+            # initial memory allocation
+            xc.domain_memory_increase_reservation(self.domid, mem_kb, 0, 0)
 
             self.createChannels()
 
@@ -1495,6 +1494,21 @@ class XendDomainInfo:
             if rc != 0:
                 raise XendError("Device of type '%s' refuses migration." % n)
 
+    def testDeviceComplete(self):
+        """ For Block IO migration safety we must ensure that
+        the device has shutdown correctly, i.e. all blocks are
+        flushed to disk
+        """
+        while True:
+            test = 0
+            for i in self.getDeviceController('vbd').deviceIDs():
+                test = 1
+                log.info("Dev %s still active, looping...", i)
+                time.sleep(0.1)
+                
+            if test == 0:
+                break
+
     def migrateDevices(self, network, dst, step, domName=''):
         """Notify the devices about migration
         """
@@ -1615,6 +1629,9 @@ class XendDomainInfo:
             try:
                 new_dom = XendDomain.instance().domain_create(config)
                 new_dom.unpause()
+                rst_cnt = self.readVm('xend/restart_count')
+                rst_cnt = int(rst_cnt) + 1
+                self.writeVm('xend/restart_count', str(rst_cnt))
                 new_dom.removeVm(RESTART_IN_PROGRESS)
             except:
                 if new_dom:
diff -r 4ba098226429 -r 1bab7d65171b tools/python/xen/xend/image.py
--- a/tools/python/xen/xend/image.py    Fri Sep 01 12:52:12 2006 -0600
+++ b/tools/python/xen/xend/image.py    Fri Sep 01 13:04:02 2006 -0600
@@ -27,6 +27,8 @@ from xen.xend.XendLogging import log
 from xen.xend.XendLogging import log
 from xen.xend.server.netif import randomMAC
 from xen.xend.xenstore.xswatch import xswatch
+from xen.xend import arch
+from xen.xend import FlatDeviceTree
 
 
 xc = xen.lowlevel.xc.xc()
@@ -141,19 +143,10 @@ class ImageHandler:
             raise VmError('Building domain failed: ostype=%s dom=%d err=%s'
                           % (self.ostype, self.vm.getDomid(), str(result)))
 
-
-    def getDomainMemory(self, mem_kb):
-        """@return The memory required, in KiB, by the domain to store the
-        given amount, also in KiB."""
-        if os.uname()[4] != 'ia64':
-            # A little extra because auto-ballooning is broken w.r.t. HVM
-            # guests. Also, slack is necessary for live migration since that
-            # uses shadow page tables.
-            if 'hvm' in xc.xeninfo()['xen_caps']:
-                mem_kb += 4*1024;
+    def getRequiredMemory(self, mem_kb):
         return mem_kb
 
-    def getDomainShadowMemory(self, mem_kb):
+    def getRequiredShadowMemory(self, mem_kb):
         """@return The minimum shadow memory required, in KiB, for a domain 
         with mem_kb KiB of RAM."""
         # PV domains don't need any shadow memory
@@ -197,9 +190,39 @@ class LinuxImageHandler(ImageHandler):
                               ramdisk        = self.ramdisk,
                               features       = self.vm.getFeatures())
 
+class PPC_LinuxImageHandler(LinuxImageHandler):
+
+    ostype = "linux"
+
+    def configure(self, imageConfig, deviceConfig):
+        LinuxImageHandler.configure(self, imageConfig, deviceConfig)
+        self.imageConfig = imageConfig
+
+    def buildDomain(self):
+        store_evtchn = self.vm.getStorePort()
+        console_evtchn = self.vm.getConsolePort()
+
+        log.debug("dom            = %d", self.vm.getDomid())
+        log.debug("image          = %s", self.kernel)
+        log.debug("store_evtchn   = %d", store_evtchn)
+        log.debug("console_evtchn = %d", console_evtchn)
+        log.debug("cmdline        = %s", self.cmdline)
+        log.debug("ramdisk        = %s", self.ramdisk)
+        log.debug("vcpus          = %d", self.vm.getVCpuCount())
+        log.debug("features       = %s", self.vm.getFeatures())
+
+        devtree = FlatDeviceTree.build(self)
+
+        return xc.linux_build(dom            = self.vm.getDomid(),
+                              image          = self.kernel,
+                              store_evtchn   = store_evtchn,
+                              console_evtchn = console_evtchn,
+                              cmdline        = self.cmdline,
+                              ramdisk        = self.ramdisk,
+                              features       = self.vm.getFeatures(),
+                              arch_args      = devtree.to_bin())
+
 class HVMImageHandler(ImageHandler):
-
-    ostype = "hvm"
 
     def configure(self, imageConfig, deviceConfig):
         ImageHandler.configure(self, imageConfig, deviceConfig)
@@ -282,7 +305,7 @@ class HVMImageHandler(ImageHandler):
         for (name, info) in deviceConfig:
             if name == 'vbd':
                 uname = sxp.child_value(info, 'uname')
-                if 'file:' in uname:
+                if uname is not None and 'file:' in uname:
                     (_, vbdparam) = string.split(uname, ':', 1)
                     if not os.path.isfile(vbdparam):
                         raise VmError('Disk image does not exist: %s' %
@@ -355,32 +378,6 @@ class HVMImageHandler(ImageHandler):
         os.waitpid(self.pid, 0)
         self.pid = 0
 
-    def getDomainMemory(self, mem_kb):
-        """@see ImageHandler.getDomainMemory"""
-        if os.uname()[4] == 'ia64':
-            page_kb = 16
-            # ROM size for guest firmware, ioreq page and xenstore page
-            extra_pages = 1024 + 2
-        else:
-            page_kb = 4
-            # This was derived emperically:
-            #   2.4 MB overhead per 1024 MB RAM + 8 MB constant
-            #   + 4 to avoid low-memory condition
-            extra_mb = (2.4/1024) * (mem_kb/1024.0) + 12;
-            extra_pages = int( math.ceil( extra_mb*1024 / page_kb ))
-        return mem_kb + extra_pages * page_kb
-
-    def getDomainShadowMemory(self, mem_kb):
-        """@return The minimum shadow memory required, in KiB, for a domain 
-        with mem_kb KiB of RAM."""
-        if os.uname()[4] in ('ia64', 'ppc64'):
-            # Explicit shadow memory is not a concept 
-            return 0
-        else:
-            # 1MB per vcpu plus 4Kib/Mib of RAM.  This is higher than 
-            # the minimum that Xen would allocate if no value were given.
-            return 1024 * self.vm.getVCpuCount() + mem_kb / 256
-
     def register_shutdown_watch(self):
         """ add xen store watch on control/shutdown """
         self.shutdownWatch = xswatch(self.vm.dompath + "/control/shutdown", \
@@ -417,15 +414,51 @@ class HVMImageHandler(ImageHandler):
 
         return 1 # Keep watching
 
-"""Table of image handler classes for virtual machine images.  Indexed by
-image type.
-"""
-imageHandlerClasses = {}
-
-
-for h in LinuxImageHandler, HVMImageHandler:
-    imageHandlerClasses[h.ostype] = h
-
+class IA64_HVM_ImageHandler(HVMImageHandler):
+
+    ostype = "hvm"
+
+    def getRequiredMemory(self, mem_kb):
+        page_kb = 16
+        # ROM size for guest firmware, ioreq page and xenstore page
+        extra_pages = 1024 + 2
+        return mem_kb + extra_pages * page_kb
+
+    def getRequiredShadowMemory(self, mem_kb):
+        # Explicit shadow memory is not a concept 
+        return 0
+
+class X86_HVM_ImageHandler(HVMImageHandler):
+
+    ostype = "hvm"
+
+    def getRequiredMemory(self, mem_kb):
+        page_kb = 4
+        # This was derived emperically:
+        #   2.4 MB overhead per 1024 MB RAM + 8 MB constant
+        #   + 4 to avoid low-memory condition
+        extra_mb = (2.4/1024) * (mem_kb/1024.0) + 12;
+        extra_pages = int( math.ceil( extra_mb*1024 / page_kb ))
+        return mem_kb + extra_pages * page_kb
+
+    def getRequiredShadowMemory(self, mem_kb):
+        # 1MB per vcpu plus 4Kib/Mib of RAM.  This is higher than 
+        # the minimum that Xen would allocate if no value were given.
+        return 1024 * self.vm.getVCpuCount() + mem_kb / 256
+
+_handlers = {
+    "powerpc": {
+        "linux": PPC_LinuxImageHandler,
+    },
+    "ia64": {
+        "linux": LinuxImageHandler,
+        "hvm": IA64_HVM_ImageHandler,
+    },
+    "x86": {
+        "linux": LinuxImageHandler,
+        "hvm": X86_HVM_ImageHandler,
+    },
+}
 
 def findImageHandlerClass(image):
     """Find the image handler class for an image config.
@@ -433,10 +466,10 @@ def findImageHandlerClass(image):
     @param image config
     @return ImageHandler subclass or None
     """
-    ty = sxp.name(image)
-    if ty is None:
+    type = sxp.name(image)
+    if type is None:
         raise VmError('missing image type')
-    imageClass = imageHandlerClasses.get(ty)
-    if imageClass is None:
-        raise VmError('unknown image type: ' + ty)
-    return imageClass
+    try:
+        return _handlers[arch.type][type]
+    except KeyError:
+        raise VmError('unknown image type: ' + type)
diff -r 4ba098226429 -r 1bab7d65171b 
tools/python/xen/xend/server/DevController.py
--- a/tools/python/xen/xend/server/DevController.py     Fri Sep 01 12:52:12 
2006 -0600
+++ b/tools/python/xen/xend/server/DevController.py     Fri Sep 01 13:04:02 
2006 -0600
@@ -207,6 +207,9 @@ class DevController:
 
         devid = int(devid)
 
+        # Modify online status /before/ updating state (latter is watched by
+        # drivers, so this ordering avoids a race).
+        self.writeBackend(devid, 'online', "0")
         self.writeBackend(devid, 'state', str(xenbusState['Closing']))
 
 
@@ -406,7 +409,8 @@ class DevController:
             'domain' : self.vm.getName(),
             'frontend' : frontpath,
             'frontend-id' : "%i" % self.vm.getDomid(),
-            'state' : str(xenbusState['Initialising'])
+            'state' : str(xenbusState['Initialising']),
+            'online' : "1"
             })
 
         return (backpath, frontpath)
diff -r 4ba098226429 -r 1bab7d65171b 
tools/python/xen/xend/server/XMLRPCServer.py
--- a/tools/python/xen/xend/server/XMLRPCServer.py      Fri Sep 01 12:52:12 
2006 -0600
+++ b/tools/python/xen/xend/server/XMLRPCServer.py      Fri Sep 01 13:04:02 
2006 -0600
@@ -78,7 +78,8 @@ methods = ['device_create', 'device_conf
 methods = ['device_create', 'device_configure', 'destroyDevice',
            'getDeviceSxprs',
            'setMemoryTarget', 'setName', 'setVCpuCount', 'shutdown',
-           'send_sysrq', 'getVCPUInfo', 'waitForDevices']
+           'send_sysrq', 'getVCPUInfo', 'waitForDevices',
+           'getRestartCount']
 
 exclude = ['domain_create', 'domain_restore']
 
diff -r 4ba098226429 -r 1bab7d65171b tools/python/xen/xend/server/blkif.py
--- a/tools/python/xen/xend/server/blkif.py     Fri Sep 01 12:52:12 2006 -0600
+++ b/tools/python/xen/xend/server/blkif.py     Fri Sep 01 13:04:02 2006 -0600
@@ -52,10 +52,18 @@ class BlkifController(DevController):
         except ValueError:
             dev_type = "disk"
 
-        try:
-            (typ, params) = string.split(uname, ':', 1)
-        except ValueError:
-            (typ, params) = ("", "")
+        if uname is None:
+            if dev_type == 'cdrom':
+                (typ, params) = ("", "")
+            else:
+                raise VmError(
+                    'Block device must have physical details specified')
+        else:
+            try:
+                (typ, params) = string.split(uname, ':', 1)
+            except ValueError:
+                (typ, params) = ("", "")
+
         back = { 'dev'    : dev,
                  'type'   : typ,
                  'params' : params,
diff -r 4ba098226429 -r 1bab7d65171b tools/python/xen/xm/migrate.py
--- a/tools/python/xen/xm/migrate.py    Fri Sep 01 12:52:12 2006 -0600
+++ b/tools/python/xen/xm/migrate.py    Fri Sep 01 13:04:02 2006 -0600
@@ -57,7 +57,8 @@ def main(argv):
         opts.usage()
         return
     if len(args) != 2:
-        opts.err('Invalid arguments: ' + str(args))
+        opts.usage()
+        sys.exit(1)
     dom = args[0]
     dst = args[1]
     server.xend.domain.migrate(dom, dst, opts.vals.live, opts.vals.resource, 
opts.vals.port)
diff -r 4ba098226429 -r 1bab7d65171b tools/python/xen/xm/shutdown.py
--- a/tools/python/xen/xm/shutdown.py   Fri Sep 01 12:52:12 2006 -0600
+++ b/tools/python/xen/xm/shutdown.py   Fri Sep 01 13:04:02 2006 -0600
@@ -48,21 +48,48 @@ gopts.opt('reboot', short='R',
           fn=set_true, default=0,
           use='Shutdown and reboot.')
 
+def wait_reboot(opts, doms, rcs):
+    while doms:
+        alive = server.xend.domains(0)
+        reboot = []
+        for d in doms:
+            if d in alive:
+                rc = server.xend.domain.getRestartCount(d)
+                if rc == rcs[d]: continue
+                reboot.append(d)
+            else:
+                opts.info("Domain %s destroyed for failed in rebooting" % d)
+                doms.remove(d)
+        for d in reboot:
+            opts.info("Domain %s rebooted" % d)
+            doms.remove(d)
+        time.sleep(1)
+    opts.info("All domains rebooted")
+
+def wait_shutdown(opts, doms):
+    while doms:
+        alive = server.xend.domains(0)
+        dead = []
+        for d in doms:
+            if d in alive: continue
+            dead.append(d)
+        for d in dead:
+            opts.info("Domain %s terminated" % d)
+            doms.remove(d)
+        time.sleep(1)
+    opts.info("All domains terminated")
+
 def shutdown(opts, doms, mode, wait):
+    rcs = {}
     for d in doms:
+        rcs[d] = server.xend.domain.getRestartCount(d)
         server.xend.domain.shutdown(d, mode)
+
     if wait:
-        while doms:
-            alive = server.xend.domains(0)
-            dead = []
-            for d in doms:
-                if d in alive: continue
-                dead.append(d)
-            for d in dead:
-                opts.info("Domain %s terminated" % d)
-                doms.remove(d)
-            time.sleep(1)
-        opts.info("All domains terminated")
+        if mode == 'reboot':
+            wait_reboot(opts, doms, rcs)
+        else:
+            wait_shutdown(opts, doms)
 
 def shutdown_mode(opts):
     if opts.vals.halt and opts.vals.reboot:
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/ia64/xen/dom0_ops.c
--- a/xen/arch/ia64/xen/dom0_ops.c      Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/ia64/xen/dom0_ops.c      Fri Sep 01 13:04:02 2006 -0600
@@ -40,8 +40,8 @@ long arch_do_domctl(xen_domctl_t *op, XE
     {
         unsigned long i;
         struct domain *d = find_domain_by_id(op->domain);
-        unsigned long start_page = op->u.getmemlist.max_pfns >> 32;
-        unsigned long nr_pages = op->u.getmemlist.max_pfns & 0xffffffff;
+        unsigned long start_page = op->u.getmemlist.start_pfn;
+        unsigned long nr_pages = op->u.getmemlist.max_pfns;
         unsigned long mfn;
 
         if ( d == NULL ) {
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/Makefile
--- a/xen/arch/powerpc/Makefile Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/powerpc/Makefile Fri Sep 01 13:04:02 2006 -0600
@@ -6,6 +6,7 @@ subdir-y += papr
 subdir-y += papr
 
 obj-y += audit.o
+obj-y += backtrace.o
 obj-y += bitops.o
 obj-y += boot_of.o
 obj-y += dart.o
@@ -19,19 +20,21 @@ obj-y += external.o
 obj-y += external.o
 obj-y += float.o
 obj-y += hcalls.o
-obj-y += htab.o
 obj-y += iommu.o
 obj-y += irq.o
 obj-y += mambo.o
+obj-y += memory.o
 obj-y += mm.o
 obj-y += mpic.o
 obj-y += mpic_init.o
 obj-y += of-devtree.o
 obj-y += of-devwalk.o
 obj-y += ofd_fixup.o
+obj-y += ofd_fixup_memory.o
 obj-y += physdev.o
 obj-y += rtas.o
 obj-y += setup.o
+obj-y += shadow.o
 obj-y += smp.o
 obj-y += time.o
 obj-y += usercopy.o
@@ -47,6 +50,7 @@ obj-y += elf32.o
 # These are extra warnings like for the arch/ppc directory but may not
 # allow the rest of the tree to build.
 PPC_C_WARNINGS += -Wundef -Wmissing-prototypes -Wmissing-declarations
+PPC_C_WARNINGS += -Wshadow
 CFLAGS += $(PPC_C_WARNINGS)
 
 LINK=0x400000
@@ -91,8 +95,27 @@ start.o: boot/start.S
 start.o: boot/start.S
        $(CC) $(CFLAGS) -D__ASSEMBLY__ -c $< -o $@
 
-$(TARGET)-syms: start.o $(ALL_OBJS) xen.lds
-       $(CC) $(CFLAGS) $(OMAGIC) -Wl,-Ttext,$(xen_link_base),-T,xen.lds 
start.o $(ALL_OBJS) -o $@
+TARGET_OPTS = $(OMAGIC) -Wl,-Ttext,$(xen_link_base),-T,xen.lds
+TARGET_OPTS += start.o $(ALL_OBJS)
+
+.xen-syms: start.o $(ALL_OBJS) xen.lds
+       $(CC) $(CFLAGS) $(TARGET_OPTS) -o $@
+
+NM=$(CROSS_COMPILE)nm
+new_nm := $(shell if $(NM) --help 2>&1 | grep -- '--synthetic' > /dev/null; 
then echo y; else echo n; fi)
+
+ifeq ($(new_nm),y)
+NM             := $(NM) --synthetic
+endif
+
+xen-syms.S: .xen-syms
+       $(NM) -n $^ | $(BASEDIR)/tools/symbols > $@
+
+xen-syms.o: xen-syms.S
+       $(CC) $(CFLAGS) -D__ASSEMBLY__ -c $< -o $@
+
+$(TARGET)-syms: start.o $(ALL_OBJS) xen-syms.o xen.lds
+       $(CC) $(CFLAGS) $(TARGET_OPTS) xen-syms.o -o $@
 
 $(TARGET).bin: $(TARGET)-syms
        $(CROSS_COMPILE)objcopy --output-target=binary $< $@
@@ -122,4 +145,4 @@ dom0.bin: $(DOM0_IMAGE)
 
 clean::
        $(MAKE) -f $(BASEDIR)/Rules.mk -C of_handler clean
-       rm -f firmware firmware_image dom0.bin
+       rm -f firmware firmware_image dom0.bin .xen-syms
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/Rules.mk
--- a/xen/arch/powerpc/Rules.mk Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/powerpc/Rules.mk Fri Sep 01 13:04:02 2006 -0600
@@ -4,7 +4,7 @@ LD := $(CROSS_COMPILE)ld
 LD := $(CROSS_COMPILE)ld
 
 # These are goodess that applies to all source.
-C_WARNINGS := -Wpointer-arith -Wredundant-decls
+C_WARNINGS := -Wredundant-decls
 
 # _no_ common code can have packed data structures or we are in touble.
 C_WARNINGS += -Wpacked
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/boot_of.c
--- a/xen/arch/powerpc/boot_of.c        Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/powerpc/boot_of.c        Fri Sep 01 13:04:02 2006 -0600
@@ -26,10 +26,14 @@
 #include <xen/spinlock.h>
 #include <xen/serial.h>
 #include <xen/time.h>
+#include <xen/sched.h>
 #include <asm/page.h>
 #include <asm/io.h>
 #include "exceptions.h"
 #include "of-devtree.h"
+
+/* Secondary processors use this for handshaking with main processor.  */
+volatile unsigned int __spin_ack;
 
 static ulong of_vec;
 static ulong of_msr;
@@ -322,17 +326,18 @@ static void __init of_test(const char *o
     }
 }
 
-static int __init of_claim(void * virt, u32 size)
+static int __init of_claim(u32 virt, u32 size, u32 align)
 {
     int rets[1] = { OF_FAILURE };
     
-    of_call("claim", 3, 1, rets, virt, size, 0/*align*/);
+    of_call("claim", 3, 1, rets, virt, size, align);
     if (rets[0] == OF_FAILURE) {
-        DBG("%s 0x%p 0x%08x -> FAIL\n", __func__, virt, size);
+        DBG("%s 0x%08x 0x%08x  0x%08x -> FAIL\n", __func__, virt, size, align);
         return OF_FAILURE;
     }
 
-    DBG("%s 0x%p 0x%08x -> 0x%x\n", __func__, virt, size, rets[0]);
+    DBG("%s 0x%08x 0x%08x  0x%08x -> 0x%08x\n", __func__, virt, size, align,
+        rets[0]);
     return rets[0];
 }
 
@@ -683,32 +688,53 @@ static int boot_of_fixup_chosen(void *me
 }
 
 static ulong space_base;
-static ulong find_space(u32 size, ulong align, multiboot_info_t *mbi)
+
+/*
+ * The following function is necessary because we cannot depend on all
+ * FW to actually allocate us any space, so we look for it _hoping_
+ * that at least is will fail if we try to claim something that
+ * belongs to FW.  This hope does not seem to be true on some version
+ * of PIBS.
+ */
+static ulong find_space(u32 size, u32 align, multiboot_info_t *mbi)
 {
     memory_map_t *map = (memory_map_t *)((ulong)mbi->mmap_addr);
     ulong eomem = ((u64)map->length_high << 32) | (u64)map->length_low;
     ulong base;
 
-    of_printf("%s base=0x%016lx  eomem=0x%016lx  size=0x%08x  align=0x%lx\n",
+    if (size == 0)
+        return 0;
+
+    if (align == 0)
+        of_panic("cannot call %s() with align of 0\n", __func__);
+
+#ifdef BROKEN_CLAIM_WORKAROUND
+    {
+        static int broken_claim;
+        if (!broken_claim) {
+            /* just try and claim it to the FW chosen address */
+            base = of_claim(0, size, align);
+            if (base != OF_FAILURE)
+                return base;
+            of_printf("%s: Firmware does not allocate memory for you\n",
+                      __func__);
+            broken_claim = 1;
+        }
+    }
+#endif
+
+    of_printf("%s base=0x%016lx  eomem=0x%016lx  size=0x%08x  align=0x%x\n",
                     __func__, space_base, eomem, size, align);
     base = ALIGN_UP(space_base, PAGE_SIZE);
-    if ((base + size) >= 0x4000000) return 0;
-    if (base + size > eomem) of_panic("not enough RAM\n");
-
-    if (size == 0) return base;
-    if (of_claim((void*)base, size) != OF_FAILURE) {
-        space_base = base + size;
-        return base;
-    } else {
-        for(base += 0x100000; (base+size) < 0x4000000; base += 0x100000) {
-            of_printf("Trying 0x%016lx\n", base);
-            if (of_claim((void*)base, size) != OF_FAILURE) {
-                space_base = base + size;
-                return base;
-            }
-        }
-        return 0;
-    }
+
+    while ((base + size) < rma_size(cpu_default_rma_order_pages())) {
+        if (of_claim(base, size, 0) != OF_FAILURE) {
+            space_base = base + size;
+            return base;
+        }
+        base += (PAGE_SIZE >  align) ? PAGE_SIZE : align;
+    }
+    of_panic("Cannot find memory in the RMA\n");
 }
 
 /* PIBS Version 1.05.0000 04/26/2005 has an incorrect /ht/isa/ranges
@@ -834,9 +860,8 @@ static void boot_of_module(ulong r3, ulo
     static module_t mods[3];
     void *oftree;
     ulong oftree_sz = 48 * PAGE_SIZE;
-    char *mod0_start;
+    ulong mod0_start;
     ulong mod0_size;
-    ulong mod0;
     static const char sepr[] = " -- ";
     extern char dom0_start[] __attribute__ ((weak));
     extern char dom0_size[] __attribute__ ((weak));
@@ -844,59 +869,48 @@ static void boot_of_module(ulong r3, ulo
 
     if ((r3 > 0) && (r4 > 0)) {
         /* was it handed to us in registers ? */
-        mod0_start = (void *)r3;
+        mod0_start = r3;
         mod0_size = r4;
+            of_printf("%s: Dom0 was loaded and found using r3/r4:"
+                      "0x%lx[size 0x%lx]\n",
+                      __func__, mod0_start, mod0_size);
     } else {
         /* see if it is in the boot params */
         p = strstr((char *)((ulong)mbi->cmdline), "dom0_start=");
         if ( p != NULL) {
             p += 11;
-            mod0_start = (char *)simple_strtoul(p, NULL, 0);
+            mod0_start = simple_strtoul(p, NULL, 0);
 
             p = strstr((char *)((ulong)mbi->cmdline), "dom0_size=");
             p += 10;
             mod0_size = simple_strtoul(p, NULL, 0);
-
-            of_printf("mod0: %o %c %c %c\n",
-                      mod0_start[0],
-                      mod0_start[1],
-                      mod0_start[2],
-                      mod0_start[3]);
-
+            of_printf("%s: Dom0 was loaded and found using cmdline:"
+                      "0x%lx[size 0x%lx]\n",
+                      __func__, mod0_start, mod0_size);
         } else if ( ((ulong)dom0_start != 0) && ((ulong)dom0_size != 0) ) {
             /* was it linked in ? */
         
-            mod0_start = dom0_start;
+            mod0_start = (ulong)dom0_start;
             mod0_size = (ulong)dom0_size;
-            of_printf("%s: linked in module copied after _end "
-                      "(start 0x%p size 0x%lx)\n",
+            of_printf("%s: Dom0 is linked in: 0x%lx[size 0x%lx]\n",
                       __func__, mod0_start, mod0_size);
         } else {
-            mod0_start = _end;
+            mod0_start = (ulong)_end;
             mod0_size = 0;
-        }
+            of_printf("%s: FYI Dom0 is unknown, will be caught later\n",
+                      __func__);
+        }
+    }
+
+    if (mod0_size > 0) {
+        const char *c = (const char *)mod0_start;
+
+        of_printf("mod0: %o %c %c %c\n", c[0], c[1], c[2], c[3]);
     }
 
     space_base = (ulong)_end;
-    mod0 = find_space(mod0_size, PAGE_SIZE, mbi);
-
-    /* three cases
-     * 1) mod0_size is not 0 and the image can be copied
-     * 2) mod0_size is not 0 and the image cannot be copied
-     * 3) mod0_size is 0
-     */
-    if (mod0_size > 0) {
-        if (mod0 != 0) {
-            memcpy((void *)mod0, mod0_start, mod0_size);
-            mods[0].mod_start = mod0;
-            mods[0].mod_end = mod0 + mod0_size;
-        } else {
-            of_panic("No space to copy mod0\n");
-        }
-    } else {
-        mods[0].mod_start = mod0;
-        mods[0].mod_end = mod0;
-    }
+    mods[0].mod_start = mod0_start;
+    mods[0].mod_end = mod0_start + mod0_size;
 
     of_printf("%s: mod[0] @ 0x%016x[0x%x]\n", __func__,
               mods[0].mod_start, mods[0].mod_end);
@@ -909,15 +923,22 @@ static void boot_of_module(ulong r3, ulo
 
     /* snapshot the tree */
     oftree = (void*)find_space(oftree_sz, PAGE_SIZE, mbi);
-    if (oftree == 0) of_panic("Could not allocate OFD tree\n");
+    if (oftree == 0)
+        of_panic("Could not allocate OFD tree\n");
 
     of_printf("creating oftree\n");
     of_test("package-to-path");
-    ofd_create(oftree, oftree_sz);
+    oftree = ofd_create(oftree, oftree_sz);
     pkg_save(oftree);
+
+    if (ofd_size(oftree) > oftree_sz)
+         of_panic("Could not fit all of native devtree\n");
 
     boot_of_fixup_refs(oftree);
     boot_of_fixup_chosen(oftree);
+
+    if (ofd_size(oftree) > oftree_sz)
+         of_panic("Could not fit all devtree fixups\n");
 
     ofd_walk(oftree, OFD_ROOT, /* add_hype_props */ NULL, 2);
 
@@ -937,7 +958,7 @@ static int __init boot_of_cpus(void)
 static int __init boot_of_cpus(void)
 {
     int cpus;
-    int cpu;
+    int cpu, bootcpu, logical;
     int result;
     u32 cpu_clock[2];
 
@@ -962,10 +983,68 @@ static int __init boot_of_cpus(void)
     cpu_khz /= 1000;
     of_printf("OF: clock-frequency = %ld KHz\n", cpu_khz);
 
-    /* FIXME: should not depend on the boot CPU bring the first child */
+    /* Look up which CPU we are running on right now.  */
+    result = of_getprop(bof_chosen, "cpu", &bootcpu, sizeof (bootcpu));
+    if (result == OF_FAILURE)
+        of_panic("Failed to look up boot cpu\n");
+
     cpu = of_getpeer(cpu);
-    while (cpu > 0) {
-        of_start_cpu(cpu, (ulong)spin_start, 0);
+
+    /* We want a continuous logical cpu number space.  */
+    cpu_set(0, cpu_present_map);
+    cpu_set(0, cpu_online_map);
+    cpu_set(0, cpu_possible_map);
+
+    /* Spin up all CPUS, even if there are more than NR_CPUS, because
+     * Open Firmware has them spinning on cache lines which will
+     * eventually be scrubbed, which could lead to random CPU activation.
+     */
+    for (logical = 1; cpu > 0; logical++) {
+        unsigned int cpuid, ping, pong;
+        unsigned long now, then, timeout;
+
+        if (cpu == bootcpu) {
+            of_printf("skipping boot cpu!\n");
+            continue;
+        }
+
+        result = of_getprop(cpu, "reg", &cpuid, sizeof(cpuid));
+        if (result == OF_FAILURE)
+            of_panic("cpuid lookup failed\n");
+
+        of_printf("spinning up secondary processor #%d: ", logical);
+
+        __spin_ack = ~0x0;
+        ping = __spin_ack;
+        pong = __spin_ack;
+        of_printf("ping = 0x%x: ", ping);
+
+        mb();
+        result = of_start_cpu(cpu, (ulong)spin_start, logical);
+        if (result == OF_FAILURE)
+            of_panic("start cpu failed\n");
+
+        /* We will give the secondary processor five seconds to reply.  */
+        then = mftb();
+        timeout = then + (5 * timebase_freq);
+
+        do {
+            now = mftb();
+            if (now >= timeout) {
+                of_printf("BROKEN: ");
+                break;
+            }
+
+            mb();
+            pong = __spin_ack;
+        } while (pong == ping);
+        of_printf("pong = 0x%x\n", pong);
+
+        if (pong != ping) {
+            cpu_set(logical, cpu_present_map);
+            cpu_set(logical, cpu_possible_map);
+        }
+
         cpu = of_getpeer(cpu);
     }
     return 1;
@@ -1013,6 +1092,7 @@ multiboot_info_t __init *boot_of_init(
     boot_of_rtas();
 
     /* end of OF */
+    of_printf("Quiescing Open Firmware ...\n");
     of_call("quiesce", 0, 0, NULL);
 
     return &mbi;
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/dart_u3.c
--- a/xen/arch/powerpc/dart_u3.c        Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/powerpc/dart_u3.c        Fri Sep 01 13:04:02 2006 -0600
@@ -55,10 +55,10 @@ static void u3_inv_all(void)
         dc.reg.dc_invtlb = 1;
         out_32(dart_ctl_reg, dc.dc_word);
 
-    do {
-        dc.dc_word = in_32(dart_ctl_reg);
-        r++;
-    } while ((dc.reg.dc_invtlb == 1) && (r < (1 << l)));
+        do {
+            dc.dc_word = in_32(dart_ctl_reg);
+            r++;
+        } while ((dc.reg.dc_invtlb == 1) && (r < (1 << l)));
 
         if (r == (1 << l)) {
             if (l < 4) {
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/dom0_ops.c
--- a/xen/arch/powerpc/dom0_ops.c       Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/powerpc/dom0_ops.c       Fri Sep 01 13:04:02 2006 -0600
@@ -23,16 +23,20 @@
 #include <xen/lib.h>
 #include <xen/sched.h>
 #include <xen/guest_access.h>
+#include <xen/shadow.h>
 #include <public/xen.h>
 #include <public/domctl.h>
 #include <public/sysctl.h>
 
+void arch_getdomaininfo_ctxt(struct vcpu *, vcpu_guest_context_t *);
 void arch_getdomaininfo_ctxt(struct vcpu *v, vcpu_guest_context_t *c)
 { 
     memcpy(&c->user_regs, &v->arch.ctxt, sizeof(struct cpu_user_regs));
     /* XXX fill in rest of vcpu_guest_context_t */
 }
 
+long arch_do_domctl(struct xen_domctl *domctl,
+                    XEN_GUEST_HANDLE(xen_domctl_t) u_domctl);
 long arch_do_domctl(struct xen_domctl *domctl,
                     XEN_GUEST_HANDLE(xen_domctl_t) u_domctl)
 {
@@ -75,6 +79,19 @@ long arch_do_domctl(struct xen_domctl *d
         }
     }
     break;
+    case XEN_DOMCTL_shadow_op:
+    {
+        struct domain *d;
+        ret = -ESRCH;
+        d = find_domain_by_id(domctl->domain);
+        if ( d != NULL )
+        {
+            ret = shadow_domctl(d, &domctl->u.shadow_op, u_domctl);
+            put_domain(d);
+            copy_to_guest(u_domctl, domctl, 1);
+        } 
+    }
+    break;
 
     default:
         ret = -ENOSYS;
@@ -84,6 +101,8 @@ long arch_do_domctl(struct xen_domctl *d
     return ret;
 }
 
+long arch_do_sysctl(struct xen_sysctl *sysctl,
+                    XEN_GUEST_HANDLE(xen_sysctl_t) u_sysctl);
 long arch_do_sysctl(struct xen_sysctl *sysctl,
                     XEN_GUEST_HANDLE(xen_sysctl_t) u_sysctl)
 {
@@ -109,6 +128,7 @@ long arch_do_sysctl(struct xen_sysctl *s
     break;
 
     default:
+        printk("%s: unsupported sysctl: 0x%x\n", __func__, (sysctl->cmd));
         ret = -ENOSYS;
         break;
     }
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/domain.c
--- a/xen/arch/powerpc/domain.c Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/powerpc/domain.c Fri Sep 01 13:04:02 2006 -0600
@@ -27,6 +27,8 @@
 #include <xen/domain.h>
 #include <xen/console.h>
 #include <xen/shutdown.h>
+#include <xen/shadow.h>
+#include <xen/mm.h>
 #include <asm/htab.h>
 #include <asm/current.h>
 #include <asm/hcalls.h>
@@ -75,7 +77,8 @@ int arch_domain_create(struct domain *d)
 {
     unsigned long rma_base;
     unsigned long rma_sz;
-    uint htab_order;
+    uint rma_order_pages;
+    int rc;
 
     if (d->domain_id == IDLE_DOMAIN_ID) {
         d->shared_info = (void *)alloc_xenheap_page();
@@ -84,44 +87,31 @@ int arch_domain_create(struct domain *d)
         return 0;
     }
 
-    d->arch.rma_order = cpu_rma_order();
-    rma_sz = rma_size(d->arch.rma_order);
-
     /* allocate the real mode area */
-    d->max_pages = 1UL << d->arch.rma_order;
+    rma_order_pages = cpu_default_rma_order_pages();
+    d->max_pages = 1UL << rma_order_pages;
     d->tot_pages = 0;
-    d->arch.rma_page = alloc_domheap_pages(d, d->arch.rma_order, 0);
-    if (NULL == d->arch.rma_page)
-        return 1;
+
+    rc = allocate_rma(d, rma_order_pages);
+    if (rc)
+        return rc;
     rma_base = page_to_maddr(d->arch.rma_page);
-
-    BUG_ON(rma_base & (rma_sz - 1)); /* check alignment */
-
-    printk("clearing RMO: 0x%lx[0x%lx]\n", rma_base, rma_sz);
-    memset((void *)rma_base, 0, rma_sz);
+    rma_sz = rma_size(rma_order_pages);
 
     d->shared_info = (shared_info_t *)
         (rma_addr(&d->arch, RMA_SHARED_INFO) + rma_base);
 
-    d->arch.large_page_sizes = 1;
-    d->arch.large_page_shift[0] = 24; /* 16 M for 970s */
-
-    /* FIXME: we need to the the maximum addressible memory for this
-     * domain to calculate this correctly. It should probably be set
-     * by the managment tools */
-    htab_order = d->arch.rma_order - 6; /* (1/64) */
-    if (test_bit(_DOMF_privileged, &d->domain_flags)) {
-        /* bump the htab size of privleged domains */
-        ++htab_order;
-    }
-    htab_alloc(d, htab_order);
+    d->arch.large_page_sizes = cpu_large_page_orders(
+        d->arch.large_page_order, ARRAY_SIZE(d->arch.large_page_order));
+
+    INIT_LIST_HEAD(&d->arch.extent_list);
 
     return 0;
 }
 
 void arch_domain_destroy(struct domain *d)
 {
-    htab_free(d);
+    shadow_teardown(d);
 }
 
 void machine_halt(void)
@@ -162,6 +152,16 @@ int arch_set_info_guest(struct vcpu *v, 
 int arch_set_info_guest(struct vcpu *v, vcpu_guest_context_t *c)
 { 
     memcpy(&v->arch.ctxt, &c->user_regs, sizeof(c->user_regs));
+
+    printf("Domain[%d].%d: initializing\n",
+           v->domain->domain_id, v->vcpu_id);
+
+    if (v->domain->arch.htab.order == 0)
+        panic("Page table never allocated for Domain: %d\n",
+              v->domain->domain_id);
+    if (v->domain->arch.rma_order == 0)
+        panic("RMA never allocated for Domain: %d\n",
+              v->domain->domain_id);
 
     set_bit(_VCPUF_initialised, &v->vcpu_flags);
 
@@ -253,17 +253,19 @@ void continue_running(struct vcpu *same)
 void continue_running(struct vcpu *same)
 {
     /* nothing to do */
+    return;
 }
 
 void sync_vcpu_execstate(struct vcpu *v)
 {
-    /* XXX for now, for domain destruction, make this non-fatal */
-    printf("%s: called\n", __func__);
+    /* do nothing */
+    return;
 }
 
 void domain_relinquish_resources(struct domain *d)
 {
     free_domheap_pages(d->arch.rma_page, d->arch.rma_order);
+    free_extents(d);
 }
 
 void arch_dump_domain_info(struct domain *d)
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/domain_build.c
--- a/xen/arch/powerpc/domain_build.c   Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/powerpc/domain_build.c   Fri Sep 01 13:04:02 2006 -0600
@@ -25,6 +25,7 @@
 #include <xen/init.h>
 #include <xen/ctype.h>
 #include <xen/iocap.h>
+#include <xen/shadow.h>
 #include <xen/version.h>
 #include <asm/processor.h>
 #include <asm/papr.h>
@@ -34,17 +35,21 @@ extern int loadelfimage_32(struct domain
 extern int loadelfimage_32(struct domain_setup_info *dsi);
 
 /* opt_dom0_mem: memory allocated to domain 0. */
-static unsigned int opt_dom0_mem;
+static unsigned int dom0_nrpages;
 static void parse_dom0_mem(char *s)
 {
-    unsigned long long bytes = parse_size_and_unit(s);
-    /* If no unit is specified we default to kB units, not bytes. */
-    if (isdigit(s[strlen(s)-1]))
-        opt_dom0_mem = (unsigned int)bytes;
-    else
-        opt_dom0_mem = (unsigned int)(bytes >> 10);
+    unsigned long long bytes;
+
+    bytes = parse_size_and_unit(s);
+    dom0_nrpages = bytes >> PAGE_SHIFT;
 }
 custom_param("dom0_mem", parse_dom0_mem);
+
+static unsigned int opt_dom0_max_vcpus;
+integer_param("dom0_max_vcpus", opt_dom0_max_vcpus);
+
+static unsigned int opt_dom0_shadow;
+boolean_param("dom0_shadow", opt_dom0_shadow);
 
 int elf_sanity_check(Elf_Ehdr *ehdr)
 {
@@ -105,11 +110,13 @@ int construct_dom0(struct domain *d,
     struct domain_setup_info dsi;
     ulong dst;
     u64 *ofh_tree;
+    uint rma_nrpages = 1 << d->arch.rma_order;
     ulong rma_sz = rma_size(d->arch.rma_order);
     ulong rma = page_to_maddr(d->arch.rma_page);
     start_info_t *si;
     ulong eomem;
     int am64 = 1;
+    int preempt = 0;
     ulong msr;
     ulong pc;
     ulong r2;
@@ -118,13 +125,18 @@ int construct_dom0(struct domain *d,
     BUG_ON(d->domain_id != 0);
     BUG_ON(d->vcpu[0] == NULL);
 
+    if (image_len == 0)
+        panic("No Dom0 image supplied\n");
+
     cpu_init_vcpu(v);
 
     memset(&dsi, 0, sizeof(struct domain_setup_info));
     dsi.image_addr = image_start;
     dsi.image_len  = image_len;
 
+    printk("Trying Dom0 as 64bit ELF\n");
     if ((rc = parseelfimage(&dsi)) != 0) {
+        printk("Trying Dom0 as 32bit ELF\n");
         if ((rc = parseelfimage_32(&dsi)) != 0)
             return rc;
         am64 = 0;
@@ -141,7 +153,33 @@ int construct_dom0(struct domain *d,
 
     /* By default DOM0 is allocated all available memory. */
     d->max_pages = ~0U;
-    d->tot_pages = 1UL << d->arch.rma_order;
+
+    /* default is the max(1/16th of memory, CONFIG_MIN_DOM0_PAGES) */
+    if (dom0_nrpages == 0) {
+        dom0_nrpages = total_pages >> 4;
+
+        if (dom0_nrpages < CONFIG_MIN_DOM0_PAGES)
+            dom0_nrpages = CONFIG_MIN_DOM0_PAGES;
+    }
+
+    /* make sure we are at least as big as the RMA */
+    if (dom0_nrpages > rma_nrpages)
+        dom0_nrpages = allocate_extents(d, dom0_nrpages, rma_nrpages);
+
+    ASSERT(d->tot_pages == dom0_nrpages);
+    ASSERT(d->tot_pages >= rma_nrpages);
+
+    if (opt_dom0_shadow == 0) {
+        /* 1/64 of memory  */
+        opt_dom0_shadow = (d->tot_pages >> 6) >> (20 - PAGE_SHIFT);
+    }
+
+    do {
+        shadow_set_allocation(d, opt_dom0_shadow, &preempt);
+    } while (preempt);
+    if (shadow_get_allocation(d) == 0)
+        panic("shadow allocation failed 0x%x < 0x%x\n",
+              shadow_get_allocation(d), opt_dom0_shadow);
 
     ASSERT( image_len < rma_sz );
 
@@ -156,10 +194,6 @@ int construct_dom0(struct domain *d,
     printk("shared_info: 0x%lx,%p\n", si->shared_info, d->shared_info);
 
     eomem = si->shared_info;
-
-    /* allow dom0 to access all of system RAM */
-    d->arch.logical_base_pfn = 128 << (20 - PAGE_SHIFT); /* 128 MB */
-    d->arch.logical_end_pfn = max_page;
 
     /* number of pages accessible */
     si->nr_pages = rma_sz >> PAGE_SHIFT;
@@ -265,7 +299,7 @@ int construct_dom0(struct domain *d,
 
     printk("DOM: pc = 0x%lx, r2 = 0x%lx\n", pc, r2);
 
-    ofd_dom0_fixup(d, *ofh_tree + rma, si, dst - rma);
+    ofd_dom0_fixup(d, *ofh_tree + rma, si);
 
     set_bit(_VCPUF_initialised, &v->vcpu_flags);
 
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/exceptions.c
--- a/xen/arch/powerpc/exceptions.c     Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/powerpc/exceptions.c     Fri Sep 01 13:04:02 2006 -0600
@@ -82,6 +82,8 @@ void program_exception(struct cpu_user_r
     show_registers(regs);
     printk("dar 0x%016lx, dsisr 0x%08x\n", mfdar(), mfdsisr());
     printk("hid4 0x%016lx\n", regs->hid4);
+    printk("---[ backtrace ]---\n");
+    show_backtrace(regs->gprs[1], regs->lr, regs->pc);
     panic("%s: 0x%lx\n", __func__, cookie);
 #endif /* CRASH_DEBUG */
 }
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/exceptions.h
--- a/xen/arch/powerpc/exceptions.h     Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/powerpc/exceptions.h     Fri Sep 01 13:04:02 2006 -0600
@@ -51,7 +51,4 @@ extern char exception_vectors[];
 extern char exception_vectors[];
 extern char exception_vectors_end[];
 extern int spin_start[];
-extern int firmware_image_start[0];
-extern int firmware_image_size[0];
-
 #endif
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/external.c
--- a/xen/arch/powerpc/external.c       Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/powerpc/external.c       Fri Sep 01 13:04:02 2006 -0600
@@ -175,8 +175,7 @@ void init_IRQ(void)
 
 void ack_APIC_irq(void)
 {
-    printk("%s: EOI the whole MPIC?\n", __func__);
-    for (;;);
+    panic("%s: EOI the whole MPIC?\n", __func__);
 }
 
 void ack_bad_irq(unsigned int irq)
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/iommu.c
--- a/xen/arch/powerpc/iommu.c  Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/powerpc/iommu.c  Fri Sep 01 13:04:02 2006 -0600
@@ -52,17 +52,14 @@ int iommu_put(u32 buid, ulong ioba, unio
 
         pfn = tce.tce_bits.tce_rpn;
         mfn = pfn2mfn(d, pfn, &mtype);
-        if (mtype != 0) {
-            panic("we don't do non-RMO memory yet\n");
+        if (mfn > 0) {
+#ifdef DEBUG
+            printk("%s: ioba=0x%lx pfn=0x%lx mfn=0x%lx\n", __func__,
+                   ioba, pfn, mfn);
+#endif
+            tce.tce_bits.tce_rpn = mfn;
+            return iommu_phbs[buid].iommu_put(ioba, tce);
         }
-
-#ifdef DEBUG
-        printk("%s: ioba=0x%lx pfn=0x%lx mfn=0x%lx\n", __func__,
-               ioba, pfn, mfn);
-#endif
-        tce.tce_bits.tce_rpn = mfn;
-
-        return iommu_phbs[buid].iommu_put(ioba, tce);
     }
     return -1;
 }
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/mm.c
--- a/xen/arch/powerpc/mm.c     Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/powerpc/mm.c     Fri Sep 01 13:04:02 2006 -0600
@@ -13,9 +13,10 @@
  * along with this program; if not, write to the Free Software
  * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  *
- * Copyright (C) IBM Corp. 2005
+ * Copyright (C) IBM Corp. 2005, 2006
  *
  * Authors: Hollis Blanchard <hollisb@xxxxxxxxxx>
+ *          Jimi Xenidis <jimix@xxxxxxxxxxxxxx>
  */
 
 #include <xen/config.h>
@@ -23,9 +24,18 @@
 #include <xen/shadow.h>
 #include <xen/kernel.h>
 #include <xen/sched.h>
+#include <xen/perfc.h>
 #include <asm/misc.h>
 #include <asm/init.h>
 #include <asm/page.h>
+
+#ifdef VERBOSE
+#define MEM_LOG(_f, _a...)                                  \
+  printk("DOM%u: (file=mm.c, line=%d) " _f "\n",            \
+         current->domain->domain_id , __LINE__ , ## _a )
+#else
+#define MEM_LOG(_f, _a...) ((void)0)
+#endif
 
 /* Frame table and its size in pages. */
 struct page_info *frame_table;
@@ -53,16 +63,128 @@ int steal_page(struct domain *d, struct 
     return 1;
 }
 
-
-int get_page_type(struct page_info *page, u32 type)
-{
-    panic("%s called\n", __func__);
-    return 1;
-}
-
 void put_page_type(struct page_info *page)
 {
-    panic("%s called\n", __func__);
+    unsigned long nx, x, y = page->u.inuse.type_info;
+
+    do {
+        x  = y;
+        nx = x - 1;
+
+        ASSERT((x & PGT_count_mask) != 0);
+
+        /*
+         * The page should always be validated while a reference is held. The 
+         * exception is during domain destruction, when we forcibly invalidate 
+         * page-table pages if we detect a referential loop.
+         * See domain.c:relinquish_list().
+         */
+        ASSERT((x & PGT_validated) || 
+               test_bit(_DOMF_dying, &page_get_owner(page)->domain_flags));
+
+        if ( unlikely((nx & PGT_count_mask) == 0) )
+        {
+            /* Record TLB information for flush later. */
+            page->tlbflush_timestamp = tlbflush_current_time();
+        }
+        else if ( unlikely((nx & (PGT_pinned|PGT_type_mask|PGT_count_mask)) == 
+                           (PGT_pinned | 1)) )
+        {
+            /* Page is now only pinned. Make the back pointer mutable again. */
+            nx |= PGT_va_mutable;
+        }
+    }
+    while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) );
+}
+
+
+int get_page_type(struct page_info *page, unsigned long type)
+{
+    unsigned long nx, x, y = page->u.inuse.type_info;
+
+ again:
+    do {
+        x  = y;
+        nx = x + 1;
+        if ( unlikely((nx & PGT_count_mask) == 0) )
+        {
+            MEM_LOG("Type count overflow on pfn %lx", page_to_mfn(page));
+            return 0;
+        }
+        else if ( unlikely((x & PGT_count_mask) == 0) )
+        {
+            if ( (x & (PGT_type_mask|PGT_va_mask)) != type )
+            {
+                if ( (x & PGT_type_mask) != (type & PGT_type_mask) )
+                {
+                    /*
+                     * On type change we check to flush stale TLB
+                     * entries. This may be unnecessary (e.g., page
+                     * was GDT/LDT) but those circumstances should be
+                     * very rare.
+                     */
+                    cpumask_t mask =
+                        page_get_owner(page)->domain_dirty_cpumask;
+                    tlbflush_filter(mask, page->tlbflush_timestamp);
+
+                    if ( unlikely(!cpus_empty(mask)) )
+                    {
+                        perfc_incrc(need_flush_tlb_flush);
+                        flush_tlb_mask(mask);
+                    }
+                }
+
+                /* We lose existing type, back pointer, and validity. */
+                nx &= ~(PGT_type_mask | PGT_va_mask | PGT_validated);
+                nx |= type;
+
+                /* No special validation needed for writable pages. */
+                /* Page tables and GDT/LDT need to be scanned for validity. */
+                if ( type == PGT_writable_page )
+                    nx |= PGT_validated;
+            }
+        }
+        else
+        {
+            if ( unlikely((x & (PGT_type_mask|PGT_va_mask)) != type) )
+            {
+                if ( unlikely((x & PGT_type_mask) != (type & PGT_type_mask) ) )
+                {
+                    return 0;
+                }
+                else if ( (x & PGT_va_mask) == PGT_va_mutable )
+                {
+                    /* The va backpointer is mutable, hence we update it. */
+                    nx &= ~PGT_va_mask;
+                    nx |= type; /* we know the actual type is correct */
+                }
+                else if ( (type & PGT_va_mask) != PGT_va_mutable )
+                {
+                    ASSERT((type & PGT_va_mask) != (x & PGT_va_mask));
+
+                    /* This table is possibly mapped at multiple locations. */
+                    nx &= ~PGT_va_mask;
+                    nx |= PGT_va_unknown;
+                }
+            }
+            if ( unlikely(!(x & PGT_validated)) )
+            {
+                /* Someone else is updating validation of this page. Wait... */
+                while ( (y = page->u.inuse.type_info) == x )
+                    cpu_relax();
+                goto again;
+            }
+        }
+    }
+    while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) );
+
+    if ( unlikely(!(nx & PGT_validated)) )
+    {
+        /* Noone else is updating simultaneously. */
+        __set_bit(_PGT_validated, &page->u.inuse.type_info);
+    }
+
+    return 1;
 }
 
 void __init init_frametable(void)
@@ -107,44 +229,148 @@ extern void copy_page(void *dp, void *sp
     }
 }
 
+static int mfn_in_hole(ulong mfn)
+{
+    /* totally cheating */
+    if (mfn >= (0xf0000000UL >> PAGE_SHIFT) &&
+        mfn < (((1UL << 32) - 1) >> PAGE_SHIFT))
+        return 1;
+
+    return 0;
+}
+
+static uint add_extent(struct domain *d, struct page_info *pg, uint order)
+{
+    struct page_extents *pe;
+
+    pe = xmalloc(struct page_extents);
+    if (pe == NULL)
+        return 0;
+
+    pe->pg = pg;
+    pe->order = order;
+    pe->pfn = page_to_mfn(pg);
+
+    list_add_tail(&pe->pe_list, &d->arch.extent_list);
+
+    return pe->pfn;
+}
+
+void free_extents(struct domain *d)
+{
+    /* we just need to free the memory behind list */
+    struct list_head *list;
+    struct list_head *ent;
+    struct list_head *next;
+
+    list = &d->arch.extent_list;
+    ent = list->next;
+
+    while (ent != list) {
+        next = ent->next;
+        xfree(ent);
+        ent = next;
+    }
+}
+
+uint allocate_extents(struct domain *d, uint nrpages, uint rma_nrpages)
+{
+    uint ext_order;
+    uint ext_nrpages;
+    uint total_nrpages;
+    struct page_info *pg;
+
+    ext_order = cpu_extent_order();
+    ext_nrpages = 1 << ext_order;
+
+    total_nrpages = rma_nrpages;
+
+    /* We only allocate in nr_extsz chunks so if you are not divisible
+     * you get more than you asked for */
+    while (total_nrpages < nrpages) {
+        pg = alloc_domheap_pages(d, ext_order, 0);
+        if (pg == NULL)
+            return total_nrpages;
+
+        if (add_extent(d, pg, ext_order) == 0) {
+            free_domheap_pages(pg, ext_order);
+            return total_nrpages;
+        }
+        total_nrpages += ext_nrpages;
+    }
+
+    return total_nrpages;
+}
+        
+int allocate_rma(struct domain *d, unsigned int order_pages)
+{
+    ulong rma_base;
+    ulong rma_sz = rma_size(order_pages);
+
+    d->arch.rma_page = alloc_domheap_pages(d, order_pages, 0);
+    if (d->arch.rma_page == NULL) {
+        DPRINTK("Could not allocate order_pages=%d RMA for domain %u\n",
+                order_pages, d->domain_id);
+        return -ENOMEM;
+    }
+    d->arch.rma_order = order_pages;
+
+    rma_base = page_to_maddr(d->arch.rma_page);
+    BUG_ON(rma_base & (rma_sz - 1)); /* check alignment */
+
+    /* XXX */
+    printk("clearing RMA: 0x%lx[0x%lx]\n", rma_base, rma_sz);
+    memset((void *)rma_base, 0, rma_sz);
+
+    return 0;
+}
+
 ulong pfn2mfn(struct domain *d, long pfn, int *type)
 {
     ulong rma_base_mfn = page_to_mfn(d->arch.rma_page);
     ulong rma_size_mfn = 1UL << d->arch.rma_order;
-    ulong mfn;
-    int t;
+    struct page_extents *pe;
 
     if (pfn < rma_size_mfn) {
-        mfn = pfn + rma_base_mfn;
-        t = PFN_TYPE_RMA;
-    } else if (pfn >= d->arch.logical_base_pfn &&
-               pfn < d->arch.logical_end_pfn) {
-        if (test_bit(_DOMF_privileged, &d->domain_flags)) {
-            /* This hack allows dom0 to map all memory, necessary to
-             * initialize domU state. */
-            mfn = pfn;
-        } else {
-            panic("we do not handle the logical area yet\n");
-            mfn = 0;
-        }
-
-        t = PFN_TYPE_LOGICAL;
-    } else {
-        /* don't know */
-        mfn = pfn;
-        t = PFN_TYPE_IO;
-    }
-
-    if (type != NULL)
-        *type = t;
-
-    return mfn;
+        if (type)
+            *type = PFN_TYPE_RMA;
+        return pfn + rma_base_mfn;
+    }
+
+    if (test_bit(_DOMF_privileged, &d->domain_flags) &&
+        mfn_in_hole(pfn)) {
+        if (type)
+            *type = PFN_TYPE_IO;
+        return pfn;
+    }
+
+    /* quick tests first */
+    list_for_each_entry (pe, &d->arch.extent_list, pe_list) {
+        uint end_pfn = pe->pfn + (1 << pe->order);
+
+        if (pfn >= pe->pfn && pfn < end_pfn) {
+            if (type)
+                *type = PFN_TYPE_LOGICAL;
+            return page_to_mfn(pe->pg) + (pfn - pe->pfn);
+        }
+    }
+
+    /* This hack allows dom0 to map all memory, necessary to
+     * initialize domU state. */
+    if (test_bit(_DOMF_privileged, &d->domain_flags)) {
+        if (type)
+            *type = PFN_TYPE_REMOTE;
+        return pfn;
+    }
+
+    BUG();
+    return 0;
 }
 
 void guest_physmap_add_page(
     struct domain *d, unsigned long gpfn, unsigned long mfn)
 {
-    panic("%s\n", __func__);
+    printk("%s(%d, 0x%lx, 0x%lx)\n", __func__, d->domain_id, gpfn, mfn);
 }
 void guest_physmap_remove_page(
     struct domain *d, unsigned long gpfn, unsigned long mfn)
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/mpic.c
--- a/xen/arch/powerpc/mpic.c   Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/powerpc/mpic.c   Fri Sep 01 13:04:02 2006 -0600
@@ -498,10 +498,10 @@ static void mpic_enable_irq(unsigned int
 
 #ifdef CONFIG_MPIC_BROKEN_U3
        if (mpic->flags & MPIC_BROKEN_U3) {
-               unsigned int src = irq - mpic->irq_offset;
-               if (mpic_is_ht_interrupt(mpic, src) &&
+               unsigned int bsrc = irq - mpic->irq_offset;
+               if (mpic_is_ht_interrupt(mpic, bsrc) &&
                    (irq_desc[irq].status & IRQ_LEVEL))
-                       mpic_ht_end_irq(mpic, src);
+                       mpic_ht_end_irq(mpic, bsrc);
        }
 #endif /* CONFIG_MPIC_BROKEN_U3 */
 }
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/ofd_fixup.c
--- a/xen/arch/powerpc/ofd_fixup.c      Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/powerpc/ofd_fixup.c      Fri Sep 01 13:04:02 2006 -0600
@@ -13,7 +13,7 @@
  * along with this program; if not, write to the Free Software
  * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  *
- * Copyright (C) IBM Corp. 2005
+ * Copyright (C) IBM Corp. 2005, 2006
  *
  * Authors: Jimi Xenidis <jimix@xxxxxxxxxxxxxx>
  */
@@ -24,6 +24,7 @@
 #include <xen/version.h>
 #include <public/xen.h>
 #include "of-devtree.h"
+#include "oftree.h"
 
 #undef RTAS
 
@@ -316,91 +317,6 @@ static ofdn_t ofd_rtas_props(void *m)
 }
 #endif
 
-struct mem_reg {
-    u64 addr;
-    u64 sz;
-};
-
-static ofdn_t ofd_memory_chunk_create(void *m, ofdn_t p,
-        const char *ppath,
-        const char *name,
-        const char *dt,
-        ulong start, ulong size)
-{
-    struct mem_reg reg;
-    char path[128];
-    ulong l;
-    u32 v;
-    ofdn_t n;
-    ulong nl = strlen(name) + 1;
-    ulong dtl = strlen(dt) + 1;
-
-    l = snprintf(path, sizeof (path), "%s/%s@%lx", ppath, name, start);
-    n = ofd_node_add(m, p, path, l + 1);
-    ofd_prop_add(m, n, "name", name, nl);
-
-    v = 1;
-    ofd_prop_add(m, n, "#address-cells", &v, sizeof (v));
-    v = 0;
-    ofd_prop_add(m, n, "#size-cells", &v, sizeof (v));
-
-    ofd_prop_add(m, n, "device_type", dt, dtl);
-
-    /* physical addresses usable without regard to OF */
-    reg.addr = start;
-    reg.sz = size;
-    ofd_prop_add(m, n, "reg", &reg, sizeof (reg));
-
-    return n;
-}
-
-static ofdn_t ofd_memory_props(void *m, struct domain *d, ulong eoload)
-{
-    ofdn_t n = -1;
-    ulong start = 0;
-    static char name[] = "memory";
-    ulong mem_size = rma_size(d->arch.rma_order);
-    ulong chunk_size = rma_size(d->arch.rma_order);
-
-    /* Remove all old memory props */
-    do {
-        ofdn_t old;
-
-        old = ofd_node_find_by_prop(m, OFD_ROOT, "device_type",
-                                    name, sizeof(name));
-        if (old <= 0) break;
-
-        ofd_node_prune(m, old);
-    } while (1);
-
-    while (start < mem_size) {
-        ulong size = (mem_size < chunk_size) ? mem_size : chunk_size;
-
-        n = ofd_memory_chunk_create(m, OFD_ROOT, "", "memory", "memory",
-                start, size);
-
-        if (start == 0) {
-            /* We are processing the first and RMA chunk */
-
-            /* free list of physical addresses available after OF and
-             * client program have been accounted for */
-            struct mem_reg avail[] = {
-                /* 0 til OF @ 32MiB - 16KiB stack */
-                { .addr = 0, .sz = ((32 << 20) - (16 << 10)) },
-                /* end of loaded material to the end the chunk - 1 page */
-                { .addr = eoload, .sz = chunk_size - eoload - PAGE_SIZE },
-                /* the last page is reserved for xen_start_info */
-            };
-            ofd_prop_add(m, n, "available", &avail,
-                    sizeof (avail));
-        }
-
-        start += size;
-        mem_size -= size;
-    }
-    return n;
-}
-
 static ofdn_t ofd_xen_props(void *m, struct domain *d, start_info_t *si)
 {
     ofdn_t n;
@@ -440,9 +356,8 @@ static ofdn_t ofd_xen_props(void *m, str
     }
     return n;
 }
-extern int ofd_dom0_fixup(
-    struct domain *d, ulong oftree, start_info_t *si, ulong dst);
-int ofd_dom0_fixup(struct domain *d, ulong mem, start_info_t *si, ulong eoload)
+
+int ofd_dom0_fixup(struct domain *d, ulong mem, start_info_t *si)
 {
     void *m;
     const ofdn_t n = OFD_ROOT;
@@ -470,8 +385,8 @@ int ofd_dom0_fixup(struct domain *d, ulo
     printk("Add /chosen props\n");
     ofd_chosen_props(m, (char *)si->cmd_line);
 
-    printk("fix /memory@0 props\n");
-    ofd_memory_props(m, d, eoload);
+    printk("fix /memory props\n");
+    ofd_memory_props(m, d);
 
     printk("fix /xen props\n");
     ofd_xen_props(m, d, si);
@@ -497,8 +412,8 @@ int ofd_dom0_fixup(struct domain *d, ulo
     r = ofd_prop_add(m, n, "ibm,partition-no", &did, sizeof(did));
     ASSERT( r > 0 );
 
-    const char dom0[] = "dom0";
-    r = ofd_prop_add(m, n, "ibm,partition-name", dom0, sizeof (dom0));
+    const char d0[] = "dom0";
+    r = ofd_prop_add(m, n, "ibm,partition-name", d0, sizeof (d0));
     ASSERT( r > 0 );
 
 
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/oftree.h
--- a/xen/arch/powerpc/oftree.h Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/powerpc/oftree.h Fri Sep 01 13:04:02 2006 -0600
@@ -20,14 +20,18 @@
 
 #ifndef _OFTREE_H
 #define _OFTREE_H
+#include <xen/multiboot.h>
 
 extern ulong oftree;
 extern ulong oftree_len;
+extern ulong oftree_end;
 
-extern int ofd_dom0_fixup(
-    struct domain *d, ulong oftree, start_info_t *si, ulong dst);
+extern int ofd_dom0_fixup(struct domain *d, ulong mem, start_info_t *si);
+extern void ofd_memory_props(void *m, struct domain *d);
 
 extern int firmware_image_start[0];
 extern int firmware_image_size[0];
 
+extern void memory_init(module_t *mod, int mcount);
+
 #endif  /* #ifndef _OFTREE_H */
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/papr/tce.c
--- a/xen/arch/powerpc/papr/tce.c       Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/powerpc/papr/tce.c       Fri Sep 01 13:04:02 2006 -0600
@@ -47,7 +47,7 @@ static void h_put_tce(struct cpu_user_re
         regs->gprs[3] = H_Success;
     }
 }
-    
+
 static void h_get_tce(struct cpu_user_regs *regs)
 {
     u32 liobn = regs->gprs[4];
@@ -57,7 +57,7 @@ static void h_get_tce(struct cpu_user_re
     printk("%s: liobn: 0x%x ioba: 0x%lx \n", __func__, liobn, ioba);
 #endif
     regs->gprs[3] = H_Function;
-    for(;;) ;
+    BUG();
 }
 
 static void h_stuff_tce(struct cpu_user_regs *regs)
@@ -76,7 +76,7 @@ static void h_stuff_tce(struct cpu_user_
             count);
 #endif
     regs->gprs[3] = H_Function;
-    for(;;);
+    BUG();
 }
    
 __init_papr_hcall(H_PUT_TCE, h_put_tce);
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/papr/xlate.c
--- a/xen/arch/powerpc/papr/xlate.c     Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/powerpc/papr/xlate.c     Fri Sep 01 13:04:02 2006 -0600
@@ -30,12 +30,6 @@
 #include <asm/papr.h>
 #include <asm/hcalls.h>
 
-static void not_yet(struct cpu_user_regs *regs)
-{
-    printk("not implemented yet: 0x%lx\n", regs->gprs[3]);
-    for (;;);
-}
-
 #ifdef USE_PTE_INSERT
 static inline void pte_insert(union pte volatile *pte,
         ulong vsid, ulong rpn, ulong lrpn)
@@ -160,13 +154,13 @@ static void h_enter(struct cpu_user_regs
         }
 
         /* get correct pgshift value */
-        pgshift = d->arch.large_page_shift[lp_size];
+        pgshift = d->arch.large_page_order[lp_size] + PAGE_SHIFT;
     }
 
     /* get the correct logical RPN in terms of 4K pages need to mask
      * off lp bits and unused arpn bits if this is a large page */
 
-    lpn = ~0ULL << (pgshift - 12);
+    lpn = ~0ULL << (pgshift - PAGE_SHIFT);
     lpn = pte.bits.rpn & lpn;
 
     rpn = pfn2mfn(d, lpn, &mtype);
@@ -493,8 +487,42 @@ static void h_remove(struct cpu_user_reg
     pte_tlbie(&lpte, ptex);
 }
 
+static void h_read(struct cpu_user_regs *regs)
+{
+    ulong flags = regs->gprs[4];
+    ulong ptex = regs->gprs[5];
+    struct vcpu *v = get_current();
+    struct domain *d = v->domain;
+    struct domain_htab *htab = &d->arch.htab;
+    union pte volatile *pte;
+
+       if (flags & H_READ_4)
+        ptex &= ~0x3UL;
+
+    if (ptex > (1UL << htab->log_num_ptes)) {
+        regs->gprs[3] = H_Parameter;
+        printk("%s: bad ptex: 0x%lx\n", __func__, ptex);
+        return;
+    }
+    pte = &htab->map[ptex];
+    regs->gprs[4] = pte[0].words.vsid;
+    regs->gprs[5] = pte[0].words.rpn;
+
+    if (!(flags & H_READ_4)) {
+        /* dump another 3 PTEs */
+        regs->gprs[6] = pte[1].words.vsid;
+        regs->gprs[7] = pte[1].words.rpn;
+        regs->gprs[8] = pte[2].words.vsid;
+        regs->gprs[9] = pte[2].words.rpn;
+        regs->gprs[10] = pte[3].words.vsid;
+        regs->gprs[11] = pte[3].words.rpn;
+    }
+
+    regs->gprs[3] = H_Success;
+}
+
 __init_papr_hcall(H_ENTER, h_enter);
-__init_papr_hcall(H_READ, not_yet);
+__init_papr_hcall(H_READ, h_read);
 __init_papr_hcall(H_REMOVE, h_remove);
 __init_papr_hcall(H_CLEAR_MOD, h_clear_mod);
 __init_papr_hcall(H_CLEAR_REF, h_clear_ref);
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/powerpc64/exceptions.S
--- a/xen/arch/powerpc/powerpc64/exceptions.S   Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/powerpc/powerpc64/exceptions.S   Fri Sep 01 13:04:02 2006 -0600
@@ -514,6 +514,43 @@ _GLOBAL(sleep)
     mtmsrd r3
     blr
 
+/* The primary processor issues a firmware call to spin us up at this
+ * address, passing our CPU number in r3.  We only need a function
+ * entry point instead of a descriptor since this is never called from
+ * C code.
+ */    
     .globl spin_start
 spin_start:
+    /* Write our processor number as an acknowledgment that we're alive.  */
+    LOADADDR(r14, __spin_ack)
+    stw r3, 0(r14)
+    sync
+    /* If NR_CPUS is too small, we should just spin forever.  */
+    LOADADDR(r15, NR_CPUS)
+    cmpd r3, r15
+    blt 2f     
     b .
+    /* Find our index in the array of processor_area struct pointers.  */
+2:  LOADADDR(r14, global_cpu_table)
+    muli r15, r3, 8
+    add r14, r14, r15
+    /* Spin until the pointer for our processor goes valid.  */
+1:  ld r15, 0(r14)
+    cmpldi r15, 0
+    beq 1b
+    /* Dereference the pointer and load our stack pointer.  */
+    isync
+    ld r1, PAREA_stack(r15)
+    li r14, STACK_FRAME_OVERHEAD
+    sub r1, r1, r14
+    /* Load up the TOC and entry point for the C function to be called.  */
+    LOADADDR(r14, secondary_cpu_init)
+    ld r2, 8(r14)
+    ld r11, 0(r14)
+    mtctr r11
+    /* Warning: why do we need this synchronizing instruction on 970FX?  */
+    isync
+    /* Jump into C code now.  */
+    bctrl
+    nop
+    b .
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/powerpc64/ppc970.c
--- a/xen/arch/powerpc/powerpc64/ppc970.c       Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/powerpc/powerpc64/ppc970.c       Fri Sep 01 13:04:02 2006 -0600
@@ -13,9 +13,10 @@
  * along with this program; if not, write to the Free Software
  * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  *
- * Copyright (C) IBM Corp. 2005
+ * Copyright (C) IBM Corp. 2005, 2006
  *
  * Authors: Hollis Blanchard <hollisb@xxxxxxxxxx>
+ *          Jimi Xenidis <jimix@xxxxxxxxxxxxxx>
  */
 
 #include <xen/config.h>
@@ -31,25 +32,68 @@
 
 #undef SERIALIZE
 
-unsigned int cpu_rma_order(void)
+extern volatile struct processor_area * volatile global_cpu_table[];
+
+struct rma_settings {
+    int order;
+    int rmlr0;
+    int rmlr12;
+};
+
+static struct rma_settings rma_orders[] = {
+    { .order = 26, .rmlr0 = 0, .rmlr12 = 3, }, /*  64 MB */
+    { .order = 27, .rmlr0 = 1, .rmlr12 = 3, }, /* 128 MB */
+    { .order = 28, .rmlr0 = 1, .rmlr12 = 0, }, /* 256 MB */
+    { .order = 30, .rmlr0 = 0, .rmlr12 = 2, }, /*   1 GB */
+    { .order = 34, .rmlr0 = 0, .rmlr12 = 1, }, /*  16 GB */
+    { .order = 38, .rmlr0 = 0, .rmlr12 = 0, }, /* 256 GB */
+};
+
+static uint log_large_page_sizes[] = {
+    4 + 20, /* (1 << 4) == 16M */
+};
+
+static struct rma_settings *cpu_find_rma(unsigned int order)
 {
-    /* XXX what about non-HV mode? */
-    uint rma_log_size = 6 + 20; /* 64M */
-    return rma_log_size - PAGE_SHIFT;
+    int i;
+    for (i = 0; i < ARRAY_SIZE(rma_orders); i++) {
+        if (rma_orders[i].order == order)
+            return &rma_orders[i];
+    }
+    return NULL;
 }
 
-void cpu_initialize(void)
+unsigned int cpu_default_rma_order_pages(void)
 {
-    ulong stack;
+    return rma_orders[0].order - PAGE_SHIFT;
+}
 
-    parea = xmalloc(struct processor_area);
+unsigned int cpu_large_page_orders(uint *sizes, uint max)
+{
+    uint i = 0;
+
+    while (i < max && i < ARRAY_SIZE(log_large_page_sizes)) {
+        sizes[i] = log_large_page_sizes[i] - PAGE_SHIFT;
+        ++i;
+    }
+
+    return i;
+}
+
+unsigned int cpu_extent_order(void)
+{
+    return log_large_page_sizes[0] - PAGE_SHIFT;
+}
+
+void cpu_initialize(int cpuid)
+{
+    ulong r1, r2;
+    __asm__ __volatile__ ("mr %0, 1" : "=r" (r1));
+    __asm__ __volatile__ ("mr %0, 2" : "=r" (r2));
+
+    /* This is SMP safe because the compiler must use r13 for it.  */
+    parea = global_cpu_table[cpuid];
     ASSERT(parea != NULL);
-
-    stack = (ulong)alloc_xenheap_pages(STACK_ORDER);
-
-    ASSERT(stack != 0);
-    parea->hyp_stack_base = (void *)(stack + STACK_SIZE);
-    printk("stack is here: %p\n", parea->hyp_stack_base);
 
     mthsprg0((ulong)parea); /* now ready for exceptions */
 
@@ -79,7 +123,10 @@ void cpu_initialize(void)
     s |= 1UL << (63-3);     /* ser-gp */
     hid0.word |= s;
 #endif
-    printk("hid0: 0x%016lx\n", hid0.word);
+
+    printk("CPU #%d: Hello World! SP = %lx TOC = %lx HID0 = %lx\n", 
+           smp_processor_id(), r1, r2, hid0.word);
+
     mthid0(hid0.word);
 
     union hid1 hid1;
@@ -115,45 +162,22 @@ void cpu_init_vcpu(struct vcpu *v)
 {
     struct domain *d = v->domain;
     union hid4 hid4;
-    ulong rma_base = page_to_maddr(d->arch.rma_page);
-    ulong rma_size = rma_size(d->arch.rma_order);
+    struct rma_settings *rma_settings;
 
     hid4.word = mfhid4();
 
     hid4.bits.lpes0 = 0; /* exceptions set MSR_HV=1 */
     hid4.bits.lpes1 = 1; /* RMA applies */
 
-    hid4.bits.rmor = rma_base >> 26;
+    hid4.bits.rmor = page_to_maddr(d->arch.rma_page) >> 26;
 
     hid4.bits.lpid01 = d->domain_id & 3;
     hid4.bits.lpid25 = (d->domain_id >> 2) & 0xf;
 
-    switch (rma_size) {
-        case 256ULL << 30:  /* 256 GB */
-            hid4.bits.rmlr0 = 0;
-            hid4.bits.rmlr12 = 0;
-            break;
-        case 16ULL << 30:   /* 16 GB */
-            hid4.bits.rmlr0 = 0;
-            hid4.bits.rmlr12 = 1;
-            break;
-        case 1ULL << 30:    /* 1 GB */
-            hid4.bits.rmlr0 = 0;
-            hid4.bits.rmlr12 = 2;
-            break;
-        case 64ULL << 20:   /* 64 MB */
-            hid4.bits.rmlr0 = 0;
-            hid4.bits.rmlr12 = 3;
-            break;
-        case 256ULL << 20:  /* 256 MB */
-            hid4.bits.rmlr0 = 1;
-            hid4.bits.rmlr12 = 0;
-            break;
-        case 128ULL << 20:  /* 128 MB */
-            hid4.bits.rmlr0 = 1;
-            hid4.bits.rmlr12 = 3;
-            break;
-    }
+    rma_settings = cpu_find_rma(d->arch.rma_order + PAGE_SHIFT);
+    ASSERT(rma_settings != NULL);
+    hid4.bits.rmlr0 = rma_settings->rmlr0;
+    hid4.bits.rmlr12 = rma_settings->rmlr12;
 
     v->arch.cpu.hid4.word = hid4.word;
 }
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/setup.c
--- a/xen/arch/powerpc/setup.c  Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/powerpc/setup.c  Fri Sep 01 13:04:02 2006 -0600
@@ -43,9 +43,9 @@
 #include <asm/percpu.h>
 #include "exceptions.h"
 #include "of-devtree.h"
+#include "oftree.h"
 
 #define DEBUG
-unsigned long xenheap_phys_end;
 
 /* opt_noht: If true, Hyperthreading is ignored. */
 int opt_noht = 0;
@@ -53,6 +53,14 @@ boolean_param("noht", opt_noht);
 
 int opt_earlygdb = 0;
 boolean_param("earlygdb", opt_earlygdb);
+
+/* opt_nosmp: If true, secondary processors are ignored. */
+static int opt_nosmp = 0;
+boolean_param("nosmp", opt_nosmp);
+
+/* maxcpus: maximum number of CPUs to activate. */
+static unsigned int max_cpus = NR_CPUS;
+integer_param("maxcpus", max_cpus);
 
 u32 tlbflush_clock = 1U;
 DEFINE_PER_CPU(u32, tlbflush_time);
@@ -61,9 +69,12 @@ unsigned long wait_init_idle;
 unsigned long wait_init_idle;
 ulong oftree;
 ulong oftree_len;
+ulong oftree_end;
 
 cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly;
 cpumask_t cpu_online_map; /* missing ifdef in schedule.c */
+cpumask_t cpu_present_map;
+cpumask_t cpu_possible_map;
 
 /* XXX get this from ISA node in device tree */
 ulong isa_io_base;
@@ -74,6 +85,8 @@ extern void idle_loop(void);
 
 /* move us to a header file */
 extern void initialize_keytable(void);
+
+volatile struct processor_area * volatile global_cpu_table[NR_CPUS];
 
 int is_kernel_text(unsigned long addr)
 {
@@ -169,6 +182,21 @@ static void __init start_of_day(void)
 
     percpu_free_unused_areas();
 
+    {
+        /* FIXME: Xen assumes that an online CPU is a schedualable
+         * CPU, but we just are not there yet. Remove this fragment when
+         * scheduling processors actually works. */
+        int cpuid;
+
+        printk("WARNING!: Taking all secondary CPUs offline\n");
+
+        for_each_online_cpu(cpuid) {
+            if (cpuid == 0)
+                continue;
+            cpu_clear(cpuid, cpu_online_map);
+        }
+    }
+
     initialize_keytable();
     /* Register another key that will allow for the the Harware Probe
      * to be contacted, this works with RiscWatch probes and should
@@ -193,17 +221,60 @@ void startup_cpu_idle_loop(void)
     reset_stack_and_jump(idle_loop);
 }
 
+static void init_parea(int cpuid)
+{
+    /* Be careful not to shadow the global variable.  */
+    volatile struct processor_area *pa;
+    void *stack;
+
+    pa = xmalloc(struct processor_area);
+    if (pa == NULL)
+        panic("%s: failed to allocate parea for cpu #%d\n", __func__, cpuid);
+
+    stack = alloc_xenheap_pages(STACK_ORDER);
+    if (stack == NULL)
+        panic("%s: failed to allocate stack (order %d) for cpu #%d\n", 
+              __func__, STACK_ORDER, cpuid);
+
+    pa->whoami = cpuid;
+    pa->hyp_stack_base = (void *)((ulong)stack + STACK_SIZE);
+
+    /* This store has the effect of invoking secondary_cpu_init.  */
+    global_cpu_table[cpuid] = pa;
+    mb();
+}
+
+static int kick_secondary_cpus(int maxcpus)
+{
+    int cpuid;
+
+    for_each_present_cpu(cpuid) {
+        if (cpuid == 0)
+            continue;
+        if (cpuid >= maxcpus)
+            break;
+        init_parea(cpuid);
+        cpu_set(cpuid, cpu_online_map);
+        cpu_set(cpuid, cpu_possible_map);
+    }
+
+    return 0;
+}
+
+/* This is the first C code that secondary processors invoke.  */
+int secondary_cpu_init(int cpuid, unsigned long r4);
+int secondary_cpu_init(int cpuid, unsigned long r4)
+{
+    cpu_initialize(cpuid);
+    while(1);
+}
+
 static void __init __start_xen(multiboot_info_t *mbi)
 {
     char *cmdline;
     module_t *mod = (module_t *)((ulong)mbi->mods_addr);
-    ulong heap_start;
-    ulong modules_start, modules_size;
-    ulong eomem = 0;
-    ulong heap_size = 0;
-    ulong bytes = 0;
-    ulong freemem = (ulong)_end;
-    ulong oftree_end;
+    ulong dom0_start, dom0_len;
+    ulong initrd_start, initrd_len;
 
     memcpy(0, exception_vectors, exception_vectors_end - exception_vectors);
     synchronize_caches(0, exception_vectors_end - exception_vectors);
@@ -226,6 +297,9 @@ static void __init __start_xen(multiboot
     console_start_sync();
 #endif
 
+    /* we give the first RMA to the hypervisor */
+    xenheap_phys_end = rma_size(cpu_default_rma_order_pages());
+
     /* Check that we have at least one Multiboot module. */
     if (!(mbi->flags & MBI_MODULES) || (mbi->mods_count == 0)) {
         panic("FATAL ERROR: Require at least one Multiboot module.\n");
@@ -234,10 +308,6 @@ static void __init __start_xen(multiboot
     if (!(mbi->flags & MBI_MEMMAP)) {
         panic("FATAL ERROR: Bootloader provided no memory information.\n");
     }
-
-    /* mark the begining of images */
-    modules_start = mod[0].mod_start;
-    modules_size = mod[mbi->mods_count-1].mod_end - mod[0].mod_start;
 
     /* OF dev tree is the last module */
     oftree = mod[mbi->mods_count-1].mod_start;
@@ -249,71 +319,7 @@ static void __init __start_xen(multiboot
     mod[mbi->mods_count-1].mod_end = 0;
     --mbi->mods_count;
 
-    printk("Physical RAM map:\n");
-
-    /* lets find out how much memory there is */
-    while (bytes < mbi->mmap_length) {
-        u64 end;
-        u64 addr;
-        u64 size;
-
-        memory_map_t *map = (memory_map_t *)((ulong)mbi->mmap_addr + bytes);
-        addr = ((u64)map->base_addr_high << 32) | (u64)map->base_addr_low;
-        size = ((u64)map->length_high << 32) | (u64)map->length_low;
-        end = addr + size;
-
-        printk(" %016lx - %016lx (usable)\n", addr, end);
-
-        if (addr > eomem) {
-            printk("found a hole skipping remainder of memory at:\n"
-                   " %016lx and beyond\n", addr);
-            break;
-        }
-        if (end > eomem) {
-            eomem = end;
-        }
-        bytes += map->size + 4;
-    }
-
-    printk("System RAM: %luMB (%lukB)\n", eomem >> 20, eomem >> 10);
-
-    /* top of memory */
-    max_page = PFN_DOWN(ALIGN_DOWN(eomem, PAGE_SIZE));
-    total_pages = max_page;
-
-    /* Architecturally the first 4 pages are exception hendlers, we
-     * will also be copying down some code there */
-    heap_start = init_boot_allocator(4 << PAGE_SHIFT);
-
-    /* we give the first RMA to the hypervisor */
-    xenheap_phys_end = rma_size(cpu_rma_order());
-
-    /* allow everything else to be allocated */
-    init_boot_pages(xenheap_phys_end, eomem);
-    init_frametable();
-    end_boot_allocator();
-
-    /* Add memory between the beginning of the heap and the beginning
-     * of out text */
-    init_xenheap_pages(heap_start, (ulong)_start);
-
-    /* move the modules to just after _end */
-    if (modules_start) {
-        printk("modules at: %016lx - %016lx\n", modules_start,
-                modules_start + modules_size);
-        freemem = ALIGN_UP(freemem, PAGE_SIZE);
-        memmove((void *)freemem, (void *)modules_start, modules_size);
-
-        oftree -= modules_start - freemem;
-        modules_start = freemem;
-        freemem += modules_size;
-        printk("  moved to: %016lx - %016lx\n", modules_start,
-                modules_start + modules_size);
-    }
-
-    /* the rest of the xenheap, starting at the end of modules */
-    init_xenheap_pages(freemem, xenheap_phys_end);
-
+    memory_init(mod, mbi->mods_count);
 
 #ifdef OF_DEBUG
     printk("ofdump:\n");
@@ -321,13 +327,10 @@ static void __init __start_xen(multiboot
     ofd_walk((void *)oftree, OFD_ROOT, ofd_dump_props, OFD_DUMP_ALL);
 #endif
 
-    heap_size = xenheap_phys_end - heap_start;
-
-    printk("Xen heap: %luMB (%lukB)\n", heap_size >> 20, heap_size >> 10);
-
     percpu_init_areas();
 
-    cpu_initialize();
+    init_parea(0);
+    cpu_initialize(0);
 
 #ifdef CONFIG_GDB
     initialise_gdb();
@@ -335,6 +338,14 @@ static void __init __start_xen(multiboot
         debugger_trap_immediate();
 #endif
 
+    /* Deal with secondary processors.  */
+    if (opt_nosmp) {
+        printk("nosmp: leaving secondary processors spinning forever\n");
+    } else {
+        printk("spinning up at most %d total processors ...\n", max_cpus);
+        kick_secondary_cpus(max_cpus);
+    }
+
     start_of_day();
 
     /* Create initial domain 0. */
@@ -353,22 +364,26 @@ static void __init __start_xen(multiboot
     /* Scrub RAM that is still free and so may go to an unprivileged domain. */
     scrub_heap_pages();
 
-    /*
-     * We're going to setup domain0 using the module(s) that we
-     * stashed safely above our heap. The second module, if present,
-     * is an initrd ramdisk.  The last module is the OF devtree.
-     */
-    if (construct_dom0(dom0,
-                       modules_start, 
-                       mod[0].mod_end-mod[0].mod_start,
-                       (mbi->mods_count == 1) ? 0 :
-                       modules_start + 
-                       (mod[1].mod_start-mod[0].mod_start),
-                       (mbi->mods_count == 1) ? 0 :
-                       mod[mbi->mods_count-1].mod_end - mod[1].mod_start,
+    dom0_start = mod[0].mod_start;
+    dom0_len = mod[0].mod_end - mod[0].mod_start;
+    if (mbi->mods_count > 1) {
+        initrd_start = mod[1].mod_start;
+        initrd_len = mod[1].mod_end - mod[1].mod_start;
+    } else {
+        initrd_start = 0;
+        initrd_len = 0;
+    }
+    if (construct_dom0(dom0, dom0_start, dom0_len,
+                       initrd_start, initrd_len,
                        cmdline) != 0) {
         panic("Could not set up DOM0 guest OS\n");
     }
+
+    init_xenheap_pages(ALIGN_UP(dom0_start, PAGE_SIZE),
+                 ALIGN_DOWN(dom0_start + dom0_len, PAGE_SIZE));
+    if (initrd_start)
+        init_xenheap_pages(ALIGN_UP(initrd_start, PAGE_SIZE),
+                     ALIGN_DOWN(initrd_start + initrd_len, PAGE_SIZE));
 
     init_trace_bufs();
 
@@ -407,6 +422,8 @@ void arch_get_xen_caps(xen_capabilities_
 void arch_get_xen_caps(xen_capabilities_info_t info)
 {
 }
+
+
 
 /*
  * Local variables:
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/xen.lds.S
--- a/xen/arch/powerpc/xen.lds.S        Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/powerpc/xen.lds.S        Fri Sep 01 13:04:02 2006 -0600
@@ -10,11 +10,15 @@ SEARCH_DIR("=/usr/local/lib64"); SEARCH_
 SEARCH_DIR("=/usr/local/lib64"); SEARCH_DIR("=/lib64"); 
SEARCH_DIR("=/usr/lib64"); SEARCH_DIR("=/usr/local/lib"); SEARCH_DIR("=/lib"); 
SEARCH_DIR("=/usr/lib");
 /* Do we need any of these for elf?
    __DYNAMIC = 0;    */
+PHDRS
+{
+  text PT_LOAD FILEHDR PHDRS;
+}   
 SECTIONS
 {
   /* Read-only sections, merged into text segment: */
   PROVIDE (__executable_start = 0x10000000); . = 0x10000000 + SIZEOF_HEADERS;
-  .interp         : { *(.interp) }
+  .interp         : { *(.interp) } :text
   .hash           : { *(.hash) }
   .dynsym         : { *(.dynsym) }
   .dynstr         : { *(.dynstr) }
@@ -103,7 +107,7 @@ SECTIONS
   PROVIDE (__fini_array_end = .);
   .data           :
   {
-    *(.data .data.* .gnu.linkonce.d.*)
+    *(.data .gnu.linkonce.d.*)
     SORT(CONSTRUCTORS)
   }
 
@@ -121,7 +125,7 @@ SECTIONS
   __inithcall_end = .;
 
   __per_cpu_start = .;
-  .data.percpu : { *(.data.percpu) } :text
+  .data.percpu : { *(.data.percpu) }
   __per_cpu_data_end = .;
   . = __per_cpu_start + (NR_CPUS << PERCPU_SHIFT);
   . = ALIGN(STACK_SIZE);
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/x86/hvm/io.c
--- a/xen/arch/x86/hvm/io.c     Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/x86/hvm/io.c     Fri Sep 01 13:04:02 2006 -0600
@@ -646,9 +646,13 @@ static void hvm_mmio_assist(struct cpu_u
         break;
 
     case INSTR_BT:
-        index = operand_index(src);
-        value = get_reg_value(size, index, 0, regs);
-
+        if ( src & REGISTER )
+        {
+            index = operand_index(src);
+            value = get_reg_value(size, index, 0, regs);
+        }
+        else if ( src & IMMEDIATE )
+            value = mmio_opp->immediate;
         if (p->u.data & (1 << (value & ((1 << 5) - 1))))
             regs->eflags |= X86_EFLAGS_CF;
         else
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/x86/hvm/platform.c
--- a/xen/arch/x86/hvm/platform.c       Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/x86/hvm/platform.c       Fri Sep 01 13:04:02 2006 -0600
@@ -652,6 +652,23 @@ static int hvm_decode(int realmode, unsi
         instr->operand[1] = mk_operand(instr->op_size, 0, 0, MEMORY);
         return DECODE_success;
 
+    case 0xBA:
+        if (((opcode[1] >> 3) & 7) == 4) /* BT $imm8, m16/32/64 */
+        {
+            instr->instr = INSTR_BT;
+            GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
+            instr->immediate =
+                    (signed char)get_immediate(realmode, opcode+1, BYTE);
+            instr->operand[0] = mk_operand(BYTE, 0, 0, IMMEDIATE);
+            instr->operand[1] = mk_operand(instr->op_size, 0, 0, MEMORY);
+            return DECODE_success;
+        }
+        else
+        {
+            printf("0f %x, This opcode subtype isn't handled yet\n", *opcode);
+            return DECODE_failure;
+        }
+
     default:
         printf("0f %x, This opcode isn't handled yet\n", *opcode);
         return DECODE_failure;
@@ -1002,10 +1019,17 @@ void handle_mmio(unsigned long va, unsig
             mmio_opp->operand[0] = mmio_inst.operand[0]; /* bit offset */
             mmio_opp->operand[1] = mmio_inst.operand[1]; /* bit base */
 
-            index = operand_index(mmio_inst.operand[0]);
-            size = operand_size(mmio_inst.operand[0]);
-            value = get_reg_value(size, index, 0, regs);
-
+            if ( mmio_inst.operand[0] & REGISTER )
+            { 
+                index = operand_index(mmio_inst.operand[0]);
+                size = operand_size(mmio_inst.operand[0]);
+                value = get_reg_value(size, index, 0, regs);
+            }
+            else if ( mmio_inst.operand[0] & IMMEDIATE )
+            {
+                mmio_opp->immediate = mmio_inst.immediate;
+                value = mmio_inst.immediate;
+            } 
             send_mmio_req(IOREQ_TYPE_COPY, gpa + (value >> 5), 1,
                           mmio_inst.op_size, 0, IOREQ_READ, 0);
             break;
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/x86/hvm/svm/intr.c
--- a/xen/arch/x86/hvm/svm/intr.c       Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/x86/hvm/svm/intr.c       Fri Sep 01 13:04:02 2006 -0600
@@ -79,22 +79,22 @@ asmlinkage void svm_intr_assist(void)
     ASSERT(vmcb);
 
     /* Check if an Injection is active */
-       /* Previous Interrupt delivery caused this Intercept? */
-       if (vmcb->exitintinfo.fields.v && (vmcb->exitintinfo.fields.type == 0)) 
{
-           v->arch.hvm_svm.saved_irq_vector = vmcb->exitintinfo.fields.vector;
+    /* Previous Interrupt delivery caused this Intercept? */
+    if (vmcb->exitintinfo.fields.v && (vmcb->exitintinfo.fields.type == 0)) {
+        v->arch.hvm_svm.saved_irq_vector = vmcb->exitintinfo.fields.vector;
 //           printk("Injecting PF#: saving IRQ from ExitInfo\n");
-           vmcb->exitintinfo.bytes = 0;
-           re_injecting = 1;
-       }
+        vmcb->exitintinfo.bytes = 0;
+        re_injecting = 1;
+    }
 
     /* Guest's interrputs masked? */
     rflags = vmcb->rflags;
     if (irq_masked(rflags)) {
         HVM_DBG_LOG(DBG_LEVEL_1, "Guest IRQs masked: rflags: %lx", rflags);
-       /* bail out, we won't be injecting an interrupt this time */
-       return;
+        /* bail out, we won't be injecting an interrupt this time */
+        return;
     }
-  
+    
     /* Previous interrupt still pending? */
     if (vmcb->vintr.fields.irq) {
 //        printk("Re-injecting IRQ from Vintr\n");
@@ -115,27 +115,24 @@ asmlinkage void svm_intr_assist(void)
       if ( v->vcpu_id == 0 )
          hvm_pic_assist(v);
 
+
+      if ( (v->vcpu_id == 0) && pt->enabled && pt->pending_intr_nr ) {
+          pic_set_irq(pic, pt->irq, 0);
+          pic_set_irq(pic, pt->irq, 1);
+      }
+
       callback_irq = v->domain->arch.hvm_domain.params[HVM_PARAM_CALLBACK_IRQ];
-
-      /* Before we deal with PIT interrupts, let's check for
-         interrupts set by the device model or paravirtualised event
-         channel interrupts.
-      */
-      if ( cpu_has_pending_irq(v) ) {
-           intr_vector = cpu_get_interrupt(v, &intr_type);
-      }
-      else  if ( callback_irq != 0 && local_events_need_delivery() ) {
+      if ( callback_irq != 0 &&
+           local_events_need_delivery() ) {
           /*inject para-device call back irq*/
           v->vcpu_info->evtchn_upcall_mask = 1;
           pic_set_irq(pic, callback_irq, 0);
           pic_set_irq(pic, callback_irq, 1);
-          intr_vector = callback_irq;
       }
-      else  if ( (v->vcpu_id == 0) && pt->enabled && pt->pending_intr_nr ) {
-          pic_set_irq(pic, pt->irq, 0);
-          pic_set_irq(pic, pt->irq, 1);
+
+      if ( cpu_has_pending_irq(v) )
           intr_vector = cpu_get_interrupt(v, &intr_type);
-      }
+
     }
 
     /* have we got an interrupt to inject? */
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c        Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/x86/hvm/svm/svm.c        Fri Sep 01 13:04:02 2006 -0600
@@ -243,6 +243,7 @@ static void svm_store_cpu_guest_regs(
     {
         /* Returning the guest's regs */
         crs[0] = v->arch.hvm_svm.cpu_shadow_cr0;
+        crs[2] = v->arch.hvm_svm.cpu_cr2;
         crs[3] = v->arch.hvm_svm.cpu_cr3;
         crs[4] = v->arch.hvm_svm.cpu_shadow_cr4;
     }
@@ -2793,9 +2794,7 @@ asmlinkage void svm_vmexit_handler(struc
         break;
 
     case VMEXIT_INTR:
-        raise_softirq(SCHEDULE_SOFTIRQ);
-        break;
-
+        break;
 
     case VMEXIT_INVD:
         svm_vmexit_do_invd(vmcb);
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/x86/hvm/vlapic.c
--- a/xen/arch/x86/hvm/vlapic.c Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/x86/hvm/vlapic.c Fri Sep 01 13:04:02 2006 -0600
@@ -919,6 +919,20 @@ int cpu_has_apic_interrupt(struct vcpu* 
     return 0;
 }
 
+/* check to see if there is pending interrupt  */
+int cpu_has_pending_irq(struct vcpu *v)
+{
+    struct hvm_domain *plat = &v->domain->arch.hvm_domain;
+
+    /* APIC */
+    if ( cpu_has_apic_interrupt(v) ) return 1;
+    
+    /* PIC */
+    if ( !vlapic_accept_pic_intr(v) ) return 0;
+
+    return plat->interrupt_request;
+}
+
 void vlapic_post_injection(struct vcpu *v, int vector, int deliver_mode)
 {
     struct vlapic *vlapic = VLAPIC(v);
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/x86/hvm/vmx/io.c
--- a/xen/arch/x86/hvm/vmx/io.c Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/x86/hvm/vmx/io.c Fri Sep 01 13:04:02 2006 -0600
@@ -68,19 +68,6 @@ static inline int is_interruptibility_st
     return interruptibility;
 }
 
-/* check to see if there is pending interrupt  */
-int cpu_has_pending_irq(struct vcpu *v)
-{
-    struct hvm_domain *plat = &v->domain->arch.hvm_domain;
-
-    /* APIC */
-    if ( cpu_has_apic_interrupt(v) ) return 1;
-    
-    /* PIC */
-    if ( !vlapic_accept_pic_intr(v) ) return 0;
-
-    return plat->interrupt_request;
-}
 
 asmlinkage void vmx_intr_assist(void)
 {
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c        Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/x86/hvm/vmx/vmx.c        Fri Sep 01 13:04:02 2006 -0600
@@ -46,6 +46,8 @@
 #include <asm/hvm/vpic.h>
 #include <asm/hvm/vlapic.h>
 
+extern uint32_t vlapic_update_ppr(struct vlapic *vlapic);
+
 static DEFINE_PER_CPU(unsigned long, trace_values[5]);
 #define TRACE_VMEXIT(index,value) this_cpu(trace_values)[index]=value
 
@@ -518,6 +520,7 @@ static void vmx_store_cpu_guest_regs(
     if ( crs != NULL )
     {
         __vmread(CR0_READ_SHADOW, &crs[0]);
+        crs[2] = v->arch.hvm_vmx.cpu_cr2;
         __vmread(GUEST_CR3, &crs[3]);
         __vmread(CR4_READ_SHADOW, &crs[4]);
     }
@@ -953,8 +956,6 @@ static void vmx_vmexit_do_cpuid(struct c
                      bitmaskof(X86_FEATURE_MWAIT) );
 
             edx &= ~( bitmaskof(X86_FEATURE_HT)   |
-                     bitmaskof(X86_FEATURE_MCA)   |
-                     bitmaskof(X86_FEATURE_MCE)   |
                      bitmaskof(X86_FEATURE_ACPI)  |
                      bitmaskof(X86_FEATURE_ACC) );
         }
@@ -1615,6 +1616,7 @@ static int mov_to_cr(int gp, int cr, str
     unsigned long value;
     unsigned long old_cr;
     struct vcpu *v = current;
+    struct vlapic *vlapic = VLAPIC(v);
 
     switch ( gp ) {
     CASE_GET_REG(EAX, eax);
@@ -1758,6 +1760,12 @@ static int mov_to_cr(int gp, int cr, str
             shadow_update_paging_modes(v);
         break;
     }
+    case 8:
+    {
+        vlapic_set_reg(vlapic, APIC_TASKPRI, ((value & 0x0F) << 4));
+        vlapic_update_ppr(vlapic);
+        break;
+    }
     default:
         printk("invalid cr: %d\n", gp);
         __hvm_bug(regs);
@@ -1771,13 +1779,20 @@ static int mov_to_cr(int gp, int cr, str
  */
 static void mov_from_cr(int cr, int gp, struct cpu_user_regs *regs)
 {
-    unsigned long value;
+    unsigned long value = 0;
     struct vcpu *v = current;
-
-    if ( cr != 3 )
+    struct vlapic *vlapic = VLAPIC(v);
+
+    if ( cr != 3 && cr != 8)
         __hvm_bug(regs);
 
-    value = (unsigned long) v->arch.hvm_vmx.cpu_cr3;
+    if ( cr == 3 )
+        value = (unsigned long) v->arch.hvm_vmx.cpu_cr3;
+    else if ( cr == 8 )
+    {
+        value = (unsigned long)vlapic_get_reg(vlapic, APIC_TASKPRI);
+        value = (value & 0xF0) >> 4;
+    }
 
     switch ( gp ) {
     CASE_SET_REG(EAX, eax);
@@ -1888,7 +1903,7 @@ static inline void vmx_do_msr_read(struc
         }
 
         rdmsr_safe(regs->ecx, regs->eax, regs->edx);
-        break;
+        return;
     }
 
     regs->eax = msr_content & 0xFFFFFFFF;
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/x86/mm/shadow/multi.c
--- a/xen/arch/x86/mm/shadow/multi.c    Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/x86/mm/shadow/multi.c    Fri Sep 01 13:04:02 2006 -0600
@@ -2861,11 +2861,11 @@ static int sh_page_fault(struct vcpu *v,
     //      bunch of 4K maps.
     //
 
+    shadow_lock(d);
+
     SHADOW_PRINTK("d:v=%u:%u va=%#lx err=%u\n",
                    v->domain->domain_id, v->vcpu_id, va, regs->error_code);
     
-    shadow_lock(d);
-
     shadow_audit_tables(v);
                    
     if ( guest_walk_tables(v, va, &gw, 1) != 0 )
@@ -3291,12 +3291,6 @@ sh_update_linear_entries(struct vcpu *v)
         {
             ml3e = __linear_l3_table;
             l3mfn = _mfn(l4e_get_pfn(__linear_l4_table[0]));
-#if GUEST_PAGING_LEVELS == 2
-            /* Shadow l3 tables are made up by update_cr3 */
-            sl3e = v->arch.hvm_vcpu.hvm_lowmem_l3tab;
-#else
-            sl3e = v->arch.shadow_vtable;
-#endif
         }
         else 
         {   
@@ -3306,13 +3300,15 @@ sh_update_linear_entries(struct vcpu *v)
             l3mfn = _mfn(l4e_get_pfn(ml4e[0]));
             ml3e = sh_map_domain_page(l3mfn);
             sh_unmap_domain_page(ml4e);
+        }
+
 #if GUEST_PAGING_LEVELS == 2
-            /* Shadow l3 tables are made up by update_cr3 */
-            sl3e = v->arch.hvm_vcpu.hvm_lowmem_l3tab;
+        /* Shadow l3 tables are made up by update_cr3 */
+        sl3e = v->arch.hvm_vcpu.hvm_lowmem_l3tab;
 #else
-            sl3e = sh_map_domain_page(pagetable_get_mfn(v->arch.shadow_table));
-#endif
-        }
+        /* Always safe to use shadow_vtable, because it's globally mapped */
+        sl3e = v->arch.shadow_vtable;
+#endif
 
         for ( i = 0; i < SHADOW_L3_PAGETABLE_ENTRIES; i++ )
         {
@@ -3324,12 +3320,7 @@ sh_update_linear_entries(struct vcpu *v)
         }
 
         if ( v != current ) 
-        {
             sh_unmap_domain_page(ml3e);
-#if GUEST_PAGING_LEVELS != 2
-            sh_unmap_domain_page(sl3e);
-#endif
-        }
     }
 
 #elif CONFIG_PAGING_LEVELS == 3
@@ -3361,31 +3352,10 @@ sh_update_linear_entries(struct vcpu *v)
         
 #else /* GUEST_PAGING_LEVELS == 3 */
         
-        /* Use local vcpu's mappings if we can; otherwise make new mappings */
-        if ( v == current ) 
-        {
-            shadow_l3e = v->arch.shadow_vtable;
-            if ( !shadow_mode_external(d) )
-                guest_l3e = v->arch.guest_vtable;
-        }
-        else 
-        {
-            mfn_t smfn;
-            int idx;
-            
-            /* Map the shadow l3 */
-            smfn = pagetable_get_mfn(v->arch.shadow_table);
-            idx = shadow_l3_index(&smfn, guest_index(v->arch.shadow_vtable));
-            shadow_l3e = sh_map_domain_page(smfn);
-            shadow_l3e += idx;
-            if ( !shadow_mode_external(d) )
-            {
-                /* Also the guest l3 */
-                mfn_t gmfn = pagetable_get_mfn(v->arch.guest_table); 
-                guest_l3e = sh_map_domain_page(gmfn);
-                guest_l3e += guest_index(v->arch.guest_vtable);
-            }
-        }
+        /* Always safe to use *_vtable, because they're globally mapped */
+        shadow_l3e = v->arch.shadow_vtable;
+        guest_l3e = v->arch.guest_vtable;
+
 #endif /* GUEST_PAGING_LEVELS */
         
         /* Choose where to write the entries, using linear maps if possible */
@@ -3443,14 +3413,6 @@ sh_update_linear_entries(struct vcpu *v)
         if ( v != current || !shadow_mode_external(d) )
             sh_unmap_domain_page(l2e);
         
-#if GUEST_PAGING_LEVELS == 3
-        if ( v != current) 
-        {
-            sh_unmap_domain_page(shadow_l3e);
-            if ( !shadow_mode_external(d) )
-                sh_unmap_domain_page(guest_l3e);
-        }
-#endif
     }
 
 #elif CONFIG_PAGING_LEVELS == 2
@@ -3601,7 +3563,7 @@ sh_detach_old_tables(struct vcpu *v)
          v->arch.shadow_vtable )
     {
         // Q: why does this need to use (un)map_domain_page_*global* ?
-        //
+        /* A: so sh_update_linear_entries can operate on other vcpus */
         sh_unmap_domain_page_global(v->arch.shadow_vtable);
         v->arch.shadow_vtable = NULL;
     }
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/x86/physdev.c
--- a/xen/arch/x86/physdev.c    Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/x86/physdev.c    Fri Sep 01 13:04:02 2006 -0600
@@ -96,10 +96,11 @@ long do_physdev_op(int cmd, XEN_GUEST_HA
         if ( !IS_PRIV(current->domain) )
             break;
 
+        irq = irq_op.irq;
         ret = -EINVAL;
-        if ( (irq = irq_op.irq) >= NR_IRQS )
+        if ( (irq < 0) || (irq >= NR_IRQS) )
             break;
-        
+
         irq_op.vector = assign_irq_vector(irq);
         ret = copy_to_guest(arg, &irq_op, 1) ? -EFAULT : 0;
         break;
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/x86/time.c
--- a/xen/arch/x86/time.c       Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/x86/time.c       Fri Sep 01 13:04:02 2006 -0600
@@ -676,7 +676,7 @@ static inline void __update_vcpu_system_
     struct vcpu_time_info *u;
 
     t = &this_cpu(cpu_time);
-    u = &v->domain->shared_info->vcpu_info[v->vcpu_id].time;
+    u = &v->vcpu_info->time;
 
     version_update_begin(&u->version);
 
@@ -690,7 +690,7 @@ static inline void __update_vcpu_system_
 
 void update_vcpu_system_time(struct vcpu *v)
 {
-    if ( v->domain->shared_info->vcpu_info[v->vcpu_id].time.tsc_timestamp != 
+    if ( v->vcpu_info->time.tsc_timestamp !=
          this_cpu(cpu_time).local_tsc_stamp )
         __update_vcpu_system_time(v);
 }
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c      Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/x86/traps.c      Fri Sep 01 13:04:02 2006 -0600
@@ -339,7 +339,6 @@ asmlinkage void fatal_trap(int trapnr, s
 asmlinkage void fatal_trap(int trapnr, struct cpu_user_regs *regs)
 {
     int cpu = smp_processor_id();
-    unsigned long cr2;
     static char *trapstr[] = { 
         "divide error", "debug", "nmi", "bkpt", "overflow", "bounds", 
         "invalid opcode", "device not available", "double fault", 
@@ -356,7 +355,7 @@ asmlinkage void fatal_trap(int trapnr, s
 
     if ( trapnr == TRAP_page_fault )
     {
-        __asm__ __volatile__ ("mov %%cr2,%0" : "=r" (cr2) : );
+        unsigned long cr2 = read_cr2();
         printk("Faulting linear address: %p\n", _p(cr2));
         show_page_walk(cr2);
     }
@@ -911,7 +910,7 @@ asmlinkage int do_page_fault(struct cpu_
 
     ASSERT(!in_irq());
 
-    __asm__ __volatile__ ("mov %%cr2,%0" : "=r" (addr) : );
+    addr = read_cr2();
 
     DEBUGGER_trap_entry(TRAP_page_fault, regs);
 
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/x86/x86_32/traps.c
--- a/xen/arch/x86/x86_32/traps.c       Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/x86/x86_32/traps.c       Fri Sep 01 13:04:02 2006 -0600
@@ -21,11 +21,28 @@
 /* All CPUs have their own IDT to allow int80 direct trap. */
 idt_entry_t *idt_tables[NR_CPUS] __read_mostly;
 
+static void print_xen_info(void)
+{
+    char taint_str[TAINT_STRING_MAX_LEN];
+    char debug = 'n', *arch = "x86_32";
+
+#ifndef NDEBUG
+    debug = 'y';
+#endif
+
+#ifdef CONFIG_X86_PAE
+    arch = "x86_32p";
+#endif
+
+    printk("----[ Xen-%d.%d%s  %s  debug=%c  %s ]----\n",
+           xen_major_version(), xen_minor_version(), xen_extra_version(),
+           arch, debug, print_tainted(taint_str));
+}
+
 void show_registers(struct cpu_user_regs *regs)
 {
     struct cpu_user_regs fault_regs = *regs;
     unsigned long fault_crs[8];
-    char taint_str[TAINT_STRING_MAX_LEN];
     const char *context;
 
     if ( hvm_guest(current) && guest_mode(regs) )
@@ -35,25 +52,29 @@ void show_registers(struct cpu_user_regs
     }
     else
     {
-        context = guest_mode(regs) ? "guest" : "hypervisor";
-
         if ( !guest_mode(regs) )
         {
+            context = "hypervisor";
             fault_regs.esp = (unsigned long)&regs->esp;
             fault_regs.ss = read_segment_register(ss);
             fault_regs.ds = read_segment_register(ds);
             fault_regs.es = read_segment_register(es);
             fault_regs.fs = read_segment_register(fs);
             fault_regs.gs = read_segment_register(gs);
+            fault_crs[2] = read_cr2();
+        }
+        else
+        {
+            context = "guest";
+            fault_crs[2] = current->vcpu_info->arch.cr2;
         }
 
         fault_crs[0] = read_cr0();
         fault_crs[3] = read_cr3();
-    }
-
-    printk("----[ Xen-%d.%d%s    %s ]----\n",
-           xen_major_version(), xen_minor_version(), xen_extra_version(),
-           print_tainted(taint_str));
+        fault_crs[4] = read_cr4();
+    }
+
+    print_xen_info();
     printk("CPU:    %d\nEIP:    %04x:[<%08x>]",
            smp_processor_id(), fault_regs.cs, fault_regs.eip);
     if ( !guest_mode(regs) )
@@ -63,7 +84,8 @@ void show_registers(struct cpu_user_regs
            fault_regs.eax, fault_regs.ebx, fault_regs.ecx, fault_regs.edx);
     printk("esi: %08x   edi: %08x   ebp: %08x   esp: %08x\n",
            fault_regs.esi, fault_regs.edi, fault_regs.ebp, fault_regs.esp);
-    printk("cr0: %08lx   cr3: %08lx\n", fault_crs[0], fault_crs[3]);
+    printk("cr0: %08lx   cr4: %08lx   cr3: %08lx   cr2: %08lx\n",
+           fault_crs[0], fault_crs[4], fault_crs[3], fault_crs[2]);
     printk("ds: %04x   es: %04x   fs: %04x   gs: %04x   "
            "ss: %04x   cs: %04x\n",
            fault_regs.ds, fault_regs.es, fault_regs.fs,
@@ -125,7 +147,6 @@ asmlinkage void do_double_fault(void)
 {
     struct tss_struct *tss = &doublefault_tss;
     unsigned int cpu = ((tss->back_link>>3)-__FIRST_TSS_ENTRY)>>1;
-    char taint_str[TAINT_STRING_MAX_LEN];
 
     watchdog_disable();
 
@@ -133,9 +154,8 @@ asmlinkage void do_double_fault(void)
 
     /* Find information saved during fault and dump it to the console. */
     tss = &init_tss[cpu];
-    printk("*** DOUBLE FAULT: Xen-%d.%d%s    %s\n",
-           xen_major_version(), xen_minor_version(), xen_extra_version(),
-           print_tainted(taint_str));
+    printk("*** DOUBLE FAULT ***\n");
+    print_xen_info();
     printk("CPU:    %d\nEIP:    %04x:[<%08x>]",
            cpu, tss->cs, tss->eip);
     print_symbol(" %s\n", tss->eip);
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/x86/x86_64/traps.c
--- a/xen/arch/x86/x86_64/traps.c       Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/arch/x86/x86_64/traps.c       Fri Sep 01 13:04:02 2006 -0600
@@ -21,11 +21,24 @@
 
 #include <public/callback.h>
 
+static void print_xen_info(void)
+{
+    char taint_str[TAINT_STRING_MAX_LEN];
+    char debug = 'n';
+
+#ifndef NDEBUG
+    debug = 'y';
+#endif
+
+    printk("----[ Xen-%d.%d%s  x86_64  debug=%c  %s ]----\n",
+           xen_major_version(), xen_minor_version(), xen_extra_version(),
+           debug, print_tainted(taint_str));
+}
+
 void show_registers(struct cpu_user_regs *regs)
 {
     struct cpu_user_regs fault_regs = *regs;
     unsigned long fault_crs[8];
-    char taint_str[TAINT_STRING_MAX_LEN];
     const char *context;
 
     if ( hvm_guest(current) && guest_mode(regs) )
@@ -35,18 +48,27 @@ void show_registers(struct cpu_user_regs
     }
     else
     {
-        context = guest_mode(regs) ? "guest" : "hypervisor";
+        if ( guest_mode(regs) )
+        {
+            context = "guest";
+            fault_crs[2] = current->vcpu_info->arch.cr2;
+        }
+        else
+        {
+            context = "hypervisor";
+            fault_crs[2] = read_cr2();
+        }
+
         fault_crs[0] = read_cr0();
         fault_crs[3] = read_cr3();
+        fault_crs[4] = read_cr4();
         fault_regs.ds = read_segment_register(ds);
         fault_regs.es = read_segment_register(es);
         fault_regs.fs = read_segment_register(fs);
         fault_regs.gs = read_segment_register(gs);
     }
 
-    printk("----[ Xen-%d.%d%s    %s ]----\n",
-           xen_major_version(), xen_minor_version(), xen_extra_version(),
-           print_tainted(taint_str));
+    print_xen_info();
     printk("CPU:    %d\nRIP:    %04x:[<%016lx>]",
            smp_processor_id(), fault_regs.cs, fault_regs.rip);
     if ( !guest_mode(regs) )
@@ -62,8 +84,9 @@ void show_registers(struct cpu_user_regs
            fault_regs.r9,  fault_regs.r10, fault_regs.r11);
     printk("r12: %016lx   r13: %016lx   r14: %016lx\n",
            fault_regs.r12, fault_regs.r13, fault_regs.r14);
-    printk("r15: %016lx   cr0: %016lx   cr3: %016lx\n",
-           fault_regs.r15, fault_crs[0], fault_crs[3]);
+    printk("r15: %016lx   cr0: %016lx   cr4: %016lx\n",
+           fault_regs.r15, fault_crs[0], fault_crs[4]);
+    printk("cr3: %016lx   cr2: %016lx\n", fault_crs[3], fault_crs[2]);
     printk("ds: %04x   es: %04x   fs: %04x   gs: %04x   "
            "ss: %04x   cs: %04x\n",
            fault_regs.ds, fault_regs.es, fault_regs.fs,
@@ -121,7 +144,6 @@ asmlinkage void do_double_fault(struct c
 asmlinkage void do_double_fault(struct cpu_user_regs *regs)
 {
     unsigned int cpu, tr;
-    char taint_str[TAINT_STRING_MAX_LEN];
 
     asm ( "str %0" : "=r" (tr) );
     cpu = ((tr >> 3) - __FIRST_TSS_ENTRY) >> 2;
@@ -131,9 +153,8 @@ asmlinkage void do_double_fault(struct c
     console_force_unlock();
 
     /* Find information saved during fault and dump it to the console. */
-    printk("*** DOUBLE FAULT: Xen-%d.%d%s    %s\n",
-           xen_major_version(), xen_minor_version(), xen_extra_version(),
-           print_tainted(taint_str));
+    printk("*** DOUBLE FAULT ***\n");
+    print_xen_info();
     printk("CPU:    %d\nRIP:    %04x:[<%016lx>]",
            cpu, regs->cs, regs->rip);
     print_symbol(" %s", regs->rip);
diff -r 4ba098226429 -r 1bab7d65171b xen/common/perfc.c
--- a/xen/common/perfc.c        Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/common/perfc.c        Fri Sep 01 13:04:02 2006 -0600
@@ -136,8 +136,8 @@ static xen_sysctl_perfc_val_t *perfc_val
 static xen_sysctl_perfc_val_t *perfc_vals;
 static int               perfc_nbr_vals;
 static int               perfc_init = 0;
-static int perfc_copy_info(XEN_GUEST_HANDLE_64(xen_sysctl_perfc_desc_t) desc,
-                           XEN_GUEST_HANDLE_64(xen_sysctl_perfc_val_t) val)
+static int perfc_copy_info(XEN_GUEST_HANDLE(xen_sysctl_perfc_desc_t) desc,
+                           XEN_GUEST_HANDLE(xen_sysctl_perfc_val_t) val)
 {
     unsigned int i, j;
     unsigned int v = 0;
diff -r 4ba098226429 -r 1bab7d65171b xen/include/asm-ia64/mm.h
--- a/xen/include/asm-ia64/mm.h Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/asm-ia64/mm.h Fri Sep 01 13:04:02 2006 -0600
@@ -451,7 +451,6 @@ extern u64 translate_domain_pte(u64 ptev
 
 #define INVALID_M2P_ENTRY        (~0UL)
 #define VALID_M2P(_e)            (!((_e) & (1UL<<63)))
-#define IS_INVALID_M2P_ENTRY(_e) (!VALID_M2P(_e))
 
 #define set_gpfn_from_mfn(mfn, pfn) (machine_to_phys_mapping[(mfn)] = (pfn))
 #define get_gpfn_from_mfn(mfn)      (machine_to_phys_mapping[(mfn)])
diff -r 4ba098226429 -r 1bab7d65171b xen/include/asm-powerpc/config.h
--- a/xen/include/asm-powerpc/config.h  Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/asm-powerpc/config.h  Fri Sep 01 13:04:02 2006 -0600
@@ -47,11 +47,13 @@ extern char __bss_start[];
 /* this should be per processor, but for now */
 #define CACHE_LINE_SIZE 128
 
+/* 256M - 64M of Xen space seems like a nice number */
+#define CONFIG_MIN_DOM0_PAGES (192 << (20 - PAGE_SHIFT))
 #define CONFIG_SHADOW 1
 #define CONFIG_GDB 1
 #define CONFIG_SMP 1
 #define CONFIG_PCI 1
-#define NR_CPUS 1
+#define NR_CPUS 16
 
 #ifndef ELFSIZE
 #define ELFSIZE 64
diff -r 4ba098226429 -r 1bab7d65171b xen/include/asm-powerpc/current.h
--- a/xen/include/asm-powerpc/current.h Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/asm-powerpc/current.h Fri Sep 01 13:04:02 2006 -0600
@@ -27,7 +27,7 @@
 
 struct vcpu;
 
-register struct processor_area *parea asm("r13");
+register volatile struct processor_area *parea asm("r13");
 
 static inline struct vcpu *get_current(void)
 {
@@ -66,7 +66,7 @@ static inline struct cpu_user_regs *gues
 
 static inline void reset_stack_and_jump(void (*f)(void))
 {
-    void _reset_stack_and_jump(void (*f)(void), struct cpu_user_regs *regs);
+    void _reset_stack_and_jump(void (*)(void), struct cpu_user_regs *);
     struct cpu_user_regs *regs = guest_cpu_user_regs();
 
 #ifdef TRACK_RESUME
diff -r 4ba098226429 -r 1bab7d65171b xen/include/asm-powerpc/domain.h
--- a/xen/include/asm-powerpc/domain.h  Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/asm-powerpc/domain.h  Fri Sep 01 13:04:02 2006 -0600
@@ -38,15 +38,14 @@ struct arch_domain {
     struct page_info *rma_page;
     uint rma_order;
 
-    /* This is regular memory, only available thru translataion */
-    ulong logical_base_pfn;
-    ulong logical_end_pfn;
+    /* list of extents beyond RMA */
+    struct list_head extent_list;
 
     /* I/O-port access bitmap mask. */
     u8 *iobmp_mask;       /* Address of IO bitmap mask, or NULL.      */
 
     uint large_page_sizes;
-    char large_page_shift[4];
+    uint large_page_order[4];
 } __cacheline_aligned;
 
 struct slb_entry {
diff -r 4ba098226429 -r 1bab7d65171b xen/include/asm-powerpc/htab.h
--- a/xen/include/asm-powerpc/htab.h    Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/asm-powerpc/htab.h    Fri Sep 01 13:04:02 2006 -0600
@@ -133,8 +133,4 @@ struct domain_htab {
     union pte *map;     /* access the htab like an array */
     ulong *shadow;      /* idx -> logical translation array */
 };
-
-struct domain;
-extern void htab_alloc(struct domain *d, uint order);
-extern void htab_free(struct domain *d);
 #endif
diff -r 4ba098226429 -r 1bab7d65171b xen/include/asm-powerpc/mm.h
--- a/xen/include/asm-powerpc/mm.h      Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/asm-powerpc/mm.h      Fri Sep 01 13:04:02 2006 -0600
@@ -24,6 +24,7 @@
 #include <public/xen.h>
 #include <xen/list.h>
 #include <xen/types.h>
+#include <xen/mm.h>
 #include <asm/misc.h>
 #include <asm/system.h>
 #include <asm/flushtlb.h>
@@ -33,7 +34,6 @@
 #define memguard_unguard_range(_p,_l)    ((void)0)
 
 extern unsigned long xenheap_phys_end;
-#define IS_XEN_HEAP_FRAME(_pfn) (page_to_maddr(_pfn) < xenheap_phys_end)
 
 /*
  * Per-page-frame information.
@@ -43,7 +43,6 @@ extern unsigned long xenheap_phys_end;
  *  2. Provide a PFN_ORDER() macro for accessing the order of a free page.
  */
 #define PFN_ORDER(_pfn) ((_pfn)->u.free.order)
-#define PRtype_info "016lx"
 
 /* XXX copy-and-paste job; re-examine me */
 struct page_info
@@ -63,7 +62,7 @@ struct page_info
         /* Page is in use: ((count_info & PGC_count_mask) != 0). */
         struct {
             /* Owner of this page (NULL if page is anonymous). */
-            struct domain *_domain;
+            u32 _domain;
             /* Type reference count and various PGT_xxx flags and fields. */
             unsigned long type_info;
         } inuse;
@@ -80,80 +79,132 @@ struct page_info
 
 };
 
+struct page_extents {
+    /* Each frame can be threaded onto a doubly-linked list. */
+    struct list_head pe_list;
+
+    /* page extent */
+    struct page_info *pg;
+    uint order;
+    ulong pfn;
+};
+
  /* The following page types are MUTUALLY EXCLUSIVE. */
 #define PGT_none            (0<<29) /* no special uses of this page */
-#define PGT_l1_page_table   (1<<29) /* using this page as an L1 page table? */
-#define PGT_l2_page_table   (2<<29) /* using this page as an L2 page table? */
-#define PGT_l3_page_table   (3<<29) /* using this page as an L3 page table? */
-#define PGT_l4_page_table   (4<<29) /* using this page as an L4 page table? */
-#define PGT_gdt_page        (5<<29) /* using this page in a GDT? */
-#define PGT_ldt_page        (6<<29) /* using this page in an LDT? */
+#define PGT_RMA             (1<<29) /* This page is an RMA page? */
 #define PGT_writable_page   (7<<29) /* has writable mappings of this page? */
 #define PGT_type_mask       (7<<29) /* Bits 29-31. */
+
+ /* Owning guest has pinned this page to its current type? */
+#define _PGT_pinned         28
+#define PGT_pinned          (1U<<_PGT_pinned)
  /* Has this page been validated for use as its current type? */
-#define _PGT_validated      28
+#define _PGT_validated      27
 #define PGT_validated       (1U<<_PGT_validated)
- /* Owning guest has pinned this page to its current type? */
-#define _PGT_pinned         27
-#define PGT_pinned          (1U<<_PGT_pinned)
- /* The 10 most significant bits of virt address if this is a page table. */
-#define PGT_va_shift        17
-#define PGT_va_mask         (((1U<<10)-1)<<PGT_va_shift)
+
+ /* The 27 most significant bits of virt address if this is a page table. */
+#define PGT_va_shift        32
+#define PGT_va_mask         ((unsigned long)((1U<<28)-1)<<PGT_va_shift)
  /* Is the back pointer still mutable (i.e. not fixed yet)? */
-#define PGT_va_mutable      (((1U<<10)-1)<<PGT_va_shift)
+#define PGT_va_mutable      ((unsigned long)((1U<<28)-1)<<PGT_va_shift)
  /* Is the back pointer unknown (e.g., p.t. is mapped at multiple VAs)? */
-#define PGT_va_unknown      (((1U<<10)-2)<<PGT_va_shift)
- /* 17-bit count of uses of this frame as its current type. */
-#define PGT_count_mask      ((1U<<17)-1)
+#define PGT_va_unknown      ((unsigned long)((1U<<28)-2)<<PGT_va_shift)
+
+ /* 16-bit count of uses of this frame as its current type. */
+#define PGT_count_mask      ((1U<<16)-1)
 
  /* Cleared when the owning guest 'frees' this page. */
 #define _PGC_allocated      31
 #define PGC_allocated       (1U<<_PGC_allocated)
- /* 31-bit count of references to this frame. */
-#define PGC_count_mask      ((1U<<31)-1)
+ /* Set on a *guest* page to mark it out-of-sync with its shadow */
+#define _PGC_out_of_sync     30
+#define PGC_out_of_sync     (1U<<_PGC_out_of_sync)
+ /* Set when is using a page as a page table */
+#define _PGC_page_table      29
+#define PGC_page_table      (1U<<_PGC_page_table)
+ /* 29-bit count of references to this frame. */
+#define PGC_count_mask      ((1U<<29)-1)
+
+#define IS_XEN_HEAP_FRAME(_pfn) (page_to_maddr(_pfn) < xenheap_phys_end)
+
+static inline struct domain *unpickle_domptr(u32 _domain)
+{ return ((_domain == 0) || (_domain & 1)) ? NULL : __va(_domain); }
+
+static inline u32 pickle_domptr(struct domain *domain)
+{ return (domain == NULL) ? 0 : (u32)__pa(domain); }
+
+#define PRtype_info "016lx"/* should only be used for printk's */
+
+#define page_get_owner(_p)    (unpickle_domptr((_p)->u.inuse._domain))
+#define page_set_owner(_p,_d) ((_p)->u.inuse._domain = pickle_domptr(_d))
+
+extern struct page_info *frame_table;
+extern unsigned long max_page;
+extern unsigned long total_pages;
+void init_frametable(void);
 
 static inline void put_page(struct page_info *page)
 {
-#if 0
-    int count;
-
-    count = atomic_dec_return(&page->count_info);
-
-    if ( unlikely((count & PGC_count_mask) == 0) )
+    u32 nx, x, y = page->count_info;
+
+    do {
+        x  = y;
+        nx = x - 1;
+    }
+    while ( unlikely((y = cmpxchg(&page->count_info, x, nx)) != x) );
+
+    if ( unlikely((nx & PGC_count_mask) == 0) ) {
+        panic("about to free page\n");
         free_domheap_page(page);
-#else
-    trap();
-#endif
+    }
 }
 
 static inline int get_page(struct page_info *page,
                            struct domain *domain)
 {
-#if 0
-    int count;
-
-    count = atomic_inc_return(&page->count_info);
-
-    if (((count & PGC_count_mask) == 0) ||      /* Count overflow? */
-            ((count & PGC_count_mask) == 1) ||  /* Wasn't allocated? */
-            ((page->domain != domain)))         /* Wrong owner? */
-    {
-        atomic_dec(&page->count_info);
-        return 0;
-    }
-
-#else
-    trap();
-#endif
+    u32 x, nx, y = page->count_info;
+    u32 d, nd = page->u.inuse._domain;
+    u32 _domain = pickle_domptr(domain);
+
+    do {
+        x  = y;
+        nx = x + 1;
+        d  = nd;
+        if ( unlikely((x & PGC_count_mask) == 0) ||  /* Not allocated? */
+             unlikely((nx & PGC_count_mask) == 0) || /* Count overflow? */
+             unlikely(d != _domain) )                /* Wrong owner? */
+        {
+            return 0;
+        }
+        y = cmpxchg(&page->count_info, x, nx);
+    }
+    while ( unlikely(y != x) );
+
     return 1;
+}
+
+extern void put_page_type(struct page_info *page);
+extern int  get_page_type(struct page_info *page, unsigned long type);
+
+static inline void put_page_and_type(struct page_info *page)
+{
+    put_page_type(page);
+    put_page(page);
 }
 
 static inline int get_page_and_type(struct page_info *page,
                                     struct domain *domain,
-                                    u32 type)
-{
-    trap();
-    return 1;
+                                    unsigned long type)
+{
+    int rc = get_page(page, domain);
+
+    if ( likely(rc) && unlikely(!get_page_type(page, type)) )
+    {
+        put_page(page);
+        rc = 0;
+    }
+
+    return rc;
 }
 
 static inline int page_is_removable(struct page_info *page)
@@ -161,16 +212,9 @@ static inline int page_is_removable(stru
     return ((page->count_info & PGC_count_mask) == 1);
 }
 
-int get_page_type(struct page_info *page, u32 type);
-
 #define set_machinetophys(_mfn, _pfn) (trap(), 0)
 
 extern void synchronise_pagetables(unsigned long cpu_mask);
-
-static inline void put_page_and_type(struct page_info *page)
-{
-    trap();
-}
 
 /* XXX don't know what this is for */
 typedef struct {
@@ -179,17 +223,10 @@ typedef struct {
 } vm_assist_info_t;
 extern vm_assist_info_t vm_assist_info[];
 
-#define page_get_owner(_p)    ((_p)->u.inuse._domain)
-#define page_set_owner(_p,_d) ((_p)->u.inuse._domain = _d)
-
 #define share_xen_page_with_guest(p, d, r) do { } while (0)
 #define share_xen_page_with_privileged_guests(p, r) do { } while (0)
 
-extern struct page_info *frame_table;
 extern unsigned long frame_table_size;
-extern unsigned long max_page;
-extern unsigned long total_pages;
-void init_frametable(void);
 
 /* hope that accesses to this will fail spectacularly */
 #define machine_to_phys_mapping ((u32 *)-1UL)
@@ -199,12 +236,12 @@ extern int update_grant_va_mapping(unsig
                                    struct domain *,
                                    struct vcpu *);
 
-extern void put_page_type(struct page_info *page);
-
-#define PFN_TYPE_RMA 0
-#define PFN_TYPE_LOGICAL 1
-#define PFN_TYPE_IO 2
-extern ulong pfn2mfn(struct domain *d, long mfn, int *type);
+#define PFN_TYPE_RMA 1
+#define PFN_TYPE_LOGICAL 2
+#define PFN_TYPE_IO 3
+#define PFN_TYPE_REMOTE 4
+
+extern ulong pfn2mfn(struct domain *d, long pfn, int *type);
 
 /* Arch-specific portion of memory_op hypercall. */
 long arch_memory_op(int op, XEN_GUEST_HANDLE(void) arg);
@@ -221,6 +258,10 @@ static inline unsigned long gmfn_to_mfn(
 
 #define mfn_to_gmfn(_d, mfn) (mfn)
 
+extern int allocate_rma(struct domain *d, unsigned int order_pages);
+extern uint allocate_extents(struct domain *d, uint nrpages, uint rma_nrpages);
+extern void free_extents(struct domain *d);
+
 extern int steal_page(struct domain *d, struct page_info *page,
                         unsigned int memflags);
 
diff -r 4ba098226429 -r 1bab7d65171b 
xen/include/asm-powerpc/powerpc64/procarea.h
--- a/xen/include/asm-powerpc/powerpc64/procarea.h      Fri Sep 01 12:52:12 
2006 -0600
+++ b/xen/include/asm-powerpc/powerpc64/procarea.h      Fri Sep 01 13:04:02 
2006 -0600
@@ -28,6 +28,7 @@ struct gdb_state;
 
 struct processor_area
 {
+    unsigned int whoami;
     struct vcpu *cur_vcpu;
     void *hyp_stack_base;
     ulong saved_regs[2];
diff -r 4ba098226429 -r 1bab7d65171b xen/include/asm-powerpc/processor.h
--- a/xen/include/asm-powerpc/processor.h       Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/asm-powerpc/processor.h       Fri Sep 01 13:04:02 2006 -0600
@@ -39,8 +39,11 @@ struct cpu_user_regs;
 struct cpu_user_regs;
 extern void show_registers(struct cpu_user_regs *);
 extern void show_execution_state(struct cpu_user_regs *);
-extern unsigned int cpu_rma_order(void);
-extern void cpu_initialize(void);
+extern void show_backtrace(ulong sp, ulong lr, ulong pc);
+extern unsigned int cpu_extent_order(void);
+extern unsigned int cpu_default_rma_order_pages(void);
+extern uint cpu_large_page_orders(uint *sizes, uint max);
+extern void cpu_initialize(int cpuid);
 extern void cpu_init_vcpu(struct vcpu *);
 extern void save_cpu_sprs(struct vcpu *);
 extern void load_cpu_sprs(struct vcpu *);
diff -r 4ba098226429 -r 1bab7d65171b xen/include/asm-powerpc/shadow.h
--- a/xen/include/asm-powerpc/shadow.h  Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/asm-powerpc/shadow.h  Fri Sep 01 13:04:02 2006 -0600
@@ -13,7 +13,7 @@
  * along with this program; if not, write to the Free Software
  * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  *
- * Copyright (C) IBM Corp. 2005
+ * Copyright (C) IBM Corp. 2005, 2006
  *
  * Authors: Hollis Blanchard <hollisb@xxxxxxxxxx>
  */
@@ -55,4 +55,18 @@ static inline void mark_dirty(struct dom
 {
     return;
 }
+#define gnttab_mark_dirty(d, f) mark_dirty((d), (f))
+
+extern int shadow_domctl(struct domain *d, 
+                   xen_domctl_shadow_op_t *sc,
+                   XEN_GUEST_HANDLE(xen_domctl_t) u_domctl);
+extern unsigned int shadow_teardown(struct domain *d);
+extern unsigned int shadow_set_allocation(
+    struct domain *d, unsigned int megabytes, int *preempted);
+
+/* Return the size of the shadow pool, rounded up to the nearest MB */
+static inline unsigned int shadow_get_allocation(struct domain *d)
+{
+    return (1ULL << (d->arch.htab.order + PAGE_SHIFT)) >> 20;
+}
 #endif
diff -r 4ba098226429 -r 1bab7d65171b xen/include/asm-powerpc/smp.h
--- a/xen/include/asm-powerpc/smp.h     Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/asm-powerpc/smp.h     Fri Sep 01 13:04:02 2006 -0600
@@ -28,8 +28,8 @@ extern int smp_num_siblings;
 
 /* revisit when we support SMP */
 #define get_hard_smp_processor_id(i) i
-#define hard_smp_processor_id() 0
-#define raw_smp_processor_id() 0
+#define raw_smp_processor_id() (parea->whoami)
+#define hard_smp_processor_id() raw_smp_processor_id()
 extern cpumask_t cpu_sibling_map[];
 extern cpumask_t cpu_core_map[];
 
diff -r 4ba098226429 -r 1bab7d65171b xen/include/asm-powerpc/types.h
--- a/xen/include/asm-powerpc/types.h   Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/asm-powerpc/types.h   Fri Sep 01 13:04:02 2006 -0600
@@ -3,8 +3,18 @@
 #ifndef _PPC_TYPES_H
 #define _PPC_TYPES_H
 
+#include <xen/config.h>
+
+#if defined(__ppc__)
+#define BYTES_PER_LONG 4
+#define BITS_PER_LONG 32
+#elif defined(__PPC64__)
+#define BYTES_PER_LONG 8
+#define BITS_PER_LONG 64
+#endif
+
+#ifndef __ASSEMBLY__
 typedef unsigned short umode_t;
-
 
 /*
  * __xx is ok: it doesn't pollute the POSIX namespace. Use these in the
@@ -31,8 +41,6 @@ typedef unsigned long __u64;
 #endif
 #endif
 
-#include <xen/config.h>
-
 typedef signed char s8;
 typedef unsigned char u8;
 
@@ -45,14 +53,10 @@ typedef unsigned int u32;
 #if defined(__ppc__)
 typedef signed long long s64;
 typedef unsigned long long u64;
-#define BYTES_PER_LONG 4
-#define BITS_PER_LONG 32
 typedef unsigned int size_t;
 #elif defined(__PPC64__)
 typedef signed long s64;
 typedef unsigned long u64;
-#define BYTES_PER_LONG 8
-#define BITS_PER_LONG 64
 typedef unsigned long size_t;
 #endif
 
@@ -66,4 +70,5 @@ typedef u64 dma64_addr_t;
 
 typedef unsigned short xmem_bufctl_t;
 
+#endif  /* __ASSEMBLY__ */
 #endif
diff -r 4ba098226429 -r 1bab7d65171b xen/include/asm-x86/mm.h
--- a/xen/include/asm-x86/mm.h  Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/asm-x86/mm.h  Fri Sep 01 13:04:02 2006 -0600
@@ -338,7 +338,6 @@ int check_descriptor(struct desc_struct 
 #define machine_to_phys_mapping  ((unsigned long *)RDWR_MPT_VIRT_START)
 #define INVALID_M2P_ENTRY        (~0UL)
 #define VALID_M2P(_e)            (!((_e) & (1UL<<(BITS_PER_LONG-1))))
-#define IS_INVALID_M2P_ENTRY(_e) (!VALID_M2P(_e))
 
 #define set_gpfn_from_mfn(mfn, pfn) (machine_to_phys_mapping[(mfn)] = (pfn))
 #define get_gpfn_from_mfn(mfn)      (machine_to_phys_mapping[(mfn)])
diff -r 4ba098226429 -r 1bab7d65171b xen/include/asm-x86/page.h
--- a/xen/include/asm-x86/page.h        Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/asm-x86/page.h        Fri Sep 01 13:04:02 2006 -0600
@@ -300,13 +300,6 @@ void setup_idle_pagetable(void);
 #define _PAGE_GNTTAB   0
 #endif
 
-/*
- * Disallow unused flag bits plus PAT, PSE and GLOBAL.
- * Also disallow GNTTAB if we are using it for grant-table debugging.
- * Permit the NX bit if the hardware supports it.
- */
-#define BASE_DISALLOW_MASK ((0xFFFFF180U | _PAGE_GNTTAB) & ~_PAGE_NX)
-
 #define __PAGE_HYPERVISOR \
     (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED)
 #define __PAGE_HYPERVISOR_NOCACHE \
diff -r 4ba098226429 -r 1bab7d65171b xen/include/asm-x86/processor.h
--- a/xen/include/asm-x86/processor.h   Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/asm-x86/processor.h   Fri Sep 01 13:04:02 2006 -0600
@@ -288,6 +288,13 @@ static inline void write_cr0(unsigned lo
 static inline void write_cr0(unsigned long val)
 {
        __asm__("mov %0,%%cr0": :"r" ((unsigned long)val));
+}
+
+static inline unsigned long read_cr2(void)
+{
+    unsigned long __cr2;
+    __asm__("mov %%cr2,%0\n\t" :"=r" (__cr2));
+    return __cr2;
 }
 
 static inline unsigned long read_cr4(void)
diff -r 4ba098226429 -r 1bab7d65171b xen/include/asm-x86/x86_32/page-2level.h
--- a/xen/include/asm-x86/x86_32/page-2level.h  Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/asm-x86/x86_32/page-2level.h  Fri Sep 01 13:04:02 2006 -0600
@@ -53,7 +53,4 @@ typedef l2_pgentry_t root_pgentry_t;
 #define get_pte_flags(x) ((int)(x) & 0xFFF)
 #define put_pte_flags(x) ((intpte_t)((x) & 0xFFF))
 
-#define L1_DISALLOW_MASK BASE_DISALLOW_MASK
-#define L2_DISALLOW_MASK BASE_DISALLOW_MASK
-
 #endif /* __X86_32_PAGE_2LEVEL_H__ */
diff -r 4ba098226429 -r 1bab7d65171b xen/include/asm-x86/x86_32/page-3level.h
--- a/xen/include/asm-x86/x86_32/page-3level.h  Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/asm-x86/x86_32/page-3level.h  Fri Sep 01 13:04:02 2006 -0600
@@ -66,8 +66,6 @@ typedef l3_pgentry_t root_pgentry_t;
 #define get_pte_flags(x) (((int)((x) >> 32) & ~0xFFF) | ((int)(x) & 0xFFF))
 #define put_pte_flags(x) (((intpte_t)((x) & ~0xFFF) << 32) | ((x) & 0xFFF))
 
-#define L1_DISALLOW_MASK BASE_DISALLOW_MASK
-#define L2_DISALLOW_MASK BASE_DISALLOW_MASK
 #define L3_DISALLOW_MASK 0xFFFFF1E6U /* must-be-zero */
 
 #endif /* __X86_32_PAGE_3LEVEL_H__ */
diff -r 4ba098226429 -r 1bab7d65171b xen/include/asm-x86/x86_32/page.h
--- a/xen/include/asm-x86/x86_32/page.h Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/asm-x86/x86_32/page.h Fri Sep 01 13:04:02 2006 -0600
@@ -26,6 +26,15 @@ extern unsigned int PAGE_HYPERVISOR_NOCA
 #define GRANT_PTE_FLAGS \
     (_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_GNTTAB)
 
+/*
+ * Disallow unused flag bits plus PAT, PSE and GLOBAL.
+ * Permit the NX bit if the hardware supports it.
+ */
+#define BASE_DISALLOW_MASK (0xFFFFF180U & ~_PAGE_NX)
+
+#define L1_DISALLOW_MASK (BASE_DISALLOW_MASK | _PAGE_GNTTAB)
+#define L2_DISALLOW_MASK (BASE_DISALLOW_MASK)
+
 #endif /* __X86_32_PAGE_H__ */
 
 /*
diff -r 4ba098226429 -r 1bab7d65171b xen/include/asm-x86/x86_64/page.h
--- a/xen/include/asm-x86/x86_64/page.h Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/asm-x86/x86_64/page.h Fri Sep 01 13:04:02 2006 -0600
@@ -75,8 +75,15 @@ typedef l4_pgentry_t root_pgentry_t;
 #define _PAGE_NX_BIT (1U<<23)
 #define _PAGE_NX     (cpu_has_nx ? _PAGE_NX_BIT : 0U)
 
-#define L1_DISALLOW_MASK BASE_DISALLOW_MASK
-#define L2_DISALLOW_MASK BASE_DISALLOW_MASK
+/*
+ * Disallow unused flag bits plus PAT, PSE and GLOBAL.
+ * Permit the NX bit if the hardware supports it.
+ * Note that range [62:52] is available for software use on x86/64.
+ */
+#define BASE_DISALLOW_MASK (0xFF000180U & ~_PAGE_NX)
+
+#define L1_DISALLOW_MASK (BASE_DISALLOW_MASK | _PAGE_GNTTAB)
+#define L2_DISALLOW_MASK (BASE_DISALLOW_MASK)
 #define L3_DISALLOW_MASK (BASE_DISALLOW_MASK | 0x180U /* must-be-zero */)
 #define L4_DISALLOW_MASK (BASE_DISALLOW_MASK | 0x180U /* must-be-zero */)
 
diff -r 4ba098226429 -r 1bab7d65171b xen/include/public/arch-ia64.h
--- a/xen/include/public/arch-ia64.h    Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/public/arch-ia64.h    Fri Sep 01 13:04:02 2006 -0600
@@ -18,15 +18,12 @@
 
 #define DEFINE_XEN_GUEST_HANDLE(name)   __DEFINE_XEN_GUEST_HANDLE(name, name)
 #define XEN_GUEST_HANDLE(name)          __guest_handle_ ## name
-#define XEN_GUEST_HANDLE_64(name)       __guest_handle_ ## name
 #define set_xen_guest_handle(hnd, val)  do { (hnd).p = val; } while (0)
 #ifdef __XEN_TOOLS__
 #define get_xen_guest_handle(val, hnd)  do { val = (hnd).p; } while (0)
 #endif
 
 #ifndef __ASSEMBLY__
-typedef uint64_t uint64_aligned_t;
-
 /* Guest handles for primitive C types. */
 __DEFINE_XEN_GUEST_HANDLE(uchar, unsigned char);
 __DEFINE_XEN_GUEST_HANDLE(uint,  unsigned int);
diff -r 4ba098226429 -r 1bab7d65171b xen/include/public/arch-powerpc.h
--- a/xen/include/public/arch-powerpc.h Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/public/arch-powerpc.h Fri Sep 01 13:04:02 2006 -0600
@@ -29,7 +29,6 @@
 
 #define DEFINE_XEN_GUEST_HANDLE(name) __DEFINE_XEN_GUEST_HANDLE(name, name)
 #define XEN_GUEST_HANDLE(name)        __guest_handle_ ## name
-#define XEN_GUEST_HANDLE_64(name)     __guest_handle_ ## name
 #define set_xen_guest_handle(hnd, val) \
     do { \
         if (sizeof ((hnd).__pad)) \
@@ -42,8 +41,6 @@
 #endif
 
 #ifndef __ASSEMBLY__
-typedef uint64_t uint64_aligned_t;
-
 /* Guest handles for primitive C types. */
 __DEFINE_XEN_GUEST_HANDLE(uchar, unsigned char);
 __DEFINE_XEN_GUEST_HANDLE(uint,  unsigned int);
diff -r 4ba098226429 -r 1bab7d65171b xen/include/public/arch-x86_32.h
--- a/xen/include/public/arch-x86_32.h  Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/public/arch-x86_32.h  Fri Sep 01 13:04:02 2006 -0600
@@ -28,14 +28,7 @@
 #endif
 
 /* Structural guest handles introduced in 0x00030201. */
-#if (defined(__XEN__) || defined(__XEN_TOOLS__)) && !defined(__ASSEMBLY__)
-typedef uint64_t __attribute__((aligned(8))) uint64_aligned_t;
-#define __DEFINE_XEN_GUEST_HANDLE(name, type)                   \
-    typedef struct { type *p; }                                 \
-        __guest_handle_ ## name;                                \
-    typedef struct { union { type *p; uint64_aligned_t q; }; }  \
-        __guest_handle_64_ ## name
-#elif __XEN_INTERFACE_VERSION__ >= 0x00030201
+#if __XEN_INTERFACE_VERSION__ >= 0x00030201
 #define __DEFINE_XEN_GUEST_HANDLE(name, type) \
     typedef struct { type *p; } __guest_handle_ ## name
 #else
@@ -45,15 +38,9 @@ typedef uint64_t __attribute__((aligned(
 
 #define DEFINE_XEN_GUEST_HANDLE(name)   __DEFINE_XEN_GUEST_HANDLE(name, name)
 #define XEN_GUEST_HANDLE(name)          __guest_handle_ ## name
-#define XEN_GUEST_HANDLE_64(name)       __guest_handle_64_ ## name
+#define set_xen_guest_handle(hnd, val)  do { (hnd).p = val; } while (0)
 #ifdef __XEN_TOOLS__
 #define get_xen_guest_handle(val, hnd)  do { val = (hnd).p; } while (0)
-#define set_xen_guest_handle(hnd, val)                      \
-    do { if ( sizeof(hnd) == 8 ) *(uint64_t *)&(hnd) = 0;   \
-         (hnd).p = val;                                     \
-    } while ( 0 )
-#else
-#define set_xen_guest_handle(hnd, val)  do { (hnd).p = val; } while (0)
 #endif
 
 #ifndef __ASSEMBLY__
diff -r 4ba098226429 -r 1bab7d65171b xen/include/public/arch-x86_64.h
--- a/xen/include/public/arch-x86_64.h  Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/public/arch-x86_64.h  Fri Sep 01 13:04:02 2006 -0600
@@ -39,15 +39,12 @@
 
 #define DEFINE_XEN_GUEST_HANDLE(name)   __DEFINE_XEN_GUEST_HANDLE(name, name)
 #define XEN_GUEST_HANDLE(name)          __guest_handle_ ## name
-#define XEN_GUEST_HANDLE_64(name)       __guest_handle_ ## name
 #define set_xen_guest_handle(hnd, val)  do { (hnd).p = val; } while (0)
 #ifdef __XEN_TOOLS__
 #define get_xen_guest_handle(val, hnd)  do { val = (hnd).p; } while (0)
 #endif
 
 #ifndef __ASSEMBLY__
-typedef uint64_t uint64_aligned_t;
-
 /* Guest handles for primitive C types. */
 __DEFINE_XEN_GUEST_HANDLE(uchar, unsigned char);
 __DEFINE_XEN_GUEST_HANDLE(uint,  unsigned int);
diff -r 4ba098226429 -r 1bab7d65171b xen/include/public/domctl.h
--- a/xen/include/public/domctl.h       Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/public/domctl.h       Fri Sep 01 13:04:02 2006 -0600
@@ -16,12 +16,10 @@
 
 #include "xen.h"
 
-#define XEN_DOMCTL_INTERFACE_VERSION 0x00000001
-
-#define uint64_t uint64_aligned_t
+#define XEN_DOMCTL_INTERFACE_VERSION 0x00000003
 
 struct xenctl_cpumap {
-    XEN_GUEST_HANDLE_64(uint8_t) bitmap;
+    XEN_GUEST_HANDLE(uint8_t) bitmap;
     uint32_t nr_cpus;
 };
 
@@ -72,8 +70,11 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_getdo
 #define XEN_DOMCTL_getmemlist         6
 struct xen_domctl_getmemlist {
     /* IN variables. */
+    /* Max entries to write to output buffer. */
     uint64_t max_pfns;
-    XEN_GUEST_HANDLE_64(ulong) buffer;
+    /* Start index in guest's page list. */
+    uint64_t start_pfn;
+    XEN_GUEST_HANDLE(xen_pfn_t) buffer;
     /* OUT variables. */
     uint64_t num_pfns;
 };
@@ -110,7 +111,7 @@ struct xen_domctl_getpageframeinfo2 {
     /* IN variables. */
     uint64_t num;
     /* IN/OUT variables. */
-    XEN_GUEST_HANDLE_64(ulong) array;
+    XEN_GUEST_HANDLE(ulong) array;
 };
 typedef struct xen_domctl_getpageframeinfo2 xen_domctl_getpageframeinfo2_t;
 DEFINE_XEN_GUEST_HANDLE(xen_domctl_getpageframeinfo2_t);
@@ -184,7 +185,7 @@ struct xen_domctl_shadow_op {
     uint32_t       mb;       /* Shadow memory allocation in MB */
 
     /* OP_PEEK / OP_CLEAN */
-    XEN_GUEST_HANDLE_64(ulong) dirty_bitmap;
+    XEN_GUEST_HANDLE(ulong) dirty_bitmap;
     uint64_t       pages;    /* Size of buffer. Updated with actual size. */
     struct xen_domctl_shadow_op_stats stats;
 };
@@ -204,8 +205,8 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_max_m
 #define XEN_DOMCTL_setvcpucontext    12
 #define XEN_DOMCTL_getvcpucontext    13
 struct xen_domctl_vcpucontext {
-    uint32_t              vcpu;                     /* IN */
-    XEN_GUEST_HANDLE_64(vcpu_guest_context_t) ctxt; /* IN/OUT */
+    uint32_t              vcpu;                  /* IN */
+    XEN_GUEST_HANDLE(vcpu_guest_context_t) ctxt; /* IN/OUT */
 };
 typedef struct xen_domctl_vcpucontext xen_domctl_vcpucontext_t;
 DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpucontext_t);
@@ -378,8 +379,6 @@ typedef struct xen_domctl xen_domctl_t;
 typedef struct xen_domctl xen_domctl_t;
 DEFINE_XEN_GUEST_HANDLE(xen_domctl_t);
 
-#undef uint64_t
-
 #endif /* __XEN_PUBLIC_DOMCTL_H__ */
 
 /*
diff -r 4ba098226429 -r 1bab7d65171b xen/include/public/sysctl.h
--- a/xen/include/public/sysctl.h       Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/public/sysctl.h       Fri Sep 01 13:04:02 2006 -0600
@@ -16,9 +16,7 @@
 #include "xen.h"
 #include "domctl.h"
 
-#define XEN_SYSCTL_INTERFACE_VERSION 0x00000001
-
-#define uint64_t uint64_aligned_t
+#define XEN_SYSCTL_INTERFACE_VERSION 0x00000002
 
 /*
  * Read console content from Xen buffer ring.
@@ -26,8 +24,8 @@
 #define XEN_SYSCTL_readconsole       1
 struct xen_sysctl_readconsole {
     /* IN variables. */
-    uint32_t clear;                   /* Non-zero -> clear after reading. */
-    XEN_GUEST_HANDLE_64(char) buffer; /* Buffer start */
+    uint32_t clear;                /* Non-zero -> clear after reading. */
+    XEN_GUEST_HANDLE(char) buffer; /* Buffer start */
     /* IN/OUT variables. */
     uint32_t count;            /* In: Buffer size;  Out: Used buffer size  */
 };
@@ -105,9 +103,9 @@ struct xen_sysctl_perfc_op {
     uint32_t       nr_counters;       /*  number of counters description  */
     uint32_t       nr_vals;                      /*  number of values  */
     /* counter information (or NULL) */
-    XEN_GUEST_HANDLE_64(xen_sysctl_perfc_desc_t) desc;
+    XEN_GUEST_HANDLE(xen_sysctl_perfc_desc_t) desc;
     /* counter values (or NULL) */
-    XEN_GUEST_HANDLE_64(xen_sysctl_perfc_val_t) val;
+    XEN_GUEST_HANDLE(xen_sysctl_perfc_val_t) val;
 };
 typedef struct xen_sysctl_perfc_op xen_sysctl_perfc_op_t;
 DEFINE_XEN_GUEST_HANDLE(xen_sysctl_perfc_op_t);
@@ -117,7 +115,7 @@ struct xen_sysctl_getdomaininfolist {
     /* IN variables. */
     domid_t               first_domain;
     uint32_t              max_domains;
-    XEN_GUEST_HANDLE_64(xen_domctl_getdomaininfo_t) buffer;
+    XEN_GUEST_HANDLE(xen_domctl_getdomaininfo_t) buffer;
     /* OUT variables. */
     uint32_t              num_domains;
 };
@@ -140,8 +138,6 @@ typedef struct xen_sysctl xen_sysctl_t;
 typedef struct xen_sysctl xen_sysctl_t;
 DEFINE_XEN_GUEST_HANDLE(xen_sysctl_t);
 
-#undef uint64_t
-
 #endif /* __XEN_PUBLIC_SYSCTL_H__ */
 
 /*
diff -r 4ba098226429 -r 1bab7d65171b xen/include/public/xen.h
--- a/xen/include/public/xen.h  Fri Sep 01 12:52:12 2006 -0600
+++ b/xen/include/public/xen.h  Fri Sep 01 13:04:02 2006 -0600
@@ -63,6 +63,7 @@
 #define __HYPERVISOR_hvm_op               34
 #define __HYPERVISOR_sysctl               35
 #define __HYPERVISOR_domctl               36
+#define __HYPERVISOR_kexec_op             37
 
 /* Architecture-specific hypercall definitions. */
 #define __HYPERVISOR_arch_0               48
diff -r 4ba098226429 -r 1bab7d65171b tools/python/xen/xend/FlatDeviceTree.py
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/python/xen/xend/FlatDeviceTree.py   Fri Sep 01 13:04:02 2006 -0600
@@ -0,0 +1,323 @@
+#!/usr/bin/env python
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#
+# Copyright (C) IBM Corp. 2006
+#
+# Authors: Hollis Blanchard <hollisb@xxxxxxxxxx>
+
+import os
+import sys
+import struct
+import stat
+import re
+
+_OF_DT_HEADER = int("d00dfeed", 16) # avoid signed/unsigned FutureWarning
+_OF_DT_BEGIN_NODE = 0x1
+_OF_DT_END_NODE = 0x2
+_OF_DT_PROP = 0x3
+_OF_DT_END = 0x9
+
+def _bincat(seq, separator=''):
+    '''Concatenate the contents of seq into a bytestream.'''
+    strs = []
+    for item in seq:
+        if type(item) == type(0):
+            strs.append(struct.pack(">I", item))
+        else:
+            try:
+                strs.append(item.to_bin())
+            except AttributeError, e:
+                strs.append(item)
+    return separator.join(strs)
+
+def _alignup(val, alignment):
+    return (val + alignment - 1) & ~(alignment - 1)
+
+def _pad(buf, alignment):
+    '''Pad bytestream with NULLs to specified alignment.'''
+    padlen = _alignup(len(buf), alignment)
+    return buf + '\0' * (padlen - len(buf))
+    # not present in Python 2.3:
+    #return buf.ljust(_padlen, '\0')
+
+def _indent(item):
+    indented = []
+    for line in str(item).splitlines(True):
+        indented.append('    ' + line)
+    return ''.join(indented)
+
+class _Property:
+    _nonprint = re.compile('[\000-\037\200-\377]')
+    def __init__(self, node, name, value):
+        self.node = node
+        self.value = value
+        self.name = name
+        self.node.tree.stradd(name)
+
+    def __str__(self):
+        result = self.name
+        if self.value:
+            searchtext = self.value
+            # it's ok for a string to end in NULL
+            if searchtext.find('\000') == len(searchtext)-1:
+                searchtext = searchtext[:-1]
+            m = self._nonprint.search(searchtext)
+            if m:
+                bytes = struct.unpack("B" * len(self.value), self.value)
+                hexbytes = [ '%02x' % b for b in bytes ]
+                words = []
+                for i in range(0, len(self.value), 4):
+                    words.append(''.join(hexbytes[i:i+4]))
+                v = '<' + ' '.join(words) + '>'
+            else:
+                v = '"%s"' % self.value
+            result += ': ' + v
+        return result
+
+    def to_bin(self):
+        offset = self.node.tree.stroffset(self.name)
+        return struct.pack('>III', _OF_DT_PROP, len(self.value), offset) \
+            + _pad(self.value, 4)
+
+class _Node:
+    def __init__(self, tree, name):
+        self.tree = tree
+        self.name = name
+        self.props = {}
+        self.children = {}
+        self.phandle = 0
+
+    def __str__(self):
+        propstrs = [ _indent(prop) for prop in self.props.values() ]
+        childstrs = [ _indent(child) for child in self.children.values() ]
+        return '%s:\n%s\n%s' % (self.name, '\n'.join(propstrs),
+            '\n'.join(childstrs))
+
+    def to_bin(self):
+        name = _pad(self.name + '\0', 4)
+        return struct.pack('>I', _OF_DT_BEGIN_NODE) + \
+                name + \
+                _bincat(self.props.values()) + \
+                _bincat(self.children.values()) + \
+                struct.pack('>I', _OF_DT_END_NODE)
+
+    def addprop(self, propname, *cells):
+        '''setprop with duplicate error-checking.'''
+        if propname in self.props:
+            raise AttributeError('%s/%s already exists' % (self.name, 
propname))
+        self.setprop(propname, *cells)
+
+    def setprop(self, propname, *cells):
+        self.props[propname] = _Property(self, propname, _bincat(cells))
+
+    def addnode(self, nodename):
+        '''newnode with duplicate error-checking.'''
+        if nodename in self.children:
+            raise AttributeError('%s/%s already exists' % (self.name, 
nodename))
+        return self.newnode(nodename)
+
+    def newnode(self, nodename):
+        node = _Node(self.tree, nodename)
+        self.children[nodename] = node
+        return node
+
+    def getprop(self, propname):
+        return self.props[propname]
+
+    def getchild(self, nodename):
+        return self.children[nodename]
+
+    def get_phandle(self):
+        if self.phandle:
+            return self.phandle
+        self.phandle = self.tree.alloc_phandle()
+        self.addprop('linux,phandle', self.phandle)
+        return self.phandle
+
+class _Header:
+    def __init__(self):
+        self.magic = 0
+        self.totalsize = 0
+        self.off_dt_struct = 0
+        self.off_dt_strings = 0
+        self.off_mem_rsvmap = 0
+        self.version = 0
+        self.last_comp_version = 0
+        self.boot_cpuid_phys = 0
+        self.size_dt_strings = 0
+    def to_bin(self):
+        return struct.pack('>9I',
+            self.magic,
+            self.totalsize,
+            self.off_dt_struct,
+            self.off_dt_strings,
+            self.off_mem_rsvmap,
+            self.version,
+            self.last_comp_version,
+            self.boot_cpuid_phys,
+            self.size_dt_strings)
+
+class _StringBlock:
+    def __init__(self):
+        self.table = []
+    def to_bin(self):
+        return _bincat(self.table, '\0') + '\0'
+    def add(self, str):
+        self.table.append(str)
+    def getoffset(self, str):
+        return self.to_bin().index(str + '\0')
+
+class Tree(_Node):
+    def __init__(self):
+        self.last_phandle = 0
+        self.strings = _StringBlock()
+        self.reserved = [(0, 0)]
+        _Node.__init__(self, self, '\0')
+
+    def alloc_phandle(self):
+        self.last_phandle += 1
+        return self.last_phandle
+
+    def stradd(self, str):
+        return self.strings.add(str)
+
+    def stroffset(self, str):
+        return self.strings.getoffset(str)
+
+    def reserve(self, start, len):
+        self.reserved.insert(0, (start, len))
+
+    def to_bin(self):
+        # layout:
+        #   header
+        #   reservation map
+        #   string block
+        #   data block
+
+        datablock = _Node.to_bin(self)
+
+        r = [ struct.pack('>QQ', rsrv[0], rsrv[1]) for rsrv in self.reserved ]
+        reserved = _bincat(r)
+
+        strblock = _pad(self.strings.to_bin(), 4)
+        strblocklen = len(strblock)
+
+        header = _Header()
+        header.magic = _OF_DT_HEADER
+        header.off_mem_rsvmap = _alignup(len(header.to_bin()), 8)
+        header.off_dt_strings = header.off_mem_rsvmap + len(reserved)
+        header.off_dt_struct = header.off_dt_strings + strblocklen
+        header.version = 0x10
+        header.last_comp_version = 0x10
+        header.boot_cpuid_phys = 0
+        header.size_dt_strings = strblocklen
+
+        payload = reserved + \
+                strblock + \
+                datablock + \
+                struct.pack('>I', _OF_DT_END)
+        header.totalsize = len(payload) + _alignup(len(header.to_bin()), 8)
+        return _pad(header.to_bin(), 8) + payload
+
+_host_devtree_root = '/proc/device-tree'
+def _getprop(propname):
+    '''Extract a property from the system's device tree.'''
+    f = file(os.path.join(_host_devtree_root, propname), 'r')
+    data = f.read()
+    f.close()
+    return data
+
+def _copynode(node, dirpath, propfilter):
+    '''Extract all properties from a node in the system's device tree.'''
+    dirents = os.listdir(dirpath)
+    for dirent in dirents:
+        fullpath = os.path.join(dirpath, dirent)
+        st = os.lstat(fullpath)
+        if stat.S_ISDIR(st.st_mode):
+            child = node.addnode(dirent)
+            _copytree(child, fullpath, propfilter)
+        elif stat.S_ISREG(st.st_mode) and propfilter(fullpath):
+            node.addprop(dirent, _getprop(fullpath))
+
+def _copytree(node, dirpath, propfilter):
+    path = os.path.join(_host_devtree_root, dirpath)
+    _copynode(node, path, propfilter)
+
+def build(imghandler):
+    '''Construct a device tree by combining the domain's configuration and
+    the host's device tree.'''
+    root = Tree()
+
+    # 4 pages: start_info, console, store, shared_info
+    root.reserve(0x3ffc000, 0x4000)
+
+    root.addprop('device_type', 'chrp-but-not-really\0')
+    root.addprop('#size-cells', 2)
+    root.addprop('#address-cells', 2)
+    root.addprop('model', 'Momentum,Maple-D\0')
+    root.addprop('compatible', 'Momentum,Maple\0')
+
+    xen = root.addnode('xen')
+    xen.addprop('start-info', 0, 0x3ffc000, 0, 0x1000)
+    xen.addprop('version', 'Xen-3.0-unstable\0')
+    xen.addprop('reg', 0, imghandler.vm.domid, 0, 0)
+    xen.addprop('domain-name', imghandler.vm.getName() + '\0')
+    xencons = xen.addnode('console')
+    xencons.addprop('interrupts', 1, 0)
+
+    # XXX split out RMA node
+    mem = root.addnode('memory@0')
+    totalmem = imghandler.vm.getMemoryTarget() * 1024
+    mem.addprop('reg', 0, 0, 0, totalmem)
+    mem.addprop('device_type', 'memory\0')
+
+    cpus = root.addnode('cpus')
+    cpus.addprop('smp-enabled')
+    cpus.addprop('#size-cells', 0)
+    cpus.addprop('#address-cells', 1)
+
+    # Copy all properties the system firmware gave us, except for 'linux,'
+    # properties, from 'cpus/@0', once for every vcpu. Hopefully all cpus are
+    # identical...
+    cpu0 = None
+    def _nolinuxprops(fullpath):
+        return not os.path.basename(fullpath).startswith('linux,')
+    for i in range(imghandler.vm.getVCpuCount()):
+        cpu = cpus.addnode('PowerPC,970@0')
+        _copytree(cpu, 'cpus/PowerPC,970@0', _nolinuxprops)
+        # and then overwrite what we need to
+        pft_size = imghandler.vm.info.get('pft-size', 0x14)
+        cpu.setprop('ibm,pft-size', 0, pft_size)
+
+        # set default CPU
+        if cpu0 == None:
+            cpu0 = cpu
+
+    chosen = root.addnode('chosen')
+    chosen.addprop('cpu', cpu0.get_phandle())
+    chosen.addprop('memory', mem.get_phandle())
+    chosen.addprop('linux,stdout-path', '/xen/console\0')
+    chosen.addprop('interrupt-controller', xen.get_phandle())
+    chosen.addprop('bootargs', imghandler.cmdline + '\0')
+    # xc_linux_load.c will overwrite these 64-bit properties later
+    chosen.addprop('linux,initrd-start', 0, 0)
+    chosen.addprop('linux,initrd-end', 0, 0)
+
+    if 1:
+        f = file('/tmp/domU.dtb', 'w')
+        f.write(root.to_bin())
+        f.close()
+
+    return root
diff -r 4ba098226429 -r 1bab7d65171b tools/python/xen/xend/arch.py
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/python/xen/xend/arch.py     Fri Sep 01 13:04:02 2006 -0600
@@ -0,0 +1,32 @@
+#!/usr/bin/env python
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#
+# Copyright (C) IBM Corp. 2006
+#
+# Authors: Hollis Blanchard <hollisb@xxxxxxxxxx>
+
+import os
+
+_types = {
+    "i386": "x86",
+    "i486": "x86",
+    "i586": "x86",
+    "i686": "x86",
+    "x86_64": "x86",
+    "ia64": "ia64",
+    "ppc": "powerpc",
+    "ppc64": "powerpc",
+}
+type = _types.get(os.uname()[4], "unknown")
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/backtrace.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/powerpc/backtrace.c      Fri Sep 01 13:04:02 2006 -0600
@@ -0,0 +1,193 @@
+/*
+ * Routines providing a simple monitor for use on the PowerMac.
+ *
+ * Copyright (C) 1996-2005 Paul Mackerras.
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#include <xen/config.h>
+#include <xen/lib.h>
+#include <xen/console.h>
+#include <xen/sched.h>
+#include <xen/symbols.h>
+
+static char namebuf[KSYM_NAME_LEN+1];
+
+/* Shamelessly lifted from Linux Xmon try to keep pristene */
+#ifdef __powerpc64__
+#define LRSAVE_OFFSET          0x10
+#define REG_FRAME_MARKER       0x7265677368657265ul    /* "regshere" */
+#define MARKER_OFFSET          0x60
+#define REGS_OFFSET            0x70
+#define REG "%016lX"
+#else
+#define LRSAVE_OFFSET          4
+#define REG_FRAME_MARKER       0x72656773
+#define MARKER_OFFSET          8
+#define REGS_OFFSET            16
+#define REG "%08lX"
+#endif
+
+#define TRAP(regs) ((regs)->entry_vector & ~0xF)
+static int xmon_depth_to_print = 64;
+
+/* Very cheap human name for vector lookup. */
+static
+const char *getvecname(unsigned long vec)
+{
+       char *ret;
+
+       switch (vec) {
+       case 0x100:     ret = "(System Reset)"; break;
+       case 0x200:     ret = "(Machine Check)"; break;
+       case 0x300:     ret = "(Data Access)"; break;
+       case 0x380:     ret = "(Data SLB Access)"; break;
+       case 0x400:     ret = "(Instruction Access)"; break;
+       case 0x480:     ret = "(Instruction SLB Access)"; break;
+       case 0x500:     ret = "(Hardware Interrupt)"; break;
+       case 0x600:     ret = "(Alignment)"; break;
+       case 0x700:     ret = "(Program Check)"; break;
+       case 0x800:     ret = "(FPU Unavailable)"; break;
+       case 0x900:     ret = "(Decrementer)"; break;
+       case 0xc00:     ret = "(System Call)"; break;
+       case 0xd00:     ret = "(Single Step)"; break;
+       case 0xf00:     ret = "(Performance Monitor)"; break;
+       case 0xf20:     ret = "(Altivec Unavailable)"; break;
+       case 0x1300:    ret = "(Instruction Breakpoint)"; break;
+       default: ret = "";
+       }
+       return ret;
+}
+
+static int mread(unsigned long adrs, void *buf, int size)
+{
+    memcpy(buf, (void *)adrs, size);
+    return size;
+}
+
+static void get_function_bounds(unsigned long pc, unsigned long *startp,
+                               unsigned long *endp)
+{
+    unsigned long size, offset;
+       const char *name;
+
+    *startp = *endp = 0;
+       if (pc == 0)
+               return;
+
+    name = symbols_lookup(pc, &size, &offset, namebuf);
+    if (name != NULL) {
+                       *startp = pc - offset;
+                       *endp = pc - offset + size;
+    }
+}
+    
+/* Print an address in numeric and symbolic form (if possible) */
+static void xmon_print_symbol(unsigned long address, const char *mid,
+                              const char *after)
+{
+       const char *name = NULL;
+       unsigned long offset, size;
+
+       printf(REG, address);
+
+    name = symbols_lookup(address, &size, &offset, namebuf);
+       if (name) {
+               printf("%s%s+%#lx/%#lx", mid, name, offset, size);
+       }
+       printf("%s", after);
+}
+
+static void backtrace(
+    unsigned long sp, unsigned long lr, unsigned long pc)
+{
+       unsigned long ip;
+       unsigned long newsp;
+       unsigned long marker;
+       int count = 0;
+       struct cpu_user_regs regs;
+
+       do {
+               if (sp > xenheap_phys_end) {
+                       if (sp != 0)
+                               printf("SP (%lx) is not in xen space\n", sp);
+                       break;
+               }
+
+               if (!mread(sp + LRSAVE_OFFSET, &ip, sizeof(unsigned long))
+                   || !mread(sp, &newsp, sizeof(unsigned long))) {
+                       printf("Couldn't read stack frame at %lx\n", sp);
+                       break;
+               }
+
+               /*
+                * For the first stack frame, try to work out if
+                * LR and/or the saved LR value in the bottommost
+                * stack frame are valid.
+                */
+               if ((pc | lr) != 0) {
+                       unsigned long fnstart, fnend;
+                       unsigned long nextip;
+                       int printip = 1;
+
+                       get_function_bounds(pc, &fnstart, &fnend);
+                       nextip = 0;
+                       if (newsp > sp)
+                               mread(newsp + LRSAVE_OFFSET, &nextip,
+                                     sizeof(unsigned long));
+                       if (lr == ip) {
+                               if (lr >= xenheap_phys_end
+                                   || (fnstart <= lr && lr < fnend))
+                                       printip = 0;
+                       } else if (lr == nextip) {
+                               printip = 0;
+                       } else if (lr < xenheap_phys_end
+                       && !(fnstart <= lr && lr < fnend)) {
+                               printf("[link register   ] ");
+                               xmon_print_symbol(lr, " ", "\n");
+                       }
+                       if (printip) {
+                               printf("["REG"] ", sp);
+                               xmon_print_symbol(ip, " ", " (unreliable)\n");
+                       }
+                       pc = lr = 0;
+
+               } else {
+                       printf("["REG"] ", sp);
+                       xmon_print_symbol(ip, " ", "\n");
+               }
+
+               /* Look for "regshere" marker to see if this is
+                  an exception frame. */
+               if (mread(sp + MARKER_OFFSET, &marker, sizeof(unsigned long))
+                   && marker == REG_FRAME_MARKER) {
+                       if (mread(sp + REGS_OFFSET, &regs, sizeof(regs))
+                           != sizeof(regs)) {
+                               printf("Couldn't read registers at %lx\n",
+                                      sp + REGS_OFFSET);
+                               break;
+                       }
+            printf("--- Exception: %x %s at ", regs.entry_vector,
+                              getvecname(TRAP(&regs)));
+                       pc = regs.pc;
+                       lr = regs.lr;
+                       xmon_print_symbol(pc, " ", "\n");
+               }
+
+               if (newsp == 0)
+                       break;
+        
+               sp = newsp;
+       } while (count++ < xmon_depth_to_print);
+}
+
+void show_backtrace(ulong sp, ulong lr, ulong pc)
+{
+    console_start_sync();
+    backtrace(sp, lr, pc);
+    console_end_sync();
+}
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/memory.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/powerpc/memory.c Fri Sep 01 13:04:02 2006 -0600
@@ -0,0 +1,206 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright (C) IBM Corp. 2006
+ *
+ * Authors: Dan Poff <poff@xxxxxxxxxx>
+ *          Jimi Xenidis <jimix@xxxxxxxxxxxxxx>
+ */
+#include <xen/sched.h>
+#include <xen/mm.h>
+#include "of-devtree.h"
+#include "oftree.h"
+
+unsigned long xenheap_phys_end;
+struct membuf {
+    ulong start;
+    ulong size;
+};
+
+typedef void (*walk_mem_fn)(struct membuf *, uint);
+
+static ulong free_xenheap(ulong start, ulong end)
+{
+    start = ALIGN_UP(start, PAGE_SIZE);
+    end = ALIGN_DOWN(end, PAGE_SIZE);
+
+    printk("%s: 0x%lx - 0x%lx\n", __func__, start, end);
+
+    if (oftree <= end && oftree >= start) {
+        printk("%s:     Go around the devtree: 0x%lx - 0x%lx\n",
+               __func__, oftree, oftree_end);
+        init_xenheap_pages(start, ALIGN_DOWN(oftree, PAGE_SIZE));
+        init_xenheap_pages(ALIGN_UP(oftree_end, PAGE_SIZE), end);
+    } else {
+        init_xenheap_pages(start, end);
+    }
+
+    return ALIGN_UP(end, PAGE_SIZE);
+}
+
+static void set_max_page(struct membuf *mb, uint entries)
+{
+    int i;
+
+    for (i = 0; i < entries; i++) {
+        ulong end_page;
+
+        end_page = (mb[i].start + mb[i].size) >> PAGE_SHIFT;
+
+        if (end_page > max_page)
+            max_page = end_page;
+    }
+}
+
+/* mark all memory from modules onward as unused */
+static void heap_init(struct membuf *mb, uint entries)
+{
+    int i;
+    ulong start_blk;
+    ulong end_blk = 0;
+
+       for (i = 0; i < entries; i++) {
+           start_blk = mb[i].start;
+           end_blk = start_blk + mb[i].size;
+
+           if (start_blk < xenheap_phys_end) {
+            if (xenheap_phys_end > end_blk) {
+                panic("xenheap spans LMB\n");
+            }
+            if (xenheap_phys_end == end_blk)
+                continue;
+
+            start_blk = xenheap_phys_end;
+        }
+
+        init_boot_pages(start_blk, end_blk);
+        total_pages += (end_blk - start_blk) >> PAGE_SHIFT;
+       }
+}
+
+static void ofd_walk_mem(void *m, walk_mem_fn fn)
+{
+    ofdn_t n;
+    uint p_len;
+    struct membuf mb[8];
+    static char name[] = "memory";
+
+    n = ofd_node_find_by_prop(m, OFD_ROOT, "device_type", name, sizeof(name));
+    while (n > 0) {
+
+        p_len = ofd_getprop(m, n, "reg", mb, sizeof (mb));
+        if (p_len <= 0) {
+            panic("ofd_getprop(): failed\n");
+        }
+        if (p_len > sizeof(mb))
+            panic("%s: buffer is not big enuff for this firmware: "
+                  "0x%lx < 0x%x\n", __func__, sizeof(mb), p_len);
+
+        fn(mb, p_len / sizeof(mb[0]));
+        n = ofd_node_find_next(m, n);
+    }
+}
+
+static void setup_xenheap(module_t *mod, int mcount)
+{
+    int i;
+    ulong freemem;
+
+    freemem = ALIGN_UP((ulong)_end, PAGE_SIZE);
+
+    for (i = 0; i < mcount; i++) {
+        u32 s;
+
+        if(mod[i].mod_end == mod[i].mod_start)
+            continue;
+
+        s = ALIGN_DOWN(mod[i].mod_start, PAGE_SIZE);
+
+        if (mod[i].mod_start > (ulong)_start &&
+            mod[i].mod_start < (ulong)_end) {
+            /* mod was linked in */
+            continue;
+        }
+
+        if (s < freemem) 
+            panic("module addresses must assend\n");
+
+        free_xenheap(freemem, s);
+        freemem = ALIGN_UP(mod[i].mod_end, PAGE_SIZE);
+        
+    }
+
+    /* the rest of the xenheap, starting at the end of modules */
+    free_xenheap(freemem, xenheap_phys_end);
+}
+
+void memory_init(module_t *mod, int mcount)
+{
+    ulong eomem;
+    ulong heap_start, heap_size;
+
+    printk("Physical RAM map:\n");
+
+    /* lets find out how much memory there is and set max_page */
+    max_page = 0;
+    ofd_walk_mem((void *)oftree, set_max_page);
+    eomem = max_page << PAGE_SHIFT;
+
+    if (eomem == 0){
+        panic("ofd_walk_mem() failed\n");
+    }
+    printk("End of RAM: %luMB (%lukB)\n", eomem >> 20, eomem >> 10);
+
+    /* Architecturally the first 4 pages are exception hendlers, we
+     * will also be copying down some code there */
+    heap_start = 4 << PAGE_SHIFT;
+    if (oftree < (ulong)_start)
+        heap_start = ALIGN_UP(oftree_end, PAGE_SIZE);
+
+    heap_start = init_boot_allocator(heap_start);
+    if (heap_start > (ulong)_start) {
+        panic("space below _start (%p) is not enough memory "
+              "for heap (0x%lx)\n", _start, heap_start);
+    }
+
+    /* allow everything else to be allocated */
+    total_pages = 0;
+    ofd_walk_mem((void *)oftree, heap_init);
+    if (total_pages == 0)
+        panic("heap_init: failed");
+
+    if (total_pages > max_page)
+        panic("total_pages > max_page: 0x%lx > 0x%lx\n",
+              total_pages, max_page);
+
+    printk("total_pages: 0x%016lx\n", total_pages);
+
+    init_frametable();
+    end_boot_allocator();
+
+    /* Add memory between the beginning of the heap and the beginning
+     * of out text */
+    free_xenheap(heap_start, (ulong)_start);
+
+    heap_size = xenheap_phys_end - heap_start;
+    printk("Xen heap: %luMB (%lukB)\n", heap_size >> 20, heap_size >> 10);
+
+    setup_xenheap(mod, mcount);
+
+    eomem = avail_domheap_pages();
+    printk("Domheap pages: 0x%lx %luMB (%lukB)\n", eomem,
+           (eomem << PAGE_SHIFT) >> 20,
+           (eomem << PAGE_SHIFT) >> 10);
+}
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/ofd_fixup_memory.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/powerpc/ofd_fixup_memory.c       Fri Sep 01 13:04:02 2006 -0600
@@ -0,0 +1,107 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright (C) IBM Corp. 2006
+ *
+ * Authors: Jimi Xenidis <jimix@xxxxxxxxxxxxxx>
+ */
+
+#include <xen/config.h>
+#include <xen/lib.h>
+#include <xen/sched.h>
+#include <public/xen.h>
+#include "of-devtree.h"
+#include "oftree.h"
+
+static char memory[] = "memory";
+
+struct mem_reg {
+    u64 addr;
+    u64 sz;
+};
+
+static void ofd_memory_clean(void *m)
+{
+    ofdn_t old;
+
+    /* Remove all old memory props */
+    do {
+        old = ofd_node_find_by_prop(m, OFD_ROOT, "device_type",
+                                    memory, sizeof(memory));
+        if (old <= 0)
+            break;
+
+        ofd_node_prune(m, old);
+    } while (1);
+}
+
+static ofdn_t ofd_memory_node_create(
+    void *m, ofdn_t p, const char *ppath, const char *name,
+    const char *dt, ulong start, ulong size)
+{
+    struct mem_reg reg;
+    char path[128];
+    ulong l;
+    ofdn_t n;
+    ulong nl = strlen(name) + 1;
+    ulong dtl = strlen(dt) + 1;
+
+    l = snprintf(path, sizeof (path), "%s/%s@%lx", ppath, name, start);
+    n = ofd_node_add(m, p, path, l + 1);
+    ofd_prop_add(m, n, "name", name, nl);
+    ofd_prop_add(m, n, "device_type", dt, dtl);
+
+    /* physical addresses usable without regard to OF */
+    reg.addr = start;
+    reg.sz = size;
+    ofd_prop_add(m, n, "reg", &reg, sizeof (reg));
+
+    return n;
+}
+
+static void ofd_memory_rma_node(void *m, struct domain *d)
+{
+    ulong size = rma_size(d->arch.rma_order);
+    ofdn_t n;
+
+    n = ofd_memory_node_create(m, OFD_ROOT, "", memory, memory, 0, size);
+    BUG_ON(n <= 0);
+}
+
+static void ofd_memory_extent_nodes(void *m, struct domain *d)
+{
+    ulong start;
+    ulong size;
+    ofdn_t n;
+    struct page_extents *pe;
+
+    list_for_each_entry (pe, &d->arch.extent_list, pe_list) {
+
+        start = pe->pfn << PAGE_SHIFT;
+        size = 1UL << (pe->order + PAGE_SHIFT);
+
+        n = ofd_memory_node_create(m, OFD_ROOT, "", memory, memory,
+                                    start, size);
+
+        BUG_ON(n <= 0);
+    }
+}
+
+void ofd_memory_props(void *m, struct domain *d)
+{
+    ofd_memory_clean(m);
+    ofd_memory_rma_node(m, d);
+    ofd_memory_extent_nodes(m,d);
+}
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/shadow.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/powerpc/shadow.c Fri Sep 01 13:04:02 2006 -0600
@@ -0,0 +1,159 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright (C) IBM Corp. 2006
+ *
+ * Authors: Jimi Xenidis <jimix@xxxxxxxxxxxxxx>
+ */
+
+#include <xen/config.h>
+#include <xen/types.h>
+#include <xen/shadow.h>
+
+static ulong htab_calc_sdr1(ulong htab_addr, ulong log_htab_size)
+{
+    ulong sdr1_htabsize;
+
+    ASSERT((htab_addr & ((1UL << log_htab_size) - 1)) == 0);
+    ASSERT(log_htab_size <= SDR1_HTABSIZE_MAX);
+    ASSERT(log_htab_size >= HTAB_MIN_LOG_SIZE);
+
+    sdr1_htabsize = log_htab_size - LOG_PTEG_SIZE - SDR1_HTABSIZE_BASEBITS;
+
+    return (htab_addr | (sdr1_htabsize & SDR1_HTABSIZE_MASK));
+}
+
+static ulong htab_alloc(struct domain *d, uint order)
+{
+    ulong htab_raddr;
+    uint log_htab_bytes = order + PAGE_SHIFT;
+    uint htab_bytes = 1UL << log_htab_bytes;
+
+    /* we use xenheap pages to keep domheap pages usefull for domains */
+
+    if (order < 6)
+        order = 6;              /* architectural minimum is 2^18 */
+    if (order > 34)
+        order = 34;             /* architectural minimum is 2^46 */
+
+    htab_raddr = (ulong)alloc_xenheap_pages(order);
+    if (htab_raddr > 0) {
+        ASSERT((htab_raddr & (htab_bytes - 1)) == 0);
+
+        d->arch.htab.order = order;
+        d->arch.htab.log_num_ptes = log_htab_bytes - LOG_PTE_SIZE;
+        d->arch.htab.sdr1 = htab_calc_sdr1(htab_raddr, log_htab_bytes);
+        d->arch.htab.map = (union pte *)htab_raddr;
+    }
+    return htab_raddr;
+}
+
+static void htab_free(struct domain *d)
+{
+    ulong htab_raddr = GET_HTAB(d);
+
+    free_xenheap_pages((void *)htab_raddr, d->arch.htab.order);
+}
+
+
+unsigned int shadow_teardown(struct domain *d)
+{
+    htab_free(d);
+    return 0;
+}
+
+unsigned int shadow_set_allocation(struct domain *d, 
+                                    unsigned int megabytes,
+                                    int *preempted)
+{
+    unsigned int rc;
+    uint pages;
+    uint p;
+    uint order;
+    ulong addr;
+    
+
+    if (d->arch.htab.order)
+        return -EBUSY;
+
+    if (megabytes == 0) {
+        /* old management tools */
+        megabytes = 1;          /* 1/64th of 64M */
+        printk("%s: Fix management tools to set and get shadow/htab values\n"
+               "    using %d MiB htab\n",
+               __func__, megabytes);
+    }
+    pages = megabytes << (20 - PAGE_SHIFT);
+    order = fls(pages) - 1;     /* log2 truncated */
+    if (pages & ((1 << order) - 1))
+        ++order;                /* round up */
+
+    addr = htab_alloc(d, order);
+
+    printk("%s: ibm,fpt-size should be: 0x%x\n", __func__,
+           d->arch.htab.log_num_ptes + LOG_PTE_SIZE);
+
+    if (addr == 0)
+        return -ENOMEM;
+
+    /* XXX make this a continuation */
+    for (p = 0; p < (1 << order); p++)
+        clear_page((void *)(addr + (p << PAGE_SHIFT)));
+
+    return rc;
+}
+
+int shadow_domctl(struct domain *d, 
+                                 xen_domctl_shadow_op_t *sc,
+                                 XEN_GUEST_HANDLE(xen_domctl_t) u_domctl)
+{
+    if ( unlikely(d == current->domain) )
+    {
+        DPRINTK("Don't try to do a shadow op on yourself!\n");
+        return -EINVAL;
+    }
+
+    switch ( sc->op )
+    {
+    case XEN_DOMCTL_SHADOW_OP_OFF:
+         DPRINTK("Shadow is mandatory!\n");
+         return -EINVAL;
+
+    case XEN_DOMCTL_SHADOW_OP_GET_ALLOCATION:
+        sc->mb = shadow_get_allocation(d);
+        return 0;
+
+    case XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION: {
+        int rc;
+        int preempted = 0;
+
+        rc = shadow_set_allocation(d, sc->mb, &preempted);
+
+        if (preempted)
+            /* Not finished.  Set up to re-run the call. */
+            rc = hypercall_create_continuation(
+                __HYPERVISOR_domctl, "h", u_domctl);
+        else 
+            /* Finished.  Return the new allocation */
+            sc->mb = shadow_get_allocation(d);
+        return rc;
+    }
+
+    default:
+        printk("Bad shadow op %u\n", sc->op);
+        BUG();
+        return -EINVAL;
+    }
+}
diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/htab.c
--- a/xen/arch/powerpc/htab.c   Fri Sep 01 12:52:12 2006 -0600
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,68 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
- *
- * Copyright (C) IBM Corp. 2005
- *
- * Authors: Hollis Blanchard <hollisb@xxxxxxxxxx>
- */
-
-#include <xen/config.h>
-#include <xen/sched.h>
-
-static ulong htab_calc_sdr1(ulong htab_addr, ulong log_htab_size)
-{
-    ulong sdr1_htabsize;
-
-    ASSERT((htab_addr & ((1UL << log_htab_size) - 1)) == 0);
-    ASSERT(log_htab_size <= SDR1_HTABSIZE_MAX);
-    ASSERT(log_htab_size >= HTAB_MIN_LOG_SIZE);
-
-    sdr1_htabsize = log_htab_size - LOG_PTEG_SIZE - SDR1_HTABSIZE_BASEBITS;
-
-    return (htab_addr | (sdr1_htabsize & SDR1_HTABSIZE_MASK));
-}
-
-void htab_alloc(struct domain *d, uint order)
-{
-    ulong htab_raddr;
-    ulong log_htab_bytes = order + PAGE_SHIFT;
-    ulong htab_bytes = 1UL << log_htab_bytes;
-
-    /* XXX use alloc_domheap_pages instead? */
-    htab_raddr = (ulong)alloc_xenheap_pages(order);
-    ASSERT(htab_raddr != 0);
-    /* XXX check alignment guarantees */
-    ASSERT((htab_raddr & (htab_bytes - 1)) == 0);
-
-    /* XXX slow. move memset out to service partition? */
-    memset((void *)htab_raddr, 0, htab_bytes);
-
-    d->arch.htab.order = order;
-    d->arch.htab.log_num_ptes = log_htab_bytes - LOG_PTE_SIZE;
-    d->arch.htab.sdr1 = htab_calc_sdr1(htab_raddr, log_htab_bytes);
-    d->arch.htab.map = (union pte *)htab_raddr;
-    d->arch.htab.shadow = xmalloc_array(ulong,
-                                        1UL << d->arch.htab.log_num_ptes);
-    ASSERT(d->arch.htab.shadow != NULL);
-}
-
-void htab_free(struct domain *d)
-{
-    ulong htab_raddr = GET_HTAB(d);
-
-    free_xenheap_pages((void *)htab_raddr, d->arch.htab.order);
-    xfree(d->arch.htab.shadow);
-}
-

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>