WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] Merged.

# HG changeset patch
# User emellor@xxxxxxxxxxxxxxxxxxxxxx
# Node ID 8af1199488d3636135f3adf3f7302d4a04e9004e
# Parent  25e3c8668f1f4769db8466b4af965a99503311ae
# Parent  299d6ff8fdb2604dde767af2a2bee985602e9a46
Merged.

diff -r 25e3c8668f1f -r 8af1199488d3 .hgignore
--- a/.hgignore Mon Jan  9 11:19:55 2006
+++ b/.hgignore Mon Jan  9 11:22:17 2006
@@ -181,6 +181,7 @@
 ^xen/TAGS$
 ^xen/arch/x86/asm-offsets\.s$
 ^xen/arch/x86/boot/mkelf32$
+^xen/arch/x86/xen\.lds$
 ^xen/ddb/.*$
 ^xen/include/asm$
 ^xen/include/asm-.*/asm-offsets\.h$
diff -r 25e3c8668f1f -r 8af1199488d3 
linux-2.6-xen-sparse/arch/ia64/xen/drivers/xenia64_init.c
--- a/linux-2.6-xen-sparse/arch/ia64/xen/drivers/xenia64_init.c Mon Jan  9 
11:19:55 2006
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/drivers/xenia64_init.c Mon Jan  9 
11:22:17 2006
@@ -25,8 +25,9 @@
 
        xen_start_info = __va(s->arch.start_info_pfn << PAGE_SHIFT);
        xen_start_info->flags = s->arch.flags;
-       printk("Running on Xen! start_info_pfn=0x%lx lags=0x%x\n",
-               s->arch.start_info_pfn, xen_start_info->flags);
+       printk("Running on Xen! start_info_pfn=0x%lx nr_pages=%d flags=0x%x\n",
+               s->arch.start_info_pfn, xen_start_info->nr_pages,
+               xen_start_info->flags);
 
        evtchn_init();
        initialized = 1;
diff -r 25e3c8668f1f -r 8af1199488d3 linux-2.6-xen-sparse/arch/xen/Makefile
--- a/linux-2.6-xen-sparse/arch/xen/Makefile    Mon Jan  9 11:19:55 2006
+++ b/linux-2.6-xen-sparse/arch/xen/Makefile    Mon Jan  9 11:22:17 2006
@@ -77,8 +77,6 @@
        install -m0664 .config 
$(INSTALL_PATH)/boot/config-$(XINSTALL_NAME)$(INSTALL_SUFFIX)
        install -m0664 System.map 
$(INSTALL_PATH)/boot/System.map-$(XINSTALL_NAME)$(INSTALL_SUFFIX)
        ln -f -s vmlinuz-$(XINSTALL_NAME)$(INSTALL_SUFFIX) 
$(INSTALL_PATH)/boot/vmlinuz-$(VERSION).$(PATCHLEVEL)$(XENGUEST)$(INSTALL_SUFFIX)
-       mkdir -p $(INSTALL_PATH)/usr/include/xen/linux
-       install -m0644 $(srctree)/include/asm-xen/linux-public/*.h 
$(INSTALL_PATH)/usr/include/xen/linux
 
 archclean:
        @if [ -e arch/xen/arch ]; then $(MAKE) $(clean)=arch/xen/arch; fi;
diff -r 25e3c8668f1f -r 8af1199488d3 
linux-2.6-xen-sparse/arch/xen/i386/kernel/acpi/boot.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/acpi/boot.c     Mon Jan  9 
11:19:55 2006
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/acpi/boot.c     Mon Jan  9 
11:22:17 2006
@@ -39,8 +39,6 @@
 #ifdef CONFIG_XEN
 #include <asm/fixmap.h>
 #endif
-
-void (*pm_power_off)(void) = NULL;
 
 #ifdef CONFIG_X86_64
 
diff -r 25e3c8668f1f -r 8af1199488d3 
linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/mtrr/main.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/mtrr/main.c Mon Jan  9 
11:19:55 2006
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/mtrr/main.c Mon Jan  9 
11:22:17 2006
@@ -67,8 +67,11 @@
        op.u.add_memtype.pfn     = base;
        op.u.add_memtype.nr_pfns = size;
        op.u.add_memtype.type    = type;
-       if ((error = HYPERVISOR_dom0_op(&op)))
+       error = HYPERVISOR_dom0_op(&op);
+       if (error) {
+               BUG_ON(error > 0);
                return error;
+       }
 
        if (increment)
                ++usage_table[op.u.add_memtype.reg];
@@ -121,8 +124,12 @@
        if (--usage_table[reg] < 1) {
                op.cmd = DOM0_DEL_MEMTYPE;
                op.u.del_memtype.handle = 0;
-               op.u.add_memtype.reg    = reg;
-               (void)HYPERVISOR_dom0_op(&op);
+               op.u.del_memtype.reg    = reg;
+               error = HYPERVISOR_dom0_op(&op);
+               if (error) {
+                       BUG_ON(error > 0);
+                       goto out;
+               }
        }
        error = reg;
  out:
diff -r 25e3c8668f1f -r 8af1199488d3 
linux-2.6-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c    Mon Jan  9 
11:19:55 2006
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c    Mon Jan  9 
11:22:17 2006
@@ -76,9 +76,7 @@
 EXPORT_SYMBOL(iounmap);
 EXPORT_SYMBOL(kernel_thread);
 EXPORT_SYMBOL(pm_idle);
-#ifdef CONFIG_ACPI_BOOT
 EXPORT_SYMBOL(pm_power_off);
-#endif
 EXPORT_SYMBOL(get_cmos_time);
 EXPORT_SYMBOL(cpu_khz);
 EXPORT_SYMBOL(apm_info);
diff -r 25e3c8668f1f -r 8af1199488d3 
linux-2.6-xen-sparse/arch/xen/kernel/gnttab.c
--- a/linux-2.6-xen-sparse/arch/xen/kernel/gnttab.c     Mon Jan  9 11:19:55 2006
+++ b/linux-2.6-xen-sparse/arch/xen/kernel/gnttab.c     Mon Jan  9 11:22:17 2006
@@ -389,6 +389,30 @@
        return -ENOSYS;
 }
 
+static int __init
+gnttab_proc_init(void)
+{
+       /*
+        *  /proc/xen/grant : used by libxc to access grant tables
+        */
+       if ((grant_pde = create_xen_proc_entry("grant", 0600)) == NULL) {
+               WPRINTK("Unable to create grant xen proc entry\n");
+               return -1;
+       }
+
+       grant_file_ops.read   = grant_pde->proc_fops->read;
+       grant_file_ops.write  = grant_pde->proc_fops->write;
+
+       grant_pde->proc_fops  = &grant_file_ops;
+
+       grant_pde->read_proc  = &grant_read;
+       grant_pde->write_proc = &grant_write;
+
+       return 0;
+}
+
+device_initcall(gnttab_proc_init);
+
 #endif /* CONFIG_PROC_FS */
 
 int
@@ -446,29 +470,11 @@
        gnttab_free_count = NR_GRANT_ENTRIES - NR_RESERVED_ENTRIES;
        gnttab_free_head  = NR_RESERVED_ENTRIES;
 
-#ifdef CONFIG_PROC_FS
-       /*
-        *  /proc/xen/grant : used by libxc to access grant tables
-        */
-       if ((grant_pde = create_xen_proc_entry("grant", 0600)) == NULL) {
-               WPRINTK("Unable to create grant xen proc entry\n");
-               return -1;
-       }
-
-       grant_file_ops.read   = grant_pde->proc_fops->read;
-       grant_file_ops.write  = grant_pde->proc_fops->write;
-
-       grant_pde->proc_fops  = &grant_file_ops;
-
-       grant_pde->read_proc  = &grant_read;
-       grant_pde->write_proc = &grant_write;
-#endif
-
        printk("Grant table initialized\n");
        return 0;
 }
 
-__initcall(gnttab_init);
+core_initcall(gnttab_init);
 
 /*
  * Local variables:
diff -r 25e3c8668f1f -r 8af1199488d3 
linux-2.6-xen-sparse/arch/xen/kernel/reboot.c
--- a/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c     Mon Jan  9 11:19:55 2006
+++ b/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c     Mon Jan  9 11:22:17 2006
@@ -16,6 +16,13 @@
 #include <linux/cpu.h>
 #include <linux/kthread.h>
 #include <asm-xen/xencons.h>
+
+#if defined(__i386__) || defined(__x86_64__)
+/*
+ * Power off function, if any
+ */
+void (*pm_power_off)(void);
+#endif
 
 #define SHUTDOWN_INVALID  -1
 #define SHUTDOWN_POWEROFF  0
diff -r 25e3c8668f1f -r 8af1199488d3 
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/x8664_ksyms.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/x8664_ksyms.c Mon Jan  9 
11:19:55 2006
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/x8664_ksyms.c Mon Jan  9 
11:22:17 2006
@@ -59,9 +59,7 @@
 EXPORT_SYMBOL(probe_irq_mask);
 EXPORT_SYMBOL(kernel_thread);
 EXPORT_SYMBOL(pm_idle);
-#ifdef CONFIG_ACPI_BOOT
 EXPORT_SYMBOL(pm_power_off);
-#endif
 EXPORT_SYMBOL(get_cmos_time);
 
 EXPORT_SYMBOL(__down_failed);
diff -r 25e3c8668f1f -r 8af1199488d3 
linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c        Mon Jan  9 
11:19:55 2006
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c        Mon Jan  9 
11:22:17 2006
@@ -540,6 +540,9 @@
        pending_vaddrs        = kmalloc(sizeof(pending_vaddrs[0]) *
                                        mmap_pages, GFP_KERNEL);
        if (!pending_reqs || !pending_grant_handles || !pending_vaddrs) {
+               kfree(pending_reqs);
+               kfree(pending_grant_handles);
+               kfree(pending_vaddrs);
                printk("%s: out of memory\n", __FUNCTION__);
                return -1;
        }
diff -r 25e3c8668f1f -r 8af1199488d3 
linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c      Mon Jan  9 
11:19:55 2006
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c      Mon Jan  9 
11:22:17 2006
@@ -331,7 +331,12 @@
                return;
        }
 
-        xlvbd_add(sectors, info->vdevice, binfo, sector_size, info);
+       err = xlvbd_add(sectors, info->vdevice, binfo, sector_size, info);
+       if (err) {
+               xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
+                                info->xbdev->otherend);
+               return;
+       }
 
        (void)xenbus_switch_state(info->xbdev, NULL, XenbusStateConnected); 
 
diff -r 25e3c8668f1f -r 8af1199488d3 
linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c  Mon Jan  9 11:19:55 2006
+++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c  Mon Jan  9 11:22:17 2006
@@ -208,7 +208,7 @@
 }
 
 struct vm_operations_struct blktap_vm_ops = {
-       nopage:   blktap_nopage,
+       .nopage = blktap_nopage,
 };
 
 /******************************************************************
@@ -225,7 +225,7 @@
        /* Allocate the fe ring. */
        sring = (blkif_sring_t *)get_zeroed_page(GFP_KERNEL);
        if (sring == NULL)
-               goto fail_nomem;
+               return -ENOMEM;
 
        SetPageReserved(virt_to_page(sring));
     
@@ -233,9 +233,6 @@
        FRONT_RING_INIT(&blktap_ufe_ring, sring, PAGE_SIZE);
 
        return 0;
-
- fail_nomem:
-       return -ENOMEM;
 }
 
 static int blktap_release(struct inode *inode, struct file *filp)
@@ -391,12 +388,12 @@
 }
 
 static struct file_operations blktap_fops = {
-       owner:    THIS_MODULE,
-       poll:     blktap_poll,
-       ioctl:    blktap_ioctl,
-       open:     blktap_open,
-       release:  blktap_release,
-       mmap:     blktap_mmap,
+       .owner   = THIS_MODULE,
+       .poll    = blktap_poll,
+       .ioctl   = blktap_ioctl,
+       .open    = blktap_open,
+       .release = blktap_release,
+       .mmap    = blktap_mmap,
 };
 
 
diff -r 25e3c8668f1f -r 8af1199488d3 
linux-2.6-xen-sparse/drivers/xen/console/console.c
--- a/linux-2.6-xen-sparse/drivers/xen/console/console.c        Mon Jan  9 
11:19:55 2006
+++ b/linux-2.6-xen-sparse/drivers/xen/console/console.c        Mon Jan  9 
11:22:17 2006
@@ -314,39 +314,31 @@
 {
        int sent, sz, work_done = 0;
 
-       if (xen_start_info->flags & SIF_INITDOMAIN) {
-               if (x_char) {
+       if (x_char) {
+               if (xen_start_info->flags & SIF_INITDOMAIN)
                        kcons_write_dom0(NULL, &x_char, 1);
-                       x_char = 0;
-                       work_done = 1;
-               }
-
-               while (wc != wp) {
-                       sz = wp - wc;
-                       if (sz > (wbuf_size - WBUF_MASK(wc)))
-                               sz = wbuf_size - WBUF_MASK(wc);
+               else
+                       while (x_char)
+                               if (xencons_ring_send(&x_char, 1) == 1)
+                                       break;
+               x_char = 0;
+               work_done = 1;
+       }
+
+       while (wc != wp) {
+               sz = wp - wc;
+               if (sz > (wbuf_size - WBUF_MASK(wc)))
+                       sz = wbuf_size - WBUF_MASK(wc);
+               if (xen_start_info->flags & SIF_INITDOMAIN) {
                        kcons_write_dom0(NULL, &wbuf[WBUF_MASK(wc)], sz);
                        wc += sz;
-                       work_done = 1;
-               }
-       } else {
-               while (x_char) {
-                       if (xencons_ring_send(&x_char, 1) == 1) {
-                               x_char = 0;
-                               work_done = 1;
-                       }
-               }
-
-               while (wc != wp) {
-                       sz = wp - wc;
-                       if (sz > (wbuf_size - WBUF_MASK(wc)))
-                               sz = wbuf_size - WBUF_MASK(wc);
+               } else {
                        sent = xencons_ring_send(&wbuf[WBUF_MASK(wc)], sz);
                        if (sent == 0)
                                break;
                        wc += sent;
-                       work_done = 1;
                }
+               work_done = 1;
        }
 
        if (work_done && (xencons_tty != NULL)) {
diff -r 25e3c8668f1f -r 8af1199488d3 
linux-2.6-xen-sparse/drivers/xen/netback/common.h
--- a/linux-2.6-xen-sparse/drivers/xen/netback/common.h Mon Jan  9 11:19:55 2006
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/common.h Mon Jan  9 11:22:17 2006
@@ -82,7 +82,7 @@
 #define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE)
 
 void netif_creditlimit(netif_t *netif);
-int  netif_disconnect(netif_t *netif);
+void netif_disconnect(netif_t *netif);
 
 netif_t *alloc_netif(domid_t domid, unsigned int handle, u8 be_mac[ETH_ALEN]);
 void free_netif(netif_t *netif);
diff -r 25e3c8668f1f -r 8af1199488d3 
linux-2.6-xen-sparse/drivers/xen/netback/interface.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/interface.c      Mon Jan  9 
11:19:55 2006
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/interface.c      Mon Jan  9 
11:22:17 2006
@@ -183,7 +183,7 @@
 int netif_map(netif_t *netif, unsigned long tx_ring_ref,
              unsigned long rx_ring_ref, unsigned int evtchn)
 {
-       int err;
+       int err = -ENOMEM;
        netif_tx_sring_t *txs;
        netif_rx_sring_t *rxs;
        evtchn_op_t op = {
@@ -196,24 +196,19 @@
                return 0;
 
        netif->tx_comms_area = alloc_vm_area(PAGE_SIZE);
+       if (netif->tx_comms_area == NULL)
+               return -ENOMEM;
        netif->rx_comms_area = alloc_vm_area(PAGE_SIZE);
-       if (netif->tx_comms_area == NULL || netif->rx_comms_area == NULL)
-               return -ENOMEM;
+       if (netif->rx_comms_area == NULL)
+               goto err_rx;
 
        err = map_frontend_pages(netif, tx_ring_ref, rx_ring_ref);
-       if (err) {
-               free_vm_area(netif->tx_comms_area);
-               free_vm_area(netif->rx_comms_area);
-               return err;
-       }
+       if (err)
+               goto err_map;
 
        err = HYPERVISOR_event_channel_op(&op);
-       if (err) {
-               unmap_frontend_pages(netif);
-               free_vm_area(netif->tx_comms_area);
-               free_vm_area(netif->rx_comms_area);
-               return err;
-       }
+       if (err)
+               goto err_hypervisor;
 
        netif->evtchn = op.u.bind_interdomain.local_port;
 
@@ -241,19 +236,22 @@
        rtnl_unlock();
 
        return 0;
+err_hypervisor:
+       unmap_frontend_pages(netif);
+err_map:
+       free_vm_area(netif->rx_comms_area);
+err_rx:
+       free_vm_area(netif->tx_comms_area);
+       return err;
 }
 
 static void free_netif_callback(void *arg)
 {
        netif_t *netif = (netif_t *)arg;
 
-       /* Already disconnected? */
-       if (!netif->irq)
-               return;
-
-       unbind_from_irqhandler(netif->irq, netif);
-       netif->irq = 0;
-
+       if (netif->irq)
+               unbind_from_irqhandler(netif->irq, netif);
+       
        unregister_netdev(netif->dev);
 
        if (netif->tx.sring) {
@@ -290,10 +288,10 @@
 #endif
 }
 
-int netif_disconnect(netif_t *netif)
-{
-
-       if (netif->status == CONNECTED) {
+void netif_disconnect(netif_t *netif)
+{
+       switch (netif->status) {
+       case CONNECTED:
                rtnl_lock();
                netif->status = DISCONNECTING;
                wmb();
@@ -301,10 +299,14 @@
                        __netif_down(netif);
                rtnl_unlock();
                netif_put(netif);
-               return 0; /* Caller should not send response message. */
-       }
-
-       return 1;
+               break;
+       case DISCONNECTED:
+               BUG_ON(atomic_read(&netif->refcnt) != 0);
+               free_netif(netif);
+               break;
+       default:
+               BUG();
+       }
 }
 
 /*
diff -r 25e3c8668f1f -r 8af1199488d3 
linux-2.6-xen-sparse/drivers/xen/netback/netback.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c        Mon Jan  9 
11:19:55 2006
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c        Mon Jan  9 
11:22:17 2006
@@ -14,6 +14,7 @@
 #include <asm-xen/balloon.h>
 #include <asm-xen/xen-public/memory.h>
 
+/*#define NETBE_DEBUG_INTERRUPT*/
 
 static void netif_idx_release(u16 pending_idx);
 static void netif_page_release(struct page *page);
@@ -727,6 +728,7 @@
        return notify;
 }
 
+#ifdef NETBE_DEBUG_INTERRUPT
 static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs)
 {
        struct list_head *ent;
@@ -758,6 +760,7 @@
 
        return IRQ_HANDLED;
 }
+#endif
 
 static int __init netback_init(void)
 {
@@ -794,6 +797,7 @@
 
        netif_xenbus_init();
 
+#ifdef NETBE_DEBUG_INTERRUPT
        (void)bind_virq_to_irqhandler(
                VIRQ_DEBUG,
                0,
@@ -801,6 +805,7 @@
                SA_SHIRQ, 
                "net-be-dbg",
                &netif_be_dbg);
+#endif
 
        return 0;
 }
diff -r 25e3c8668f1f -r 8af1199488d3 
linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c      Mon Jan  9 
11:19:55 2006
+++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c      Mon Jan  9 
11:22:17 2006
@@ -116,6 +116,8 @@
 #define RX_MAX_TARGET NET_RX_RING_SIZE
        int rx_min_target, rx_max_target, rx_target;
        struct sk_buff_head rx_batch;
+
+       struct timer_list rx_refill_timer;
 
        /*
         * {tx,rx}_skbs store outstanding skbuffs. The first entry in each
@@ -517,6 +519,13 @@
 }
 
 
+static void rx_refill_timeout(unsigned long data)
+{
+       struct net_device *dev = (struct net_device *)data;
+       netif_rx_schedule(dev);
+}
+
+
 static void network_alloc_rx_buffers(struct net_device *dev)
 {
        unsigned short id;
@@ -534,7 +543,7 @@
         * Allocate skbuffs greedily, even though we batch updates to the
         * receive ring. This creates a less bursty demand on the memory
         * allocator, so should reduce the chance of failed allocation requests
-        *  both for ourself and for other kernel subsystems.
+        * both for ourself and for other kernel subsystems.
         */
        batch_target = np->rx_target - (req_prod - np->rx.rsp_cons);
        for (i = skb_queue_len(&np->rx_batch); i < batch_target; i++) {
@@ -545,8 +554,15 @@
                skb = alloc_xen_skb(
                        ((PAGE_SIZE - sizeof(struct skb_shared_info)) &
                         (-SKB_DATA_ALIGN(1))) - 16);
-               if (skb == NULL)
-                       break;
+               if (skb == NULL) {
+                       /* Any skbuffs queued for refill? Force them out. */
+                       if (i != 0)
+                               goto refill;
+                       /* Could not allocate any skbuffs. Try again later. */
+                       mod_timer(&np->rx_refill_timer,
+                                 jiffies + (HZ/10));
+                       return;
+               }
                __skb_queue_tail(&np->rx_batch, skb);
        }
 
@@ -554,6 +570,12 @@
        if (i < (np->rx_target/2))
                return;
 
+       /* Adjust our fill target if we risked running out of buffers. */
+       if (((req_prod - np->rx.sring->rsp_prod) < (np->rx_target / 4)) &&
+           ((np->rx_target *= 2) > np->rx_max_target))
+               np->rx_target = np->rx_max_target;
+
+ refill:
        for (i = 0; ; i++) {
                if ((skb = __skb_dequeue(&np->rx_batch)) == NULL)
                        break;
@@ -608,11 +630,6 @@
        /* Above is a suitable barrier to ensure backend will see requests. */
        np->rx.req_prod_pvt = req_prod + i;
        RING_PUSH_REQUESTS(&np->rx);
-
-       /* Adjust our fill target if we risked running out of buffers. */
-       if (((req_prod - np->rx.sring->rsp_prod) < (np->rx_target / 4)) &&
-           ((np->rx_target *= 2) > np->rx_max_target))
-               np->rx_target = np->rx_max_target;
 }
 
 
@@ -1077,6 +1094,10 @@
        np->rx_min_target = RX_MIN_TARGET;
        np->rx_max_target = RX_MAX_TARGET;
 
+       init_timer(&np->rx_refill_timer);
+       np->rx_refill_timer.data = (unsigned long)netdev;
+       np->rx_refill_timer.function = rx_refill_timeout;
+
        /* Initialise {tx,rx}_skbs as a free chain containing every entry. */
        for (i = 0; i <= NET_TX_RING_SIZE; i++) {
                np->tx_skbs[i] = (void *)((unsigned long) i+1);
@@ -1188,29 +1209,26 @@
 
        DPRINTK("%s\n", dev->nodename);
 
-       netif_free(info);
-       kfree(info);
+       netif_disconnect_backend(info);
+       free_netdev(info->netdev);
 
        return 0;
 }
 
 
-static void netif_free(struct netfront_info *info)
-{
-       netif_disconnect_backend(info);
-       close_netdev(info);
-}
-
-
 static void close_netdev(struct netfront_info *info)
 {
-       if (info->netdev) {
+       spin_lock_irq(&info->netdev->xmit_lock);
+       netif_stop_queue(info->netdev);
+       spin_unlock_irq(&info->netdev->xmit_lock);
+
 #ifdef CONFIG_PROC_FS
-               xennet_proc_delif(info->netdev);
+       xennet_proc_delif(info->netdev);
 #endif
-               unregister_netdev(info->netdev);
-               info->netdev = NULL;
-       }
+
+       del_timer_sync(&info->rx_refill_timer);
+
+       unregister_netdev(info->netdev);
 }
 
 
@@ -1219,21 +1237,28 @@
        /* Stop old i/f to prevent errors whilst we rebuild the state. */
        spin_lock_irq(&info->tx_lock);
        spin_lock(&info->rx_lock);
-       netif_stop_queue(info->netdev);
-       /* info->backend_state = BEST_DISCONNECTED; */
+       info->backend_state = BEST_DISCONNECTED;
        spin_unlock(&info->rx_lock);
        spin_unlock_irq(&info->tx_lock);
-    
+
+       if (info->irq)
+               unbind_from_irqhandler(info->irq, info->netdev);
+       info->evtchn = info->irq = 0;
+
        end_access(info->tx_ring_ref, info->tx.sring);
        end_access(info->rx_ring_ref, info->rx.sring);
        info->tx_ring_ref = GRANT_INVALID_REF;
        info->rx_ring_ref = GRANT_INVALID_REF;
        info->tx.sring = NULL;
        info->rx.sring = NULL;
-
-       if (info->irq)
-               unbind_from_irqhandler(info->irq, info->netdev);
-       info->evtchn = info->irq = 0;
+}
+
+
+static void netif_free(struct netfront_info *info)
+{
+       close_netdev(info);
+       netif_disconnect_backend(info);
+       free_netdev(info->netdev);
 }
 
 
diff -r 25e3c8668f1f -r 8af1199488d3 
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c      Mon Jan  9 
11:19:55 2006
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c      Mon Jan  9 
11:22:17 2006
@@ -111,7 +111,6 @@
        struct xenbus_dev_data *u = filp->private_data;
        struct xenbus_dev_transaction *trans;
        void *reply;
-       int err = 0;
 
        if ((len + u->len) > sizeof(u->u.buffer))
                return -EINVAL;
@@ -136,41 +135,36 @@
        case XS_RM:
        case XS_SET_PERMS:
                reply = xenbus_dev_request_and_reply(&u->u.msg);
-               if (IS_ERR(reply)) {
-                       err = PTR_ERR(reply);
-               } else {
-                       if (u->u.msg.type == XS_TRANSACTION_START) {
-                               trans = kmalloc(sizeof(*trans), GFP_KERNEL);
-                               trans->handle = (struct xenbus_transaction *)
-                                       simple_strtoul(reply, NULL, 0);
-                               list_add(&trans->list, &u->transactions);
-                       } else if (u->u.msg.type == XS_TRANSACTION_END) {
-                               list_for_each_entry(trans, &u->transactions,
-                                                   list)
-                                       if ((unsigned long)trans->handle ==
-                                           (unsigned long)u->u.msg.tx_id)
-                                               break;
-                               BUG_ON(&trans->list == &u->transactions);
-                               list_del(&trans->list);
-                               kfree(trans);
-                       }
-                       queue_reply(u, (char *)&u->u.msg, sizeof(u->u.msg));
-                       queue_reply(u, (char *)reply, u->u.msg.len);
-                       kfree(reply);
+               if (IS_ERR(reply))
+                       return PTR_ERR(reply);
+
+               if (u->u.msg.type == XS_TRANSACTION_START) {
+                       trans = kmalloc(sizeof(*trans), GFP_KERNEL);
+                       if (!trans)
+                               return -ENOMEM;
+                       trans->handle = (struct xenbus_transaction *)
+                               simple_strtoul(reply, NULL, 0);
+                       list_add(&trans->list, &u->transactions);
+               } else if (u->u.msg.type == XS_TRANSACTION_END) {
+                       list_for_each_entry(trans, &u->transactions, list)
+                               if ((unsigned long)trans->handle ==
+                                   (unsigned long)u->u.msg.tx_id)
+                                       break;
+                       BUG_ON(&trans->list == &u->transactions);
+                       list_del(&trans->list);
+                       kfree(trans);
                }
+               queue_reply(u, (char *)&u->u.msg, sizeof(u->u.msg));
+               queue_reply(u, (char *)reply, u->u.msg.len);
+               kfree(reply);
                break;
 
        default:
-               err = -EINVAL;
-               break;
+               return -EINVAL;
        }
 
-       if (err == 0) {
-               u->len = 0;
-               err = len;
-       }
-
-       return err;
+       u->len = 0;
+       return len;
 }
 
 static int xenbus_dev_open(struct inode *inode, struct file *filp)
diff -r 25e3c8668f1f -r 8af1199488d3 
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c    Mon Jan  9 
11:19:55 2006
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c    Mon Jan  9 
11:22:17 2006
@@ -542,14 +542,6 @@
                             const char *type,
                             const char *nodename)
 {
-#define CHECK_FAIL                             \
-       do {                                    \
-               if (err)                        \
-                       goto fail;              \
-       }                                       \
-       while (0)                               \
-
-
        int err;
        struct xenbus_device *xendev;
        size_t stringlen;
@@ -584,19 +576,18 @@
        xendev->dev.release = xenbus_dev_release;
 
        err = bus->get_bus_id(xendev->dev.bus_id, xendev->nodename);
-       CHECK_FAIL;
+       if (err)
+               goto fail;
 
        /* Register with generic device framework. */
        err = device_register(&xendev->dev);
-       CHECK_FAIL;
+       if (err)
+               goto fail;
 
        device_create_file(&xendev->dev, &dev_attr_nodename);
        device_create_file(&xendev->dev, &dev_attr_devtype);
 
        return 0;
-
-#undef CHECK_FAIL
-
 fail:
        xenbus_dev_free(xendev);
        return err;
diff -r 25e3c8668f1f -r 8af1199488d3 
linux-2.6-xen-sparse/include/asm-xen/asm-ia64/hypercall.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-ia64/hypercall.h Mon Jan  9 
11:19:55 2006
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-ia64/hypercall.h Mon Jan  9 
11:22:17 2006
@@ -372,7 +372,7 @@
     int ret;
     __asm__ __volatile__ ( ";; mov r14=%2 ; mov r15=%3 ; mov r2=%1 ; break 
0x1000 ;; mov %0=r8 ;;"
         : "=r" (ret)
-        : "i" (__HYPERVISOR_console_io), "r"(cmd), "r"(arg)
+        : "i" (__HYPERVISOR_memory_op), "r"(cmd), "r"(arg)
         : "r14","r15","r2","r8","memory" );
     return ret;
 }
diff -r 25e3c8668f1f -r 8af1199488d3 tools/Makefile
--- a/tools/Makefile    Mon Jan  9 11:19:55 2006
+++ b/tools/Makefile    Mon Jan  9 11:22:17 2006
@@ -12,6 +12,7 @@
 SUBDIRS += security
 SUBDIRS += console
 SUBDIRS += xenmon
+SUBDIRS += guest-headers
 ifeq ($(VTPM_TOOLS),y)
 SUBDIRS += vtpm_manager
 SUBDIRS += vtpm
diff -r 25e3c8668f1f -r 8af1199488d3 tools/Rules.mk
--- a/tools/Rules.mk    Mon Jan  9 11:19:55 2006
+++ b/tools/Rules.mk    Mon Jan  9 11:22:17 2006
@@ -35,6 +35,8 @@
 mk-symlinks:
        mkdir -p xen
        ( cd xen && ln -sf ../$(XEN_ROOT)/xen/include/public/*.h . )
+       mkdir -p xen/hvm
+       ( cd xen/hvm && ln -sf ../../$(XEN_ROOT)/xen/include/public/hvm/*.h . )
        mkdir -p xen/io
        ( cd xen/io && ln -sf ../../$(XEN_ROOT)/xen/include/public/io/*.h . )
        mkdir -p xen/linux
diff -r 25e3c8668f1f -r 8af1199488d3 tools/debugger/libxendebug/xendebug.c
--- a/tools/debugger/libxendebug/xendebug.c     Mon Jan  9 11:19:55 2006
+++ b/tools/debugger/libxendebug/xendebug.c     Mon Jan  9 11:22:17 2006
@@ -119,8 +119,8 @@
 
     if ( !ctxt->valid[vcpu] )
     {
-        if ( (rc = xc_domain_get_vcpu_context(xc_handle, domid, vcpu, 
-                                              &ctxt->context[vcpu])) )
+        if ( (rc = xc_vcpu_getcontext(xc_handle, domid, vcpu, 
+                                      &ctxt->context[vcpu])) )
             return NULL;
 
         ctxt->valid[vcpu] = true;
@@ -139,10 +139,10 @@
         return -EINVAL;
 
     op.interface_version = DOM0_INTERFACE_VERSION;
-    op.cmd = DOM0_SETDOMAININFO;
-    op.u.setdomaininfo.domain = ctxt->domid;
-    op.u.setdomaininfo.vcpu = vcpu;
-    op.u.setdomaininfo.ctxt = &ctxt->context[vcpu];
+    op.cmd = DOM0_SETVCPUCONTEXT;
+    op.u.setvcpucontext.domain = ctxt->domid;
+    op.u.setvcpucontext.vcpu = vcpu;
+    op.u.setvcpucontext.ctxt = &ctxt->context[vcpu];
 
     if ( (rc = mlock(&ctxt->context[vcpu], sizeof(vcpu_guest_context_t))) )
         return rc;
diff -r 25e3c8668f1f -r 8af1199488d3 tools/examples/xmexample.vmx
--- a/tools/examples/xmexample.vmx      Mon Jan  9 11:19:55 2006
+++ b/tools/examples/xmexample.vmx      Mon Jan  9 11:22:17 2006
@@ -28,7 +28,13 @@
 
 #-----------------------------------------------------------------------------
 # the number of cpus guest platform has, default=1
-vcpus=1
+#vcpus=1
+
+# enable/disalbe vmx guest ACPI, default=0 (disabled)
+#acpi=0
+
+# enable/disalbe vmx guest APIC, default=0 (disabled)
+#apic=0
 
 # List of which CPUS this domain is allowed to use, default Xen picks
 #cpus = ""         # leave to Xen to pick
diff -r 25e3c8668f1f -r 8af1199488d3 tools/firmware/vmxassist/Makefile
--- a/tools/firmware/vmxassist/Makefile Mon Jan  9 11:19:55 2006
+++ b/tools/firmware/vmxassist/Makefile Mon Jan  9 11:22:17 2006
@@ -24,7 +24,7 @@
 # The emulator code lives in ROM space
 TEXTADDR=0x000D0000
 
-DEFINES=-DDEBUG -D_ACPI_ -DTEXTADDR=$(TEXTADDR)
+DEFINES=-DDEBUG -DTEXTADDR=$(TEXTADDR)
 XENINC=-I$(XEN_ROOT)/tools/libxc
 
 LD       = ld
diff -r 25e3c8668f1f -r 8af1199488d3 tools/firmware/vmxassist/acpi_madt.c
--- a/tools/firmware/vmxassist/acpi_madt.c      Mon Jan  9 11:19:55 2006
+++ b/tools/firmware/vmxassist/acpi_madt.c      Mon Jan  9 11:22:17 2006
@@ -17,30 +17,73 @@
  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
  * Place - Suite 330, Boston, MA 02111-1307 USA.
  */
+
 #include "../acpi/acpi2_0.h"
 #include "../acpi/acpi_madt.h"
+
+#include <xen/hvm/hvm_info_table.h>
 
 #define NULL ((void*)0)
 
 extern int puts(const char *s);
 
-#define VCPU_NR_PAGE        0x0009F000
-#define VCPU_NR_OFFSET      0x00000800
-#define VCPU_MAGIC          0x76637075  /* "vcpu" */
+static struct hvm_info_table *table = NULL;
 
-/* xc_vmx_builder wrote vcpu block at 0x9F800. Return it. */
-static int
+static int validate_hvm_info(struct hvm_info_table *t)
+{
+       char signature[] = "HVM INFO";
+       uint8_t *ptr = (uint8_t *)t;
+       uint8_t sum = 0;
+       int i;
+
+       /* strncmp(t->signature, "HVM INFO", 8) */
+       for (i = 0; i < 8; i++) {
+               if (signature[i] != t->signature[i]) {
+                       puts("Bad hvm info signature\n");
+                       return 0;
+               }
+       }
+
+       for (i = 0; i < t->length; i++)
+               sum += ptr[i];
+
+       return (sum == 0);
+}
+
+/* xc_vmx_builder wrote hvm info at 0x9F800. Return it. */
+static struct hvm_info_table *
+get_hvm_info_table(void)
+{
+       struct hvm_info_table *t;
+       int i;
+
+       if (table != NULL)
+               return table;
+
+       t = (struct hvm_info_table *)HVM_INFO_PADDR;
+
+       if (!validate_hvm_info(t)) {
+               puts("Bad hvm info table\n");
+               return NULL;
+       }
+
+       table = t;
+
+       return table;
+}
+
+int
 get_vcpu_nr(void)
 {
-       unsigned int *vcpus;
+       struct hvm_info_table *t = get_hvm_info_table();
+       return (t ? t->nr_vcpus : 1); /* default 1 vcpu */
+}
 
-       vcpus = (unsigned int *)(VCPU_NR_PAGE + VCPU_NR_OFFSET);
-       if (vcpus[0] != VCPU_MAGIC) {
-               puts("Bad vcpus magic, set vcpu number to 1 by default.\n");
-               return 1;
-       }
-
-       return vcpus[1];
+int
+get_acpi_enabled(void)
+{
+       struct hvm_info_table *t = get_hvm_info_table();
+       return (t ? t->acpi_enabled : 0); /* default no acpi */
 }
 
 static void *
@@ -74,10 +117,10 @@
        return madt;
 }
 
-static void 
+static void
 set_checksum(void *start, int checksum_offset, int len)
 {
-       unsigned char sum = 0;  
+       unsigned char sum = 0;
        unsigned char *ptr;
 
        ptr = start;
@@ -89,9 +132,9 @@
        ptr[checksum_offset] = -sum;
 }
 
-static int 
+static int
 acpi_madt_set_local_apics(
-       int nr_vcpu, 
+       int nr_vcpu,
        ACPI_MULTIPLE_APIC_DESCRIPTION_TABLE *madt)
 {
        int i;
@@ -104,14 +147,14 @@
                madt->LocalApic[i].Length          = sizeof 
(ACPI_LOCAL_APIC_STRUCTURE);
                madt->LocalApic[i].AcpiProcessorId = i;
                madt->LocalApic[i].ApicId          = i;
-               madt->LocalApic[i].Flags           = 1; 
+               madt->LocalApic[i].Flags           = 1;
        }
 
        madt->Header.Header.Length =
-               sizeof(ACPI_MULTIPLE_APIC_DESCRIPTION_TABLE) - 
+               sizeof(ACPI_MULTIPLE_APIC_DESCRIPTION_TABLE) -
                (MAX_VIRT_CPUS - nr_vcpu)* sizeof(ACPI_LOCAL_APIC_STRUCTURE);
 
-       return 0;                            
+       return 0;
 }
 
 #define FIELD_OFFSET(TYPE,Field) ((unsigned int)(&(((TYPE *) 0)->Field)))
@@ -133,7 +176,7 @@
                madt, FIELD_OFFSET(ACPI_TABLE_HEADER, Checksum),
                madt->Header.Header.Length);
 
-       return 0;              
+       return 0;
 }
 
 /*
diff -r 25e3c8668f1f -r 8af1199488d3 tools/firmware/vmxassist/vm86.h
--- a/tools/firmware/vmxassist/vm86.h   Mon Jan  9 11:19:55 2006
+++ b/tools/firmware/vmxassist/vm86.h   Mon Jan  9 11:22:17 2006
@@ -24,7 +24,7 @@
 #include <stdint.h>
 #endif
 
-#include <xen/vmx_assist.h>
+#include <xen/hvm/vmx_assist.h>
 
 #define        NR_EXCEPTION_HANDLER    32
 #define        NR_INTERRUPT_HANDLERS   16
diff -r 25e3c8668f1f -r 8af1199488d3 tools/firmware/vmxassist/vmxloader.c
--- a/tools/firmware/vmxassist/vmxloader.c      Mon Jan  9 11:19:55 2006
+++ b/tools/firmware/vmxassist/vmxloader.c      Mon Jan  9 11:22:17 2006
@@ -24,12 +24,10 @@
 #include "machine.h"
 #include "roms.h"
 
-#ifdef _ACPI_
 #include "acpi.h"
 #include "../acpi/acpi2_0.h"  // for ACPI_PHYSICAL_ADDRESS
 int acpi_madt_update(unsigned char* acpi_start);
-#endif
-
+int get_acpi_enabled(void);
 
 /*
  * C runtime start off
@@ -120,18 +118,17 @@
                memcpy((void *)0xC0000,
                        vgabios_stdvga, sizeof(vgabios_stdvga));
        }
-#ifdef _ACPI_
-       puts("Loading ACPI ...\n");
 
-       acpi_madt_update(acpi);
-
-       if (ACPI_PHYSICAL_ADDRESS+sizeof(acpi) <= 0xF0000) {
-               /* make sure acpi table does not overlap rombios
-                * currently acpi less than 8K will be OK.
-                */
-                memcpy((void *)ACPI_PHYSICAL_ADDRESS, acpi, sizeof(acpi));
+       if (get_acpi_enabled() != 0) {
+               puts("Loading ACPI ...\n");
+               acpi_madt_update((unsigned char*)acpi);
+               if (ACPI_PHYSICAL_ADDRESS+sizeof(acpi) <= 0xF0000) {
+                       /* make sure acpi table does not overlap rombios
+                        * currently acpi less than 8K will be OK.
+                        */
+                       memcpy((void *)ACPI_PHYSICAL_ADDRESS, acpi, 
sizeof(acpi));
+               }
        }
-#endif
 
        puts("Loading VMXAssist ...\n");
        memcpy((void *)TEXTADDR, vmxassist, sizeof(vmxassist));
diff -r 25e3c8668f1f -r 8af1199488d3 tools/ioemu/hw/i8254.c
--- a/tools/ioemu/hw/i8254.c    Mon Jan  9 11:19:55 2006
+++ b/tools/ioemu/hw/i8254.c    Mon Jan  9 11:22:17 2006
@@ -23,7 +23,7 @@
  */
 #include "vl.h"
 #include <xenctrl.h>
-#include <xen/io/ioreq.h>
+#include <xen/hvm/ioreq.h>
 
 //#define DEBUG_PIT
 
diff -r 25e3c8668f1f -r 8af1199488d3 tools/ioemu/hw/i8259.c
--- a/tools/ioemu/hw/i8259.c    Mon Jan  9 11:19:55 2006
+++ b/tools/ioemu/hw/i8259.c    Mon Jan  9 11:22:17 2006
@@ -23,7 +23,7 @@
  */
 #include "vl.h"
 #include <xenctrl.h>
-#include <xen/io/ioreq.h>
+#include <xen/hvm/ioreq.h>
 
 /* debug PIC */
 //#define DEBUG_PIC
diff -r 25e3c8668f1f -r 8af1199488d3 tools/ioemu/hw/i8259_stub.c
--- a/tools/ioemu/hw/i8259_stub.c       Mon Jan  9 11:19:55 2006
+++ b/tools/ioemu/hw/i8259_stub.c       Mon Jan  9 11:22:17 2006
@@ -22,7 +22,7 @@
  * THE SOFTWARE.
  */
 #include "xenctrl.h"
-#include <xen/io/ioreq.h>
+#include <xen/hvm/ioreq.h>
 #include <stdio.h>
 #include "cpu.h"
 #include "cpu-all.h"
diff -r 25e3c8668f1f -r 8af1199488d3 tools/ioemu/target-i386-dm/helper2.c
--- a/tools/ioemu/target-i386-dm/helper2.c      Mon Jan  9 11:19:55 2006
+++ b/tools/ioemu/target-i386-dm/helper2.c      Mon Jan  9 11:22:17 2006
@@ -48,7 +48,7 @@
 #include <sys/ioctl.h>
 
 #include <xenctrl.h>
-#include <xen/io/ioreq.h>
+#include <xen/hvm/ioreq.h>
 #include <xen/linux/evtchn.h>
 
 #include "cpu.h"
diff -r 25e3c8668f1f -r 8af1199488d3 tools/ioemu/vl.c
--- a/tools/ioemu/vl.c  Mon Jan  9 11:19:55 2006
+++ b/tools/ioemu/vl.c  Mon Jan  9 11:22:17 2006
@@ -2948,6 +2948,7 @@
             case QEMU_OPTION_vcpus:
                 vcpus = atoi(optarg);
                 fprintf(logfile, "qemu: the number of cpus is %d\n", vcpus);
+                break;
             case QEMU_OPTION_pci:
                 pci_enabled = 1;
                 break;
diff -r 25e3c8668f1f -r 8af1199488d3 tools/libxc/xc_core.c
--- a/tools/libxc/xc_core.c     Mon Jan  9 11:19:55 2006
+++ b/tools/libxc/xc_core.c     Mon Jan  9 11:22:17 2006
@@ -55,7 +55,7 @@
     }
  
     for (i = 0; i < info.max_vcpu_id; i++)
-        if (xc_domain_get_vcpu_context(xc_handle, domid,
+        if (xc_vcpu_getcontext(xc_handle, domid,
                                        i, &ctxt[nr_vcpus]) == 0)
             nr_vcpus++;
  
diff -r 25e3c8668f1f -r 8af1199488d3 tools/libxc/xc_domain.c
--- a/tools/libxc/xc_domain.c   Mon Jan  9 11:19:55 2006
+++ b/tools/libxc/xc_domain.c   Mon Jan  9 11:22:17 2006
@@ -58,16 +58,16 @@
     return do_dom0_op(xc_handle, &op);
 }
 
-int xc_domain_pincpu(int xc_handle,
-                     uint32_t domid, 
-                     int vcpu,
-                     cpumap_t cpumap)
-{
-    DECLARE_DOM0_OP;
-    op.cmd = DOM0_PINCPUDOMAIN;
-    op.u.pincpudomain.domain  = (domid_t)domid;
-    op.u.pincpudomain.vcpu    = vcpu;
-    op.u.pincpudomain.cpumap  = cpumap;
+int xc_vcpu_setaffinity(int xc_handle,
+                        uint32_t domid, 
+                        int vcpu,
+                        cpumap_t cpumap)
+{
+    DECLARE_DOM0_OP;
+    op.cmd = DOM0_SETVCPUAFFINITY;
+    op.u.setvcpuaffinity.domain  = (domid_t)domid;
+    op.u.setvcpuaffinity.vcpu    = vcpu;
+    op.u.setvcpuaffinity.cpumap  = cpumap;
     return do_dom0_op(xc_handle, &op);
 }
 
@@ -155,7 +155,7 @@
     return ret;
 }
 
-int xc_domain_get_vcpu_context(int xc_handle,
+int xc_vcpu_getcontext(int xc_handle,
                                uint32_t domid,
                                uint32_t vcpu,
                                vcpu_guest_context_t *ctxt)
@@ -345,10 +345,10 @@
     return do_dom0_op(xc_handle, &op);
 }
 
-int xc_domain_get_vcpu_info(int xc_handle,
-                            uint32_t domid,
-                            uint32_t vcpu,
-                            xc_vcpuinfo_t *info)
+int xc_vcpu_getinfo(int xc_handle,
+                    uint32_t domid,
+                    uint32_t vcpu,
+                    xc_vcpuinfo_t *info)
 {
     int rc;
     DECLARE_DOM0_OP;
@@ -380,18 +380,18 @@
     return do_dom0_op(xc_handle, &op);
 }
 
-int xc_domain_setinfo(int xc_handle,
-                      uint32_t domid,
-                      uint32_t vcpu,
-                      vcpu_guest_context_t *ctxt)
+int xc_vcpu_setcontext(int xc_handle,
+                       uint32_t domid,
+                       uint32_t vcpu,
+                       vcpu_guest_context_t *ctxt)
 {
     dom0_op_t op;
     int rc;
 
-    op.cmd = DOM0_SETDOMAININFO;
-    op.u.setdomaininfo.domain = domid;
-    op.u.setdomaininfo.vcpu = vcpu;
-    op.u.setdomaininfo.ctxt = ctxt;
+    op.cmd = DOM0_SETVCPUCONTEXT;
+    op.u.setvcpucontext.domain = domid;
+    op.u.setvcpucontext.vcpu = vcpu;
+    op.u.setvcpucontext.ctxt = ctxt;
 
     if ( (rc = mlock(ctxt, sizeof(*ctxt))) != 0 )
         return rc;
@@ -402,6 +402,38 @@
 
     return rc;
 
+}
+
+int xc_domain_irq_permission(int xc_handle,
+                             uint32_t domid,
+                             uint8_t pirq,
+                             uint8_t allow_access)
+{
+    dom0_op_t op;
+
+    op.cmd = DOM0_IRQ_PERMISSION;
+    op.u.irq_permission.domain = domid;
+    op.u.irq_permission.pirq = pirq;
+    op.u.irq_permission.allow_access = allow_access;
+
+    return do_dom0_op(xc_handle, &op);
+}
+
+int xc_domain_iomem_permission(int xc_handle,
+                               uint32_t domid,
+                               unsigned long first_pfn,
+                               unsigned long nr_pfns,
+                               uint8_t allow_access)
+{
+    dom0_op_t op;
+
+    op.cmd = DOM0_IOMEM_PERMISSION;
+    op.u.iomem_permission.domain = domid;
+    op.u.iomem_permission.first_pfn = first_pfn;
+       op.u.iomem_permission.nr_pfns = nr_pfns;
+    op.u.iomem_permission.allow_access = allow_access;
+
+    return do_dom0_op(xc_handle, &op);
 }
 
 /*
diff -r 25e3c8668f1f -r 8af1199488d3 tools/libxc/xc_ia64_stubs.c
--- a/tools/libxc/xc_ia64_stubs.c       Mon Jan  9 11:19:55 2006
+++ b/tools/libxc/xc_ia64_stubs.c       Mon Jan  9 11:22:17 2006
@@ -5,7 +5,7 @@
 #include <stdlib.h>
 #include <zlib.h>
 #include "xen/arch-ia64.h"
-#include <xen/io/ioreq.h>
+#include <xen/hvm/ioreq.h>
 
 /* this is a very ugly way of getting FPSR_DEFAULT.  struct ia64_fpreg is
  * mysteriously declared in two places: /usr/include/asm/fpu.h and
@@ -627,6 +627,7 @@
                  unsigned int control_evtchn,
                  unsigned int lapic,
                  unsigned int vcpus,
+                 unsigned int acpi,
                  unsigned int store_evtchn,
                  unsigned long *store_mfn)
 {
@@ -663,7 +664,7 @@
         goto error_out;
     }
 
-    if ( xc_domain_get_vcpu_context(xc_handle, domid, 0, ctxt) ){
+    if ( xc_vcpu_getcontext(xc_handle, domid, 0, ctxt) ){
         PERROR("Could not get vcpu context");
         goto error_out;
     }
@@ -687,11 +688,11 @@
 
     memset( &launch_op, 0, sizeof(launch_op) );
 
-    launch_op.u.setdomaininfo.domain = (domid_t)domid;
-    launch_op.u.setdomaininfo.vcpu   = 0;
-    launch_op.u.setdomaininfo.ctxt   = ctxt;
-
-    launch_op.cmd = DOM0_SETDOMAININFO;
+    launch_op.u.setvcpucontext.domain = (domid_t)domid;
+    launch_op.u.setvcpucontext.vcpu   = 0;
+    launch_op.u.setvcpucontext.ctxt   = ctxt;
+
+    launch_op.cmd = DOM0_SETVCPUCONTEXT;
     rc = do_dom0_op(xc_handle, &launch_op);
     return rc;
 
diff -r 25e3c8668f1f -r 8af1199488d3 tools/libxc/xc_linux_build.c
--- a/tools/libxc/xc_linux_build.c      Mon Jan  9 11:19:55 2006
+++ b/tools/libxc/xc_linux_build.c      Mon Jan  9 11:22:17 2006
@@ -393,10 +393,14 @@
     start_info->store_evtchn = store_evtchn;
     start_info->console_mfn   = nr_pages - 1;
     start_info->console_evtchn = console_evtchn;
+    start_info->nr_pages       = nr_pages;     // FIXME?: nr_pages - 2 ????
     if ( initrd_len != 0 )
     {
         ctxt->initrd.start    = vinitrd_start;
         ctxt->initrd.size     = initrd_len;
+    } else {
+        ctxt->initrd.start    = 0;
+        ctxt->initrd.size     = 0;
     }
     strncpy((char *)ctxt->cmdline, cmdline, IA64_COMMAND_LINE_SIZE);
     ctxt->cmdline[IA64_COMMAND_LINE_SIZE-1] = '\0';
@@ -790,7 +794,7 @@
         goto error_out;
     }
 
-    if ( xc_domain_get_vcpu_context(xc_handle, domid, 0, ctxt) )
+    if ( xc_vcpu_getcontext(xc_handle, domid, 0, ctxt) )
     {
         PERROR("Could not get vcpu context");
         goto error_out;
@@ -893,11 +897,11 @@
 
     memset( &launch_op, 0, sizeof(launch_op) );
 
-    launch_op.u.setdomaininfo.domain = (domid_t)domid;
-    launch_op.u.setdomaininfo.vcpu   = 0;
-    launch_op.u.setdomaininfo.ctxt   = ctxt;
-
-    launch_op.cmd = DOM0_SETDOMAININFO;
+    launch_op.u.setvcpucontext.domain = (domid_t)domid;
+    launch_op.u.setvcpucontext.vcpu   = 0;
+    launch_op.u.setvcpucontext.ctxt   = ctxt;
+
+    launch_op.cmd = DOM0_SETVCPUCONTEXT;
     rc = xc_dom0_op(xc_handle, &launch_op);
     
     return rc;
diff -r 25e3c8668f1f -r 8af1199488d3 tools/libxc/xc_linux_restore.c
--- a/tools/libxc/xc_linux_restore.c    Mon Jan  9 11:19:55 2006
+++ b/tools/libxc/xc_linux_restore.c    Mon Jan  9 11:22:17 2006
@@ -171,7 +171,7 @@
 
 
     /* Only have to worry about vcpu 0 even for SMP */
-    if (xc_domain_get_vcpu_context( xc_handle, dom, 0, &ctxt)) {
+    if (xc_vcpu_getcontext( xc_handle, dom, 0, &ctxt)) {
         ERR("Could not get vcpu context");
         goto out;
     }
@@ -735,10 +735,10 @@
 
     DPRINTF("Domain ready to be built.\n");
 
-    op.cmd = DOM0_SETDOMAININFO;
-    op.u.setdomaininfo.domain = (domid_t)dom;
-    op.u.setdomaininfo.vcpu   = 0;
-    op.u.setdomaininfo.ctxt   = &ctxt;
+    op.cmd = DOM0_SETVCPUCONTEXT;
+    op.u.setvcpucontext.domain = (domid_t)dom;
+    op.u.setvcpucontext.vcpu   = 0;
+    op.u.setvcpucontext.ctxt   = &ctxt;
     rc = xc_dom0_op(xc_handle, &op);
 
     if (rc != 0) {
diff -r 25e3c8668f1f -r 8af1199488d3 tools/libxc/xc_linux_save.c
--- a/tools/libxc/xc_linux_save.c       Mon Jan  9 11:19:55 2006
+++ b/tools/libxc/xc_linux_save.c       Mon Jan  9 11:22:17 2006
@@ -382,7 +382,7 @@
         return -1;
     }
 
-    if ( xc_domain_get_vcpu_context(xc_handle, dom, 0 /* XXX */, ctxt)) 
+    if ( xc_vcpu_getcontext(xc_handle, dom, 0 /* XXX */, ctxt)) 
         ERR("Could not get vcpu context");
 
 
@@ -643,7 +643,7 @@
     }
     
     /* Only have to worry about vcpu 0 even for SMP */
-    if (xc_domain_get_vcpu_context(xc_handle, dom, 0, &ctxt)) {
+    if (xc_vcpu_getcontext(xc_handle, dom, 0, &ctxt)) {
         ERR("Could not get vcpu context");
         goto out;
     }
diff -r 25e3c8668f1f -r 8af1199488d3 tools/libxc/xc_pagetab.c
--- a/tools/libxc/xc_pagetab.c  Mon Jan  9 11:19:55 2006
+++ b/tools/libxc/xc_pagetab.c  Mon Jan  9 11:22:17 2006
@@ -74,7 +74,7 @@
 #define pt_levels 4
 #endif
 
-    if (xc_domain_get_vcpu_context(xc_handle, dom, vcpu, &ctx) != 0) {
+    if (xc_vcpu_getcontext(xc_handle, dom, vcpu, &ctx) != 0) {
         fprintf(stderr, "failed to retreive vcpu context\n");
         goto out;
     }
diff -r 25e3c8668f1f -r 8af1199488d3 tools/libxc/xc_ptrace.c
--- a/tools/libxc/xc_ptrace.c   Mon Jan  9 11:19:55 2006
+++ b/tools/libxc/xc_ptrace.c   Mon Jan  9 11:22:17 2006
@@ -33,7 +33,7 @@
     if (online)
         *online = 0;
     if ( !(regs_valid & (1 << cpu)) ) { 
-        retval = xc_domain_get_vcpu_context(xc_handle, current_domid, 
+        retval = xc_vcpu_getcontext(xc_handle, current_domid, 
                                                cpu, &ctxt[cpu]);
         if ( retval ) 
             goto done;
@@ -43,8 +43,7 @@
        if ( online == NULL )
            goto done;
 
-       retval = xc_domain_get_vcpu_info(xc_handle, current_domid,
-                                        cpu, &info);
+       retval = xc_vcpu_getinfo(xc_handle, current_domid, cpu, &info);
        *online = info.online;
     
  done:
@@ -395,7 +394,7 @@
 
     case PTRACE_SETREGS:
         SET_XC_REGS(((struct gdb_regs *)data), ctxt[cpu].user_regs);
-        retval = xc_domain_setinfo(xc_handle, current_domid, cpu, &ctxt[cpu]);
+        retval = xc_vcpu_setcontext(xc_handle, current_domid, cpu, &ctxt[cpu]);
         if (retval)
             goto error_out;
         break;
@@ -405,7 +404,7 @@
          *  during single-stepping - but that just seems retarded
          */
         ctxt[cpu].user_regs.eflags |= PSL_T; 
-        retval = xc_domain_setinfo(xc_handle, current_domid, cpu, &ctxt[cpu]);
+        retval = xc_vcpu_setcontext(xc_handle, current_domid, cpu, &ctxt[cpu]);
         if ( retval )
         {
             perror("dom0 op failed");
@@ -423,8 +422,8 @@
                 /* Clear trace flag */
                 if ( ctxt[cpu].user_regs.eflags & PSL_T ) {
                     ctxt[cpu].user_regs.eflags &= ~PSL_T;
-                    retval = xc_domain_setinfo(xc_handle, current_domid, 
-                                               cpu, &ctxt[cpu]);
+                    retval = xc_vcpu_setcontext(xc_handle, current_domid, 
+                                                cpu, &ctxt[cpu]);
                     if ( retval ) {
                         perror("dom0 op failed");
                         goto error_out;
diff -r 25e3c8668f1f -r 8af1199488d3 tools/libxc/xc_vmx_build.c
--- a/tools/libxc/xc_vmx_build.c        Mon Jan  9 11:19:55 2006
+++ b/tools/libxc/xc_vmx_build.c        Mon Jan  9 11:22:17 2006
@@ -9,7 +9,8 @@
 #include <stdlib.h>
 #include <unistd.h>
 #include <zlib.h>
-#include <xen/io/ioreq.h>
+#include <xen/hvm/hvm_info_table.h>
+#include <xen/hvm/ioreq.h>
 
 #define VMX_LOADER_ENTR_ADDR  0x00100000
 
@@ -33,9 +34,6 @@
 #define E820_MAP_NR_OFFSET  0x000001E8
 #define E820_MAP_OFFSET     0x000002D0
 
-#define VCPU_NR_PAGE        0x0009F000
-#define VCPU_NR_OFFSET      0x00000800
-
 struct e820entry {
     uint64_t addr;
     uint64_t size;
@@ -119,26 +117,50 @@
     return (*(((unsigned char *)e820_page) + E820_MAP_NR_OFFSET) = nr_map);
 }
 
+static void
+set_hvm_info_checksum(struct hvm_info_table *t)
+{
+    uint8_t *ptr = (uint8_t *)t, sum = 0;
+    unsigned int i;
+
+    t->checksum = 0;
+
+    for (i = 0; i < t->length; i++)
+        sum += *ptr++;
+
+    t->checksum = -sum;
+}
+
 /*
- * Use E820 reserved memory 0x9F800 to pass number of vcpus to vmxloader
- * vmxloader will use it to config ACPI MADT table
+ * Use E820 reserved memory 0x9F800 to pass HVM info to vmxloader
+ * vmxloader will use this info to set BIOS accordingly
  */
-#define VCPU_MAGIC      0x76637075  /* "vcpu" */
-static int set_vcpu_nr(int xc_handle, uint32_t dom,
-                        unsigned long *pfn_list, unsigned int vcpus)
-{
-    char         *va_map;
-    unsigned int *va_vcpus;
-
-    va_map = xc_map_foreign_range(xc_handle, dom,
-                                  PAGE_SIZE, PROT_READ|PROT_WRITE,
-                                  pfn_list[VCPU_NR_PAGE >> PAGE_SHIFT]);
+static int set_hvm_info(int xc_handle, uint32_t dom,
+                        unsigned long *pfn_list, unsigned int vcpus,
+                        unsigned int acpi, unsigned int apic)
+{
+    char *va_map;
+    struct hvm_info_table *va_hvm;
+
+    va_map = xc_map_foreign_range(
+        xc_handle,
+        dom,
+        PAGE_SIZE,
+        PROT_READ|PROT_WRITE,
+        pfn_list[HVM_INFO_PFN]);
+    
     if ( va_map == NULL )
         return -1;
 
-    va_vcpus = (unsigned int *)(va_map + VCPU_NR_OFFSET);
-    va_vcpus[0] = VCPU_MAGIC;
-    va_vcpus[1] = vcpus;
+    va_hvm = (struct hvm_info_table *)(va_map + HVM_INFO_OFFSET);
+    memset(va_hvm, 0, sizeof(*va_hvm));
+    strncpy(va_hvm->signature, "HVM INFO", 8);
+    va_hvm->length       = sizeof(struct hvm_info_table);
+    va_hvm->acpi_enabled = acpi;
+    va_hvm->apic_enabled = apic;
+    va_hvm->nr_vcpus     = vcpus;
+
+    set_hvm_info_checksum(va_hvm);
 
     munmap(va_map, PAGE_SIZE);
 
@@ -279,8 +301,9 @@
                        vcpu_guest_context_t *ctxt,
                        unsigned long shared_info_frame,
                        unsigned int control_evtchn,
-                       unsigned int lapic,
                        unsigned int vcpus,
+                       unsigned int acpi,
+                       unsigned int apic,
                        unsigned int store_evtchn,
                        unsigned long *store_mfn)
 {
@@ -490,20 +513,14 @@
             goto error_out;
     }
 
-    if (set_vcpu_nr(xc_handle, dom, page_array, vcpus)) {
-        fprintf(stderr, "Couldn't set vcpu number for VMX guest.\n");
-        goto error_out;
-    }
-
-    *store_mfn = page_array[(v_end-2) >> PAGE_SHIFT];
-    if ( xc_clear_domain_page(xc_handle, dom, *store_mfn) )
-        goto error_out;
-
-    shared_page_frame = (v_end - PAGE_SIZE) >> PAGE_SHIFT;
-
-    if ((e820_page = xc_map_foreign_range(
-        xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
-        page_array[E820_MAP_PAGE >> PAGE_SHIFT])) == 0)
+    if ( set_hvm_info(xc_handle, dom, page_array, vcpus, acpi, apic) ) {
+        fprintf(stderr, "Couldn't set hvm info for VMX guest.\n");
+        goto error_out;
+    }
+
+    if ( (e820_page = xc_map_foreign_range(
+         xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
+         page_array[E820_MAP_PAGE >> PAGE_SHIFT])) == 0 )
         goto error_out;
     memset(e820_page, 0, PAGE_SIZE);
     e820_map_nr = build_e820map(e820_page, v_end);
@@ -518,25 +535,29 @@
     munmap(e820_page, PAGE_SIZE);
 
     /* shared_info page starts its life empty. */
-    if ((shared_info = xc_map_foreign_range(
-        xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
-        shared_info_frame)) == 0)
+    if ( (shared_info = xc_map_foreign_range(
+         xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
+         shared_info_frame)) == 0 )
         goto error_out;
     memset(shared_info, 0, sizeof(shared_info_t));
     /* Mask all upcalls... */
     for ( i = 0; i < MAX_VIRT_CPUS; i++ )
         shared_info->vcpu_info[i].evtchn_upcall_mask = 1;
-
     munmap(shared_info, PAGE_SIZE);
 
     /* Populate the event channel port in the shared page */
-    if ((sp = (shared_iopage_t *) xc_map_foreign_range(
-        xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
-        page_array[shared_page_frame])) == 0)
+    shared_page_frame = page_array[(v_end >> PAGE_SHIFT) - 1];
+    if ( (sp = (shared_iopage_t *) xc_map_foreign_range(
+         xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
+         shared_page_frame)) == 0 )
         goto error_out;
     memset(sp, 0, PAGE_SIZE);
     sp->sp_global.eport = control_evtchn;
     munmap(sp, PAGE_SIZE);
+
+    *store_mfn = page_array[(v_end >> PAGE_SHIFT) - 2];
+    if ( xc_clear_domain_page(xc_handle, dom, *store_mfn) )
+        goto error_out;
 
     /* Send the page update requests down to the hypervisor. */
     if ( xc_finish_mmu_updates(xc_handle, mmu) )
@@ -559,7 +580,7 @@
     ctxt->user_regs.eax = 0;
     ctxt->user_regs.esp = 0;
     ctxt->user_regs.ebx = 0; /* startup_32 expects this to be 0 to signal boot 
cpu */
-    ctxt->user_regs.ecx = lapic;
+    ctxt->user_regs.ecx = 0;
     ctxt->user_regs.esi = 0;
     ctxt->user_regs.edi = 0;
     ctxt->user_regs.ebp = 0;
@@ -572,29 +593,6 @@
     free(mmu);
     free(page_array);
     return -1;
-}
-
-#define VMX_FEATURE_FLAG 0x20
-
-static int vmx_identify(void)
-{
-    int eax, ecx;
-
-    __asm__ __volatile__ (
-#if defined(__i386__)
-                          "push %%ebx; cpuid; pop %%ebx"
-#elif defined(__x86_64__)
-                          "push %%rbx; cpuid; pop %%rbx"
-#endif
-                          : "=a" (eax), "=c" (ecx)
-                          : "0" (1)
-                          : "dx");
-
-    if (!(ecx & VMX_FEATURE_FLAG)) {
-        return -1;
-    }
-
-    return 0;
 }
 
 int xc_vmx_build(int xc_handle,
@@ -602,8 +600,9 @@
                  int memsize,
                  const char *image_name,
                  unsigned int control_evtchn,
-                 unsigned int lapic,
                  unsigned int vcpus,
+                 unsigned int acpi,
+                 unsigned int apic,
                  unsigned int store_evtchn,
                  unsigned long *store_mfn)
 {
@@ -613,10 +612,18 @@
     unsigned long nr_pages;
     char         *image = NULL;
     unsigned long image_size;
-
-    if ( vmx_identify() < 0 )
-    {
-        PERROR("CPU doesn't support VMX Extensions");
+    xen_capabilities_info_t xen_caps;
+
+    if ( (rc = xc_version(xc_handle, XENVER_capabilities, &xen_caps)) != 0 )
+    {
+        PERROR("Failed to get xen version info");
+        goto error_out;
+    }
+
+    if ( !strstr(xen_caps, "hvm") )
+    {
+        PERROR("CPU doesn't support VMX Extensions or "
+               "CPU VMX Extensions are not turned on");
         goto error_out;
     }
 
@@ -644,7 +651,7 @@
         goto error_out;
     }
 
-    if ( xc_domain_get_vcpu_context(xc_handle, domid, 0, ctxt) )
+    if ( xc_vcpu_getcontext(xc_handle, domid, 0, ctxt) )
     {
         PERROR("Could not get vcpu context");
         goto error_out;
@@ -659,7 +666,7 @@
 
     if ( setup_guest(xc_handle, domid, memsize, image, image_size, nr_pages,
                      ctxt, op.u.getdomaininfo.shared_info_frame, 
control_evtchn,
-                     lapic, vcpus, store_evtchn, store_mfn) < 0)
+                     vcpus, acpi, apic, store_evtchn, store_mfn) < 0)
     {
         ERROR("Error constructing guest OS");
         goto error_out;
@@ -701,11 +708,11 @@
 
     memset( &launch_op, 0, sizeof(launch_op) );
 
-    launch_op.u.setdomaininfo.domain = (domid_t)domid;
-    launch_op.u.setdomaininfo.vcpu   = 0;
-    launch_op.u.setdomaininfo.ctxt   = ctxt;
-
-    launch_op.cmd = DOM0_SETDOMAININFO;
+    launch_op.u.setvcpucontext.domain = (domid_t)domid;
+    launch_op.u.setvcpucontext.vcpu   = 0;
+    launch_op.u.setvcpucontext.ctxt   = ctxt;
+
+    launch_op.cmd = DOM0_SETVCPUCONTEXT;
     rc = xc_dom0_op(xc_handle, &launch_op);
 
     return rc;
diff -r 25e3c8668f1f -r 8af1199488d3 tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h     Mon Jan  9 11:19:55 2006
+++ b/tools/libxc/xenctrl.h     Mon Jan  9 11:22:17 2006
@@ -181,10 +181,11 @@
  */
 int xc_domain_destroy(int xc_handle, 
                       uint32_t domid);
-int xc_domain_pincpu(int xc_handle,
-                     uint32_t domid,
-                     int vcpu,
-                     cpumap_t cpumap);
+
+int xc_vcpu_setaffinity(int xc_handle,
+                        uint32_t domid,
+                        int vcpu,
+                        cpumap_t cpumap);
 
 /**
  * This function will return information about one or more domains. It is
@@ -208,7 +209,7 @@
 
 
 /**
- * This function will set the vcpu context for the specified domain.
+ * This function will set the execution context for the specified vcpu.
  *
  * @parm xc_handle a handle to an open hypervisor interface
  * @parm domid the domain to set the vcpu context for
@@ -216,10 +217,10 @@
  * @parm ctxt pointer to the the cpu context with the values to set
  * @return the number of domains enumerated or -1 on error
  */
-int xc_domain_setinfo(int xc_handle,
-                      uint32_t domid,
-                      uint32_t vcpu,
-                      vcpu_guest_context_t *ctxt);
+int xc_vcpu_setcontext(int xc_handle,
+                       uint32_t domid,
+                       uint32_t vcpu,
+                       vcpu_guest_context_t *ctxt);
 /**
  * This function will return information about one or more domains, using a
  * single hypercall.  The domain information will be stored into the supplied
@@ -249,17 +250,16 @@
  *            domain
  * @return 0 on success, -1 on failure
  */
-int xc_domain_get_vcpu_context(int xc_handle,
+int xc_vcpu_getcontext(int xc_handle,
                                uint32_t domid,
                                uint32_t vcpu,
                                vcpu_guest_context_t *ctxt);
 
 typedef dom0_getvcpuinfo_t xc_vcpuinfo_t;
-int xc_domain_get_vcpu_info(int xc_handle,
-                            uint32_t domid,
-                            uint32_t vcpu,
-                            xc_vcpuinfo_t *info);
-
+int xc_vcpu_getinfo(int xc_handle,
+                    uint32_t domid,
+                    uint32_t vcpu,
+                    xc_vcpuinfo_t *info);
 
 int xc_domain_setcpuweight(int xc_handle,
                            uint32_t domid,
@@ -379,6 +379,17 @@
                                 uint32_t first_port,
                                 uint32_t nr_ports,
                                 uint32_t allow_access);
+
+int xc_domain_irq_permission(int xc_handle,
+                             uint32_t domid,
+                             uint8_t pirq,
+                             uint8_t allow_access);
+
+int xc_domain_iomem_permission(int xc_handle,
+                               uint32_t domid,
+                               unsigned long first_pfn,
+                               unsigned long nr_pfns,
+                               uint8_t allow_access);
 
 unsigned long xc_make_page_below_4G(int xc_handle, uint32_t domid, 
                                    unsigned long mfn);
diff -r 25e3c8668f1f -r 8af1199488d3 tools/libxc/xenguest.h
--- a/tools/libxc/xenguest.h    Mon Jan  9 11:19:55 2006
+++ b/tools/libxc/xenguest.h    Mon Jan  9 11:22:17 2006
@@ -56,8 +56,9 @@
                  int memsize,
                  const char *image_name,
                  unsigned int control_evtchn,
-                 unsigned int lapic,
                  unsigned int vcpus,
+                 unsigned int acpi,
+                 unsigned int apic,
                  unsigned int store_evtchn,
                  unsigned long *store_mfn);
 
diff -r 25e3c8668f1f -r 8af1199488d3 tools/python/xen/lowlevel/xc/xc.c
--- a/tools/python/xen/lowlevel/xc/xc.c Mon Jan  9 11:19:55 2006
+++ b/tools/python/xen/lowlevel/xc/xc.c Mon Jan  9 11:22:17 2006
@@ -135,9 +135,9 @@
 }
 
 
-static PyObject *pyxc_domain_pincpu(XcObject *self,
-                                    PyObject *args,
-                                    PyObject *kwds)
+static PyObject *pyxc_vcpu_setaffinity(XcObject *self,
+                                       PyObject *args,
+                                       PyObject *kwds)
 {
     uint32_t dom;
     int vcpu = 0, i;
@@ -157,7 +157,7 @@
             cpumap |= (cpumap_t)1 << PyInt_AsLong(PyList_GetItem(cpulist, i));
     }
   
-    if ( xc_domain_pincpu(self->xc_handle, dom, vcpu, cpumap) != 0 )
+    if ( xc_vcpu_setaffinity(self->xc_handle, dom, vcpu, cpumap) != 0 )
         return PyErr_SetFromErrno(xc_error);
     
     Py_INCREF(zero);
@@ -297,7 +297,7 @@
                                       &dom, &vcpu) )
         return NULL;
 
-    rc = xc_domain_get_vcpu_info(self->xc_handle, dom, vcpu, &info);
+    rc = xc_vcpu_getinfo(self->xc_handle, dom, vcpu, &info);
     if ( rc < 0 )
         return PyErr_SetFromErrno(xc_error);
 
@@ -362,21 +362,23 @@
     uint32_t dom;
     char *image;
     int control_evtchn, store_evtchn;
+    int memsize;
     int vcpus = 1;
-    int lapic = 0;
-    int memsize;
+    int acpi = 0;
+    int apic = 0;
     unsigned long store_mfn = 0;
 
     static char *kwd_list[] = { "dom", "control_evtchn", "store_evtchn",
-                                "memsize", "image", "lapic", "vcpus", NULL };
-
-    if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iiiisii", kwd_list,
+                                "memsize", "image", "vcpus", "acpi", "apic",
+                                NULL };
+
+    if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iiiisiii", kwd_list,
                                       &dom, &control_evtchn, &store_evtchn,
-                                      &memsize, &image, &lapic, &vcpus) )
+                                      &memsize, &image, &vcpus, &acpi, &apic) )
         return NULL;
 
     if ( xc_vmx_build(self->xc_handle, dom, memsize, image, control_evtchn,
-                      lapic, vcpus, store_evtchn, &store_mfn) != 0 )
+                      vcpus, acpi, apic, store_evtchn, &store_mfn) != 0 )
         return PyErr_SetFromErrno(xc_error);
 
     return Py_BuildValue("{s:i}", "store_mfn", store_mfn);
@@ -774,6 +776,52 @@
     return zero;
 }
 
+static PyObject *pyxc_domain_irq_permission(PyObject *self,
+                                            PyObject *args,
+                                            PyObject *kwds)
+{
+    XcObject *xc = (XcObject *)self;
+    uint32_t dom;
+    int pirq, allow_access, ret;
+
+    static char *kwd_list[] = { "dom", "pirq", "allow_access", NULL };
+
+    if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iii", kwd_list, 
+                                      &dom, &pirq, &allow_access) )
+        return NULL;
+
+    ret = xc_domain_irq_permission(
+        xc->xc_handle, dom, pirq, allow_access);
+    if ( ret != 0 )
+        return PyErr_SetFromErrno(xc_error);
+
+    Py_INCREF(zero);
+    return zero;
+}
+
+static PyObject *pyxc_domain_iomem_permission(PyObject *self,
+                                               PyObject *args,
+                                               PyObject *kwds)
+{
+    XcObject *xc = (XcObject *)self;
+    uint32_t dom;
+    unsigned long first_pfn, nr_pfns, allow_access, ret;
+
+    static char *kwd_list[] = { "dom", "first_pfn", "nr_pfns", "allow_access", 
NULL };
+
+    if ( !PyArg_ParseTupleAndKeywords(args, kwds, "illi", kwd_list, 
+                                      &dom, &first_pfn, &nr_pfns, 
&allow_access) )
+        return NULL;
+
+    ret = xc_domain_iomem_permission(
+        xc->xc_handle, dom, first_pfn, nr_pfns, allow_access);
+    if ( ret != 0 )
+        return PyErr_SetFromErrno(xc_error);
+
+    Py_INCREF(zero);
+    return zero;
+}
+
 
 static PyObject *dom_op(XcObject *self, PyObject *args,
                         int (*fn)(int, uint32_t))
@@ -842,8 +890,8 @@
       " dom [int]:    Identifier of domain to be destroyed.\n\n"
       "Returns: [int] 0 on success; -1 on error.\n" },
 
-    { "domain_pincpu", 
-      (PyCFunction)pyxc_domain_pincpu, 
+    { "vcpu_setaffinity", 
+      (PyCFunction)pyxc_vcpu_setaffinity, 
       METH_VARARGS | METH_KEYWORDS, "\n"
       "Pin a VCPU to a specified set CPUs.\n"
       " dom [int]:     Identifier of domain to which VCPU belongs.\n"
@@ -1067,6 +1115,25 @@
       " dom          [int]: Identifier of domain to be allowed access.\n"
       " first_port   [int]: First IO port\n"
       " nr_ports     [int]: Number of IO ports\n"
+      " allow_access [int]: Non-zero means enable access; else disable 
access\n\n"
+      "Returns: [int] 0 on success; -1 on error.\n" },
+
+    { "domain_irq_permission",
+      (PyCFunction)pyxc_domain_irq_permission,
+      METH_VARARGS | METH_KEYWORDS, "\n"
+      "Allow a domain access to a physical IRQ\n"
+      " dom          [int]: Identifier of domain to be allowed access.\n"
+      " pirq         [int]: The Physical IRQ\n"
+      " allow_access [int]: Non-zero means enable access; else disable 
access\n\n"
+      "Returns: [int] 0 on success; -1 on error.\n" },
+
+    { "domain_iomem_permission",
+      (PyCFunction)pyxc_domain_iomem_permission,
+      METH_VARARGS | METH_KEYWORDS, "\n"
+      "Allow a domain access to a range of IO memory pages\n"
+      " dom          [int]: Identifier of domain to be allowed access.\n"
+      " first_pfn   [long]: First page of I/O Memory\n"
+      " nr_pfns     [long]: Number of pages of I/O Memory (>0)\n"
       " allow_access [int]: Non-zero means enable access; else disable 
access\n\n"
       "Returns: [int] 0 on success; -1 on error.\n" },
 
diff -r 25e3c8668f1f -r 8af1199488d3 tools/python/xen/xend/XendDomain.py
--- a/tools/python/xen/xend/XendDomain.py       Mon Jan  9 11:19:55 2006
+++ b/tools/python/xen/xend/XendDomain.py       Mon Jan  9 11:22:17 2006
@@ -443,7 +443,7 @@
         cpumap = map(lambda x: int(x),
                      cpumap.replace("[", "").replace("]", "").split(","))
         try:
-            return xc.domain_pincpu(dominfo.getDomid(), vcpu, cpumap)
+            return xc.vcpu_setaffinity(dominfo.getDomid(), vcpu, cpumap)
         except Exception, ex:
             raise XendError(str(ex))
 
diff -r 25e3c8668f1f -r 8af1199488d3 tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py   Mon Jan  9 11:19:55 2006
+++ b/tools/python/xen/xend/XendDomainInfo.py   Mon Jan  9 11:22:17 2006
@@ -1179,7 +1179,7 @@
                 for v in range(0, self.info['max_vcpu_id']+1):
                     # pincpu takes a list of ints
                     cpu = [ int( cpus[v % len(cpus)] ) ]
-                    xc.domain_pincpu(self.domid, v, cpu)
+                    xc.vcpu_setaffinity(self.domid, v, cpu)
 
             m = self.image.getDomainMemory(self.info['memory'] * 1024)
             balloon.free(m)
diff -r 25e3c8668f1f -r 8af1199488d3 tools/python/xen/xend/image.py
--- a/tools/python/xen/xend/image.py    Mon Jan  9 11:19:55 2006
+++ b/tools/python/xen/xend/image.py    Mon Jan  9 11:22:17 2006
@@ -189,11 +189,16 @@
     def configure(self, imageConfig, deviceConfig):
         ImageHandler.configure(self, imageConfig, deviceConfig)
 
+        info = xc.xeninfo()
+        if not 'hvm' in info['xen_caps']:
+            raise VmError("vmx: not an Intel VT platform, we stop creating!")
+
         self.dmargs = self.parseDeviceModelArgs(imageConfig, deviceConfig)
         self.device_model = sxp.child_value(imageConfig, 'device_model')
         if not self.device_model:
             raise VmError("vmx: missing device model")
         self.display = sxp.child_value(imageConfig, 'display')
+        self.xauthority = sxp.child_value(imageConfig, 'xauthority')
 
         self.vm.storeVm(("image/dmargs", " ".join(self.dmargs)),
                         ("image/device-model", self.device_model),
@@ -204,10 +209,8 @@
 
         self.dmargs += self.configVNC(imageConfig)
 
-        self.lapic = 0
-        lapic = sxp.child_value(imageConfig, 'lapic')
-        if not lapic is None:
-            self.lapic = int(lapic)
+        self.acpi = int(sxp.child_value(imageConfig, 'acpi', 0))
+        self.apic = int(sxp.child_value(imageConfig, 'apic', 0))
 
     def buildDomain(self):
         # Create an event channel
@@ -222,17 +225,18 @@
         log.debug("control_evtchn = %d", self.device_channel)
         log.debug("store_evtchn   = %d", store_evtchn)
         log.debug("memsize        = %d", self.vm.getMemoryTarget() / 1024)
-        log.debug("lapic          = %d", self.lapic)
         log.debug("vcpus          = %d", self.vm.getVCpuCount())
+        log.debug("acpi           = %d", self.acpi)
+        log.debug("apic           = %d", self.apic)
 
         return xc.vmx_build(dom            = self.vm.getDomid(),
                             image          = self.kernel,
                             control_evtchn = self.device_channel,
                             store_evtchn   = store_evtchn,
                             memsize        = self.vm.getMemoryTarget() / 1024,
-                            lapic          = self.lapic,
-                            vcpus          = self.vm.getVCpuCount())
-
+                            vcpus          = self.vm.getVCpuCount(),
+                            acpi           = self.acpi,
+                            apic           = self.apic)
 
     # Return a list of cmd line args to the device models based on the
     # xm config file
@@ -264,44 +268,44 @@
         nics = 0
         for (name, info) in deviceConfig:
             if name == 'vbd':
-               uname = sxp.child_value(info, 'uname')
-               typedev = sxp.child_value(info, 'dev')
-               (_, vbdparam) = string.split(uname, ':', 1)
-               if re.match('^ioemu:', typedev):
-                  (emtype, vbddev) = string.split(typedev, ':', 1)
-               else:
-                  emtype = 'vbd'
-                  vbddev = typedev
-               if emtype != 'ioemu':
-                  continue;
-               vbddev_list = ['hda', 'hdb', 'hdc', 'hdd']
-               if vbddev not in vbddev_list:
-                  raise VmError("vmx: for qemu vbd type=file&dev=hda~hdd")
-               ret.append("-%s" % vbddev)
-               ret.append("%s" % vbdparam)
+                uname = sxp.child_value(info, 'uname')
+                typedev = sxp.child_value(info, 'dev')
+                (_, vbdparam) = string.split(uname, ':', 1)
+                if 'ioemu:' in typedev:
+                    (emtype, vbddev) = string.split(typedev, ':', 1)
+                else:
+                    emtype = 'vbd'
+                    vbddev = typedev
+                if emtype == 'vbd':
+                    continue;
+                vbddev_list = ['hda', 'hdb', 'hdc', 'hdd']
+                if vbddev not in vbddev_list:
+                    raise VmError("vmx: for qemu vbd type=file&dev=hda~hdd")
+                ret.append("-%s" % vbddev)
+                ret.append("%s" % vbdparam)
             if name == 'vif':
-               type = sxp.child_value(info, 'type')
-               if type != 'ioemu':
-                   continue
-               nics += 1
-               if mac != None:
-                   continue
-               mac = sxp.child_value(info, 'mac')
-               bridge = sxp.child_value(info, 'bridge')
-               if mac == None:
-                   mac = randomMAC()
-               if bridge == None:
-                   bridge = 'xenbr0'
-               ret.append("-macaddr")
-               ret.append("%s" % mac)
-               ret.append("-bridge")
-               ret.append("%s" % bridge)
+                type = sxp.child_value(info, 'type')
+                if type != 'ioemu':
+                    continue
+                nics += 1
+                if mac != None:
+                    continue
+                mac = sxp.child_value(info, 'mac')
+                bridge = sxp.child_value(info, 'bridge')
+                if mac == None:
+                    mac = randomMAC()
+                if bridge == None:
+                    bridge = 'xenbr0'
+                ret.append("-macaddr")
+                ret.append("%s" % mac)
+                ret.append("-bridge")
+                ret.append("%s" % bridge)
             if name == 'vtpm':
-               instance = sxp.child_value(info, 'pref_instance')
-               ret.append("-instance")
-               ret.append("%s" % instance)
+                instance = sxp.child_value(info, 'pref_instance')
+                ret.append("-instance")
+                ret.append("%s" % instance)
         ret.append("-nics")
-        ret.append("%d" % nics) 
+        ret.append("%d" % nics)
         return ret
 
     def configVNC(self, config):
@@ -340,6 +344,8 @@
         env = dict(os.environ)
         if self.display:
             env['DISPLAY'] = self.display
+        if self.xauthority:
+            env['XAUTHORITY'] = self.xauthority
         log.info("spawning device models: %s %s", self.device_model, args)
         self.pid = os.spawnve(os.P_NOWAIT, self.device_model, args, env)
         log.info("device model pid: %d", self.pid)
diff -r 25e3c8668f1f -r 8af1199488d3 tools/python/xen/xend/server/blkif.py
--- a/tools/python/xen/xend/server/blkif.py     Mon Jan  9 11:19:55 2006
+++ b/tools/python/xen/xend/server/blkif.py     Mon Jan  9 11:22:17 2006
@@ -31,7 +31,7 @@
     """Block device interface controller. Handles all block devices
     for a domain.
     """
-    
+
     def __init__(self, vm):
         """Create a block device controller.
         """
@@ -40,9 +40,9 @@
 
     def getDeviceDetails(self, config):
         """@see DevController.getDeviceDetails"""
-        
+
         dev = sxp.child_value(config, 'dev')
-        if re.match('^ioemu:', dev):
+        if 'ioemu:' in dev:
             return (None,{},{})
 
         devid = blkif.blkdev_name_to_number(dev)
diff -r 25e3c8668f1f -r 8af1199488d3 tools/python/xen/xm/create.py
--- a/tools/python/xen/xm/create.py     Mon Jan  9 11:19:55 2006
+++ b/tools/python/xen/xm/create.py     Mon Jan  9 11:22:17 2006
@@ -160,9 +160,13 @@
           fn=set_int, default=None,
           use="CPUS to run the domain on.")
 
-gopts.var('lapic', val='LAPIC',
+gopts.var('acpi', val='ACPI',
           fn=set_int, default=0,
-          use="Disable or enable local APIC of VMX domain.")
+          use="Disable or enable ACPI of VMX domain.")
+
+gopts.var('apic', val='APIC',
+          fn=set_int, default=0,
+          use="Disable or enable APIC of VMX domain.")
 
 gopts.var('vcpus', val='VCPUS',
           fn=set_int, default=1,
@@ -387,6 +391,10 @@
 gopts.var('display', val='DISPLAY',
           fn=set_value, default=None,
           use="X11 display to use")
+
+gopts.var('xauthority', val='XAUTHORITY',
+          fn=set_value, default=None,
+          use="X11 Authority to use")
 
 
 def err(msg):
@@ -526,7 +534,8 @@
     """
     args = [ 'device_model', 'vcpus', 'cdrom', 'boot', 'fda', 'fdb',
              'localtime', 'serial', 'stdvga', 'isa', 'nographic', 'audio',
-             'vnc', 'vncviewer', 'sdl', 'display', 'ne2000', 'lapic']
+             'vnc', 'vncviewer', 'sdl', 'display', 'ne2000', 'acpi', 'apic',
+             'xauthority' ]
     for a in args:
         if (vals.__dict__[a]):
             config_image.append([a, vals.__dict__[a]])
@@ -801,6 +810,9 @@
     if not gopts.vals.display:
         gopts.vals.display = os.getenv("DISPLAY")
 
+    if not gopts.vals.xauthority:
+        gopts.vals.xauthority = os.getenv("XAUTHORITY")
+
     # Process remaining args as config variables.
     for arg in args:
         if '=' in arg:
diff -r 25e3c8668f1f -r 8af1199488d3 tools/python/xen/xm/main.py
--- a/tools/python/xen/xm/main.py       Mon Jan  9 11:19:55 2006
+++ b/tools/python/xen/xm/main.py       Mon Jan  9 11:22:17 2006
@@ -390,7 +390,6 @@
 
 
 def xm_vcpu_list(args):
-    print 'Name                              ID  VCPU  CPU  State  Time(s)  
CPU Affinity'
 
     from xen.xend.XendClient import server
     if args:
@@ -400,6 +399,8 @@
         dominfo = map(
             lambda x: server.xend_domain_vcpuinfo(sxp.child_value(x, 'name')),
             doms)
+
+    print 'Name                              ID  VCPU  CPU  State  Time(s)  
CPU Affinity'
 
     for dom in dominfo:
         def get_info(n):
@@ -625,6 +626,8 @@
     server.xend_domain_cpu_sedf_set(dom, *v)
 
 def xm_info(args):
+    arg_check(args, "info", 0)
+
     from xen.xend.XendClient import server
     info = server.xend_node()
     
@@ -645,9 +648,12 @@
 
 
 def xm_top(args):
+    arg_check(args, "top", 0)
+
     os.execvp('xentop', ['xentop'])
 
 def xm_dmesg(args):
+    arg_check(args, "dmesg", 0)
     
     gopts = Opts(use="""[-c|--clear]
 
diff -r 25e3c8668f1f -r 8af1199488d3 tools/tests/test_x86_emulator.c
--- a/tools/tests/test_x86_emulator.c   Mon Jan  9 11:19:55 2006
+++ b/tools/tests/test_x86_emulator.c   Mon Jan  9 11:22:17 2006
@@ -92,7 +92,7 @@
     regs.ecx    = 0x12345678;
     cr2         = (unsigned long)&res;
     res         = 0x7FFFFFFF;
-    rc = x86_emulate_memop(&regs, cr2, &emulops, 4);
+    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);
     if ( (rc != 0) || 
          (res != 0x92345677) || 
          (regs.eflags != 0xa94) ||
@@ -110,7 +110,7 @@
     regs.ecx    = 0x12345678UL;
 #endif
     cr2         = (unsigned long)&res;
-    rc = x86_emulate_memop(&regs, cr2, &emulops, 4);
+    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);
     if ( (rc != 0) || 
          (res != 0x92345677) || 
          (regs.ecx != 0x8000000FUL) ||
@@ -125,7 +125,7 @@
     regs.eax    = 0x92345677UL;
     regs.ecx    = 0xAA;
     cr2         = (unsigned long)&res;
-    rc = x86_emulate_memop(&regs, cr2, &emulops, 4);    
+    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);    
     if ( (rc != 0) || 
          (res != 0x923456AA) || 
          (regs.eflags != 0x244) ||
@@ -141,7 +141,7 @@
     regs.eax    = 0xAABBCC77UL;
     regs.ecx    = 0xFF;
     cr2         = (unsigned long)&res;
-    rc = x86_emulate_memop(&regs, cr2, &emulops, 4);    
+    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);    
     if ( (rc != 0) || 
          (res != 0x923456AA) || 
          ((regs.eflags&0x240) != 0x200) ||
@@ -157,7 +157,7 @@
     regs.eip    = (unsigned long)&instr[0];
     regs.ecx    = 0x12345678;
     cr2         = (unsigned long)&res;
-    rc = x86_emulate_memop(&regs, cr2, &emulops, 4);    
+    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);    
     if ( (rc != 0) || 
          (res != 0x12345678) || 
          (regs.eflags != 0x200) ||
@@ -174,7 +174,7 @@
     regs.eax    = 0x923456AAUL;
     regs.ecx    = 0xDDEEFF00L;
     cr2         = (unsigned long)&res;
-    rc = x86_emulate_memop(&regs, cr2, &emulops, 4);    
+    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);    
     if ( (rc != 0) || 
          (res != 0xDDEEFF00) || 
          (regs.eflags != 0x244) ||
@@ -193,7 +193,7 @@
     regs.edi    = (unsigned long)&res + 2;
     regs.error_code = 0; /* read fault */
     cr2         = regs.esi;
-    rc = x86_emulate_memop(&regs, cr2, &emulops, 4);    
+    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);    
     if ( (rc != 0) || 
          (res != 0x44554455) ||
          (regs.eflags != 0x200) ||
@@ -211,7 +211,7 @@
     regs.eip    = (unsigned long)&instr[0];
     regs.edi    = (unsigned long)&res;
     cr2         = regs.edi;
-    rc = x86_emulate_memop(&regs, cr2, &emulops, 4);    
+    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);    
     if ( (rc != 0) || 
          (res != 0x2233445D) ||
          ((regs.eflags&0x201) != 0x201) ||
@@ -229,7 +229,7 @@
     regs.eip    = (unsigned long)&instr[0];
     regs.edi    = (unsigned long)cmpxchg8b_res;
     cr2         = regs.edi;
-    rc = x86_emulate_memop(&regs, cr2, &emulops, 4);
+    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);
     if ( (rc != 0) || 
          (cmpxchg8b_res[0] != 0x9999AAAA) ||
          (cmpxchg8b_res[1] != 0xCCCCFFFF) ||
@@ -243,7 +243,7 @@
     regs.eip    = (unsigned long)&instr[0];
     regs.edi    = (unsigned long)cmpxchg8b_res;
     cr2         = regs.edi;
-    rc = x86_emulate_memop(&regs, cr2, &emulops, 4);
+    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);
     if ( (rc != 0) || 
          (cmpxchg8b_res[0] != 0x9999AAAA) ||
          (cmpxchg8b_res[1] != 0xCCCCFFFF) ||
@@ -260,7 +260,7 @@
     regs.ecx    = 0x12345678;
     cr2         = (unsigned long)&res;
     res         = 0x82;
-    rc = x86_emulate_memop(&regs, cr2, &emulops, 4);
+    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);
     if ( (rc != 0) ||
          (res != 0x82) ||
          (regs.ecx != 0xFFFFFF82) ||
@@ -275,7 +275,7 @@
     regs.ecx    = 0x12345678;
     cr2         = (unsigned long)&res;
     res         = 0x1234aa82;
-    rc = x86_emulate_memop(&regs, cr2, &emulops, 4);
+    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);
     if ( (rc != 0) ||
          (res != 0x1234aa82) ||
          (regs.ecx != 0xaa82) ||
diff -r 25e3c8668f1f -r 8af1199488d3 tools/vtpm_manager/README
--- a/tools/vtpm_manager/README Mon Jan  9 11:19:55 2006
+++ b/tools/vtpm_manager/README Mon Jan  9 11:22:17 2006
@@ -53,11 +53,6 @@
 
 MANUAL_DM_LAUNCH             -> Must manually launch & kill VTPMs
 
-WELL_KNOWN_SRK_AUTH          -> Rather than randomly generating the password 
for the SRK,
-                                use a well known value. This is necessary for 
sharing use
-                                of the SRK across applications. Such as VTPM 
and Dom0
-                                measurement software.
-
 WELL_KNOWN_OWNER_AUTH        -> Rather than randomly generating the password 
for the owner,
                                 use a well known value. This is useful for 
debugging and for
                                 poor bios which do not support clearing TPM if 
OwnerAuth is
diff -r 25e3c8668f1f -r 8af1199488d3 tools/vtpm_manager/Rules.mk
--- a/tools/vtpm_manager/Rules.mk       Mon Jan  9 11:19:55 2006
+++ b/tools/vtpm_manager/Rules.mk       Mon Jan  9 11:22:17 2006
@@ -56,8 +56,7 @@
 # Do not have manager launch DMs.
 #CFLAGS += -DMANUAL_DM_LAUNCH
 
-# Fixed SRK
-CFLAGS += -DWELL_KNOWN_SRK_AUTH
+# Fixed OwnerAuth
 #CFLAGS += -DWELL_KNOWN_OWNER_AUTH
 
 # TPM Hardware Device or TPM Simulator
diff -r 25e3c8668f1f -r 8af1199488d3 tools/vtpm_manager/manager/securestorage.c
--- a/tools/vtpm_manager/manager/securestorage.c        Mon Jan  9 11:19:55 2006
+++ b/tools/vtpm_manager/manager/securestorage.c        Mon Jan  9 11:22:17 2006
@@ -65,7 +65,7 @@
   UINT32 i;
   struct pack_constbuf_t symkey_cipher32, data_cipher32;
   
-  vtpmloginfo(VTPM_LOG_VTPM_DEEP, "Enveloping[%d]: 0x", buffer_len(inbuf));
+  vtpmloginfo(VTPM_LOG_VTPM_DEEP, "Enveloping Input[%d]: 0x", 
buffer_len(inbuf));
   for (i=0; i< buffer_len(inbuf); i++)
     vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", inbuf->bytes[i]);
   vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "\n");
@@ -94,6 +94,12 @@
               BSG_TPM_SIZE32_DATA, &data_cipher32);
 
   vtpmloginfo(VTPM_LOG_VTPM, "Saved %d bytes of E(symkey) + %d bytes of 
E(data)\n", buffer_len(&symkey_cipher), buffer_len(&data_cipher));
+
+  vtpmloginfo(VTPM_LOG_VTPM_DEEP, "Enveloping Output[%d]: 0x", 
buffer_len(sealed_data));
+  for (i=0; i< buffer_len(sealed_data); i++)
+    vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", sealed_data->bytes[i]);
+  vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "\n");
+
   goto egress;
 
  abort_egress:
@@ -125,7 +131,7 @@
 
   memset(&symkey, 0, sizeof(symkey_t));
 
-  vtpmloginfo(VTPM_LOG_VTPM_DEEP, "envelope decrypting[%ld]: 0x", cipher_size);
+  vtpmloginfo(VTPM_LOG_VTPM_DEEP, "Envelope Decrypt Input[%ld]: 0x", 
cipher_size);
   for (i=0; i< cipher_size; i++)
     vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", cipher[i]);
   vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "\n");
@@ -155,6 +161,11 @@
   
   // Decrypt State
   TPMTRY(TPM_DECRYPT_ERROR, Crypto_symcrypto_decrypt (&symkey, &data_cipher, 
unsealed_data) );
+
+  vtpmloginfo(VTPM_LOG_VTPM_DEEP, "Envelope Decrypte Output[%d]: 0x", 
buffer_len(unsealed_data));
+  for (i=0; i< buffer_len(unsealed_data); i++)
+    vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", unsealed_data->bytes[i]);
+  vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "\n");
   
   goto egress;
   
@@ -291,124 +302,175 @@
   return status;
 }
 
+
 TPM_RESULT VTPM_SaveService(void) {
   TPM_RESULT status=TPM_SUCCESS;
   int fh, dmis=-1;
-  
-  BYTE *flat_global;
-  int flat_global_size, bytes_written;
+
+  BYTE *flat_boot_key, *flat_dmis, *flat_enc;
+  buffer_t clear_flat_global, enc_flat_global;
   UINT32 storageKeySize = buffer_len(&vtpm_globals->storageKeyWrap);
+  UINT32 bootKeySize = buffer_len(&vtpm_globals->bootKeyWrap);
   struct pack_buf_t storage_key_pack = {storageKeySize, 
vtpm_globals->storageKeyWrap.bytes};
-  
+  struct pack_buf_t boot_key_pack = {bootKeySize, 
vtpm_globals->bootKeyWrap.bytes};
+
   struct hashtable_itr *dmi_itr;
   VTPM_DMI_RESOURCE *dmi_res;
-  
-  UINT32 flat_global_full_size;
-  
-  // Global Values needing to be saved
-  flat_global_full_size = 3*sizeof(TPM_DIGEST) + // Auths
-    sizeof(UINT32) +       // storagekeysize
-    storageKeySize +       // storage key
-    hashtable_count(vtpm_globals->dmi_map) * // num DMIS
-    (sizeof(UINT32) + 2*sizeof(TPM_DIGEST)); // Per DMI info
-  
-  
-  flat_global = (BYTE *) malloc( flat_global_full_size);
-  
-  flat_global_size = BSG_PackList(flat_global, 4,
-                                 BSG_TPM_AUTHDATA, 
&vtpm_globals->owner_usage_auth,
-                                 BSG_TPM_AUTHDATA, 
&vtpm_globals->srk_usage_auth,
-                                 BSG_TPM_SECRET,   
&vtpm_globals->storage_key_usage_auth,
-                                 BSG_TPM_SIZE32_DATA, &storage_key_pack);
-  
+
+  UINT32 boot_key_size, flat_dmis_size;
+
+  // Initially fill these with buffer sizes for each data type. Later fill
+  // in actual size, once flattened.
+  boot_key_size =  sizeof(UINT32) +       // bootkeysize
+                   bootKeySize;           // boot key
+
+  TPMTRYRETURN(buffer_init(&clear_flat_global, 3*sizeof(TPM_DIGEST) + // Auths
+                                              sizeof(UINT32) +// storagekeysize
+                                              storageKeySize, NULL) ); // 
storage key
+
+  flat_dmis_size = (hashtable_count(vtpm_globals->dmi_map) - 1) * // num DMIS 
(-1 for Dom0)
+                   (sizeof(UINT32) + 2*sizeof(TPM_DIGEST)); // Per DMI info
+
+  flat_boot_key = (BYTE *) malloc( boot_key_size );
+  flat_enc = (BYTE *) malloc( sizeof(UINT32) );
+  flat_dmis = (BYTE *) malloc( flat_dmis_size );
+
+  boot_key_size = BSG_PackList(flat_boot_key, 1,
+                               BSG_TPM_SIZE32_DATA, &boot_key_pack);
+
+  BSG_PackList(clear_flat_global.bytes, 3,
+                BSG_TPM_AUTHDATA, &vtpm_globals->owner_usage_auth,
+                BSG_TPM_SECRET,   &vtpm_globals->storage_key_usage_auth,
+                BSG_TPM_SIZE32_DATA, &storage_key_pack);
+
+  TPMTRYRETURN(envelope_encrypt(&clear_flat_global,
+                                &vtpm_globals->bootKey,
+                                &enc_flat_global) );
+
+  BSG_PackConst(buffer_len(&enc_flat_global), 4, flat_enc);
+
   // Per DMI values to be saved
   if (hashtable_count(vtpm_globals->dmi_map) > 0) {
-    
+
     dmi_itr = hashtable_iterator(vtpm_globals->dmi_map);
     do {
       dmi_res = (VTPM_DMI_RESOURCE *) hashtable_iterator_value(dmi_itr);
       dmis++;
 
       // No need to save dmi0.
-      if (dmi_res->dmi_id == 0)        
-       continue;
-      
-      
-      flat_global_size += BSG_PackList( flat_global + flat_global_size, 3,
-                                       BSG_TYPE_UINT32, &dmi_res->dmi_id,
-                                       BSG_TPM_DIGEST, 
&dmi_res->NVM_measurement,
-                                       BSG_TPM_DIGEST, 
&dmi_res->DMI_measurement);
-      
+      if (dmi_res->dmi_id == 0)
+        continue;
+
+
+      flat_dmis_size += BSG_PackList( flat_dmis + flat_dmis_size, 3,
+                                        BSG_TYPE_UINT32, &dmi_res->dmi_id,
+                                        BSG_TPM_DIGEST, 
&dmi_res->NVM_measurement,
+                                        BSG_TPM_DIGEST, 
&dmi_res->DMI_measurement);
+
     } while (hashtable_iterator_advance(dmi_itr));
   }
-  
-  //FIXME: Once we have a way to protect a TPM key, we should use it to 
-  //       encrypt this blob. BUT, unless there is a way to ensure the key is
-  //       not used by other apps, this encryption is useless.
+
   fh = open(STATE_FILE, O_WRONLY | O_CREAT, S_IREAD | S_IWRITE);
   if (fh == -1) {
     vtpmlogerror(VTPM_LOG_VTPM, "Unable to open %s file for write.\n", 
STATE_FILE);
     status = TPM_IOERROR;
     goto abort_egress;
   }
-  
-  if ( (bytes_written = write(fh, flat_global, flat_global_size)) != 
flat_global_size ) {
-    vtpmlogerror(VTPM_LOG_VTPM, "Failed to save service data. %d/%d bytes 
written.\n", bytes_written, flat_global_size);
-    status = TPM_IOERROR;
-    goto abort_egress;
-  }
-  vtpm_globals->DMI_table_dirty = FALSE; 
-  
+
+  if ( ( write(fh, flat_boot_key, boot_key_size) != boot_key_size ) ||
+       ( write(fh, flat_enc, sizeof(UINT32)) != sizeof(UINT32) ) ||
+       ( write(fh, enc_flat_global.bytes, buffer_len(&enc_flat_global)) != 
buffer_len(&enc_flat_global) ) ||
+       ( write(fh, flat_dmis, flat_dmis_size) != flat_dmis_size ) ) {
+    vtpmlogerror(VTPM_LOG_VTPM, "Failed to completely write service data.\n");
+    status = TPM_IOERROR;
+    goto abort_egress;
+ }
+
+  vtpm_globals->DMI_table_dirty = FALSE;
+
   goto egress;
-  
+
  abort_egress:
  egress:
-  
-  free(flat_global);
+
+  free(flat_boot_key);
+  free(flat_enc);
+  buffer_free(&enc_flat_global);
+  free(flat_dmis);
   close(fh);
-  
+
   vtpmloginfo(VTPM_LOG_VTPM, "Saved VTPM Service state (status = %d, dmis = 
%d)\n", (int) status, dmis);
   return status;
 }
 
 TPM_RESULT VTPM_LoadService(void) {
-  
+
   TPM_RESULT status=TPM_SUCCESS;
   int fh, stat_ret, dmis=0;
   long fh_size = 0, step_size;
-  BYTE *flat_global=NULL;
-  struct pack_buf_t storage_key_pack;
-  UINT32 *dmi_id_key;
-  
+  BYTE *flat_table=NULL;
+  buffer_t  unsealed_data;
+  struct pack_buf_t storage_key_pack, boot_key_pack;
+  UINT32 *dmi_id_key, enc_size;
+
   VTPM_DMI_RESOURCE *dmi_res;
   struct stat file_stat;
-  
+
+  TPM_HANDLE boot_key_handle;
+  TPM_AUTHDATA boot_usage_auth;
+  memset(&boot_usage_auth, 0, sizeof(TPM_AUTHDATA));
+
   fh = open(STATE_FILE, O_RDONLY );
   stat_ret = fstat(fh, &file_stat);
-  if (stat_ret == 0) 
+  if (stat_ret == 0)
     fh_size = file_stat.st_size;
   else {
     status = TPM_IOERROR;
     goto abort_egress;
   }
-  
-  flat_global = (BYTE *) malloc(fh_size);
-  
-  if ((long) read(fh, flat_global, fh_size) != fh_size ) {
-    status = TPM_IOERROR;
-    goto abort_egress;
-  }
-  
+
+  flat_table = (BYTE *) malloc(fh_size);
+
+  if ((long) read(fh, flat_table, fh_size) != fh_size ) {
+    status = TPM_IOERROR;
+    goto abort_egress;
+  }
+
+  // Read Boot Key
+  step_size = BSG_UnpackList( flat_table, 2,
+                              BSG_TPM_SIZE32_DATA, &boot_key_pack,
+                              BSG_TYPE_UINT32, &enc_size);
+
+  TPMTRYRETURN(buffer_init(&vtpm_globals->bootKeyWrap, 0, 0) );
+  TPMTRYRETURN(buffer_append_raw(&vtpm_globals->bootKeyWrap, 
boot_key_pack.size, boot_key_pack.data) );
+
+  //Load Boot Key
+  TPMTRYRETURN( VTSP_LoadKey( vtpm_globals->manager_tcs_handle,
+                              TPM_SRK_KEYHANDLE,
+                              &vtpm_globals->bootKeyWrap,
+                              &SRK_AUTH,
+                              &boot_key_handle,
+                              &vtpm_globals->keyAuth,
+                              &vtpm_globals->bootKey,
+                              FALSE) );
+
+  TPMTRYRETURN( envelope_decrypt(enc_size,
+                                 flat_table + step_size,
+                                 vtpm_globals->manager_tcs_handle,
+                                 boot_key_handle,
+                                 (const TPM_AUTHDATA*) &boot_usage_auth,
+                                 &unsealed_data) );
+  step_size += enc_size;
+
   // Global Values needing to be saved
-  step_size = BSG_UnpackList( flat_global, 4,
-                             BSG_TPM_AUTHDATA, &vtpm_globals->owner_usage_auth,
-                             BSG_TPM_AUTHDATA, &vtpm_globals->srk_usage_auth,
-                             BSG_TPM_SECRET,   
&vtpm_globals->storage_key_usage_auth,
-                             BSG_TPM_SIZE32_DATA, &storage_key_pack);
-  
+  BSG_UnpackList( unsealed_data.bytes, 3,
+                  BSG_TPM_AUTHDATA, &vtpm_globals->owner_usage_auth,
+                  BSG_TPM_SECRET,   &vtpm_globals->storage_key_usage_auth,
+                  BSG_TPM_SIZE32_DATA, &storage_key_pack);
+
   TPMTRYRETURN(buffer_init(&vtpm_globals->storageKeyWrap, 0, 0) );
   TPMTRYRETURN(buffer_append_raw(&vtpm_globals->storageKeyWrap, 
storage_key_pack.size, storage_key_pack.data) );
-  
+
   // Per DMI values to be saved
   while ( step_size < fh_size ){
     if (fh_size - step_size < (long) (sizeof(UINT32) + 2*sizeof(TPM_DIGEST))) {
@@ -417,35 +479,38 @@
     } else {
       dmi_res = (VTPM_DMI_RESOURCE *) malloc(sizeof(VTPM_DMI_RESOURCE));
       dmis++;
-      
+
       dmi_res->connected = FALSE;
-      
-      step_size += BSG_UnpackList(flat_global + step_size, 3,
-                                 BSG_TYPE_UINT32, &dmi_res->dmi_id, 
-                                 BSG_TPM_DIGEST, &dmi_res->NVM_measurement,
-                                 BSG_TPM_DIGEST, &dmi_res->DMI_measurement);
-      
+
+      step_size += BSG_UnpackList(flat_table + step_size, 3,
+                                 BSG_TYPE_UINT32, &dmi_res->dmi_id,
+                                 BSG_TPM_DIGEST, &dmi_res->NVM_measurement,
+                                 BSG_TPM_DIGEST, &dmi_res->DMI_measurement);
+
       // install into map
       dmi_id_key = (UINT32 *) malloc (sizeof(UINT32));
       *dmi_id_key = dmi_res->dmi_id;
       if (!hashtable_insert(vtpm_globals->dmi_map, dmi_id_key, dmi_res)) {
-       status = TPM_FAIL;
-       goto abort_egress;
+        status = TPM_FAIL;
+        goto abort_egress;
       }
-      
+
     }
-    
-  }
-  
+
+  }
+
   vtpmloginfo(VTPM_LOG_VTPM, "Loaded saved state (dmis = %d).\n", dmis);
   goto egress;
-  
+
  abort_egress:
   vtpmlogerror(VTPM_LOG_VTPM, "Failed to load service data with error = %s\n", 
tpm_get_error_name(status));
  egress:
-  
-  free(flat_global);
+
+  free(flat_table);
   close(fh);
-  
+
+  // TODO: Could be nice and evict BootKey. (Need to add EvictKey to VTSP.
+
   return status;
 }
+
diff -r 25e3c8668f1f -r 8af1199488d3 tools/vtpm_manager/manager/vtpm_manager.c
--- a/tools/vtpm_manager/manager/vtpm_manager.c Mon Jan  9 11:19:55 2006
+++ b/tools/vtpm_manager/manager/vtpm_manager.c Mon Jan  9 11:22:17 2006
@@ -74,16 +74,15 @@
 #endif
 
 // --------------------------- Well Known Auths --------------------------
-#ifdef WELL_KNOWN_SRK_AUTH
-static BYTE FIXED_SRK_AUTH[20] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
0xff, 0xff, 0xff,
+const TPM_AUTHDATA SRK_AUTH = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
0xff, 0xff,
                                   0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
0xff, 0xff, 0xff};
-#endif
 
 #ifdef WELL_KNOWN_OWNER_AUTH
 static BYTE FIXED_OWNER_AUTH[20] =  {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
0xff, 0xff, 0xff,
                                   0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
0xff, 0xff, 0xff};
 #endif
-                                  
+
+
 // -------------------------- Hash table functions --------------------
 
 static unsigned int hashfunc32(void *ky) {
@@ -100,13 +99,7 @@
   
   TPM_RESULT status = TPM_SUCCESS;
   
-  // Generate Auth's for SRK & Owner
-#ifdef WELL_KNOWN_SRK_AUTH 
-  memcpy(vtpm_globals->srk_usage_auth, FIXED_SRK_AUTH, sizeof(TPM_AUTHDATA));
-#else    
-  Crypto_GetRandom(vtpm_globals->srk_usage_auth, sizeof(TPM_AUTHDATA) );  
-#endif
-  
+  // Generate Auth for Owner
 #ifdef WELL_KNOWN_OWNER_AUTH 
   memcpy(vtpm_globals->owner_usage_auth, FIXED_OWNER_AUTH, 
sizeof(TPM_AUTHDATA));
 #else    
@@ -116,14 +109,14 @@
   // Take Owership of TPM
   CRYPTO_INFO ek_cryptoInfo;
   
-  vtpmloginfo(VTPM_LOG_VTPM, "Attempting Pubek Read. NOTE: Failure is ok.\n");
   status = VTSP_ReadPubek(vtpm_globals->manager_tcs_handle, &ek_cryptoInfo);
   
   // If we can read PubEK then there is no owner and we should take it.
   if (status == TPM_SUCCESS) { 
+    vtpmloginfo(VTPM_LOG_VTPM, "Failed to readEK meaning TPM has an owner. 
Creating Keys off existing SRK.\n");
     TPMTRYRETURN(VTSP_TakeOwnership(vtpm_globals->manager_tcs_handle,
                                    (const 
TPM_AUTHDATA*)&vtpm_globals->owner_usage_auth, 
-                                   (const 
TPM_AUTHDATA*)&vtpm_globals->srk_usage_auth,
+                                   &SRK_AUTH,
                                    &ek_cryptoInfo,
                                    &vtpm_globals->keyAuth)); 
   
@@ -142,7 +135,7 @@
   TPMTRYRETURN( VTSP_OSAP(vtpm_globals->manager_tcs_handle,
                          TPM_ET_KEYHANDLE,
                          TPM_SRK_KEYHANDLE, 
-                         (const TPM_AUTHDATA*)&vtpm_globals->srk_usage_auth,
+                         &SRK_AUTH,
                          &sharedsecret, 
                          &osap) ); 
 
@@ -157,8 +150,43 @@
                                    &vtpm_globals->storageKeyWrap,
                                    &osap) );
   
-  vtpm_globals->keyAuth.fContinueAuthSession = TRUE;
-  
+  // Generate boot key's auth
+  Crypto_GetRandom(  &vtpm_globals->storage_key_usage_auth, 
+                    sizeof(TPM_AUTHDATA) );
+  
+  TPM_AUTHDATA bootKeyWrapAuth;
+  memset(&bootKeyWrapAuth, 0, sizeof(bootKeyWrapAuth));
+  
+  TPMTRYRETURN( VTSP_OSAP(vtpm_globals->manager_tcs_handle,
+                         TPM_ET_KEYHANDLE,
+                         TPM_SRK_KEYHANDLE, 
+                         &SRK_AUTH,
+                         &sharedsecret, 
+                         &osap) ); 
+
+  osap.fContinueAuthSession = FALSE;
+ 
+  // FIXME: This key protects the global secrets on disk. It should use TPM
+  //        PCR bindings to limit its use to legit configurations.
+  //        Current binds are open, implying a Trusted VM contains this code.
+  //        If this VM is not Trusted, use measurement and PCR bindings.
+  TPMTRYRETURN( VTSP_CreateWrapKey( vtpm_globals->manager_tcs_handle,
+                                   TPM_KEY_BIND,
+                                   (const TPM_AUTHDATA*)&bootKeyWrapAuth,
+                                   TPM_SRK_KEYHANDLE, 
+                                   (const TPM_AUTHDATA*)&sharedsecret,
+                                   &vtpm_globals->bootKeyWrap,
+                                   &osap) );
+
+  // Populate CRYPTO_INFO vtpm_globals->bootKey. This does not load it into 
the TPM
+  TPMTRYRETURN( VTSP_LoadKey( vtpm_globals->manager_tcs_handle,
+                              TPM_SRK_KEYHANDLE,
+                              &vtpm_globals->bootKeyWrap,
+                              NULL,
+                              NULL,
+                              NULL,
+                              &vtpm_globals->bootKey,
+                              TRUE ) );
   goto egress;
   
  abort_egress:
@@ -278,24 +306,26 @@
 #endif
 
     // Check status of rx_fh. If necessary attempt to re-open it.    
+    char* s = NULL;
     if (*rx_fh < 0) {
 #ifdef VTPM_MULTI_VM
-      *rx_fh = open(VTPM_BE_DEV, O_RDWR);
+      s = VTPM_BE_DEV;
 #else
       if (threadType == BE_LISTENER_THREAD) 
   #ifdef DUMMY_BACKEND
-       *rx_fh = open("/tmp/in.fifo", O_RDWR);
+       s = "/tmp/in.fifo";
   #else
-        *rx_fh = open(VTPM_BE_DEV, O_RDWR);
+      s = VTPM_BE_DEV;
   #endif
       else  // DMI Listener   
-       *rx_fh = open(VTPM_RX_FIFO, O_RDWR);
+       s = VTPM_RX_FIFO;
+      *rx_fh = open(s, O_RDWR);
 #endif    
     }
     
     // Respond to failures to open rx_fh
     if (*rx_fh < 0) {
-      vtpmhandlerlogerror(VTPM_LOG_VTPM, "Can't open inbound fh.\n");
+      vtpmhandlerlogerror(VTPM_LOG_VTPM, "Can't open inbound fh for %s.\n", s);
 #ifdef VTPM_MULTI_VM
       return TPM_IOERROR; 
 #else
@@ -713,7 +743,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////
 TPM_RESULT VTPM_Init_Service() {
-  TPM_RESULT status = TPM_FAIL;   
+  TPM_RESULT status = TPM_FAIL, serviceStatus;   
   BYTE *randomsead;
   UINT32 randomsize;
 
@@ -737,7 +767,7 @@
   
   // Create new TCS Object
   vtpm_globals->manager_tcs_handle = 0;
-  
+ 
   TPMTRYRETURN(TCS_create());
   
   // Create TCS Context for service
@@ -756,17 +786,24 @@
   vtpm_globals->keyAuth.fContinueAuthSession = TRUE;
 
        // If failed, create new Service.
-  if (VTPM_LoadService() != TPM_SUCCESS)
+  serviceStatus = VTPM_LoadService();
+  if (serviceStatus == TPM_IOERROR) {
+    vtpmloginfo(VTPM_LOG_VTPM, "Failed to read service file. Assuming first 
time initialization.\n");
     TPMTRYRETURN( VTPM_Create_Service() );    
+  } else if (serviceStatus != TPM_SUCCESS) {
+    vtpmlogerror(VTPM_LOG_VTPM, "Failed to read existing service file");
+    exit(1);
+  }
 
   //Load Storage Key 
   TPMTRYRETURN( VTSP_LoadKey( vtpm_globals->manager_tcs_handle,
                              TPM_SRK_KEYHANDLE,
                              &vtpm_globals->storageKeyWrap,
-                             (const 
TPM_AUTHDATA*)&vtpm_globals->srk_usage_auth,
+                             &SRK_AUTH,
                              &vtpm_globals->storageKeyHandle,
                              &vtpm_globals->keyAuth,
-                             &vtpm_globals->storageKey) );
+                             &vtpm_globals->storageKey,
+                              FALSE ) );
 
   // Create entry for Dom0 for control messages
   TPMTRYRETURN( VTPM_Handle_New_DMI(NULL) );
@@ -797,12 +834,11 @@
                free (dmi_itr);
   }
   
-       
+  if ( (vtpm_globals->DMI_table_dirty) && (VTPM_SaveService() != TPM_SUCCESS) )
+    vtpmlogerror(VTPM_LOG_VTPM, "Unable to save manager data.\n");
+
   TCS_CloseContext(vtpm_globals->manager_tcs_handle);
-  
-  if ( (vtpm_globals->DMI_table_dirty) &&
-       (VTPM_SaveService() != TPM_SUCCESS) )
-    vtpmlogerror(VTPM_LOG_VTPM, "Unable to save manager data.\n");
+  TCS_destroy();
   
   hashtable_destroy(vtpm_globals->dmi_map, 1);
   free(vtpm_globals);
diff -r 25e3c8668f1f -r 8af1199488d3 tools/vtpm_manager/manager/vtpmpriv.h
--- a/tools/vtpm_manager/manager/vtpmpriv.h     Mon Jan  9 11:19:55 2006
+++ b/tools/vtpm_manager/manager/vtpmpriv.h     Mon Jan  9 11:22:17 2006
@@ -108,6 +108,7 @@
   TCS_CONTEXT_HANDLE  manager_tcs_handle;     // TCS Handle used by manager
   TPM_HANDLE          storageKeyHandle;       // Key used by persistent store
   CRYPTO_INFO         storageKey;             // For software encryption
+  CRYPTO_INFO         bootKey;                // For saving table
   TCS_AUTH            keyAuth;                // OIAP session for storageKey 
   BOOL                DMI_table_dirty;        // Indicates that a command
                                               // has updated the DMI table
@@ -115,15 +116,17 @@
     
   // Persistent Data
   TPM_AUTHDATA        owner_usage_auth;       // OwnerAuth of real TPM
-  TPM_AUTHDATA        srk_usage_auth;         // SRK Auth of real TPM    
   buffer_t            storageKeyWrap;         // Wrapped copy of storageKey
+  TPM_AUTHDATA        srk_usage_auth;
+  TPM_AUTHDATA        storage_key_usage_auth; 
 
-  TPM_AUTHDATA        storage_key_usage_auth; 
-    
+  buffer_t            bootKeyWrap;            // Wrapped copy of boot key 
+
 }VTPM_GLOBALS;
 
-//Global dmi map
-extern VTPM_GLOBALS *vtpm_globals;
+// --------------------------- Global Values --------------------------
+extern VTPM_GLOBALS *vtpm_globals;   // Key info and DMI states
+extern const TPM_AUTHDATA SRK_AUTH;  // SRK Well Known Auth Value
 
 // ********************** Command Handler Prototypes ***********************
 TPM_RESULT VTPM_Handle_Load_NVM(       VTPM_DMI_RESOURCE *myDMI, 
diff -r 25e3c8668f1f -r 8af1199488d3 tools/vtpm_manager/manager/vtsp.c
--- a/tools/vtpm_manager/manager/vtsp.c Mon Jan  9 11:19:55 2006
+++ b/tools/vtpm_manager/manager/vtsp.c Mon Jan  9 11:22:17 2006
@@ -144,7 +144,10 @@
   if (memcmp (&hm, &(auth->HMAC), sizeof(TPM_DIGEST)) == 0)  // 0 indicates 
equality
     return (TPM_SUCCESS);
   else {
-    VTSP_OIAP( hContext, auth);
+    // If specified, reconnect the OIAP session.
+    // NOTE: This only works for TCS's that never have a 0 context. 
+    if (hContext) 
+      VTSP_OIAP( hContext, auth);
     return (TPM_AUTHFAIL);
   }
 }
@@ -157,6 +160,10 @@
   TPMTRYRETURN( TCSP_OIAP(hContext,
                          &auth->AuthHandle,
                          &auth->NonceEven) );
+
+  memset(&auth->HMAC, 0, sizeof(TPM_DIGEST));
+  auth->fContinueAuthSession = FALSE;
+
   goto egress;
   
  abort_egress:
@@ -195,6 +202,9 @@
                 BSG_TPM_NONCE, &nonceOddOSAP);
   
   Crypto_HMAC(sharedSecretText, sizeof(sharedSecretText), (BYTE *) usageAuth, 
TPM_DIGEST_SIZE, (BYTE *) sharedSecret);       
+
+  memset(&auth->HMAC, 0, sizeof(TPM_DIGEST));
+  auth->fContinueAuthSession = FALSE;
     
   goto egress;
   
@@ -287,9 +297,6 @@
   srkKeyInfo.parms = (BYTE *) &srkRSAkeyInfo;
   
   struct pack_buf_t srkText;
-  
-  // GenerateAuth new nonceOdd    
-  Crypto_GetRandom(&auth->NonceOdd, sizeof(TPM_NONCE) );
   
   //These values are accurate for an enc(AuthData).
   struct pack_buf_t encOwnerAuth, encSrkAuth;
@@ -383,9 +390,6 @@
   BYTE *paramText;        // Digest to make Auth.
   UINT32 paramTextSize;
     
-  // Generate HMAC   
-  Crypto_GetRandom(&auth->NonceOdd, sizeof(TPM_NONCE) );
-  
   paramText = (BYTE *) malloc(sizeof(BYTE) * TCPA_MAX_BUFFER_LENGTH);
   
   paramTextSize = BSG_PackList(paramText, 1,
@@ -504,9 +508,6 @@
   newKeyText.data = flatKey;
   newKeyText.size = flatKeySize;
   
-  // GenerateAuth new nonceOdd    
-  Crypto_GetRandom(&auth->NonceOdd, sizeof(TPM_NONCE) );
-  
   // Generate HMAC
   paramText = (BYTE *) malloc(sizeof(BYTE) * TCPA_MAX_BUFFER_LENGTH);
   
@@ -563,63 +564,66 @@
                         const TPM_AUTHDATA          *parentAuth,
                         TPM_HANDLE                  *newKeyHandle,
                         TCS_AUTH                    *auth,
-                        CRYPTO_INFO                 *cryptoinfo /*= NULL*/) {
-  
-  
-  vtpmloginfo(VTPM_LOG_VTSP, "Loading Key.\n%s","");
+                        CRYPTO_INFO                 *cryptoinfo,
+                        const BOOL                  skipTPMLoad) { 
+  
+  
+  vtpmloginfo(VTPM_LOG_VTSP, "Loading Key %s.\n", (!skipTPMLoad ? "into TPM" : 
"only into memory"));
   
   TPM_RESULT status = TPM_SUCCESS;
   TPM_COMMAND_CODE command = TPM_ORD_LoadKey;
-  
-  BYTE *paramText;        // Digest to make Auth.
+
+  BYTE *paramText=NULL;        // Digest to make Auth.
   UINT32 paramTextSize;
-  
-  if ((rgbWrappedKeyBlob == NULL) || (parentAuth == NULL) || 
-      (newKeyHandle==NULL) || (auth==NULL)) {
-    status = TPM_BAD_PARAMETER;
-    goto abort_egress;
-  }
-  
-  // Generate Extra TCS Parameters
-  TPM_HANDLE phKeyHMAC;
-  
-  // Generate HMAC
-  Crypto_GetRandom(&auth->NonceOdd, sizeof(TPM_NONCE) );
-  
-  paramText = (BYTE *) malloc(sizeof(BYTE) *  TCPA_MAX_BUFFER_LENGTH);
-  
-  paramTextSize = BSG_PackList(paramText, 1,
-                              BSG_TPM_COMMAND_CODE, &command);
-  
-  memcpy(paramText + paramTextSize, rgbWrappedKeyBlob->bytes, 
buffer_len(rgbWrappedKeyBlob));
-  paramTextSize += buffer_len(rgbWrappedKeyBlob);
-  
-  TPMTRYRETURN( GenerateAuth( paramText, paramTextSize,
+
+  // SkipTPMLoad stops key from being loaded into TPM, but still generates 
CRYPTO_INFO for it
+  if (! skipTPMLoad) { 
+  
+    if ((rgbWrappedKeyBlob == NULL) || (parentAuth == NULL) || 
+        (newKeyHandle==NULL) || (auth==NULL)) {
+      status = TPM_BAD_PARAMETER;
+      goto abort_egress;
+    }
+  
+    // Generate Extra TCS Parameters
+    TPM_HANDLE phKeyHMAC;
+  
+    paramText = (BYTE *) malloc(sizeof(BYTE) *  TCPA_MAX_BUFFER_LENGTH);
+  
+    paramTextSize = BSG_PackList(paramText, 1,
+                                BSG_TPM_COMMAND_CODE, &command);
+  
+    memcpy(paramText + paramTextSize, rgbWrappedKeyBlob->bytes, 
buffer_len(rgbWrappedKeyBlob));
+    paramTextSize += buffer_len(rgbWrappedKeyBlob);
+  
+    TPMTRYRETURN( GenerateAuth( paramText, paramTextSize,
                              parentAuth, auth) );
   
-  // Call TCS
-  TPMTRYRETURN( TCSP_LoadKeyByBlob(  hContext,
-                                    hUnwrappingKey,
-                                    buffer_len(rgbWrappedKeyBlob),
-                                    rgbWrappedKeyBlob->bytes,
-                                    auth,
-                                    newKeyHandle,
-                                    &phKeyHMAC) );
-  
-  // Verify Auth
-  paramTextSize = BSG_PackList(paramText, 3,
-                              BSG_TPM_RESULT, &status,
-                              BSG_TPM_COMMAND_CODE, &command,
-                              BSG_TPM_HANDLE, newKeyHandle);
-  
-  TPMTRYRETURN( VerifyAuth( paramText, paramTextSize,
-                           parentAuth, auth, 
-                           hContext) );
-  
-  // Unpack/return key structure
+    // Call TCS
+    TPMTRYRETURN( TCSP_LoadKeyByBlob(  hContext,
+                                      hUnwrappingKey,
+                                      buffer_len(rgbWrappedKeyBlob),
+                                      rgbWrappedKeyBlob->bytes,
+                                      auth,
+                                      newKeyHandle,
+                                      &phKeyHMAC) );
+  
+    // Verify Auth
+    paramTextSize = BSG_PackList(paramText, 3,
+                                BSG_TPM_RESULT, &status,
+                                BSG_TPM_COMMAND_CODE, &command,
+                                BSG_TPM_HANDLE, newKeyHandle);
+  
+    TPMTRYRETURN( VerifyAuth( paramText, paramTextSize,
+                             parentAuth, auth, 
+                             hContext) );
+  } 
+  
+  // Build cryptoinfo structure for software crypto function. 
   if (cryptoinfo != NULL) {
     TPM_KEY newKey;
     
+    // Unpack/return key structure
     BSG_Unpack(BSG_TPM_KEY, rgbWrappedKeyBlob->bytes , &newKey);
     TPM_RSA_KEY_PARMS rsaKeyParms;
     
@@ -669,9 +673,6 @@
   struct pack_buf_t clear_data32;
   BYTE *clear_data_text;
   UINT32 clear_data_size;
-  
-  // Generate HMAC   
-  Crypto_GetRandom(&auth->NonceOdd, sizeof(TPM_NONCE) );
   
   struct pack_buf_t bound_data32 = {bound_data->size, bound_data->bytes};
   
@@ -781,6 +782,196 @@
   return TPM_SUCCESS;
 }
 
+TPM_RESULT VTSP_Seal(const TCS_CONTEXT_HANDLE    hContext,
+                     const TPM_KEY_HANDLE        keyHandle,
+                     const TPM_AUTHDATA          *sealDataAuth,
+                     const TPM_PCR_COMPOSITE     *pcrComp,
+                     const buffer_t              *inData,
+                     TPM_STORED_DATA             *sealedData,                  
                 
+                     const TPM_SECRET            *osapSharedSecret,
+                     TCS_AUTH                    *auth) {
+
+  TPM_RESULT status = TPM_SUCCESS;
+  TPM_COMMAND_CODE command = TPM_ORD_Seal;
+
+  BYTE *paramText;        // Digest to make Auth.
+  UINT32 paramTextSize;
+
+  // Generate PCR_Info Struct from Comp
+  TPM_PCR_INFO pcrInfo;
+  UINT32 pcrInfoSize, flatpcrSize;
+  BYTE flatpcr[3 +                          // PCR_Select = 3 1 byte banks
+               sizeof(UINT16) +             //              2 byte UINT16
+               sizeof(UINT32) +             // PCR_Comp   = 4 byte UINT32
+               24 * sizeof(TPM_PCRVALUE) ]; //              up to 24 PCRs
+
+  if (pcrComp != NULL) {
+      //printf("\n\tBinding to PCRs: ");
+      //for(int i = 0 ; i < pcrComp->select.sizeOfSelect ; i++)
+      //printf("%2.2x", pcrComp->select.pcrSelect[i]);
+
+      memcpy(&pcrInfo.pcrSelection, &pcrComp->select, 
sizeof(TPM_PCR_SELECTION));
+
+      flatpcrSize = BSG_Pack(BSG_TPM_PCR_COMPOSITE, (BYTE *) pcrComp, flatpcr);
+      Crypto_SHA1Full((BYTE *) flatpcr, flatpcrSize, (BYTE *) 
&(pcrInfo.digestAtRelease));
+      memset(&(pcrInfo.digestAtCreation), 0, sizeof(TPM_DIGEST));
+      pcrInfoSize = BSG_Pack(BSG_TPM_PCR_INFO, (BYTE *) &pcrInfo, flatpcr);
+  } else {
+      //printf("\n\tBinding to no PCRS.");
+      pcrInfoSize = 0;
+  }
+
+  // Calculate encUsageAuth
+  BYTE XORbuffer[sizeof(TPM_SECRET) + sizeof(TPM_NONCE)];
+  UINT32 XORbufferSize = sizeof(XORbuffer);
+  TPM_DIGEST XORKey;
+  TPM_ENCAUTH encAuth;
+
+  BSG_PackList( XORbuffer, 2,
+                BSG_TPM_SECRET, osapSharedSecret,
+                BSG_TPM_NONCE, &auth->NonceEven );
+
+  Crypto_SHA1Full(XORbuffer, XORbufferSize, (BYTE *) &XORKey);
+
+  int i;
+  for (i=0; i < TPM_DIGEST_SIZE; i++)
+    ((BYTE *) &encAuth)[i] = ((BYTE *) &XORKey)[i] ^ ((BYTE *) 
sealDataAuth)[i];
+
+  // Generate Extra TCS Parameters
+  UINT32 inDataSize = buffer_len(inData);
+  struct pack_buf_t inData_pack = {inDataSize, inData->bytes};
+  struct pack_buf_t pcrInfo_pack = {pcrInfoSize, flatpcr};
+
+  UINT32 sealedDataSize;
+  BYTE *flatSealedData=NULL;
+
+  paramText = (BYTE *) malloc(sizeof(BYTE) *  TCPA_MAX_BUFFER_LENGTH);
+
+  paramTextSize = BSG_PackList(paramText, 4,
+                               BSG_TPM_COMMAND_CODE, &command,
+                               BSG_TPM_ENCAUTH, &encAuth,
+                               BSG_TPM_SIZE32_DATA, &pcrInfo_pack,
+                               BSG_TPM_SIZE32_DATA, &inData_pack);
+
+  TPMTRYRETURN( GenerateAuth( paramText, paramTextSize,
+                              osapSharedSecret, auth) );
+
+  // Call TCS
+  TPMTRYRETURN( TCSP_Seal( hContext,
+                           keyHandle,
+                           encAuth,
+                           pcrInfoSize,
+                           flatpcr,
+                           inDataSize,
+                           inData->bytes,
+                           auth,
+                           &sealedDataSize,
+                           &flatSealedData) );
+
+  // Unpack/return key structure
+  BSG_Unpack( BSG_TPM_STORED_DATA, flatSealedData, sealedData );
+
+  paramTextSize = BSG_PackList(paramText, 3,
+                               BSG_TPM_RESULT, &status,
+                               BSG_TPM_COMMAND_CODE, &command,
+                               BSG_TPM_STORED_DATA, sealedData);
+
+  TPMTRYRETURN( VerifyAuth( paramText, paramTextSize,
+                            osapSharedSecret, auth,
+                            0) );
+
+
+  goto egress;
+
+ abort_egress:
+ egress:
+
+  if (flatSealedData)
+    TCS_FreeMemory( hContext, flatSealedData);
+
+  free(paramText);
+  return status;
+}
+
+
+TPM_RESULT VTSP_Unseal(const TCS_CONTEXT_HANDLE    hContext,
+                       const TPM_KEY_HANDLE        keyHandle,
+                       const TPM_STORED_DATA       *sealedData,
+                       const TPM_AUTHDATA          *key_usage_auth,
+                       const TPM_AUTHDATA          *data_usage_auth,
+                       buffer_t                    *outData,
+                       TCS_AUTH                    *auth,
+                       TCS_AUTH                    *dataAuth) {
+
+  TPM_RESULT status = TPM_SUCCESS;
+  TPM_COMMAND_CODE command = TPM_ORD_Unseal;
+
+  BYTE *paramText;        // Digest to make Auth.
+  UINT32 paramTextSize;
+
+  // Generate Extra TCS Parameters
+  UINT32 sealDataSize, clearDataSize;
+  BYTE *flatSealedData= (BYTE *) malloc(sizeof(TPM_VERSION) +
+                                        2 * sizeof(UINT32) +
+                                        sealedData->sealInfoSize +
+                                        sealedData->encDataSize),
+       *clearData=NULL;
+
+  sealDataSize = BSG_Pack(BSG_TPM_STORED_DATA, sealedData, flatSealedData );
+
+  paramText = (BYTE *) malloc(sizeof(BYTE) *  TCPA_MAX_BUFFER_LENGTH);
+
+  paramTextSize = BSG_PackList(paramText, 2,
+                               BSG_TPM_COMMAND_CODE, &command,
+                               BSG_TPM_STORED_DATA, sealedData);
+
+  TPMTRYRETURN( GenerateAuth( paramText, paramTextSize,
+                              key_usage_auth, auth) );
+
+  TPMTRYRETURN( GenerateAuth( paramText, paramTextSize,
+                              data_usage_auth, dataAuth) );
+  // Call TCS
+  TPMTRYRETURN( TCSP_Unseal(  hContext,
+                              keyHandle,
+                              sealDataSize,
+                              flatSealedData,
+                              auth,
+                              dataAuth,
+                              &clearDataSize,
+                              &clearData) );
+
+  // Verify Auth
+  struct pack_buf_t clearData_pack = {clearDataSize, clearData};
+
+  paramTextSize = BSG_PackList(paramText, 3,
+                               BSG_TPM_RESULT, &status,
+                               BSG_TPM_COMMAND_CODE, &command,
+                               BSG_TPM_SIZE32_DATA, &clearData_pack);
+
+  TPMTRYRETURN( VerifyAuth( paramText, paramTextSize,
+                            key_usage_auth, auth,
+                            hContext) );
+
+  TPMTRYRETURN( VerifyAuth( paramText, paramTextSize,
+                            data_usage_auth, dataAuth,
+                            hContext) );
+
+  // Unpack/return key structure
+  TPMTRYRETURN( buffer_init(outData, clearDataSize, clearData) );
+
+  goto egress;
+
+ abort_egress:
+ egress:
+
+  if (flatSealedData)
+    TCS_FreeMemory( hContext, clearData);
+
+  free(paramText);
+  return status;
+}
+
+
 // Function Reaches into unsupported TCS command, beware.
 TPM_RESULT VTSP_RawTransmit(const TCS_CONTEXT_HANDLE    hContext,
                             const buffer_t *inbuf,
diff -r 25e3c8668f1f -r 8af1199488d3 tools/vtpm_manager/manager/vtsp.h
--- a/tools/vtpm_manager/manager/vtsp.h Mon Jan  9 11:19:55 2006
+++ b/tools/vtpm_manager/manager/vtsp.h Mon Jan  9 11:22:17 2006
@@ -86,7 +86,8 @@
                         const TPM_AUTHDATA          *parentAuth,
                         TPM_HANDLE                  *newKeyHandle,
                         TCS_AUTH                    *pAuth,
-                        CRYPTO_INFO                 *cryptoinfo);
+                        CRYPTO_INFO                 *cryptoinfo,
+                        const BOOL                  skipTPMLoad);
 
 TPM_RESULT VTSP_Unbind( const TCS_CONTEXT_HANDLE    hContext,
                         const TPM_KEY_HANDLE        key_handle,
@@ -99,4 +100,22 @@
             const buffer_t *inData, 
             buffer_t *outData);
                         
+TPM_RESULT VTSP_Seal(const TCS_CONTEXT_HANDLE    hContext,
+                     const TPM_KEY_HANDLE        keyHandle,
+                     const TPM_AUTHDATA          *sealDataAuth,
+                     const TPM_PCR_COMPOSITE     *pcrComp,
+                     const buffer_t              *inData,
+                     TPM_STORED_DATA             *sealedData,                  
                 
+                     const TPM_SECRET            *osapSharedSecret,
+                     TCS_AUTH                    *auth);
+
+TPM_RESULT VTSP_Unseal(const TCS_CONTEXT_HANDLE    hContext,
+                       const TPM_KEY_HANDLE        keyHandle,
+                       const TPM_STORED_DATA       *sealedData,
+                       const TPM_AUTHDATA          *key_usage_auth,
+                       const TPM_AUTHDATA          *data_usage_auth,
+                       buffer_t                    *outData,
+                       TCS_AUTH                    *auth,
+                       TCS_AUTH                    *dataAuth);
+
 #endif //_VTSP_H_
diff -r 25e3c8668f1f -r 8af1199488d3 tools/vtpm_manager/tcs/tcs.c
--- a/tools/vtpm_manager/tcs/tcs.c      Mon Jan  9 11:19:55 2006
+++ b/tools/vtpm_manager/tcs/tcs.c      Mon Jan  9 11:22:17 2006
@@ -636,7 +636,7 @@
   TDDL_UINT32  OutLength = TCPA_MAX_BUFFER_LENGTH;
   
   // check input params
-  if (inData == NULL || pubAuth == NULL || SealedDataSize == NULL || 
*SealedData == NULL)
+  if (inData == NULL || pubAuth == NULL || SealedDataSize == NULL || 
SealedData == NULL)
     return TPM_BAD_PARAMETER;
   
   // Convert Byte Input parameter in the input byte stream InBuf
diff -r 25e3c8668f1f -r 8af1199488d3 tools/xentrace/Makefile
--- a/tools/xentrace/Makefile   Mon Jan  9 11:19:55 2006
+++ b/tools/xentrace/Makefile   Mon Jan  9 11:22:17 2006
@@ -15,24 +15,32 @@
 OBJS     = $(patsubst %.c,%.o,$(wildcard *.c))
 
 BIN      = xentrace tbctl setsize
-LIBBIN   = xenctx
+LIBBIN   = 
 SCRIPTS  = xentrace_format
 MAN1     = $(wildcard *.1)
 MAN8     = $(wildcard *.8)
+
+ifeq ($(XEN_TARGET_ARCH),x86_32)
+LIBBIN  += xenctx
+endif
+
+ifeq ($(XEN_TARGET_ARCH),x86_64)
+LIBBIN  += xenctx
+endif
 
 all: build
 build: $(BIN) $(LIBBIN)
 
 install: build
        [ -d $(DESTDIR)/usr/bin ] || $(INSTALL_DIR) $(DESTDIR)/usr/bin
-       [ -d $(DESTDIR)/usr/$(LIBDIR)/xen/bin ] || \
+       [ -z "$(LIBBIN)"] || [ -d $(DESTDIR)/usr/$(LIBDIR)/xen/bin ] || \
                $(INSTALL_DIR) $(DESTDIR)/usr/$(LIBDIR)/xen/bin
        [ -d $(DESTDIR)/usr/share/man/man1 ] || \
                $(INSTALL_DIR) $(DESTDIR)/usr/share/man/man1
        [ -d $(DESTDIR)/usr/share/man/man8 ] || \
                $(INSTALL_DIR) $(DESTDIR)/usr/share/man/man8
        $(INSTALL_PROG) $(BIN) $(SCRIPTS) $(DESTDIR)/usr/bin
-       $(INSTALL_PROG) $(LIBBIN) $(DESTDIR)/usr/$(LIBDIR)/xen/bin
+       [ -z "$(LIBBIN)"] || $(INSTALL_PROG) $(LIBBIN) 
$(DESTDIR)/usr/$(LIBDIR)/xen/bin
        $(INSTALL_DATA) $(MAN1) $(DESTDIR)/usr/share/man/man1
        $(INSTALL_DATA) $(MAN8) $(DESTDIR)/usr/share/man/man8
 
diff -r 25e3c8668f1f -r 8af1199488d3 tools/xentrace/xenctx.c
--- a/tools/xentrace/xenctx.c   Mon Jan  9 11:19:55 2006
+++ b/tools/xentrace/xenctx.c   Mon Jan  9 11:22:17 2006
@@ -380,10 +380,10 @@
         exit(-1);
     }
 
-    ret = xc_domain_get_vcpu_context(xc_handle, domid, vcpu, &ctx);
+    ret = xc_vcpu_getcontext(xc_handle, domid, vcpu, &ctx);
     if (ret < 0) {
         xc_domain_unpause(xc_handle, domid);
-        perror("xc_domain_get_vcpu_context");
+        perror("xc_vcpu_getcontext");
         exit(-1);
     }
 
diff -r 25e3c8668f1f -r 8af1199488d3 
tools/xm-test/tests/network-attach/Makefile.am
--- a/tools/xm-test/tests/network-attach/Makefile.am    Mon Jan  9 11:19:55 2006
+++ b/tools/xm-test/tests/network-attach/Makefile.am    Mon Jan  9 11:22:17 2006
@@ -6,7 +6,7 @@
        03_network_attach_detach_multiple_pos.test  \
        04_network_attach_baddomain_neg.test
 
-XFAIL_TESTS = 03_network_attach_detach_multiple_pos.test
+XFAIL_TESTS = 
 
 EXTRA_DIST = $(TESTS) $(XFAIL_TESTS) network_utils.py
 
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/ia64/Makefile
--- a/xen/arch/ia64/Makefile    Mon Jan  9 11:19:55 2006
+++ b/xen/arch/ia64/Makefile    Mon Jan  9 11:22:17 2006
@@ -23,6 +23,13 @@
        __divsi3.o __udivsi3.o __modsi3.o __umodsi3.o                   \
        __divdi3.o __udivdi3.o __moddi3.o __umoddi3.o
 
+# xen stack unwinder
+# unwind_decoder.c is included in unwind.c
+OBJS += unwind.o
+#unwind.o: CFLAGS += -DUNW_DEBUG=4
+
+OBJS += process-linux-xen.o
+
 # perfmon.o
 # unwind.o needed for kernel unwinding (rare)
 
@@ -31,11 +38,26 @@
 # remove following line if not privifying in memory
 # OBJS += privify.o
 
-default: $(OBJS) head.o xen.lds.s
-       $(LD) -r -o arch.o $(OBJS)
+default: $(TARGET)
+
+$(CURDIR)/arch.o: $(OBJS)
+       $(LD) -r -o $@ $(OBJS)
+
+$(TARGET)-syms: $(ALL_OBJS) head.o xen.lds.s
        $(LD) $(LDFLAGS) -T $(BASEDIR)/arch/$(TARGET_ARCH)/xen.lds.s -N \
-               -Map map.out head.o $(ALL_OBJS) -o $(TARGET)-syms
-       $(OBJCOPY) -R .note -R .comment -S $(TARGET)-syms $(TARGET)
+               -Map map.out head.o $(ALL_OBJS) -o $@
+       $(NM) -n $@ | $(BASEDIR)/tools/symbols > $(BASEDIR)/xen-syms.S
+       $(MAKE) $(BASEDIR)/xen-syms.o
+       $(LD) $(LDFLAGS) -T $(BASEDIR)/arch/$(TARGET_ARCH)/xen.lds.s -N \
+               -Map map.out head.o $(ALL_OBJS) $(BASEDIR)/xen-syms.o -o $@
+       $(NM) -n $@ | $(BASEDIR)/tools/symbols >$(BASEDIR)/xen-syms.S
+       $(MAKE) $(BASEDIR)/xen-syms.o
+       $(LD) $(LDFLAGS) -T $(BASEDIR)/arch/$(TARGET_ARCH)/xen.lds.s -N \
+               -Map map.out head.o $(ALL_OBJS) $(BASEDIR)/xen-syms.o -o $@
+       rm -f $(BASEDIR)/xen-syms.S $(BASEDIR)/xen-syms.o
+
+$(TARGET): $(TARGET)-syms
+       $(OBJCOPY) -R .note -R .comment -S $(TARGET)-syms $@
        $(NM) -n $(TARGET)-syms | grep -v '\( [aUw] \)\|\(__crc_\)\|\( 
\$[adt]\)'\
                 > $(BASEDIR)/System.map
 
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/ia64/linux-xen/entry.S
--- a/xen/arch/ia64/linux-xen/entry.S   Mon Jan  9 11:19:55 2006
+++ b/xen/arch/ia64/linux-xen/entry.S   Mon Jan  9 11:22:17 2006
@@ -1417,7 +1417,6 @@
        br.cond.sptk.many rp                            // goes to 
ia64_leave_kernel
 END(ia64_prepare_handle_unaligned)
 
-#ifndef XEN
        //
        // unw_init_running(void (*callback)(info, arg), void *arg)
        //
@@ -1463,6 +1462,7 @@
        br.ret.sptk.many rp
 END(unw_init_running)
 
+#ifndef XEN
        .rodata
        .align 8
        .globl sys_call_table
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/ia64/vmx/mmio.c
--- a/xen/arch/ia64/vmx/mmio.c  Mon Jan  9 11:19:55 2006
+++ b/xen/arch/ia64/vmx/mmio.c  Mon Jan  9 11:22:17 2006
@@ -29,7 +29,7 @@
 #include <asm/vmx_vcpu.h>
 #include <asm/privop.h>
 #include <asm/types.h>
-#include <public/io/ioreq.h>
+#include <public/hvm/ioreq.h>
 #include <asm/mm.h>
 #include <asm/vmx.h>
 
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/ia64/vmx/vlsapic.c
--- a/xen/arch/ia64/vmx/vlsapic.c       Mon Jan  9 11:19:55 2006
+++ b/xen/arch/ia64/vmx/vlsapic.c       Mon Jan  9 11:22:17 2006
@@ -218,7 +218,7 @@
  */
 void vtm_domain_out(VCPU *vcpu)
 {
-    if(!is_idle_task(vcpu->domain))
+    if(!is_idle_domain(vcpu->domain))
        rem_ac_timer(&vcpu->arch.arch_vmx.vtm.vtm_timer);
 }
 
@@ -230,7 +230,7 @@
 {
     vtime_t     *vtm;
 
-    if(!is_idle_task(vcpu->domain)) {
+    if(!is_idle_domain(vcpu->domain)) {
        vtm=&(vcpu->arch.arch_vmx.vtm);
        vtm_interruption_update(vcpu, vtm);
     }
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/ia64/vmx/vmx_init.c
--- a/xen/arch/ia64/vmx/vmx_init.c      Mon Jan  9 11:19:55 2006
+++ b/xen/arch/ia64/vmx/vmx_init.c      Mon Jan  9 11:22:17 2006
@@ -42,7 +42,7 @@
 #include <xen/lib.h>
 #include <asm/vmmu.h>
 #include <public/arch-ia64.h>
-#include <public/io/ioreq.h>
+#include <public/hvm/ioreq.h>
 #include <asm/vmx_phy_mode.h>
 #include <asm/processor.h>
 #include <asm/vmx.h>
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/ia64/vmx/vmx_process.c
--- a/xen/arch/ia64/vmx/vmx_process.c   Mon Jan  9 11:19:55 2006
+++ b/xen/arch/ia64/vmx/vmx_process.c   Mon Jan  9 11:22:17 2006
@@ -53,6 +53,7 @@
 #define INITIAL_PSR_VALUE_AT_INTERRUPTION 0x0000001808028034
 
 
+extern void die_if_kernel(char *str, struct pt_regs *regs, long err);
 extern void rnat_consumption (VCPU *vcpu);
 #define DOMN_PAL_REQUEST    0x110000
 
@@ -185,8 +186,11 @@
        }else if(iim == DOMN_PAL_REQUEST){
         pal_emul(current);
                vmx_vcpu_increment_iip(current);
-    }  else
+    } else {
+               if (iim == 0) 
+                       die_if_kernel("bug check", regs, iim);
                vmx_reflect_interruption(ifa,isr,iim,11,regs);
+    }
 }
 
 
@@ -227,7 +231,7 @@
        struct domain *d = current->domain;
        struct vcpu *v = current;
        // FIXME: Will this work properly if doing an RFI???
-       if (!is_idle_task(d) ) {        // always comes from guest
+       if (!is_idle_domain(d) ) {      // always comes from guest
                extern void vmx_dorfirfi(void);
                struct pt_regs *user_regs = vcpu_regs(current);
                if (local_softirq_pending())
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/ia64/vmx/vmx_support.c
--- a/xen/arch/ia64/vmx/vmx_support.c   Mon Jan  9 11:19:55 2006
+++ b/xen/arch/ia64/vmx/vmx_support.c   Mon Jan  9 11:22:17 2006
@@ -21,7 +21,7 @@
  */
 #include <xen/config.h>
 #include <xen/sched.h>
-#include <public/io/ioreq.h>
+#include <public/hvm/ioreq.h>
 #include <asm/vmx.h>
 #include <asm/vmx_vcpu.h>
 
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/ia64/xen/dom_fw.c
--- a/xen/arch/ia64/xen/dom_fw.c        Mon Jan  9 11:19:55 2006
+++ b/xen/arch/ia64/xen/dom_fw.c        Mon Jan  9 11:22:17 2006
@@ -861,12 +861,16 @@
        bp->console_info.orig_x = 0;
        bp->console_info.orig_y = 24;
        bp->fpswa = 0;
-        bp->initrd_start = (dom0_start+dom0_size) -
-                (PAGE_ALIGN(ia64_boot_param->initrd_size) + 4*1024*1024);
-        bp->initrd_size = ia64_boot_param->initrd_size;
-                printf(" initrd start %0xlx", bp->initrd_start);
-                printf(" initrd size %0xlx", bp->initrd_size);
-
-
+       if (d == dom0) {
+               bp->initrd_start = (dom0_start+dom0_size) -
+                 (PAGE_ALIGN(ia64_boot_param->initrd_size) + 4*1024*1024);
+               bp->initrd_size = ia64_boot_param->initrd_size;
+       }
+       else {
+               bp->initrd_start = d->arch.initrd_start;
+               bp->initrd_size  = d->arch.initrd_len;
+       }
+       printf(" initrd start %0xlx", bp->initrd_start);
+       printf(" initrd size %0xlx", bp->initrd_size);
        return bp;
 }
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/ia64/xen/domain.c
--- a/xen/arch/ia64/xen/domain.c        Mon Jan  9 11:19:55 2006
+++ b/xen/arch/ia64/xen/domain.c        Mon Jan  9 11:22:17 2006
@@ -19,6 +19,7 @@
 #include <xen/delay.h>
 #include <xen/softirq.h>
 #include <xen/mm.h>
+#include <xen/iocap.h>
 #include <asm/ptrace.h>
 #include <asm/system.h>
 #include <asm/io.h>
@@ -45,7 +46,7 @@
 #include <asm/vmx_vcpu.h>
 #include <asm/vmx_vpd.h>
 #include <asm/pal.h>
-#include <public/io/ioreq.h>
+#include <public/hvm/ioreq.h>
 
 #define CONFIG_DOMAIN0_CONTIGUOUS
 unsigned long dom0_start = -1L;
@@ -181,7 +182,7 @@
        memset(v->arch._thread.fph,0,sizeof(struct ia64_fpreg)*96);
 }
 
-void arch_do_createdomain(struct vcpu *v)
+int arch_do_createdomain(struct vcpu *v)
 {
        struct domain *d = v->domain;
        struct thread_info *ti = alloc_thread_info(v);
@@ -248,7 +249,9 @@
                }
        } else
                d->arch.mm = NULL;
-       printf ("arch_do_create_domain: domain=%p\n", d);
+       printf ("arch_do_create_domain: domain=%p\n", d);
+
+       return 0;
 }
 
 void arch_getdomaininfo_ctxt(struct vcpu *v, struct vcpu_guest_context *c)
@@ -291,16 +294,7 @@
        d->arch.cmdline      = c->cmdline;
        new_thread(v, regs->cr_iip, 0, 0);
 
-#ifdef CONFIG_IA64_SPLIT_CACHE
-    /* Sync d/i cache conservatively */
-    if (!running_on_sim) {
-        ret = ia64_pal_cache_flush(4, 0, &progress, NULL);
-        if ((ret!=PAL_STATUS_SUCCESS)&& (ret!=PAL_STATUS_UNIMPLEMENTED))
-            printk("PAL CACHE FLUSH failed for dom0.\n");
-        else
-            printk("Sync i/d cache for guest SUCC\n");
-    }
-#endif
+       sync_split_caches();
        v->vcpu_info->arch.evtchn_vector = c->vcpu.evtchn_vector;
        if ( c->vcpu.privregs && copy_from_user(v->arch.privregs,
                           c->vcpu.privregs, sizeof(mapped_regs_t))) {
@@ -428,7 +422,7 @@
                {
                        p = alloc_domheap_page(d);
                        // zero out pages for security reasons
-                       memset(__va(page_to_phys(p)),0,PAGE_SIZE);
+                       if (p) memset(__va(page_to_phys(p)),0,PAGE_SIZE);
                }
                if (unlikely(!p)) {
 printf("map_new_domain_page: Can't alloc!!!! Aaaargh!\n");
@@ -763,7 +757,10 @@
  */
 void physdev_init_dom0(struct domain *d)
 {
-       set_bit(_DOMF_physdev_access, &d->domain_flags);
+       if (iomem_permit_access(d, 0UL, ~0UL))
+               BUG();
+       if (irqs_permit_access(d, 0, NR_PIRQS-1))
+               BUG();
 }
 
 unsigned int vmx_dom0 = 0;
@@ -912,9 +909,9 @@
        memset(si, 0, PAGE_SIZE);
        d->shared_info->arch.start_info_pfn = __pa(si) >> PAGE_SHIFT;
        sprintf(si->magic, "xen-%i.%i-ia64", XEN_VERSION, XEN_SUBVERSION);
+       si->nr_pages     = d->tot_pages;
 
 #if 0
-       si->nr_pages     = d->tot_pages;
        si->shared_info  = virt_to_phys(d->shared_info);
        si->flags        = SIF_PRIVILEGED | SIF_INITDOMAIN;
        //si->pt_base      = vpt_start;
@@ -959,16 +956,7 @@
 
        new_thread(v, pkern_entry, 0, 0);
        physdev_init_dom0(d);
-#ifdef CONFIG_IA64_SPLIT_CACHE
-    /* Sync d/i cache conservatively */
-    if (!running_on_sim) {
-        ret = ia64_pal_cache_flush(4, 0, &progress, NULL);
-        if ((ret!=PAL_STATUS_SUCCESS)&& (ret!=PAL_STATUS_UNIMPLEMENTED))
-            printk("PAL CACHE FLUSH failed for dom0.\n");
-        else
-            printk("Sync i/d cache for guest SUCC\n");
-    }
-#endif
+       sync_split_caches();
 
        // FIXME: Hack for keyboard input
 #ifdef CLONE_DOMAIN0
@@ -1027,16 +1015,7 @@
 #endif
        new_thread(v, pkern_entry, 0, 0);
        printk("new_thread returns\n");
-#ifdef CONFIG_IA64_SPLIT_CACHE
-    /* Sync d/i cache conservatively */
-    if (!running_on_sim) {
-        ret = ia64_pal_cache_flush(4, 0, &progress, NULL);
-        if ((ret!=PAL_STATUS_SUCCESS)&& (ret!=PAL_STATUS_UNIMPLEMENTED))
-            printk("PAL CACHE FLUSH failed for dom0.\n");
-        else
-            printk("Sync i/d cache for guest SUCC\n");
-    }
-#endif
+       sync_split_caches();
        __set_bit(0x30, VCPU(v, delivery_mask));
 
        return 0;
@@ -1050,16 +1029,7 @@
                v->domain->domain_id);
        loaddomainelfimage(v->domain,v->domain->arch.image_start);
        new_thread(v, v->domain->arch.entry, 0, 0);
-#ifdef CONFIG_IA64_SPLIT_CACHE
-    /* Sync d/i cache conservatively */
-    if (!running_on_sim) {
-        ret = ia64_pal_cache_flush(4, 0, &progress, NULL);
-        if ((ret!=PAL_STATUS_SUCCESS)&& (ret!=PAL_STATUS_UNIMPLEMENTED))
-            printk("PAL CACHE FLUSH failed for dom0.\n");
-        else
-            printk("Sync i/d cache for guest SUCC\n");
-    }
-#endif
+       sync_split_caches();
 }
 #endif
 
@@ -1098,15 +1068,6 @@
 void domain_pend_keyboard_interrupt(int irq)
 {
        vcpu_pend_interrupt(dom0->vcpu[0],irq);
-}
-
-void vcpu_migrate_cpu(struct vcpu *v, int newcpu)
-{
-       if ( v->processor == newcpu )
-               return;
-
-       set_bit(_VCPUF_cpu_migrated, &v->vcpu_flags);
-       v->processor = newcpu;
 }
 
 void sync_vcpu_execstate(struct vcpu *v)
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/ia64/xen/hyperprivop.S
--- a/xen/arch/ia64/xen/hyperprivop.S   Mon Jan  9 11:19:55 2006
+++ b/xen/arch/ia64/xen/hyperprivop.S   Mon Jan  9 11:22:17 2006
@@ -543,6 +543,13 @@
        extr.u r21=r30,IA64_PSR_PP_BIT,1 ;;
        cmp.ne p7,p0=r21,r0 ;;
 (p7)   br.spnt.few dispatch_break_fault ;;
+        movl r20=IA64_PSR_CPL ;; 
+        and r22=r20,r30 ;;
+        cmp.ne p7,p0=r22,r0
+(p7)    br.spnt.many 1f ;;
+        cmp.eq p7,p0=r17,r0
+(p7)    br.spnt.few dispatch_break_fault ;;
+1:
 #if 1 /* special handling in case running on simulator */
        movl r20=first_break;;
        ld4 r23=[r20];;
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/ia64/xen/irq.c
--- a/xen/arch/ia64/xen/irq.c   Mon Jan  9 11:19:55 2006
+++ b/xen/arch/ia64/xen/irq.c   Mon Jan  9 11:22:17 2006
@@ -1377,9 +1377,6 @@
     irq_guest_action_t *action;
     unsigned long       flags;
     int                 rc = 0;
-
-    if ( !IS_CAPABLE_PHYSDEV(d->domain) )
-        return -EPERM;
 
     spin_lock_irqsave(&desc->lock, flags);
 
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/ia64/xen/ivt.S
--- a/xen/arch/ia64/xen/ivt.S   Mon Jan  9 11:19:55 2006
+++ b/xen/arch/ia64/xen/ivt.S   Mon Jan  9 11:22:17 2006
@@ -839,6 +839,8 @@
        mov r17=cr.iim
        mov r31=pr
        ;;
+       cmp.eq p7,p0=r17,r0
+(p7)   br.spnt.few dispatch_break_fault ;;
        movl r18=XSI_PSR_IC
        ;;
        ld8 r19=[r18]
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/ia64/xen/process.c
--- a/xen/arch/ia64/xen/process.c       Mon Jan  9 11:19:55 2006
+++ b/xen/arch/ia64/xen/process.c       Mon Jan  9 11:22:17 2006
@@ -33,6 +33,7 @@
 #include <xen/multicall.h>
 
 extern unsigned long vcpu_get_itir_on_fault(struct vcpu *, UINT64);
+extern void die_if_kernel(char *str, struct pt_regs *regs, long err);
 
 extern unsigned long dom0_start, dom0_size;
 
@@ -64,26 +65,16 @@
 
 extern struct schedule_data schedule_data[NR_CPUS];
 
-void schedule_tail(struct vcpu *next)
-{
-       unsigned long rr7;
-       //printk("current=%lx,shared_info=%lx\n",current,current->vcpu_info);
-       //printk("next=%lx,shared_info=%lx\n",next,next->vcpu_info);
-
-    // TG: Real HACK FIXME.
-    // This is currently necessary because when a new domain is started, 
-    // the context_switch function of xen/common/schedule.c(__enter_scheduler)
-    // never returns.  Therefore, the lock must be released.
-    // schedule_tail is only called when a domain is started.
-    spin_unlock_irq(&schedule_data[current->processor].schedule_lock);
-
-       /* rr7 will be postponed to last point when resuming back to guest */
-    if(VMX_DOMAIN(current)){
-       vmx_load_all_rr(current);
-    }else{
-           load_region_regs(current);
-            vcpu_load_kernel_regs(current);
-    }
+void schedule_tail(struct vcpu *prev)
+{
+       context_saved(prev);
+
+       if (VMX_DOMAIN(current)) {
+               vmx_load_all_rr(current);
+       } else {
+               load_region_regs(current);
+               vcpu_load_kernel_regs(current);
+       }
 }
 
 void tdpfoo(void) { }
@@ -251,7 +242,7 @@
        struct domain *d = current->domain;
        struct vcpu *v = current;
        // FIXME: Will this work properly if doing an RFI???
-       if (!is_idle_task(d) && user_mode(regs)) {
+       if (!is_idle_domain(d) && user_mode(regs)) {
                //vcpu_poke_timer(v);
                if (vcpu_deliverable_interrupts(v))
                        reflect_extint(regs);
@@ -686,6 +677,8 @@
                        vcpu_increment_iip(current);
        }
        else {
+               if (iim == 0) 
+                       die_if_kernel("bug check", regs, iim);
                PSCB(v,iim) = iim;
                reflect_interruption(isr,regs,IA64_BREAK_VECTOR);
        }
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/ia64/xen/vcpu.c
--- a/xen/arch/ia64/xen/vcpu.c  Mon Jan  9 11:19:55 2006
+++ b/xen/arch/ia64/xen/vcpu.c  Mon Jan  9 11:22:17 2006
@@ -1085,7 +1085,7 @@
        /* gloss over the wraparound problem for now... we know it exists
         * but it doesn't matter right now */
 
-       if (is_idle_task(vcpu->domain)) {
+       if (is_idle_domain(vcpu->domain)) {
 //             printf("****** vcpu_set_next_timer called during idle!!\n");
                vcpu_safe_set_itm(s);
                return;
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/ia64/xen/xenmisc.c
--- a/xen/arch/ia64/xen/xenmisc.c       Mon Jan  9 11:19:55 2006
+++ b/xen/arch/ia64/xen/xenmisc.c       Mon Jan  9 11:22:17 2006
@@ -25,7 +25,6 @@
 int phys_proc_id[NR_CPUS];
 unsigned long loops_per_jiffy = (1<<12);       // from linux/init/main.c
 
-void unw_init(void) { printf("unw_init() skipped (NEED FOR KERNEL UNWIND)\n"); 
}
 void ia64_mca_init(void) { printf("ia64_mca_init() skipped (Machine check 
abort handling)\n"); }
 void ia64_mca_cpu_init(void *x) { }
 void ia64_patch_mckinley_e9(unsigned long a, unsigned long b) { }
@@ -180,11 +179,6 @@
 // from arch/ia64/traps.c
 ///////////////////////////////
 
-void show_registers(struct pt_regs *regs)
-{
-       printf("*** ADD REGISTER DUMP HERE FOR DEBUGGING\n");
-}
-
 int is_kernel_text(unsigned long addr)
 {
        extern char _stext[], _etext[];
@@ -236,7 +230,13 @@
 
 void die_if_kernel(char *str, struct pt_regs *regs, long err) /* __attribute__ 
((noreturn)) */
 {
-       printk("die_if_kernel: called, not implemented\n");
+       if (user_mode(regs))
+               return;
+
+       printk("%s: %s %ld\n", __func__, str, err);
+       debugtrace_dump();
+       show_registers(regs);
+       domain_crash_synchronous();
 }
 
 long
@@ -320,18 +320,15 @@
        ia64_set_iva(&ia64_ivt);
        ia64_set_pta(VHPT_ADDR | (1 << 8) | (VHPT_SIZE_LOG2 << 2) |
                VHPT_ENABLED);
-       if (!is_idle_task(current->domain)) {
+       if (!is_idle_domain(current->domain)) {
                load_region_regs(current);
                vcpu_load_kernel_regs(current);
                    if (vcpu_timer_expired(current)) vcpu_pend_timer(current);
        }
            if (vcpu_timer_expired(current)) vcpu_pend_timer(current);
     }
-}
-
-void context_switch_finalise(struct vcpu *next)
-{
-       /* nothing to do */
+
+    context_saved(prev);
 }
 
 void continue_running(struct vcpu *same)
@@ -368,3 +365,23 @@
                goto loop;
        }
 }
+
+/* FIXME: for the forseeable future, all cpu's that enable VTi have split
+ *  caches and all cpu's that have split caches enable VTi.  This may
+ *  eventually be untrue though. */
+#define cpu_has_split_cache    vmx_enabled
+extern unsigned int vmx_enabled;
+
+void sync_split_caches(void)
+{
+       unsigned long ret, progress = 0;
+
+       if (cpu_has_split_cache) {
+               /* Sync d/i cache conservatively */
+               ret = ia64_pal_cache_flush(4, 0, &progress, NULL);
+               if ((ret!=PAL_STATUS_SUCCESS)&& (ret!=PAL_STATUS_UNIMPLEMENTED))
+                       printk("PAL CACHE FLUSH failed\n");
+               else printk("Sync i/d cache for guest SUCC\n");
+       }
+       else printk("sync_split_caches ignored for CPU with no split cache\n");
+}
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/ia64/xen/xensetup.c
--- a/xen/arch/ia64/xen/xensetup.c      Mon Jan  9 11:19:55 2006
+++ b/xen/arch/ia64/xen/xensetup.c      Mon Jan  9 11:22:17 2006
@@ -26,7 +26,7 @@
 
 char saved_command_line[COMMAND_LINE_SIZE];
 
-struct vcpu *idle_task[NR_CPUS] = { &idle0_vcpu };
+struct vcpu *idle_domain[NR_CPUS] = { &idle0_vcpu };
 
 cpumask_t cpu_present_map;
 
@@ -382,8 +382,7 @@
         panic("Could not set up DOM0 guest OS\n");
 
     /* PIN domain0 on CPU 0.  */
-    dom0->vcpu[0]->cpumap=1;
-    set_bit(_VCPUF_cpu_pinned, &dom0->vcpu[0]->vcpu_flags);
+    dom0->vcpu[0]->cpu_affinity = cpumask_of_cpu(0);
 
 #ifdef CLONE_DOMAIN0
     {
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/ia64/xen/xentime.c
--- a/xen/arch/ia64/xen/xentime.c       Mon Jan  9 11:19:55 2006
+++ b/xen/arch/ia64/xen/xentime.c       Mon Jan  9 11:22:17 2006
@@ -127,7 +127,7 @@
                        vcpu_wake(dom0->vcpu[0]);
                }
        }
-       if (!is_idle_task(current->domain))  {
+       if (!is_idle_domain(current->domain))  {
                if (vcpu_timer_expired(current)) {
                        vcpu_pend_timer(current);
                        // ensure another timer interrupt happens even if 
domain doesn't
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/Makefile
--- a/xen/arch/x86/Makefile     Mon Jan  9 11:19:55 2006
+++ b/xen/arch/x86/Makefile     Mon Jan  9 11:22:17 2006
@@ -29,6 +29,7 @@
 endif
 
 OBJS := $(subst $(TARGET_SUBARCH)/asm-offsets.o,,$(OBJS))
+OBJS := $(subst $(TARGET_SUBARCH)/xen.lds.o,,$(OBJS))
 
 ifneq ($(crash_debug),y)
 OBJS := $(patsubst cdb%.o,,$(OBJS))
@@ -43,21 +44,24 @@
 $(CURDIR)/arch.o: $(OBJS)
        $(LD) $(LDFLAGS) -r -o $@ $(OBJS)
 
-$(TARGET)-syms: boot/$(TARGET_SUBARCH).o $(ALL_OBJS) $(TARGET_SUBARCH)/xen.lds
-       $(LD) $(LDFLAGS) -T $(TARGET_SUBARCH)/xen.lds -N \
+$(TARGET)-syms: boot/$(TARGET_SUBARCH).o $(ALL_OBJS) xen.lds
+       $(LD) $(LDFLAGS) -T xen.lds -N \
            boot/$(TARGET_SUBARCH).o $(ALL_OBJS) -o $@
        $(NM) -n $@ | $(BASEDIR)/tools/symbols >$(BASEDIR)/xen-syms.S
        $(MAKE) $(BASEDIR)/xen-syms.o
-       $(LD) $(LDFLAGS) -T $(TARGET_SUBARCH)/xen.lds -N \
+       $(LD) $(LDFLAGS) -T xen.lds -N \
            boot/$(TARGET_SUBARCH).o $(ALL_OBJS) $(BASEDIR)/xen-syms.o -o $@
        $(NM) -n $@ | $(BASEDIR)/tools/symbols >$(BASEDIR)/xen-syms.S
        $(MAKE) $(BASEDIR)/xen-syms.o
-       $(LD) $(LDFLAGS) -T $(TARGET_SUBARCH)/xen.lds -N \
+       $(LD) $(LDFLAGS) -T xen.lds -N \
            boot/$(TARGET_SUBARCH).o $(ALL_OBJS) $(BASEDIR)/xen-syms.o -o $@
        rm -f $(BASEDIR)/xen-syms.S $(BASEDIR)/xen-syms.o
 
 asm-offsets.s: $(TARGET_SUBARCH)/asm-offsets.c $(HDRS)
        $(CC) $(CFLAGS) -S -o $@ $<
+
+xen.lds: $(TARGET_SUBARCH)/xen.lds.S $(HDRS)
+       $(CC) $(CFLAGS) -P -E -Ui386 -D__ASSEMBLY__ -o $@ $<
 
 boot/mkelf32: boot/mkelf32.c
        $(HOSTCC) $(HOSTCFLAGS) -o $@ $<
@@ -73,5 +77,6 @@
        rm -f dm/*.o dm/*~ dm/core
        rm -f genapic/*.o genapic/*~ genapic/core
        rm -f cpu/*.o cpu/*~ cpu/core
+       rm -f xen.lds
 
 .PHONY: default clean
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/boot/x86_32.S
--- a/xen/arch/x86/boot/x86_32.S        Mon Jan  9 11:19:55 2006
+++ b/xen/arch/x86/boot/x86_32.S        Mon Jan  9 11:22:17 2006
@@ -1,5 +1,6 @@
 #include <xen/config.h>
 #include <public/xen.h>
+#include <asm/asm_defns.h>
 #include <asm/desc.h>
 #include <asm/page.h>
 #include <asm/msr.h>
@@ -53,6 +54,7 @@
         mov     %ecx,%gs
         ljmp    $(__HYPERVISOR_CS),$(1f)-__PAGE_OFFSET
 1:      lss     stack_start-__PAGE_OFFSET,%esp
+        add     $(STACK_SIZE-CPUINFO_sizeof-__PAGE_OFFSET),%esp
 
         /* Reset EFLAGS (subsumes CLI and CLD). */
        pushl   $0
@@ -98,7 +100,7 @@
 1:      stosl   /* low mappings cover as much physmem as possible */
         add     $4,%edi
         add     $(1<<L2_PAGETABLE_SHIFT),%eax
-        cmp     $__HYPERVISOR_VIRT_START+0xe3,%eax
+        cmp     $HYPERVISOR_VIRT_START+0xe3,%eax
         jne     1b
 #else
         /* Initialize low and high mappings of all memory with 4MB pages */
@@ -111,7 +113,7 @@
         jne     1b
 1:      stosl   /* low mappings cover as much physmem as possible */
         add     $(1<<L2_PAGETABLE_SHIFT),%eax
-        cmp     $__HYPERVISOR_VIRT_START+0xe3,%eax
+        cmp     $HYPERVISOR_VIRT_START+0xe3,%eax
         jne     1b
 #endif
         
@@ -189,7 +191,7 @@
 /*** STACK LOCATION ***/
         
 ENTRY(stack_start)
-        .long cpu0_stack + STACK_SIZE - 200 - __PAGE_OFFSET
+        .long cpu0_stack
         .long __HYPERVISOR_DS
         
 /*** DESCRIPTOR TABLES ***/
@@ -256,10 +258,6 @@
         .fill 1*PAGE_SIZE,1,0
 #endif
 
-#if (STACK_ORDER == 0)
-.section ".bss.page_aligned","w"
-#else
-.section ".bss.twopage_aligned","w"
-#endif
+.section ".bss.stack_aligned","w"
 ENTRY(cpu0_stack)
         .fill STACK_SIZE,1,0
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/boot/x86_64.S
--- a/xen/arch/x86/boot/x86_64.S        Mon Jan  9 11:19:55 2006
+++ b/xen/arch/x86/boot/x86_64.S        Mon Jan  9 11:22:17 2006
@@ -1,5 +1,6 @@
 #include <xen/config.h>
 #include <public/xen.h>
+#include <asm/asm_defns.h>
 #include <asm/desc.h>
 #include <asm/page.h>
 #include <asm/msr.h>
@@ -121,7 +122,8 @@
         mov     %rcx,%cr4
 
         mov     stack_start(%rip),%rsp
-        
+        or      $(STACK_SIZE-CPUINFO_sizeof),%rsp
+
         /* Reset EFLAGS (subsumes CLI and CLD). */
         pushq   $0
         popf
@@ -140,7 +142,7 @@
         mov     %ecx,%ss
 
         lidt    idt_descr(%rip)
-                
+
         cmp     $(SECONDARY_CPU_FLAG),%ebx
         je      start_secondary
 
@@ -219,7 +221,7 @@
         .quad   idt_table
 
 ENTRY(stack_start)
-        .quad   cpu0_stack + STACK_SIZE - 200
+        .quad   cpu0_stack
 
 high_start:
         .quad   __high_start
@@ -265,10 +267,6 @@
         .org 0x4000 + PAGE_SIZE
         .code64
 
-#if (STACK_ORDER == 0)
-.section ".bss.page_aligned","w"
-#else
-.section ".bss.twopage_aligned","w"
-#endif
+.section ".bss.stack_aligned","w"
 ENTRY(cpu0_stack)
         .fill STACK_SIZE,1,0
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/dm/i8259.c
--- a/xen/arch/x86/dm/i8259.c   Mon Jan  9 11:19:55 2006
+++ b/xen/arch/x86/dm/i8259.c   Mon Jan  9 11:22:17 2006
@@ -29,7 +29,7 @@
 #include <xen/lib.h>
 #include <xen/errno.h>
 #include <xen/sched.h>
-#include <public/io/ioreq.h>
+#include <public/hvm/ioreq.h>
 #include <asm/vmx.h>
 #include <asm/vmx_vpic.h>
 #include <asm/current.h>
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/dm/vmx_vioapic.c
--- a/xen/arch/x86/dm/vmx_vioapic.c     Mon Jan  9 11:19:55 2006
+++ b/xen/arch/x86/dm/vmx_vioapic.c     Mon Jan  9 11:22:17 2006
@@ -37,7 +37,7 @@
 #include <xen/lib.h>
 #include <xen/errno.h>
 #include <xen/sched.h>
-#include <public/io/ioreq.h>
+#include <public/hvm/ioreq.h>
 #include <asm/vmx.h>
 #include <asm/vmx_vpic.h>
 #include <asm/current.h>
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/dom0_ops.c
--- a/xen/arch/x86/dom0_ops.c   Mon Jan  9 11:19:55 2006
+++ b/xen/arch/x86/dom0_ops.c   Mon Jan  9 11:22:17 2006
@@ -17,6 +17,7 @@
 #include <asm/msr.h>
 #include <xen/trace.h>
 #include <xen/console.h>
+#include <xen/iocap.h>
 #include <asm/shadow.h>
 #include <asm/irq.h>
 #include <asm/processor.h>
@@ -35,13 +36,13 @@
 
 static void write_msr_for(void *unused)
 {
-    if ( ((1 << current->processor) & msr_cpu_mask) )
+    if ( ((1 << smp_processor_id()) & msr_cpu_mask) )
         (void)wrmsr_user(msr_addr, msr_lo, msr_hi);
 }
 
 static void read_msr_for(void *unused)
 {
-    if ( ((1 << current->processor) & msr_cpu_mask) )
+    if ( ((1 << smp_processor_id()) & msr_cpu_mask) )
         (void)rdmsr_user(msr_addr, msr_lo, msr_hi);
 }
 
@@ -102,12 +103,27 @@
             op->u.add_memtype.nr_pfns,
             op->u.add_memtype.type,
             1);
+        if (ret > 0)
+        {
+            (void)__put_user(0, &u_dom0_op->u.add_memtype.handle);
+            (void)__put_user(ret, &u_dom0_op->u.add_memtype.reg);
+            ret = 0;
+        }
     }
     break;
 
     case DOM0_DEL_MEMTYPE:
     {
-        ret = mtrr_del_page(op->u.del_memtype.reg, 0, 0);
+        if (op->u.del_memtype.handle == 0
+            /* mtrr/main.c otherwise does a lookup */
+            && (int)op->u.del_memtype.reg >= 0)
+        {
+            ret = mtrr_del_page(op->u.del_memtype.reg, 0, 0);
+            if (ret > 0)
+                ret = 0;
+        }
+        else
+            ret = -EINVAL;
     }
     break;
 
@@ -141,7 +157,6 @@
         struct domain *d;
         unsigned int fp = op->u.ioport_permission.first_port;
         unsigned int np = op->u.ioport_permission.nr_ports;
-        unsigned int p;
 
         ret = -EINVAL;
         if ( (fp + np) > 65536 )
@@ -152,26 +167,12 @@
             op->u.ioport_permission.domain)) == NULL) )
             break;
 
-        ret = -ENOMEM;
-        if ( d->arch.iobmp_mask != NULL )
-        {
-            if ( (d->arch.iobmp_mask = xmalloc_array(
-                u8, IOBMP_BYTES)) == NULL )
-            {
-                put_domain(d);
-                break;
-            }
-            memset(d->arch.iobmp_mask, 0xFF, IOBMP_BYTES);
-        }
-
-        ret = 0;
-        for ( p = fp; p < (fp + np); p++ )
-        {
-            if ( op->u.ioport_permission.allow_access )
-                clear_bit(p, d->arch.iobmp_mask);
-            else
-                set_bit(p, d->arch.iobmp_mask);
-        }
+        if ( np == 0 )
+            ret = 0;
+        else if ( op->u.ioport_permission.allow_access )
+            ret = ioports_permit_access(d, fp, fp + np - 1);
+        else
+            ret = ioports_deny_access(d, fp, fp + np - 1);
 
         put_domain(d);
     }
@@ -193,7 +194,7 @@
         memcpy(pi->hw_cap, boot_cpu_data.x86_capability, NCAPINTS*4);
         ret = 0;
         if ( copy_to_user(u_dom0_op, op, sizeof(*op)) )
-           ret = -EFAULT;
+            ret = -EFAULT;
     }
     break;
     
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c     Mon Jan  9 11:19:55 2006
+++ b/xen/arch/x86/domain.c     Mon Jan  9 11:22:17 2006
@@ -20,6 +20,7 @@
 #include <xen/delay.h>
 #include <xen/softirq.h>
 #include <xen/grant_table.h>
+#include <xen/iocap.h>
 #include <asm/regs.h>
 #include <asm/mc146818rtc.h>
 #include <asm/system.h>
@@ -35,9 +36,7 @@
 #include <xen/console.h>
 #include <xen/elf.h>
 #include <asm/vmx.h>
-#include <asm/vmx_vmcs.h>
 #include <asm/msr.h>
-#include <asm/physdev.h>
 #include <xen/kernel.h>
 #include <xen/multicall.h>
 
@@ -47,17 +46,16 @@
 
 struct percpu_ctxt {
     struct vcpu *curr_vcpu;
-    unsigned int context_not_finalised;
     unsigned int dirty_segment_mask;
 } __cacheline_aligned;
 static struct percpu_ctxt percpu_ctxt[NR_CPUS];
 
-static void continue_idle_task(struct vcpu *v)
+static void continue_idle_domain(struct vcpu *v)
 {
     reset_stack_and_jump(idle_loop);
 }
 
-static void continue_nonidle_task(struct vcpu *v)
+static void continue_nonidle_domain(struct vcpu *v)
 {
     reset_stack_and_jump(ret_from_intr);
 }
@@ -93,12 +91,13 @@
 {
     struct vcpu *v = current;
 
-    ASSERT(is_idle_task(v->domain));
+    ASSERT(is_idle_domain(v->domain));
     percpu_ctxt[smp_processor_id()].curr_vcpu = v;
-    cpu_set(smp_processor_id(), v->domain->cpumask);
-    v->arch.schedule_tail = continue_idle_task;
-
-    idle_loop();
+    cpu_set(smp_processor_id(), v->domain->domain_dirty_cpumask);
+    cpu_set(smp_processor_id(), v->vcpu_dirty_cpumask);
+    v->arch.schedule_tail = continue_idle_domain;
+
+    reset_stack_and_jump(idle_loop);
 }
 
 static long no_idt[2];
@@ -185,11 +184,17 @@
 {
     struct pfn_info *page;
 
-    if ( d->tot_pages < 10 )
+    printk("Memory pages belonging to domain %u:\n", d->domain_id);
+
+    if ( d->tot_pages >= 10 )
+    {
+        printk("    DomPage list too long to display\n");
+    }
+    else
     {
         list_for_each_entry ( page, &d->page_list, list )
         {
-            printk("Page %p: mfn=%p, caf=%08x, taf=%" PRtype_info "\n",
+            printk("    DomPage %p: mfn=%p, caf=%08x, taf=%" PRtype_info "\n",
                    _p(page_to_phys(page)), _p(page_to_pfn(page)),
                    page->count_info, page->u.inuse.type_info);
         }
@@ -197,15 +202,10 @@
 
     list_for_each_entry ( page, &d->xenpage_list, list )
     {
-        printk("XenPage %p: mfn=%p, caf=%08x, taf=%" PRtype_info "\n",
+        printk("    XenPage %p: mfn=%p, caf=%08x, taf=%" PRtype_info "\n",
                _p(page_to_phys(page)), _p(page_to_pfn(page)),
                page->count_info, page->u.inuse.type_info);
     }
-
-    page = virt_to_page(d->shared_info);
-    printk("Shared_info@%p: mfn=%p, caf=%08x, taf=%" PRtype_info "\n",
-           _p(page_to_phys(page)), _p(page_to_pfn(page)), page->count_info,
-           page->u.inuse.type_info);
 }
 
 struct vcpu *alloc_vcpu_struct(struct domain *d, unsigned int vcpu_id)
@@ -250,24 +250,36 @@
 #endif
 }
 
-void arch_do_createdomain(struct vcpu *v)
+int arch_do_createdomain(struct vcpu *v)
 {
     struct domain *d = v->domain;
     l1_pgentry_t gdt_l1e;
-    int vcpuid, pdpt_order;
+    int vcpuid, pdpt_order, rc;
 #ifdef __x86_64__
     int i;
 #endif
 
-    if ( is_idle_task(d) )
-        return;
-
-    v->arch.schedule_tail = continue_nonidle_task;
-
-    d->shared_info = alloc_xenheap_page();
+    if ( is_idle_domain(d) )
+        return 0;
+
+    d->arch.ioport_caps = 
+        rangeset_new(d, "I/O Ports", RANGESETF_prettyprint_hex);
+    if ( d->arch.ioport_caps == NULL )
+        return -ENOMEM;
+
+    if ( (d->shared_info = alloc_xenheap_page()) == NULL )
+        return -ENOMEM;
+
+    if ( (rc = ptwr_init(d)) != 0 )
+    {
+        free_xenheap_page(d->shared_info);
+        return rc;
+    }
+
+    v->arch.schedule_tail = continue_nonidle_domain;
+
     memset(d->shared_info, 0, PAGE_SIZE);
     v->vcpu_info = &d->shared_info->vcpu_info[v->vcpu_id];
-    v->cpumap = CPUMAP_RUNANYWHERE;
     SHARE_PFN_WITH_DOMAIN(virt_to_page(d->shared_info), d);
 
     pdpt_order = get_order_from_bytes(PDPT_L1_ENTRIES * sizeof(l1_pgentry_t));
@@ -308,25 +320,10 @@
                             __PAGE_HYPERVISOR);
 #endif
 
-    (void)ptwr_init(d);
-
     shadow_lock_init(d);
     INIT_LIST_HEAD(&d->arch.free_shadow_frames);
-}
-
-void vcpu_migrate_cpu(struct vcpu *v, int newcpu)
-{
-    if ( v->processor == newcpu )
-        return;
-
-    set_bit(_VCPUF_cpu_migrated, &v->vcpu_flags);
-    v->processor = newcpu;
-
-    if ( VMX_DOMAIN(v) )
-    {
-        __vmpclear(virt_to_phys(v->arch.arch_vmx.vmcs));
-        v->arch.schedule_tail = arch_vmx_do_relaunch;
-    }
+
+    return 0;
 }
 
 /* This is called by arch_final_setup_guest and do_boot_vcpu */
@@ -348,6 +345,8 @@
              ((c->user_regs.ss & 3) == 0) )
             return -EINVAL;
     }
+    else if ( !hvm_enabled )
+        return -EINVAL;
 
     clear_bit(_VCPUF_fpu_initialised, &v->vcpu_flags);
     if ( c->flags & VGCF_I387_VALID )
@@ -690,7 +689,7 @@
     struct vcpu          *p = percpu_ctxt[cpu].curr_vcpu;
     struct vcpu          *n = current;
 
-    if ( !is_idle_task(p->domain) )
+    if ( !is_idle_domain(p->domain) )
     {
         memcpy(&p->arch.guest_context.user_regs,
                stack_regs,
@@ -699,7 +698,7 @@
         save_segments(p);
     }
 
-    if ( !is_idle_task(n->domain) )
+    if ( !is_idle_domain(n->domain) )
     {
         memcpy(stack_regs,
                &n->arch.guest_context.user_regs,
@@ -725,7 +724,8 @@
     }
 
     if ( p->domain != n->domain )
-        cpu_set(cpu, n->domain->cpumask);
+        cpu_set(cpu, n->domain->domain_dirty_cpumask);
+    cpu_set(cpu, n->vcpu_dirty_cpumask);
 
     write_ptbase(n);
 
@@ -738,7 +738,8 @@
     }
 
     if ( p->domain != n->domain )
-        cpu_clear(cpu, p->domain->cpumask);
+        cpu_clear(cpu, p->domain->domain_dirty_cpumask);
+    cpu_clear(cpu, p->vcpu_dirty_cpumask);
 
     percpu_ctxt[cpu].curr_vcpu = n;
 }
@@ -748,28 +749,24 @@
 {
     unsigned int cpu = smp_processor_id();
 
-    ASSERT(!local_irq_is_enabled());
+    ASSERT(local_irq_is_enabled());
 
     set_current(next);
 
-    if ( (percpu_ctxt[cpu].curr_vcpu != next) && !is_idle_task(next->domain) )
-    {
+    if ( (percpu_ctxt[cpu].curr_vcpu != next) &&
+         !is_idle_domain(next->domain) )
+    {
+        /* This may happen if next has been migrated by the scheduler. */
+        if ( unlikely(!cpus_empty(next->vcpu_dirty_cpumask)) )
+        {
+            ASSERT(!cpu_isset(cpu, next->vcpu_dirty_cpumask));
+            sync_vcpu_execstate(next);
+            ASSERT(cpus_empty(next->vcpu_dirty_cpumask));
+        }
+
+        local_irq_disable();
         __context_switch();
-        percpu_ctxt[cpu].context_not_finalised = 1;
-    }
-}
-
-void context_switch_finalise(struct vcpu *next)
-{
-    unsigned int cpu = smp_processor_id();
-
-    ASSERT(local_irq_is_enabled());
-
-    if ( percpu_ctxt[cpu].context_not_finalised )
-    {
-        percpu_ctxt[cpu].context_not_finalised = 0;
-
-        BUG_ON(percpu_ctxt[cpu].curr_vcpu != next);
+        local_irq_enable();
 
         if ( VMX_DOMAIN(next) )
         {
@@ -783,6 +780,8 @@
         }
     }
 
+    context_saved(prev);
+
     schedule_tail(next);
     BUG();
 }
@@ -812,20 +811,11 @@
 
 void sync_vcpu_execstate(struct vcpu *v)
 {
-    unsigned int cpu = v->processor;
-
-    if ( !cpu_isset(cpu, v->domain->cpumask) )
-        return;
-
-    if ( cpu == smp_processor_id() )
-    {
+    if ( cpu_isset(smp_processor_id(), v->vcpu_dirty_cpumask) )
         (void)__sync_lazy_execstate();
-    }
-    else
-    {
-        /* Other cpus call __sync_lazy_execstate from flush ipi handler. */
-        flush_tlb_mask(cpumask_of_cpu(cpu));
-    }
+
+    /* Other cpus call __sync_lazy_execstate from flush ipi handler. */
+    flush_tlb_mask(v->vcpu_dirty_cpumask);
 }
 
 unsigned long __hypercall_create_continuation(
@@ -951,9 +941,7 @@
     struct vcpu *v;
     unsigned long pfn;
 
-    BUG_ON(!cpus_empty(d->cpumask));
-
-    physdev_destroy_state(d);
+    BUG_ON(!cpus_empty(d->domain_dirty_cpumask));
 
     ptwr_destroy(d);
 
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/domain_build.c
--- a/xen/arch/x86/domain_build.c       Mon Jan  9 11:19:55 2006
+++ b/xen/arch/x86/domain_build.c       Mon Jan  9 11:22:17 2006
@@ -16,13 +16,13 @@
 #include <xen/kernel.h>
 #include <xen/domain.h>
 #include <xen/compile.h>
+#include <xen/iocap.h>
 #include <asm/regs.h>
 #include <asm/system.h>
 #include <asm/io.h>
 #include <asm/processor.h>
 #include <asm/desc.h>
 #include <asm/i387.h>
-#include <asm/physdev.h>
 #include <asm/shadow.h>
 
 static long dom0_nrpages;
@@ -94,9 +94,9 @@
     return page;
 }
 
-static void process_dom0_ioports_disable()
+static void process_dom0_ioports_disable(void)
 {
-    unsigned long io_from, io_to, io_nr;
+    unsigned long io_from, io_to;
     char *t, *u, *s = opt_dom0_ioports_disable;
 
     if ( *s == '\0' )
@@ -126,8 +126,8 @@
         printk("Disabling dom0 access to ioport range %04lx-%04lx\n",
             io_from, io_to);
 
-        io_nr = io_to - io_from + 1;
-        physdev_modify_ioport_access_range(dom0, 0, io_from, io_nr);
+        if ( ioports_deny_access(dom0, io_from, io_to) != 0 )
+            BUG();
     }
 }
 
@@ -183,7 +183,6 @@
     /* Machine address of next candidate page-table page. */
     unsigned long mpt_alloc;
 
-    extern void physdev_init_dom0(struct domain *);
     extern void translate_l2pgtable(
         struct domain *d, l1_pgentry_t *p2m, unsigned long l2mfn);
 
@@ -692,9 +691,6 @@
     zap_low_mappings(l2start);
     zap_low_mappings(idle_pg_table_l2);
 #endif
-    
-    /* DOM0 gets access to everything. */
-    physdev_init_dom0(d);
 
     init_domain_time(d);
 
@@ -746,19 +742,28 @@
         printk("dom0: shadow setup done\n");
     }
 
+    i = 0;
+
+    /* DOM0 is permitted full I/O capabilities. */
+    i |= ioports_permit_access(dom0, 0, 0xFFFF);
+    i |= iomem_permit_access(dom0, 0UL, ~0UL);
+    i |= irqs_permit_access(dom0, 0, NR_PIRQS-1);
+
     /*
      * Modify I/O port access permissions.
      */
     /* Master Interrupt Controller (PIC). */
-    physdev_modify_ioport_access_range(dom0, 0, 0x20, 2);
+    i |= ioports_deny_access(dom0, 0x20, 0x21);
     /* Slave Interrupt Controller (PIC). */
-    physdev_modify_ioport_access_range(dom0, 0, 0xA0, 2);
+    i |= ioports_deny_access(dom0, 0xA0, 0xA1);
     /* Interval Timer (PIT). */
-    physdev_modify_ioport_access_range(dom0, 0, 0x40, 4);
+    i |= ioports_deny_access(dom0, 0x40, 0x43);
     /* PIT Channel 2 / PC Speaker Control. */
-    physdev_modify_ioport_access_range(dom0, 0, 0x61, 1);
-    /* Command-line passed i/o ranges */
+    i |= ioports_deny_access(dom0, 0x61, 0x61);
+    /* Command-line I/O ranges. */
     process_dom0_ioports_disable();
+
+    BUG_ON(i != 0);
 
     return 0;
 }
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/idle0_task.c
--- a/xen/arch/x86/idle0_task.c Mon Jan  9 11:19:55 2006
+++ b/xen/arch/x86/idle0_task.c Mon Jan  9 11:22:17 2006
@@ -11,6 +11,7 @@
 
 struct vcpu idle0_vcpu = {
     processor:   0,
+    cpu_affinity:CPU_MASK_CPU0,
     domain:      &idle0_domain
 };
 
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/io_apic.c
--- a/xen/arch/x86/io_apic.c    Mon Jan  9 11:19:55 2006
+++ b/xen/arch/x86/io_apic.c    Mon Jan  9 11:22:17 2006
@@ -1807,3 +1807,47 @@
 
     return 0;
 }
+
+void dump_ioapic_irq_info(void)
+{
+    struct irq_pin_list *entry;
+    struct IO_APIC_route_entry rte;
+    unsigned int irq, pin, printed = 0;
+    unsigned long flags;
+
+    for ( irq = 0; irq < NR_IRQS; irq++ )
+    {
+        entry = &irq_2_pin[irq];
+        if ( entry->pin == -1 )
+            continue;
+
+        if ( !printed++ )
+            printk("IO-APIC interrupt information:\n");
+
+        printk("    IRQ%3d Vec%3d:\n", irq, irq_to_vector(irq));
+
+        for ( ; ; )
+        {
+            pin = entry->pin;
+
+            printk("      Apic 0x%02x, Pin %2d: ", entry->apic, pin);
+
+            spin_lock_irqsave(&ioapic_lock, flags);
+            *(((int *)&rte) + 0) = io_apic_read(entry->apic, 0x10 + 2 * pin);
+            *(((int *)&rte) + 1) = io_apic_read(entry->apic, 0x11 + 2 * pin);
+            spin_unlock_irqrestore(&ioapic_lock, flags);
+
+            printk("vector=%u, delivery_mode=%u, dest_mode=%s, "
+                   "delivery_status=%d, polarity=%d, irr=%d, "
+                   "trigger=%s, mask=%d\n",
+                   rte.vector, rte.delivery_mode,
+                   rte.dest_mode ? "logical" : "physical",
+                   rte.delivery_status, rte.polarity, rte.irr,
+                   rte.trigger ? "level" : "edge", rte.mask);
+
+            if ( entry->next == 0 )
+                break;
+            entry = &irq_2_pin[entry->next];
+        }
+    }
+}
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/irq.c
--- a/xen/arch/x86/irq.c        Mon Jan  9 11:19:55 2006
+++ b/xen/arch/x86/irq.c        Mon Jan  9 11:22:17 2006
@@ -12,6 +12,7 @@
 #include <xen/irq.h>
 #include <xen/perfc.h>
 #include <xen/sched.h>
+#include <xen/keyhandler.h>
 #include <asm/current.h>
 #include <asm/smpboot.h>
 
@@ -198,19 +199,21 @@
 
 int pirq_guest_bind(struct vcpu *v, int irq, int will_share)
 {
-    unsigned int        vector = irq_to_vector(irq);
-    struct domain      *d = v->domain;
-    irq_desc_t         *desc = &irq_desc[vector];
+    unsigned int        vector;
+    irq_desc_t         *desc;
     irq_guest_action_t *action;
     unsigned long       flags;
     int                 rc = 0;
     cpumask_t           cpumask = CPU_MASK_NONE;
 
-    if ( !IS_CAPABLE_PHYSDEV(d) )
-        return -EPERM;
-
+    if ( (irq < 0) || (irq >= NR_IRQS) )
+        return -EINVAL;
+
+    vector = irq_to_vector(irq);
     if ( vector == 0 )
-        return -EBUSY;
+        return -EINVAL;
+
+    desc = &irq_desc[vector];
 
     spin_lock_irqsave(&desc->lock, flags);
 
@@ -309,3 +312,71 @@
     spin_unlock_irqrestore(&desc->lock, flags);    
     return 0;
 }
+
+extern void dump_ioapic_irq_info(void);
+
+static void dump_irqs(unsigned char key)
+{
+    int i, irq, vector;
+    irq_desc_t *desc;
+    irq_guest_action_t *action;
+    struct domain *d;
+    unsigned long flags;
+
+    printk("Guest interrupt information:\n");
+
+    for ( irq = 0; irq < NR_IRQS; irq++ )
+    {
+        vector = irq_to_vector(irq);
+        if ( vector == 0 )
+            continue;
+
+        desc = &irq_desc[vector];
+
+        spin_lock_irqsave(&desc->lock, flags);
+
+        if ( desc->status & IRQ_GUEST )
+        {
+            action = (irq_guest_action_t *)desc->action;
+
+            printk("    IRQ%3d Vec%3d: type=%-15s status=%08x "
+                   "in-flight=%d domain-list=",
+                   irq, vector, desc->handler->typename,
+                   desc->status, action->in_flight);
+
+            for ( i = 0; i < action->nr_guests; i++ )
+            {
+                d = action->guest[i];
+                printk("%u(%c%c%c%c)",
+                       d->domain_id,
+                       (test_bit(d->pirq_to_evtchn[irq],
+                                 &d->shared_info->evtchn_pending[0]) ?
+                        'P' : '-'),
+                       (test_bit(d->pirq_to_evtchn[irq]/BITS_PER_LONG,
+                                 &d->shared_info->vcpu_info[0].
+                                 evtchn_pending_sel) ?
+                        'S' : '-'),
+                       (test_bit(d->pirq_to_evtchn[irq],
+                                 &d->shared_info->evtchn_mask[0]) ?
+                        'M' : '-'),
+                       (test_bit(irq, &d->pirq_mask) ?
+                        'M' : '-'));
+                if ( i != action->nr_guests )
+                    printk(",");
+            }
+
+            printk("\n");
+        }
+
+        spin_unlock_irqrestore(&desc->lock, flags);
+    }
+
+    dump_ioapic_irq_info();
+}
+
+static int __init setup_dump_irqs(void)
+{
+    register_keyhandler('i', dump_irqs, "dump interrupt bindings");
+    return 0;
+}
+__initcall(setup_dump_irqs);
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Mon Jan  9 11:19:55 2006
+++ b/xen/arch/x86/mm.c Mon Jan  9 11:22:17 2006
@@ -96,6 +96,7 @@
 #include <xen/softirq.h>
 #include <xen/domain_page.h>
 #include <xen/event.h>
+#include <xen/iocap.h>
 #include <asm/shadow.h>
 #include <asm/page.h>
 #include <asm/flushtlb.h>
@@ -437,7 +438,6 @@
     unsigned long mfn = l1e_get_pfn(l1e);
     struct pfn_info *page = pfn_to_page(mfn);
     int okay;
-    extern int domain_iomem_in_pfn(struct domain *d, unsigned long pfn);
 
     if ( !(l1e_get_flags(l1e) & _PAGE_PRESENT) )
         return 1;
@@ -455,8 +455,7 @@
         if ( d == dom_io )
             d = current->domain;
 
-        if ( (!IS_PRIV(d)) &&
-             (!IS_CAPABLE_PHYSDEV(d) || !domain_iomem_in_pfn(d, mfn)) )
+        if ( !iomem_access_permitted(d, mfn, mfn) )
         {
             MEM_LOG("Non-privileged attempt to map I/O space %08lx", mfn);
             return 0;
@@ -1458,7 +1457,8 @@
                      * was GDT/LDT) but those circumstances should be
                      * very rare.
                      */
-                    cpumask_t mask = page_get_owner(page)->cpumask;
+                    cpumask_t mask =
+                        page_get_owner(page)->domain_dirty_cpumask;
                     tlbflush_filter(mask, page->tlbflush_timestamp);
 
                     if ( unlikely(!cpus_empty(mask)) )
@@ -1620,7 +1620,7 @@
         if ( shadow_mode_enabled(d) )
             shadow_sync_all(d);
         if ( deferred_ops & DOP_FLUSH_ALL_TLBS )
-            flush_tlb_mask(d->cpumask);
+            flush_tlb_mask(d->domain_dirty_cpumask);
         else
             local_flush_tlb();
     }
@@ -1692,7 +1692,7 @@
     struct domain *d, unsigned long vmask)
 {
     unsigned int vcpu_id;
-    cpumask_t    pmask;
+    cpumask_t    pmask = CPU_MASK_NONE;
     struct vcpu *v;
 
     while ( vmask != 0 )
@@ -1701,7 +1701,7 @@
         vmask &= ~(1UL << vcpu_id);
         if ( (vcpu_id < MAX_VIRT_CPUS) &&
              ((v = d->vcpu[vcpu_id]) != NULL) )
-            cpu_set(v->processor, pmask);
+            cpus_or(pmask, pmask, v->vcpu_dirty_cpumask);
     }
 
     return pmask;
@@ -1870,7 +1870,6 @@
                 break;
             }
             pmask = vcpumask_to_pcpumask(d, vmask);
-            cpus_and(pmask, pmask, d->cpumask);
             if ( op.cmd == MMUEXT_TLB_FLUSH_MULTI )
                 flush_tlb_mask(pmask);
             else
@@ -1879,15 +1878,15 @@
         }
 
         case MMUEXT_TLB_FLUSH_ALL:
-            flush_tlb_mask(d->cpumask);
+            flush_tlb_mask(d->domain_dirty_cpumask);
             break;
     
         case MMUEXT_INVLPG_ALL:
-            flush_tlb_one_mask(d->cpumask, op.arg1.linear_addr);
+            flush_tlb_one_mask(d->domain_dirty_cpumask, op.arg1.linear_addr);
             break;
 
         case MMUEXT_FLUSH_CACHE:
-            if ( unlikely(!IS_CAPABLE_PHYSDEV(d)) )
+            if ( unlikely(!cache_flush_permitted(d)) )
             {
                 MEM_LOG("Non-physdev domain tried to FLUSH_CACHE.");
                 okay = 0;
@@ -2498,7 +2497,7 @@
     l1_pgentry_t   val = l1e_from_intpte(val64);
     struct vcpu   *v   = current;
     struct domain *d   = v->domain;
-    unsigned int   cpu = v->processor;
+    unsigned int   cpu = smp_processor_id();
     unsigned long  vmask, bmap_ptr;
     cpumask_t      pmask;
     int            rc  = 0;
@@ -2549,13 +2548,12 @@
             local_flush_tlb();
             break;
         case UVMF_ALL:
-            flush_tlb_mask(d->cpumask);
+            flush_tlb_mask(d->domain_dirty_cpumask);
             break;
         default:
             if ( unlikely(get_user(vmask, (unsigned long *)bmap_ptr)) )
                 rc = -EFAULT;
             pmask = vcpumask_to_pcpumask(d, vmask);
-            cpus_and(pmask, pmask, d->cpumask);
             flush_tlb_mask(pmask);
             break;
         }
@@ -2570,13 +2568,12 @@
             local_flush_tlb_one(va);
             break;
         case UVMF_ALL:
-            flush_tlb_one_mask(d->cpumask, va);
+            flush_tlb_one_mask(d->domain_dirty_cpumask, va);
             break;
         default:
             if ( unlikely(get_user(vmask, (unsigned long *)bmap_ptr)) )
                 rc = -EFAULT;
             pmask = vcpumask_to_pcpumask(d, vmask);
-            cpus_and(pmask, pmask, d->cpumask);
             flush_tlb_one_mask(pmask, va);
             break;
         }
@@ -3019,7 +3016,7 @@
 
     /* Ensure that there are no stale writable mappings in any TLB. */
     /* NB. INVLPG is a serialising instruction: flushes pending updates. */
-    flush_tlb_one_mask(d->cpumask, l1va);
+    flush_tlb_one_mask(d->domain_dirty_cpumask, l1va);
     PTWR_PRINTK("[%c] disconnected_l1va at %p now %"PRIpte"\n",
                 PTWR_PRINT_WHICH, ptep, pte.l1);
 
@@ -3343,7 +3340,7 @@
     if ( which == PTWR_PT_ACTIVE )
     {
         l2e_remove_flags(*pl2e, _PAGE_PRESENT);
-        flush_tlb_mask(d->cpumask);
+        flush_tlb_mask(d->domain_dirty_cpumask);
     }
     
     /* Temporarily map the L1 page, and make a copy of it. */
@@ -3370,7 +3367,7 @@
 
  emulate:
     if ( x86_emulate_memop(guest_cpu_user_regs(), addr,
-                           &ptwr_mem_emulator, BITS_PER_LONG/8) )
+                           &ptwr_mem_emulator, X86EMUL_MODE_HOST) )
         return 0;
     perfc_incrc(ptwr_emulations);
     return EXCRET_fault_fixed;
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/physdev.c
--- a/xen/arch/x86/physdev.c    Mon Jan  9 11:19:55 2006
+++ b/xen/arch/x86/physdev.c    Mon Jan  9 11:22:17 2006
@@ -13,27 +13,6 @@
 
 extern int ioapic_guest_read(int apicid, int address, u32 *pval);
 extern int ioapic_guest_write(int apicid, int address, u32 pval);
-
-void physdev_modify_ioport_access_range(
-    struct domain *d, int enable, int port, int num)
-{
-    int i;
-    for ( i = port; i < (port + num); i++ )
-        (enable ? clear_bit : set_bit)(i, d->arch.iobmp_mask);
-}
-
-void physdev_destroy_state(struct domain *d)
-{
-    xfree(d->arch.iobmp_mask);
-    d->arch.iobmp_mask = NULL;
-}
-
-/* Check if a domain controls a device with IO memory within frame @pfn.
- * Returns: 1 if the domain should be allowed to map @pfn, 0 otherwise.  */
-int domain_iomem_in_pfn(struct domain *p, unsigned long pfn)
-{
-    return 0;
-}
 
 /*
  * Demuxing hypercall.
@@ -120,18 +99,6 @@
     return ret;
 }
 
-/* Domain 0 has read access to all devices. */
-void physdev_init_dom0(struct domain *d)
-{
-    /* Access to all I/O ports. */
-    d->arch.iobmp_mask = xmalloc_array(u8, IOBMP_BYTES);
-    BUG_ON(d->arch.iobmp_mask == NULL);
-    memset(d->arch.iobmp_mask, 0, IOBMP_BYTES);
-
-    set_bit(_DOMF_physdev_access, &d->domain_flags);
-}
-
-
 /*
  * Local variables:
  * mode: C
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/setup.c
--- a/xen/arch/x86/setup.c      Mon Jan  9 11:19:55 2006
+++ b/xen/arch/x86/setup.c      Mon Jan  9 11:22:17 2006
@@ -92,7 +92,7 @@
 #endif
 EXPORT_SYMBOL(mmu_cr4_features);
 
-struct vcpu *idle_task[NR_CPUS] = { &idle0_vcpu };
+struct vcpu *idle_domain[NR_CPUS] = { &idle0_vcpu };
 
 int acpi_disabled;
 
@@ -138,131 +138,19 @@
         (*call)();
 }
 
-static void __init start_of_day(void)
-{
-    int i;
-    unsigned long vgdt, gdt_pfn;
-
-    early_cpu_init();
-
-    paging_init();
-
-    /* Unmap the first page of CPU0's stack. */
-    memguard_guard_stack(cpu0_stack);
-
-    open_softirq(NEW_TLBFLUSH_CLOCK_PERIOD_SOFTIRQ, new_tlbflush_clock_period);
-
-    if ( opt_watchdog ) 
-        nmi_watchdog = NMI_LOCAL_APIC;
-
-    sort_exception_tables();
-
-    arch_do_createdomain(current);
-    
-    /*
-     * Map default GDT into its final positions in the idle page table. As
-     * noted in arch_do_createdomain(), we must map for every possible VCPU#.
-     */
-    vgdt = GDT_VIRT_START(current) + FIRST_RESERVED_GDT_BYTE;
-    gdt_pfn = virt_to_phys(gdt_table) >> PAGE_SHIFT;
-    for ( i = 0; i < MAX_VIRT_CPUS; i++ )
-    {
-        map_pages_to_xen(vgdt, gdt_pfn, 1, PAGE_HYPERVISOR);
-        vgdt += 1 << PDPT_VCPU_VA_SHIFT;
-    }
-
-    find_smp_config();
-
-    smp_alloc_memory();
-
-    dmi_scan_machine();
-
-    generic_apic_probe();
-
-    acpi_boot_table_init();
-    acpi_boot_init();
-
-    if ( smp_found_config ) 
-        get_smp_config();
-
-    init_apic_mappings();
-
-    init_IRQ();
-
-    trap_init();
-
-    ac_timer_init();
-
-    early_time_init();
-
-    arch_init_memory();
-
-    scheduler_init();
-
-    identify_cpu(&boot_cpu_data);
-    if ( cpu_has_fxsr )
-        set_in_cr4(X86_CR4_OSFXSR);
-    if ( cpu_has_xmm )
-        set_in_cr4(X86_CR4_OSXMMEXCPT);
-
-    if ( opt_nosmp )
-    {
-        max_cpus = 0;
-        smp_num_siblings = 1;
-        boot_cpu_data.x86_num_cores = 1;
-    }
-
-    smp_prepare_cpus(max_cpus);
-
-    /* We aren't hotplug-capable yet. */
-    BUG_ON(!cpus_empty(cpu_present_map));
-    for_each_cpu ( i )
-        cpu_set(i, cpu_present_map);
-
-    /*
-     * Initialise higher-level timer functions. We do this fairly late
-     * (post-SMP) because the time bases and scale factors need to be updated 
-     * regularly, and SMP initialisation can cause a long delay with 
-     * interrupts not yet enabled.
-     */
-    init_xen_time();
-
-    initialize_keytable();
-
-    serial_init_postirq();
-
-    BUG_ON(!local_irq_is_enabled());
-
-    for_each_present_cpu ( i )
-    {
-        if ( num_online_cpus() >= max_cpus )
-            break;
-        if ( !cpu_online(i) )
-            __cpu_up(i);
-    }
-
-    printk("Brought up %ld CPUs\n", (long)num_online_cpus());
-    smp_cpus_done(max_cpus);
-
-    do_initcalls();
-
-    schedulers_start();
-
-    watchdog_enable();
-}
-
 #define EARLY_FAIL() for ( ; ; ) __asm__ __volatile__ ( "hlt" )
 
 static struct e820entry e820_raw[E820MAX];
 
 void __init __start_xen(multiboot_info_t *mbi)
 {
+    unsigned long vgdt, gdt_pfn;
     char *cmdline;
+    unsigned long _initrd_start = 0, _initrd_len = 0;
+    unsigned int initrdidx = 1;
     module_t *mod = (module_t *)__va(mbi->mods_addr);
     unsigned long nr_pages, modules_length;
     unsigned long initial_images_start, initial_images_end;
-    unsigned long _initrd_start = 0, _initrd_len = 0;
-    unsigned int initrdidx = 1;
     physaddr_t s, e;
     int i, e820_warn = 0, e820_raw_nr = 0, bytes = 0;
     struct ns16550_defaults ns16550 = {
@@ -455,6 +343,12 @@
     BUG_ON(sizeof(shared_info_t) > PAGE_SIZE);
     BUG_ON(sizeof(vcpu_info_t) != 64);
 
+    /* __foo are defined in public headers. Check they match internal defs. */
+    BUG_ON(__HYPERVISOR_VIRT_START != HYPERVISOR_VIRT_START);
+#ifdef HYPERVISOR_VIRT_END
+    BUG_ON(__HYPERVISOR_VIRT_END   != HYPERVISOR_VIRT_END);
+#endif
+
     init_frametable();
 
     end_boot_allocator();
@@ -486,7 +380,113 @@
 
     early_boot = 0;
 
-    start_of_day();
+    early_cpu_init();
+
+    paging_init();
+
+    /* Unmap the first page of CPU0's stack. */
+    memguard_guard_stack(cpu0_stack);
+
+    open_softirq(NEW_TLBFLUSH_CLOCK_PERIOD_SOFTIRQ, new_tlbflush_clock_period);
+
+    if ( opt_watchdog ) 
+        nmi_watchdog = NMI_LOCAL_APIC;
+
+    sort_exception_tables();
+
+    if ( arch_do_createdomain(current) != 0 )
+        BUG();
+
+    /*
+     * Map default GDT into its final positions in the idle page table. As
+     * noted in arch_do_createdomain(), we must map for every possible VCPU#.
+     */
+    vgdt = GDT_VIRT_START(current) + FIRST_RESERVED_GDT_BYTE;
+    gdt_pfn = virt_to_phys(gdt_table) >> PAGE_SHIFT;
+    for ( i = 0; i < MAX_VIRT_CPUS; i++ )
+    {
+        map_pages_to_xen(vgdt, gdt_pfn, 1, PAGE_HYPERVISOR);
+        vgdt += 1 << PDPT_VCPU_VA_SHIFT;
+    }
+
+    find_smp_config();
+
+    smp_alloc_memory();
+
+    dmi_scan_machine();
+
+    generic_apic_probe();
+
+    acpi_boot_table_init();
+    acpi_boot_init();
+
+    if ( smp_found_config ) 
+        get_smp_config();
+
+    init_apic_mappings();
+
+    init_IRQ();
+
+    trap_init();
+
+    ac_timer_init();
+
+    early_time_init();
+
+    arch_init_memory();
+
+    scheduler_init();
+
+    identify_cpu(&boot_cpu_data);
+    if ( cpu_has_fxsr )
+        set_in_cr4(X86_CR4_OSFXSR);
+    if ( cpu_has_xmm )
+        set_in_cr4(X86_CR4_OSXMMEXCPT);
+
+    if ( opt_nosmp )
+    {
+        max_cpus = 0;
+        smp_num_siblings = 1;
+        boot_cpu_data.x86_num_cores = 1;
+    }
+
+    smp_prepare_cpus(max_cpus);
+
+    /* We aren't hotplug-capable yet. */
+    BUG_ON(!cpus_empty(cpu_present_map));
+    for_each_cpu ( i )
+        cpu_set(i, cpu_present_map);
+
+    /*
+     * Initialise higher-level timer functions. We do this fairly late
+     * (post-SMP) because the time bases and scale factors need to be updated 
+     * regularly, and SMP initialisation can cause a long delay with 
+     * interrupts not yet enabled.
+     */
+    init_xen_time();
+
+    initialize_keytable();
+
+    serial_init_postirq();
+
+    BUG_ON(!local_irq_is_enabled());
+
+    for_each_present_cpu ( i )
+    {
+        if ( num_online_cpus() >= max_cpus )
+            break;
+        if ( !cpu_online(i) )
+            __cpu_up(i);
+    }
+
+    printk("Brought up %ld CPUs\n", (long)num_online_cpus());
+    smp_cpus_done(max_cpus);
+
+    do_initcalls();
+
+    schedulers_start();
+
+    watchdog_enable();
 
     shadow_mode_init();
 
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/shadow.c
--- a/xen/arch/x86/shadow.c     Mon Jan  9 11:19:55 2006
+++ b/xen/arch/x86/shadow.c     Mon Jan  9 11:22:17 2006
@@ -1800,7 +1800,7 @@
     }
 
     /* Other VCPUs mustn't use the revoked writable mappings. */
-    other_vcpus_mask = d->cpumask;
+    other_vcpus_mask = d->domain_dirty_cpumask;
     cpu_clear(smp_processor_id(), other_vcpus_mask);
     flush_tlb_mask(other_vcpus_mask);
 
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/shadow32.c
--- a/xen/arch/x86/shadow32.c   Mon Jan  9 11:19:55 2006
+++ b/xen/arch/x86/shadow32.c   Mon Jan  9 11:22:17 2006
@@ -2586,7 +2586,7 @@
     }
 
     /* Other VCPUs mustn't use the revoked writable mappings. */
-    other_vcpus_mask = d->cpumask;
+    other_vcpus_mask = d->domain_dirty_cpumask;
     cpu_clear(smp_processor_id(), other_vcpus_mask);
     flush_tlb_mask(other_vcpus_mask);
 
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/smpboot.c
--- a/xen/arch/x86/smpboot.c    Mon Jan  9 11:19:55 2006
+++ b/xen/arch/x86/smpboot.c    Mon Jan  9 11:22:17 2006
@@ -435,7 +435,7 @@
 
        extern void percpu_traps_init(void);
 
-       set_current(idle_task[cpu]);
+       set_current(idle_domain[cpu]);
        set_processor_id(cpu);
 
        percpu_traps_init();
@@ -763,7 +763,6 @@
 {
        struct domain *idle;
        struct vcpu *v;
-       void *stack;
        unsigned long boot_error;
        int timeout, cpu;
        unsigned long start_eip;
@@ -774,7 +773,7 @@
        if ( (idle = do_createdomain(IDLE_DOMAIN_ID, cpu)) == NULL )
                panic("failed 'createdomain' for CPU %d", cpu);
 
-       v = idle_task[cpu] = idle->vcpu[0];
+       v = idle_domain[cpu] = idle->vcpu[0];
 
        set_bit(_DOMF_idle_domain, &idle->domain_flags);
 
@@ -786,16 +785,10 @@
        /* So we see what's up   */
        printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);
 
-       stack = alloc_xenheap_pages(STACK_ORDER);
-#if defined(__i386__)
-       stack_start.esp = (void *)__pa(stack);
-#elif defined(__x86_64__)
-       stack_start.esp = stack;
-#endif
-       stack_start.esp += STACK_SIZE - sizeof(struct cpu_info);
+       stack_start.esp = alloc_xenheap_pages(STACK_ORDER);
 
        /* Debug build: detect stack overflow by setting up a guard page. */
-       memguard_guard_stack(stack);
+       memguard_guard_stack(stack_start.esp);
 
        /*
         * This grunge runs the startup process for
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c      Mon Jan  9 11:19:55 2006
+++ b/xen/arch/x86/traps.c      Mon Jan  9 11:22:17 2006
@@ -41,6 +41,7 @@
 #include <xen/softirq.h>
 #include <xen/domain_page.h>
 #include <xen/symbols.h>
+#include <xen/iocap.h>
 #include <asm/shadow.h>
 #include <asm/system.h>
 #include <asm/io.h>
@@ -192,7 +193,8 @@
 
     /* Bounds for range of valid frame pointer. */
     low  = (unsigned long)(ESP_BEFORE_EXCEPTION(regs) - 2);
-    high = (low & ~(STACK_SIZE - 1)) + (STACK_SIZE - sizeof(struct cpu_info));
+    high = (low & ~(STACK_SIZE - 1)) + 
+        (STACK_SIZE - sizeof(struct cpu_info) - 2*sizeof(unsigned long));
 
     /* The initial frame pointer. */
     next = regs->ebp;
@@ -200,14 +202,14 @@
     for ( ; ; )
     {
         /* Valid frame pointer? */
-        if ( (next < low) || (next > high) )
+        if ( (next < low) || (next >= high) )
         {
             /*
              * Exception stack frames have a different layout, denoted by an
              * inverted frame pointer.
              */
             next = ~next;
-            if ( (next < low) || (next > high) )
+            if ( (next < low) || (next >= high) )
                 break;
             frame = (unsigned long *)next;
             next  = frame[0];
@@ -621,17 +623,7 @@
     unsigned int port, unsigned int bytes,
     struct vcpu *v, struct cpu_user_regs *regs)
 {
-    struct domain *d = v->domain;
-    u16 x;
-
-    if ( d->arch.iobmp_mask != NULL )
-    {
-        x = *(u16 *)(d->arch.iobmp_mask + (port >> 3));
-        if ( (x & (((1<<bytes)-1) << (port&7))) == 0 )
-            return 1;
-    }
-
-    return 0;
+    return ioports_access_permitted(v->domain, port, port + bytes - 1);
 }
 
 /* Check admin limits. Silently fail the access if it is disallowed. */
@@ -871,7 +863,7 @@
 
     case 0x09: /* WBINVD */
         /* Ignore the instruction if unprivileged. */
-        if ( !IS_CAPABLE_PHYSDEV(v->domain) )
+        if ( !cache_flush_permitted(v->domain) )
             DPRINTK("Non-physdev domain attempted WBINVD.\n");
         else
             wbinvd();
@@ -885,7 +877,8 @@
         switch ( modrm_reg )
         {
         case 0: /* Read CR0 */
-            *reg = v->arch.guest_context.ctrlreg[0];
+            *reg = (read_cr0() & ~X86_CR0_TS) |
+                v->arch.guest_context.ctrlreg[0];
             break;
 
         case 2: /* Read CR2 */
@@ -927,6 +920,11 @@
         switch ( modrm_reg )
         {
         case 0: /* Write CR0 */
+            if ( (*reg ^ read_cr0()) & ~X86_CR0_TS )
+            {
+                DPRINTK("Attempt to change unmodifiable CR0 flags.\n");
+                goto fail;
+            }
             (void)do_fpu_taskswitch(!!(*reg & X86_CR0_TS));
             break;
 
@@ -939,6 +937,14 @@
             LOCK_BIGLOCK(v->domain);
             (void)new_guest_cr3(*reg);
             UNLOCK_BIGLOCK(v->domain);
+            break;
+
+        case 4:
+            if ( *reg != (read_cr4() & ~(X86_CR4_PGE|X86_CR4_PSE)) )
+            {
+                DPRINTK("Attempt to change CR4 flags.\n");
+                goto fail;
+            }
             break;
 
         default:
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/vmx.c
--- a/xen/arch/x86/vmx.c        Mon Jan  9 11:19:55 2006
+++ b/xen/arch/x86/vmx.c        Mon Jan  9 11:22:17 2006
@@ -42,7 +42,7 @@
 #include <asm/shadow_64.h>
 #endif
 #include <public/sched.h>
-#include <public/io/ioreq.h>
+#include <public/hvm/ioreq.h>
 #include <asm/vmx_vpic.h>
 #include <asm/vmx_vlapic.h>
 
@@ -53,7 +53,7 @@
 integer_param("vmx_debug", opt_vmx_debug_level);
 
 static unsigned long trace_values[NR_CPUS][4];
-#define TRACE_VMEXIT(index,value) trace_values[current->processor][index]=value
+#define TRACE_VMEXIT(index,value) trace_values[smp_processor_id()][index]=value
 
 static int vmx_switch_on;
 
@@ -65,11 +65,6 @@
     {
         struct domain *d = v->domain;
         struct vcpu *vc;
-
-        d->arch.vmx_platform.lapic_enable = 
v->arch.guest_context.user_regs.ecx;
-        v->arch.guest_context.user_regs.ecx = 0;
-        VMX_DBG_LOG(DBG_LEVEL_VLAPIC, "lapic enable is %d.\n",
-                    d->arch.vmx_platform.lapic_enable);
 
         /* Initialize monitor page table */
         for_each_vcpu(d, vc)
@@ -95,7 +90,7 @@
 void vmx_relinquish_resources(struct vcpu *v)
 {
     struct vmx_virpit *vpit;
-    
+
     if ( !VMX_DOMAIN(v) )
         return;
 
@@ -1955,9 +1950,12 @@
 
 asmlinkage void trace_vmentry (void)
 {
-    TRACE_5D(TRC_VMENTRY,trace_values[current->processor][0],
-             
trace_values[current->processor][1],trace_values[current->processor][2],
-             
trace_values[current->processor][3],trace_values[current->processor][4]);
+    TRACE_5D(TRC_VMENTRY,
+             trace_values[smp_processor_id()][0],
+             trace_values[smp_processor_id()][1],
+             trace_values[smp_processor_id()][2],
+             trace_values[smp_processor_id()][3],
+             trace_values[smp_processor_id()][4]);
     TRACE_VMEXIT(0,9);
     TRACE_VMEXIT(1,9);
     TRACE_VMEXIT(2,9);
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/vmx_intercept.c
--- a/xen/arch/x86/vmx_intercept.c      Mon Jan  9 11:19:55 2006
+++ b/xen/arch/x86/vmx_intercept.c      Mon Jan  9 11:22:17 2006
@@ -24,7 +24,7 @@
 #include <asm/vmx_vpit.h>
 #include <asm/vmx_intercept.h>
 #include <asm/vmx_vlapic.h>
-#include <public/io/ioreq.h>
+#include <public/hvm/ioreq.h>
 #include <xen/lib.h>
 #include <xen/sched.h>
 #include <asm/current.h>
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/vmx_io.c
--- a/xen/arch/x86/vmx_io.c     Mon Jan  9 11:19:55 2006
+++ b/xen/arch/x86/vmx_io.c     Mon Jan  9 11:22:17 2006
@@ -37,7 +37,7 @@
 #include <asm/shadow.h>
 #include <asm/vmx_vpic.h>
 #include <asm/vmx_vlapic.h>
-#include <public/io/ioreq.h>
+#include <public/hvm/ioreq.h>
 
 #ifdef CONFIG_VMX
 #if defined (__i386__)
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/vmx_platform.c
--- a/xen/arch/x86/vmx_platform.c       Mon Jan  9 11:19:55 2006
+++ b/xen/arch/x86/vmx_platform.c       Mon Jan  9 11:22:17 2006
@@ -27,7 +27,7 @@
 #include <xen/trace.h>
 #include <asm/vmx.h>
 #include <asm/vmx_platform.h>
-#include <public/io/ioreq.h>
+#include <public/hvm/ioreq.h>
 
 #include <xen/lib.h>
 #include <xen/sched.h>
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/vmx_vlapic.c
--- a/xen/arch/x86/vmx_vlapic.c Mon Jan  9 11:19:55 2006
+++ b/xen/arch/x86/vmx_vlapic.c Mon Jan  9 11:22:17 2006
@@ -32,7 +32,7 @@
 #include <xen/lib.h>
 #include <xen/sched.h>
 #include <asm/current.h>
-#include <public/io/ioreq.h>
+#include <public/hvm/ioreq.h>
 
 #ifdef CONFIG_VMX
 
@@ -62,7 +62,7 @@
 
 int vmx_apic_support(struct domain *d)
 {
-    return d->arch.vmx_platform.lapic_enable;
+    return d->arch.vmx_platform.apic_enabled;
 }
 
 s_time_t get_apictime_scheduled(struct vcpu *v)
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/vmx_vmcs.c
--- a/xen/arch/x86/vmx_vmcs.c   Mon Jan  9 11:19:55 2006
+++ b/xen/arch/x86/vmx_vmcs.c   Mon Jan  9 11:22:17 2006
@@ -32,7 +32,7 @@
 #include <asm/flushtlb.h>
 #include <xen/event.h>
 #include <xen/kernel.h>
-#include <public/io/ioreq.h>
+#include <public/hvm/hvm_info_table.h>
 #if CONFIG_PAGING_LEVELS >= 4
 #include <asm/shadow_64.h>
 #endif
@@ -206,35 +206,55 @@
               &d->shared_info->evtchn_mask[0]);
 }
 
-#define VCPU_NR_PAGE        0x0009F000
-#define VCPU_NR_OFFSET      0x00000800
-#define VCPU_MAGIC          0x76637075  /* "vcpu" */
-
-static void vmx_set_vcpu_nr(struct domain *d)
+static int validate_hvm_info(struct hvm_info_table *t)
+{
+    char signature[] = "HVM INFO";
+    uint8_t *ptr = (uint8_t *)t;
+    uint8_t sum = 0;
+    int i;
+
+    /* strncmp(t->signature, "HVM INFO", 8) */
+    for ( i = 0; i < 8; i++ ) {
+        if ( signature[i] != t->signature[i] ) {
+            printk("Bad hvm info signature\n");
+            return 0;
+        }
+    }
+
+    for ( i = 0; i < t->length; i++ )
+        sum += ptr[i];
+
+    return (sum == 0);
+}
+
+static void vmx_get_hvm_info(struct domain *d)
 {
     unsigned char *p;
     unsigned long mpfn;
-    unsigned int *vcpus;
-
-    mpfn = get_mfn_from_pfn(VCPU_NR_PAGE >> PAGE_SHIFT);
-    if (mpfn == INVALID_MFN) {
-        printk("Can not get vcpu number page mfn for VMX domain.\n");
+    struct hvm_info_table *t;
+
+    mpfn = get_mfn_from_pfn(HVM_INFO_PFN);
+    if ( mpfn == INVALID_MFN ) {
+        printk("Can not get hvm info page mfn for VMX domain.\n");
         domain_crash_synchronous();
     }
 
     p = map_domain_page(mpfn);
-    if (p == NULL) {
-        printk("Can not map vcpu number page for VMX domain.\n");
-        domain_crash_synchronous();
-    }
-
-    vcpus = (unsigned int *)(p + VCPU_NR_OFFSET);
-    if (vcpus[0] != VCPU_MAGIC) {
-        printk("Bad vcpus magic, set vcpu number to 1 by default.\n");
-        d->arch.vmx_platform.nr_vcpu = 1;
-    }
-
-    d->arch.vmx_platform.nr_vcpu = vcpus[1];
+    if ( p == NULL ) {
+        printk("Can not map hvm info page for VMX domain.\n");
+        domain_crash_synchronous();
+    }
+
+    t = (struct hvm_info_table *)(p + HVM_INFO_OFFSET);
+
+    if ( validate_hvm_info(t) ) {
+        d->arch.vmx_platform.nr_vcpus = t->nr_vcpus;
+        d->arch.vmx_platform.apic_enabled = t->apic_enabled;
+    } else {
+        printk("Bad hvm info table\n");
+        d->arch.vmx_platform.nr_vcpus = 1;
+        d->arch.vmx_platform.apic_enabled = 0;
+    }
 
     unmap_domain_page(p);
 }
@@ -244,10 +264,10 @@
     struct vmx_platform *platform;
 
     vmx_map_io_shared_page(d);
-    vmx_set_vcpu_nr(d);
+    vmx_get_hvm_info(d);
 
     platform = &d->arch.vmx_platform;
-    pic_init(&platform->vmx_pic,  pic_irq_request, 
+    pic_init(&platform->vmx_pic,  pic_irq_request,
              &platform->interrupt_request);
     register_pic_io_hook();
 
@@ -335,6 +355,8 @@
     __vmwrite(HOST_RSP, (unsigned long)get_stack_bottom());
 
     v->arch.schedule_tail = arch_vmx_do_resume;
+    v->arch.arch_vmx.launch_cpu = smp_processor_id();
+
     /* init guest tsc to start from 0 */
     rdtscll(host_tsc);
     v->arch.arch_vmx.tsc_offset = 0 - host_tsc;
@@ -617,11 +639,21 @@
 
 void arch_vmx_do_resume(struct vcpu *v)
 {
-    u64 vmcs_phys_ptr = (u64) virt_to_phys(v->arch.arch_vmx.vmcs);
-
-    load_vmcs(&v->arch.arch_vmx, vmcs_phys_ptr);
-    vmx_do_resume(v);
-    reset_stack_and_jump(vmx_asm_do_resume);
+    if ( v->arch.arch_vmx.launch_cpu == smp_processor_id() )
+    {
+        load_vmcs(&v->arch.arch_vmx, virt_to_phys(v->arch.arch_vmx.vmcs));
+        vmx_do_resume(v);
+        reset_stack_and_jump(vmx_asm_do_resume);
+    }
+    else
+    {
+        __vmpclear(virt_to_phys(v->arch.arch_vmx.vmcs));
+        load_vmcs(&v->arch.arch_vmx, virt_to_phys(v->arch.arch_vmx.vmcs));
+        vmx_do_resume(v);
+        vmx_set_host_env(v);
+        v->arch.arch_vmx.launch_cpu = smp_processor_id();
+        reset_stack_and_jump(vmx_asm_do_relaunch);
+    }
 }
 
 void arch_vmx_do_launch(struct vcpu *v)
@@ -641,18 +673,6 @@
     }
     vmx_do_launch(v);
     reset_stack_and_jump(vmx_asm_do_launch);
-}
-
-void arch_vmx_do_relaunch(struct vcpu *v)
-{
-    u64 vmcs_phys_ptr = (u64) virt_to_phys(v->arch.arch_vmx.vmcs);
-
-    load_vmcs(&v->arch.arch_vmx, vmcs_phys_ptr);
-    vmx_do_resume(v);
-    vmx_set_host_env(v);
-    v->arch.schedule_tail = arch_vmx_do_resume;
-
-    reset_stack_and_jump(vmx_asm_do_relaunch);
 }
 
 #endif /* CONFIG_VMX */
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/x86_emulate.c
--- a/xen/arch/x86/x86_emulate.c        Mon Jan  9 11:19:55 2006
+++ b/xen/arch/x86/x86_emulate.c        Mon Jan  9 11:22:17 2006
@@ -371,6 +371,21 @@
    (_type)_x; \
 })
 
+/* Access/update address held in a register, based on addressing mode. */
+#define register_address(sel, reg)                                      \
+    ((ad_bytes == sizeof(unsigned long)) ? (reg) :                      \
+     ((mode == X86EMUL_MODE_REAL) ? /* implies ad_bytes == 2 */         \
+      (((unsigned long)(sel) << 4) + ((reg) & 0xffff)) :                \
+      ((reg) & ((1UL << (ad_bytes << 3)) - 1))))
+#define register_address_increment(reg, inc)                            \
+do {                                                                    \
+    if ( ad_bytes == sizeof(unsigned long) )                            \
+        (reg) += (inc);                                                 \
+    else                                                                \
+        (reg) = ((reg) & ~((1UL << (ad_bytes << 3)) - 1)) |             \
+                (((reg) + (inc)) & ((1UL << (ad_bytes << 3)) - 1));     \
+} while (0)
+
 void *
 decode_register(
     uint8_t modrm_reg, struct cpu_user_regs *regs, int highbyte_regs)
@@ -420,32 +435,64 @@
 {
     uint8_t b, d, sib, twobyte = 0, rex_prefix = 0;
     uint8_t modrm, modrm_mod = 0, modrm_reg = 0, modrm_rm = 0;
-    unsigned int op_bytes = (mode == 8) ? 4 : mode, ad_bytes = mode;
-    unsigned int lock_prefix = 0, rep_prefix = 0, i;
+    uint16_t *seg = NULL; /* override segment */
+    unsigned int op_bytes, ad_bytes, lock_prefix = 0, rep_prefix = 0, i;
     int rc = 0;
     struct operand src, dst;
 
     /* Shadow copy of register state. Committed on successful emulation. */
     struct cpu_user_regs _regs = *regs;
 
+    switch ( mode )
+    {
+    case X86EMUL_MODE_REAL:
+    case X86EMUL_MODE_PROT16:
+        op_bytes = ad_bytes = 2;
+        break;
+    case X86EMUL_MODE_PROT32:
+        op_bytes = ad_bytes = 4;
+        break;
+#ifdef __x86_64__
+    case X86EMUL_MODE_PROT64:
+        op_bytes = 4;
+        ad_bytes = 8;
+        break;
+#endif
+    default:
+        return -1;
+    }
+
     /* Legacy prefixes. */
     for ( i = 0; i < 8; i++ )
     {
         switch ( b = insn_fetch(uint8_t, 1, _regs.eip) )
         {
         case 0x66: /* operand-size override */
-            op_bytes ^= 6;                    /* switch between 2/4 bytes */
+            op_bytes ^= 6;      /* switch between 2/4 bytes */
             break;
         case 0x67: /* address-size override */
-            ad_bytes ^= (mode == 8) ? 12 : 6; /* switch between 2/4/8 bytes */
+            if ( mode == X86EMUL_MODE_PROT64 )
+                ad_bytes ^= 12; /* switch between 4/8 bytes */
+            else
+                ad_bytes ^= 6;  /* switch between 2/4 bytes */
             break;
         case 0x2e: /* CS override */
+            seg = &_regs.cs;
+            break;
         case 0x3e: /* DS override */
+            seg = &_regs.ds;
+            break;
         case 0x26: /* ES override */
+            seg = &_regs.es;
+            break;
         case 0x64: /* FS override */
+            seg = &_regs.fs;
+            break;
         case 0x65: /* GS override */
+            seg = &_regs.gs;
+            break;
         case 0x36: /* SS override */
-            DPRINTF("Warning: ignoring a segment override.\n");
+            seg = &_regs.ss;
             break;
         case 0xf0: /* LOCK */
             lock_prefix = 1;
@@ -461,8 +508,12 @@
     }
  done_prefixes:
 
+    /* Note quite the same as 80386 real mode, but hopefully good enough. */
+    if ( (mode == X86EMUL_MODE_REAL) && (ad_bytes != 2) )
+        goto cannot_emulate;
+
     /* REX prefix. */
-    if ( (mode == 8) && ((b & 0xf0) == 0x40) )
+    if ( (mode == X86EMUL_MODE_PROT64) && ((b & 0xf0) == 0x40) )
     {
         rex_prefix = b;
         if ( b & 8 )
@@ -674,7 +725,7 @@
         emulate_2op_SrcV("cmp", src, dst, _regs.eflags);
         break;
     case 0x63: /* movsxd */
-        if ( mode != 8 ) /* x86/64 long mode only */
+        if ( mode != X86EMUL_MODE_PROT64 )
             goto cannot_emulate;
         dst.val = (int32_t)src.val;
         break;
@@ -721,12 +772,13 @@
         dst.val = src.val;
         break;
     case 0x8f: /* pop (sole member of Grp1a) */
-        /* 64-bit mode: POP defaults to 64-bit operands. */
-        if ( (mode == 8) && (dst.bytes == 4) )
+        /* 64-bit mode: POP always pops a 64-bit operand. */
+        if ( mode == X86EMUL_MODE_PROT64 )
             dst.bytes = 8;
-        if ( (rc = ops->read_std(_regs.esp, &dst.val, dst.bytes)) != 0 )
+        if ( (rc = ops->read_std(register_address(_regs.ss, _regs.esp),
+                                 &dst.val, dst.bytes)) != 0 )
             goto done;
-        _regs.esp += dst.bytes;
+        register_address_increment(_regs.esp, dst.bytes);
         break;
     case 0xc0 ... 0xc1: grp2: /* Grp2 */
         switch ( modrm_reg )
@@ -797,16 +849,17 @@
             emulate_1op("dec", dst, _regs.eflags);
             break;
         case 6: /* push */
-            /* 64-bit mode: PUSH defaults to 64-bit operands. */
-            if ( (mode == 8) && (dst.bytes == 4) )
+            /* 64-bit mode: PUSH always pushes a 64-bit operand. */
+            if ( mode == X86EMUL_MODE_PROT64 )
             {
                 dst.bytes = 8;
                 if ( (rc = ops->read_std((unsigned long)dst.ptr,
                                          &dst.val, 8)) != 0 )
                     goto done;
             }
-            _regs.esp -= dst.bytes;
-            if ( (rc = ops->write_std(_regs.esp, dst.val, dst.bytes)) != 0 )
+            register_address_increment(_regs.esp, -dst.bytes);
+            if ( (rc = ops->write_std(register_address(_regs.ss, _regs.esp),
+                                      dst.val, dst.bytes)) != 0 )
                 goto done;
             dst.val = dst.orig_val; /* skanky: disable writeback */
             break;
@@ -873,19 +926,22 @@
         {
             /* Write fault: destination is special memory. */
             dst.ptr = (unsigned long *)cr2;
-            if ( (rc = ops->read_std(_regs.esi - _regs.edi + cr2, 
+            if ( (rc = ops->read_std(register_address(seg ? *seg : _regs.ds,
+                                                      _regs.esi),
                                      &dst.val, dst.bytes)) != 0 )
                 goto done;
         }
         else
         {
             /* Read fault: source is special memory. */
-            dst.ptr = (unsigned long *)(_regs.edi - _regs.esi + cr2);
+            dst.ptr = (unsigned long *)register_address(_regs.es, _regs.edi);
             if ( (rc = ops->read_emulated(cr2, &dst.val, dst.bytes)) != 0 )
                 goto done;
         }
-        _regs.esi += (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes;
-        _regs.edi += (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes;
+        register_address_increment(
+            _regs.esi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
+        register_address_increment(
+            _regs.edi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
         break;
     case 0xa6 ... 0xa7: /* cmps */
         DPRINTF("Urk! I don't handle CMPS.\n");
@@ -895,7 +951,8 @@
         dst.bytes = (d & ByteOp) ? 1 : op_bytes;
         dst.ptr   = (unsigned long *)cr2;
         dst.val   = _regs.eax;
-        _regs.edi += (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes;
+        register_address_increment(
+            _regs.edi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
         break;
     case 0xac ... 0xad: /* lods */
         dst.type  = OP_REG;
@@ -903,7 +960,8 @@
         dst.ptr   = (unsigned long *)&_regs.eax;
         if ( (rc = ops->read_emulated(cr2, &dst.val, dst.bytes)) != 0 )
             goto done;
-        _regs.esi += (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes;
+        register_address_increment(
+            _regs.esi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
         break;
     case 0xae ... 0xaf: /* scas */
         DPRINTF("Urk! I don't handle SCAS.\n");
diff -r 25e3c8668f1f -r 8af1199488d3 xen/common/bitmap.c
--- a/xen/common/bitmap.c       Mon Jan  9 11:19:55 2006
+++ b/xen/common/bitmap.c       Mon Jan  9 11:22:17 2006
@@ -282,6 +282,111 @@
 #endif
 EXPORT_SYMBOL(__bitmap_weight);
 
+/*
+ * Bitmap printing & parsing functions: first version by Bill Irwin,
+ * second version by Paul Jackson, third by Joe Korty.
+ */
+
+#define CHUNKSZ                                32
+#define nbits_to_hold_value(val)       fls(val)
+#define roundup_power2(val,modulus)    (((val) + (modulus) - 1) & ~((modulus) 
- 1))
+#define unhex(c)                       (isdigit(c) ? (c - '0') : (toupper(c) - 
'A' + 10))
+#define BASEDEC 10             /* fancier cpuset lists input in decimal */
+
+/**
+ * bitmap_scnprintf - convert bitmap to an ASCII hex string.
+ * @buf: byte buffer into which string is placed
+ * @buflen: reserved size of @buf, in bytes
+ * @maskp: pointer to bitmap to convert
+ * @nmaskbits: size of bitmap, in bits
+ *
+ * Exactly @nmaskbits bits are displayed.  Hex digits are grouped into
+ * comma-separated sets of eight digits per set.
+ */
+int bitmap_scnprintf(char *buf, unsigned int buflen,
+       const unsigned long *maskp, int nmaskbits)
+{
+       int i, word, bit, len = 0;
+       unsigned long val;
+       const char *sep = "";
+       int chunksz;
+       u32 chunkmask;
+
+       chunksz = nmaskbits & (CHUNKSZ - 1);
+       if (chunksz == 0)
+               chunksz = CHUNKSZ;
+
+       i = roundup_power2(nmaskbits, CHUNKSZ) - CHUNKSZ;
+       for (; i >= 0; i -= CHUNKSZ) {
+               chunkmask = ((1ULL << chunksz) - 1);
+               word = i / BITS_PER_LONG;
+               bit = i % BITS_PER_LONG;
+               val = (maskp[word] >> bit) & chunkmask;
+               len += scnprintf(buf+len, buflen-len, "%s%0*lx", sep,
+                       (chunksz+3)/4, val);
+               chunksz = CHUNKSZ;
+               sep = ",";
+       }
+       return len;
+}
+EXPORT_SYMBOL(bitmap_scnprintf);
+
+/*
+ * bscnl_emit(buf, buflen, rbot, rtop, bp)
+ *
+ * Helper routine for bitmap_scnlistprintf().  Write decimal number
+ * or range to buf, suppressing output past buf+buflen, with optional
+ * comma-prefix.  Return len of what would be written to buf, if it
+ * all fit.
+ */
+static inline int bscnl_emit(char *buf, int buflen, int rbot, int rtop, int 
len)
+{
+       if (len > 0)
+               len += scnprintf(buf + len, buflen - len, ",");
+       if (rbot == rtop)
+               len += scnprintf(buf + len, buflen - len, "%d", rbot);
+       else
+               len += scnprintf(buf + len, buflen - len, "%d-%d", rbot, rtop);
+       return len;
+}
+
+/**
+ * bitmap_scnlistprintf - convert bitmap to list format ASCII string
+ * @buf: byte buffer into which string is placed
+ * @buflen: reserved size of @buf, in bytes
+ * @maskp: pointer to bitmap to convert
+ * @nmaskbits: size of bitmap, in bits
+ *
+ * Output format is a comma-separated list of decimal numbers and
+ * ranges.  Consecutively set bits are shown as two hyphen-separated
+ * decimal numbers, the smallest and largest bit numbers set in
+ * the range.  Output format is compatible with the format
+ * accepted as input by bitmap_parselist().
+ *
+ * The return value is the number of characters which would be
+ * generated for the given input, excluding the trailing '\0', as
+ * per ISO C99.
+ */
+int bitmap_scnlistprintf(char *buf, unsigned int buflen,
+       const unsigned long *maskp, int nmaskbits)
+{
+       int len = 0;
+       /* current bit is 'cur', most recently seen range is [rbot, rtop] */
+       int cur, rbot, rtop;
+
+       rbot = cur = find_first_bit(maskp, nmaskbits);
+       while (cur < nmaskbits) {
+               rtop = cur;
+               cur = find_next_bit(maskp, nmaskbits, cur+1);
+               if (cur >= nmaskbits || cur > rtop + 1) {
+                       len = bscnl_emit(buf, buflen, rbot, rtop, len);
+                       rbot = cur;
+               }
+       }
+       return len;
+}
+EXPORT_SYMBOL(bitmap_scnlistprintf);
+
 /**
  *     bitmap_find_free_region - find a contiguous aligned mem region
  *     @bitmap: an array of unsigned longs corresponding to the bitmap
diff -r 25e3c8668f1f -r 8af1199488d3 xen/common/dom0_ops.c
--- a/xen/common/dom0_ops.c     Mon Jan  9 11:19:55 2006
+++ b/xen/common/dom0_ops.c     Mon Jan  9 11:22:17 2006
@@ -16,6 +16,7 @@
 #include <xen/domain_page.h>
 #include <xen/trace.h>
 #include <xen/console.h>
+#include <xen/iocap.h>
 #include <asm/current.h>
 #include <public/dom0_ops.h>
 #include <public/sched_ctl.h>
@@ -109,13 +110,13 @@
     switch ( op->cmd )
     {
 
-    case DOM0_SETDOMAININFO:
-    {
-        struct domain *d = find_domain_by_id(op->u.setdomaininfo.domain);
+    case DOM0_SETVCPUCONTEXT:
+    {
+        struct domain *d = find_domain_by_id(op->u.setvcpucontext.domain);
         ret = -ESRCH;
         if ( d != NULL )
         {
-            ret = set_info_guest(d, &op->u.setdomaininfo);
+            ret = set_info_guest(d, &op->u.setvcpucontext);
             put_domain(d);
         }
     }
@@ -283,11 +284,12 @@
     }
     break;
 
-    case DOM0_PINCPUDOMAIN:
-    {
-        domid_t dom = op->u.pincpudomain.domain;
+    case DOM0_SETVCPUAFFINITY:
+    {
+        domid_t dom = op->u.setvcpuaffinity.domain;
         struct domain *d = find_domain_by_id(dom);
         struct vcpu *v;
+        cpumask_t new_affinity;
 
         if ( d == NULL )
         {
@@ -295,15 +297,15 @@
             break;
         }
         
-        if ( (op->u.pincpudomain.vcpu >= MAX_VIRT_CPUS) ||
-             !d->vcpu[op->u.pincpudomain.vcpu] )
+        if ( (op->u.setvcpuaffinity.vcpu >= MAX_VIRT_CPUS) ||
+             !d->vcpu[op->u.setvcpuaffinity.vcpu] )
         {
             ret = -EINVAL;
             put_domain(d);
             break;
         }
 
-        v = d->vcpu[op->u.pincpudomain.vcpu];
+        v = d->vcpu[op->u.setvcpuaffinity.vcpu];
         if ( v == NULL )
         {
             ret = -ESRCH;
@@ -318,22 +320,13 @@
             break;
         }
 
-        v->cpumap = op->u.pincpudomain.cpumap;
-
-        if ( v->cpumap == CPUMAP_RUNANYWHERE )
-        {
-            clear_bit(_VCPUF_cpu_pinned, &v->vcpu_flags);
-        }
-        else
-        {
-            /* pick a new cpu from the usable map */
-            int new_cpu;
-            new_cpu = (int)find_first_set_bit(v->cpumap) % num_online_cpus();
-            vcpu_pause(v);
-            vcpu_migrate_cpu(v, new_cpu);
-            set_bit(_VCPUF_cpu_pinned, &v->vcpu_flags);
-            vcpu_unpause(v);
-        }
+        new_affinity = v->cpu_affinity;
+        memcpy(cpus_addr(new_affinity),
+               &op->u.setvcpuaffinity.cpumap,
+               min((int)BITS_TO_LONGS(NR_CPUS),
+                   (int)sizeof(op->u.setvcpuaffinity.cpumap)));
+
+        ret = vcpu_set_affinity(v, &new_affinity);
 
         put_domain(d);
     }
@@ -505,7 +498,11 @@
         op->u.getvcpuinfo.running  = test_bit(_VCPUF_running, &v->vcpu_flags);
         op->u.getvcpuinfo.cpu_time = v->cpu_time;
         op->u.getvcpuinfo.cpu      = v->processor;
-        op->u.getvcpuinfo.cpumap   = v->cpumap;
+        op->u.getvcpuinfo.cpumap   = 0;
+        memcpy(&op->u.getvcpuinfo.cpumap,
+               cpus_addr(v->cpu_affinity),
+               min((int)BITS_TO_LONGS(NR_CPUS),
+                   (int)sizeof(op->u.getvcpuinfo.cpumap)));
         ret = 0;
 
         if ( copy_to_user(u_dom0_op, op, sizeof(*op)) )     
@@ -582,6 +579,7 @@
         }
     }
     break;
+
     case DOM0_SETDEBUGGING:
     {
         struct domain *d; 
@@ -596,6 +594,53 @@
             put_domain(d);
             ret = 0;
         }
+    }
+    break;
+
+    case DOM0_IRQ_PERMISSION:
+    {
+        struct domain *d;
+        unsigned int pirq = op->u.irq_permission.pirq;
+
+        ret = -EINVAL;
+        if ( pirq >= NR_PIRQS )
+            break;
+
+        ret = -ESRCH;
+        d = find_domain_by_id(op->u.irq_permission.domain);
+        if ( d == NULL )
+            break;
+
+        if ( op->u.irq_permission.allow_access )
+            ret = irq_permit_access(d, pirq);
+        else
+            ret = irq_deny_access(d, pirq);
+
+        put_domain(d);
+    }
+    break;
+
+    case DOM0_IOMEM_PERMISSION:
+    {
+        struct domain *d;
+        unsigned long pfn = op->u.iomem_permission.first_pfn;
+        unsigned long nr_pfns = op->u.iomem_permission.nr_pfns;
+
+        ret = -EINVAL;
+        if ( (pfn + nr_pfns - 1) < pfn ) /* wrap? */
+            break;
+
+        ret = -ESRCH;
+        d = find_domain_by_id(op->u.iomem_permission.domain);
+        if ( d == NULL )
+            break;
+
+        if ( op->u.iomem_permission.allow_access )
+            ret = iomem_permit_access(d, pfn, pfn + nr_pfns - 1);
+        else
+            ret = iomem_deny_access(d, pfn, pfn + nr_pfns - 1);
+
+        put_domain(d);
     }
     break;
 
diff -r 25e3c8668f1f -r 8af1199488d3 xen/common/domain.c
--- a/xen/common/domain.c       Mon Jan  9 11:19:55 2006
+++ b/xen/common/domain.c       Mon Jan  9 11:22:17 2006
@@ -16,6 +16,7 @@
 #include <xen/console.h>
 #include <xen/softirq.h>
 #include <xen/domain_page.h>
+#include <xen/rangeset.h>
 #include <asm/debugger.h>
 #include <public/dom0_ops.h>
 #include <public/sched.h>
@@ -50,25 +51,24 @@
     else
         set_bit(_DOMF_ctrl_pause, &d->domain_flags);
 
-    if ( !is_idle_task(d) &&
+    if ( !is_idle_domain(d) &&
          ((evtchn_init(d) != 0) || (grant_table_create(d) != 0)) )
-    {
-        evtchn_destroy(d);
-        free_domain(d);
-        return NULL;
-    }
+        goto fail1;
     
     if ( (v = alloc_vcpu(d, 0, cpu)) == NULL )
-    {
-        grant_table_destroy(d);
-        evtchn_destroy(d);
-        free_domain(d);
-        return NULL;
-    }
-
-    arch_do_createdomain(v);
-    
-    if ( !is_idle_task(d) )
+        goto fail2;
+
+    rangeset_domain_initialise(d);
+
+    d->iomem_caps = rangeset_new(d, "I/O Memory", RANGESETF_prettyprint_hex);
+    d->irq_caps   = rangeset_new(d, "Interrupts", 0);
+
+    if ( (d->iomem_caps == NULL) ||
+         (d->irq_caps == NULL) ||
+         (arch_do_createdomain(v) != 0) )
+        goto fail3;
+
+    if ( !is_idle_domain(d) )
     {
         write_lock(&domlist_lock);
         pd = &domain_list; /* NB. domain_list maintained in order of dom_id. */
@@ -83,6 +83,15 @@
     }
 
     return d;
+
+ fail3:
+    rangeset_domain_destroy(d);
+ fail2:
+    grant_table_destroy(d);
+ fail1:
+    evtchn_destroy(d);
+    free_domain(d);
+    return NULL;
 }
 
 
@@ -164,20 +173,23 @@
 
     BUG_ON(d == NULL);
     BUG_ON(d == current->domain);
-    BUG_ON(!test_bit(_DOMF_shuttingdown, &d->domain_flags));
-    BUG_ON(test_bit(_DOMF_shutdown, &d->domain_flags));
+
+    LOCK_BIGLOCK(d);
 
     /* Make sure that every vcpu is descheduled before we finalise. */
     for_each_vcpu ( d, v )
         vcpu_sleep_sync(v);
-    BUG_ON(!cpus_empty(d->cpumask));
+    BUG_ON(!cpus_empty(d->domain_dirty_cpumask));
 
     sync_pagetable_state(d);
 
-    set_bit(_DOMF_shutdown, &d->domain_flags);
-    clear_bit(_DOMF_shuttingdown, &d->domain_flags);
-
-    send_guest_virq(dom0->vcpu[0], VIRQ_DOM_EXC);
+    /* Don't set DOMF_shutdown until execution contexts are sync'ed. */
+    if ( !test_and_set_bit(_DOMF_shutdown, &d->domain_flags) )
+        send_guest_virq(dom0->vcpu[0], VIRQ_DOM_EXC);
+
+    UNLOCK_BIGLOCK(d);
+
+    put_domain(d);
 }
 
 static __init int domain_shutdown_finaliser_init(void)
@@ -213,16 +225,17 @@
 
     /* Mark the domain as shutting down. */
     d->shutdown_code = reason;
-    if ( !test_and_set_bit(_DOMF_shuttingdown, &d->domain_flags) )
-    {
-        /* This vcpu won the race to finalise the shutdown. */
-        domain_shuttingdown[smp_processor_id()] = d;
-        raise_softirq(DOMAIN_SHUTDOWN_FINALISE_SOFTIRQ);
-    }
 
     /* Put every vcpu to sleep, but don't wait (avoids inter-vcpu deadlock). */
     for_each_vcpu ( d, v )
+    {
+        atomic_inc(&v->pausecnt);
         vcpu_sleep_nosync(v);
+    }
+
+    get_knownalive_domain(d);
+    domain_shuttingdown[smp_processor_id()] = d;
+    raise_softirq(DOMAIN_SHUTDOWN_FINALISE_SOFTIRQ);
 }
 
 
@@ -271,6 +284,8 @@
     *pd = d->next_in_hashbucket;
     write_unlock(&domlist_lock);
 
+    rangeset_domain_destroy(d);
+
     evtchn_destroy(d);
     grant_table_destroy(d);
 
@@ -346,11 +361,11 @@
  * of domains other than domain 0. ie. the domains that are being built by 
  * the userspace dom0 domain builder.
  */
-int set_info_guest(struct domain *d, dom0_setdomaininfo_t *setdomaininfo)
+int set_info_guest(struct domain *d, dom0_setvcpucontext_t *setvcpucontext)
 {
     int rc = 0;
     struct vcpu_guest_context *c = NULL;
-    unsigned long vcpu = setdomaininfo->vcpu;
+    unsigned long vcpu = setvcpucontext->vcpu;
     struct vcpu *v; 
 
     if ( (vcpu >= MAX_VIRT_CPUS) || ((v = d->vcpu[vcpu]) == NULL) )
@@ -363,7 +378,7 @@
         return -ENOMEM;
 
     rc = -EFAULT;
-    if ( copy_from_user(c, setdomaininfo->ctxt, sizeof(*c)) == 0 )
+    if ( copy_from_user(c, setvcpucontext->ctxt, sizeof(*c)) == 0 )
         rc = arch_set_info_guest(v, c);
 
     xfree(c);
diff -r 25e3c8668f1f -r 8af1199488d3 xen/common/event_channel.c
--- a/xen/common/event_channel.c        Mon Jan  9 11:19:55 2006
+++ b/xen/common/event_channel.c        Mon Jan  9 11:22:17 2006
@@ -22,6 +22,7 @@
 #include <xen/sched.h>
 #include <xen/event.h>
 #include <xen/irq.h>
+#include <xen/iocap.h>
 #include <asm/current.h>
 
 #include <public/xen.h>
@@ -241,6 +242,9 @@
 
     if ( pirq >= ARRAY_SIZE(d->pirq_to_evtchn) )
         return -EINVAL;
+
+    if ( !irq_access_permitted(d, pirq) )
+        return -EPERM;
 
     spin_lock(&d->evtchn_lock);
 
diff -r 25e3c8668f1f -r 8af1199488d3 xen/common/grant_table.c
--- a/xen/common/grant_table.c  Mon Jan  9 11:19:55 2006
+++ b/xen/common/grant_table.c  Mon Jan  9 11:22:17 2006
@@ -469,7 +469,7 @@
     for ( i = 0; i < count; i++ )
         (void)__gnttab_unmap_grant_ref(&uop[i]);
 
-    flush_tlb_mask(current->domain->cpumask);
+    flush_tlb_mask(current->domain->domain_dirty_cpumask);
 
     return 0;
 }
diff -r 25e3c8668f1f -r 8af1199488d3 xen/common/keyhandler.c
--- a/xen/common/keyhandler.c   Mon Jan  9 11:19:55 2006
+++ b/xen/common/keyhandler.c   Mon Jan  9 11:22:17 2006
@@ -11,6 +11,7 @@
 #include <xen/sched.h>
 #include <xen/softirq.h>
 #include <xen/domain.h>
+#include <xen/rangeset.h>
 #include <asm/debugger.h>
 
 #define KEY_MAX 256
@@ -96,44 +97,60 @@
     machine_restart(NULL); 
 }
 
-static void do_task_queues(unsigned char key)
+static void cpuset_print(char *set, int size, cpumask_t mask)
+{
+    *set++ = '{';
+    set += cpulist_scnprintf(set, size-2, mask);
+    *set++ = '}';
+    *set++ = '\0';
+}
+
+static void dump_domains(unsigned char key)
 {
     struct domain *d;
     struct vcpu   *v;
     s_time_t       now = NOW();
-
-    printk("'%c' pressed -> dumping task queues (now=0x%X:%08X)\n", key,
+    char           cpuset[100];
+
+    printk("'%c' pressed -> dumping domain info (now=0x%X:%08X)\n", key,
            (u32)(now>>32), (u32)now); 
 
     read_lock(&domlist_lock);
 
     for_each_domain ( d )
     {
-        printk("Xen: DOM %u, flags=%lx refcnt=%d nr_pages=%d "
-               "xenheap_pages=%d\n", d->domain_id, d->domain_flags,
-               atomic_read(&d->refcnt), d->tot_pages, d->xenheap_pages);
-        /* The handle is printed according to the OSF DCE UUID spec., even
-           though it is not necessarily such a thing, for ease of use when it
-           _is_ one of those. */
-        printk("     handle=%02x%02x%02x%02x-%02x%02x-%02x%02x-"
+        printk("General information for domain %u:\n", d->domain_id);
+        cpuset_print(cpuset, sizeof(cpuset), d->domain_dirty_cpumask);
+        printk("    flags=%lx refcnt=%d nr_pages=%d xenheap_pages=%d "
+               "dirty_cpus=%s\n",
+               d->domain_flags, atomic_read(&d->refcnt),
+               d->tot_pages, d->xenheap_pages, cpuset);
+        printk("    handle=%02x%02x%02x%02x-%02x%02x-%02x%02x-"
                "%02x%02x-%02x%02x%02x%02x%02x%02x\n",
                d->handle[ 0], d->handle[ 1], d->handle[ 2], d->handle[ 3],
                d->handle[ 4], d->handle[ 5], d->handle[ 6], d->handle[ 7],
                d->handle[ 8], d->handle[ 9], d->handle[10], d->handle[11],
                d->handle[12], d->handle[13], d->handle[14], d->handle[15]);
 
+        rangeset_domain_printk(d);
+
         dump_pageframe_info(d);
                
+        printk("VCPU information and callbacks for domain %u:\n",
+               d->domain_id);
         for_each_vcpu ( d, v ) {
-            printk("Guest: %p CPU %d [has=%c] flags=%lx "
-                   "upcall_pend = %02x, upcall_mask = %02x\n", v,
-                   v->processor,
+            printk("    VCPU%d: CPU%d [has=%c] flags=%lx "
+                   "upcall_pend = %02x, upcall_mask = %02x ",
+                   v->vcpu_id, v->processor,
                    test_bit(_VCPUF_running, &v->vcpu_flags) ? 'T':'F',
                    v->vcpu_flags,
                    v->vcpu_info->evtchn_upcall_pending, 
                    v->vcpu_info->evtchn_upcall_mask);
-            printk("Notifying guest... %d/%d\n", d->domain_id, v->vcpu_id); 
-            printk("port %d/%d stat %d %d %d\n",
+            cpuset_print(cpuset, sizeof(cpuset), v->vcpu_dirty_cpumask);
+            printk("dirty_cpus=%s ", cpuset);
+            cpuset_print(cpuset, sizeof(cpuset), v->cpu_affinity);
+            printk("cpu_affinity=%s\n", cpuset);
+            printk("    Notifying guest (virq %d, port %d, stat %d/%d/%d)\n",
                    VIRQ_DEBUG, v->virq_to_evtchn[VIRQ_DEBUG],
                    test_bit(v->virq_to_evtchn[VIRQ_DEBUG], 
                             &d->shared_info->evtchn_pending[0]),
@@ -191,7 +208,7 @@
     register_keyhandler(
         'L', reset_sched_histo, "reset sched latency histogram");
     register_keyhandler(
-        'q', do_task_queues, "dump task queues + guest state");
+        'q', dump_domains, "dump domain (and guest debug) info");
     register_keyhandler(
         'r', dump_runq,      "dump run queues");
     register_irq_keyhandler(
diff -r 25e3c8668f1f -r 8af1199488d3 xen/common/memory.c
--- a/xen/common/memory.c       Mon Jan  9 11:19:55 2006
+++ b/xen/common/memory.c       Mon Jan  9 11:22:17 2006
@@ -15,6 +15,7 @@
 #include <xen/sched.h>
 #include <xen/event.h>
 #include <xen/shadow.h>
+#include <xen/iocap.h>
 #include <asm/current.h>
 #include <asm/hardirq.h>
 #include <public/memory.h>
@@ -35,7 +36,8 @@
          !array_access_ok(extent_list, nr_extents, sizeof(*extent_list)) )
         return 0;
 
-    if ( (extent_order != 0) && !IS_CAPABLE_PHYSDEV(current->domain) )
+    if ( (extent_order != 0) &&
+         !multipage_allocation_permitted(current->domain) )
     {
         DPRINTK("Only I/O-capable domains may allocate multi-page extents.\n");
         return 0;
diff -r 25e3c8668f1f -r 8af1199488d3 xen/common/page_alloc.c
--- a/xen/common/page_alloc.c   Mon Jan  9 11:19:55 2006
+++ b/xen/common/page_alloc.c   Mon Jan  9 11:22:17 2006
@@ -615,7 +615,7 @@
             shadow_drop_references(d, &pg[i]);
             ASSERT((pg[i].u.inuse.type_info & PGT_count_mask) == 0);
             pg[i].tlbflush_timestamp  = tlbflush_current_time();
-            pg[i].u.free.cpumask      = d->cpumask;
+            pg[i].u.free.cpumask      = d->domain_dirty_cpumask;
             list_del(&pg[i].list);
         }
 
diff -r 25e3c8668f1f -r 8af1199488d3 xen/common/sched_bvt.c
--- a/xen/common/sched_bvt.c    Mon Jan  9 11:19:55 2006
+++ b/xen/common/sched_bvt.c    Mon Jan  9 11:22:17 2006
@@ -31,7 +31,8 @@
     struct list_head    run_list;         /* runqueue list pointers */
     u32                 avt;              /* actual virtual time */
     u32                 evt;              /* effective virtual time */
-    struct vcpu  *vcpu;
+    int                 migrated;         /* migrated to a new CPU */
+    struct vcpu         *vcpu;
     struct bvt_dom_info *inf;
 };
 
@@ -219,7 +220,7 @@
 
     einf->vcpu = v;
 
-    if ( is_idle_task(v->domain) )
+    if ( is_idle_domain(v->domain) )
     {
         einf->avt = einf->evt = ~0U;
         BUG_ON(__task_on_runqueue(v));
@@ -250,9 +251,11 @@
 
     /* Set the BVT parameters. AVT should always be updated 
        if CPU migration ocurred.*/
-    if ( einf->avt < CPU_SVT(cpu) || 
-         unlikely(test_bit(_VCPUF_cpu_migrated, &v->vcpu_flags)) )
+    if ( (einf->avt < CPU_SVT(cpu)) || einf->migrated )
+    {
         einf->avt = CPU_SVT(cpu);
+        einf->migrated = 0;
+    }
 
     /* Deal with warping here. */
     einf->evt = calc_evt(v, einf->avt);
@@ -265,7 +268,7 @@
         ((einf->evt - curr_evt) / BVT_INFO(curr->domain)->mcu_advance) +
         ctx_allow;
 
-    if ( is_idle_task(curr->domain) || (einf->evt <= curr_evt) )
+    if ( is_idle_domain(curr->domain) || (einf->evt <= curr_evt) )
         cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
     else if ( schedule_data[cpu].s_timer.expires > r_time )
         set_ac_timer(&schedule_data[cpu].s_timer, r_time);
@@ -274,11 +277,27 @@
 
 static void bvt_sleep(struct vcpu *v)
 {
-    if ( test_bit(_VCPUF_running, &v->vcpu_flags) )
+    if ( schedule_data[v->processor].curr == v )
         cpu_raise_softirq(v->processor, SCHEDULE_SOFTIRQ);
     else  if ( __task_on_runqueue(v) )
         __del_from_runqueue(v);
 }
+
+
+static int bvt_set_affinity(struct vcpu *v, cpumask_t *affinity)
+{
+    if ( v == current )
+        return cpu_isset(v->processor, *affinity) ? 0 : -EBUSY;
+
+    vcpu_pause(v);
+    v->cpu_affinity = *affinity;
+    v->processor = first_cpu(v->cpu_affinity);
+    EBVT_INFO(v)->migrated = 1;
+    vcpu_unpause(v);
+
+    return 0;
+}
+
 
 /**
  * bvt_free_task - free BVT private structures for a task
@@ -380,7 +399,7 @@
     ASSERT(prev_einf != NULL);
     ASSERT(__task_on_runqueue(prev));
 
-    if ( likely(!is_idle_task(prev->domain)) ) 
+    if ( likely(!is_idle_domain(prev->domain)) ) 
     {
         prev_einf->avt = calc_avt(prev, now);
         prev_einf->evt = calc_evt(prev, prev_einf->avt);
@@ -390,7 +409,7 @@
         
         __del_from_runqueue(prev);
         
-        if ( domain_runnable(prev) )
+        if ( vcpu_runnable(prev) )
             __add_to_runqueue_tail(prev);
     }
 
@@ -471,13 +490,13 @@
     }
 
     /* work out time for next run through scheduler */
-    if ( is_idle_task(next->domain) ) 
+    if ( is_idle_domain(next->domain) ) 
     {
         r_time = ctx_allow;
         goto sched_done;
     }
 
-    if ( (next_prime == NULL) || is_idle_task(next_prime->domain) )
+    if ( (next_prime == NULL) || is_idle_domain(next_prime->domain) )
     {
         /* We have only one runnable task besides the idle task. */
         r_time = 10 * ctx_allow;     /* RN: random constant */
@@ -557,6 +576,7 @@
     .dump_cpu_state = bvt_dump_cpu_state,
     .sleep          = bvt_sleep,
     .wake           = bvt_wake,
+    .set_affinity   = bvt_set_affinity
 };
 
 /*
diff -r 25e3c8668f1f -r 8af1199488d3 xen/common/sched_sedf.c
--- a/xen/common/sched_sedf.c   Mon Jan  9 11:19:55 2006
+++ b/xen/common/sched_sedf.c   Mon Jan  9 11:22:17 2006
@@ -325,21 +325,29 @@
     list_insert_sort(RUNQ(d->processor), LIST(d), runq_comp);
 }
 
+
 /* Allocates memory for per domain private scheduling data*/
-static int sedf_alloc_task(struct vcpu *d) {
-    PRINT(2,"sedf_alloc_task was called, domain-id 
%i.%i\n",d->domain->domain_id,
-          d->vcpu_id);
-    if (d->domain->sched_priv == NULL) {
-        if ((d->domain->sched_priv = 
-             xmalloc(struct sedf_dom_info)) == NULL )
+static int sedf_alloc_task(struct vcpu *d)
+{
+    PRINT(2, "sedf_alloc_task was called, domain-id %i.%i\n",
+          d->domain->domain_id, d->vcpu_id);
+
+    if ( d->domain->sched_priv == NULL )
+    {
+        d->domain->sched_priv = xmalloc(struct sedf_dom_info);
+        if ( d->domain->sched_priv == NULL )
             return -1;
         memset(d->domain->sched_priv, 0, sizeof(struct sedf_dom_info));
     }
-    if ((d->sched_priv = xmalloc(struct sedf_vcpu_info)) == NULL )
+
+    if ( (d->sched_priv = xmalloc(struct sedf_vcpu_info)) == NULL )
         return -1;
+
     memset(d->sched_priv, 0, sizeof(struct sedf_vcpu_info));
+
     return 0;
 }
+
 
 /* Setup the sedf_dom_info */
 static void sedf_add_task(struct vcpu *d)
@@ -363,14 +371,17 @@
         INIT_LIST_HEAD(EXTRAQ(d->processor,EXTRA_UTIL_Q));
     }
        
-    if (d->domain->domain_id==0) {
+    if ( d->domain->domain_id == 0 )
+    {
         /*set dom0 to something useful to boot the machine*/
         inf->period    = MILLISECS(20);
         inf->slice     = MILLISECS(15);
         inf->latency   = 0;
         inf->deadl_abs = 0;
         inf->status     = EXTRA_AWARE | SEDF_ASLEEP;
-    } else {
+    }
+    else
+    {
         /*other domains run in best effort mode*/
         inf->period    = WEIGHT_PERIOD;
         inf->slice     = 0;
@@ -379,14 +390,18 @@
         inf->status     = EXTRA_AWARE | SEDF_ASLEEP;
         inf->extraweight = 1;
     }
+
     inf->period_orig = inf->period; inf->slice_orig = inf->slice;
     INIT_LIST_HEAD(&(inf->list));
     INIT_LIST_HEAD(&(inf->extralist[EXTRA_PEN_Q]));
     INIT_LIST_HEAD(&(inf->extralist[EXTRA_UTIL_Q]));
  
-    if (!is_idle_task(d->domain)) {
+    if ( !is_idle_domain(d->domain) )
+    {
         extraq_check(d);
-    } else {
+    }
+    else
+    {
         EDOM_INFO(d)->deadl_abs = 0;
         EDOM_INFO(d)->status &= ~SEDF_ASLEEP;
     }
@@ -396,19 +411,28 @@
 static void sedf_free_task(struct domain *d)
 {
     int i;
+
     PRINT(2,"sedf_free_task was called, domain-id %i\n",d->domain_id);
+
     ASSERT(d->sched_priv != NULL);
     xfree(d->sched_priv);
  
-    for (i = 0; i < MAX_VIRT_CPUS; i++)
-        if ( d->vcpu[i] ) {
+    for ( i = 0; i < MAX_VIRT_CPUS; i++ )
+    {
+        if ( d->vcpu[i] )
+        {
             ASSERT(d->vcpu[i]->sched_priv != NULL);
             xfree(d->vcpu[i]->sched_priv);
         }
-}
-
-/* handles the rescheduling, bookkeeping of domains running in their 
realtime-time :)*/
-static inline void desched_edf_dom (s_time_t now, struct vcpu* d) {
+    }
+}
+
+/*
+ * Handles the rescheduling & bookkeeping of domains running in their
+ * guaranteed timeslice.
+ */
+static void desched_edf_dom(s_time_t now, struct vcpu* d)
+{
     struct sedf_vcpu_info* inf = EDOM_INFO(d);
     /*current domain is running in real time mode*/
  
@@ -418,27 +442,30 @@
 
     /*scheduling decisions, which don't remove the running domain
       from the runq*/
-    if ((inf->cputime < inf->slice) && sedf_runnable(d))
+    if ( (inf->cputime < inf->slice) && sedf_runnable(d) )
         return;
   
     __del_from_queue(d);
   
     /*manage bookkeeping (i.e. calculate next deadline,
       memorize overun-time of slice) of finished domains*/
-    if (inf->cputime >= inf->slice) {
+    if ( inf->cputime >= inf->slice )
+    {
         inf->cputime -= inf->slice;
   
-        if (inf->period < inf->period_orig) {
+        if ( inf->period < inf->period_orig )
+        {
             /*this domain runs in latency scaling or burst mode*/
 #if (UNBLOCK == UNBLOCK_BURST)
             /*if we are runnig in burst scaling wait for two periods
               before scaling periods up again*/ 
-            if (now - inf->unblock_abs >= 2 * inf->period)
+            if ( (now - inf->unblock_abs) >= (2 * inf->period) )
 #endif
             {
                 inf->period *= 2; inf->slice *= 2;
-                if ((inf->period > inf->period_orig) ||
-                    (inf->slice > inf->slice_orig)) {
+                if ( (inf->period > inf->period_orig) ||
+                     (inf->slice > inf->slice_orig) )
+                {
                     /*reset slice & period*/
                     inf->period = inf->period_orig;
                     inf->slice = inf->slice_orig;
@@ -450,36 +477,46 @@
     }
  
     /*add a runnable domain to the waitqueue*/
-    if (sedf_runnable(d))
+    if ( sedf_runnable(d) )
+    {
         __add_to_waitqueue_sort(d);
-    else {
+    }
+    else
+    {
         /*we have a blocked realtime task -> remove it from exqs too*/
 #if (EXTRA > EXTRA_OFF)
 #if (EXTRA == EXTRA_BLOCK_WEIGHT)
-        if (extraq_on(d, EXTRA_PEN_Q)) extraq_del(d, EXTRA_PEN_Q);
-#endif
-        if (extraq_on(d, EXTRA_UTIL_Q)) extraq_del(d, EXTRA_UTIL_Q);
-#endif
-    }
+        if ( extraq_on(d, EXTRA_PEN_Q) )
+            extraq_del(d, EXTRA_PEN_Q);
+#endif
+        if ( extraq_on(d, EXTRA_UTIL_Q) )
+            extraq_del(d, EXTRA_UTIL_Q);
+#endif
+    }
+
     ASSERT(EQ(sedf_runnable(d), __task_on_queue(d)));
     ASSERT(IMPLY(extraq_on(d, EXTRA_UTIL_Q) || extraq_on(d, EXTRA_PEN_Q), 
                  sedf_runnable(d)));
 }
 
+
 /* Update all elements on the queues */
-static inline void update_queues(s_time_t now, struct list_head* runq, 
-                                 struct list_head* waitq) {
-    struct list_head     *cur,*tmp;
+static void update_queues(
+    s_time_t now, struct list_head *runq, struct list_head *waitq)
+{
+    struct list_head     *cur, *tmp;
     struct sedf_vcpu_info *curinf;
  
     PRINT(3,"Updating waitq..\n");
+
     /*check for the first elements of the waitqueue, whether their
       next period has already started*/
     list_for_each_safe(cur, tmp, waitq) {
         curinf = list_entry(cur, struct sedf_vcpu_info, list);
         PRINT(4,"\tLooking @ dom %i.%i\n",
               curinf->vcpu->domain->domain_id, curinf->vcpu->vcpu_id);
-        if (PERIOD_BEGIN(curinf) <= now) {
+        if ( PERIOD_BEGIN(curinf) <= now )
+        {
             __del_from_queue(curinf->vcpu);
             __add_to_runqueue_sort(curinf->vcpu);
         }
@@ -488,13 +525,16 @@
     }
  
     PRINT(3,"Updating runq..\n");
+
     /*process the runq, find domains that are on
       the runqueue which shouldn't be there*/
     list_for_each_safe(cur, tmp, runq) {
         curinf = list_entry(cur,struct sedf_vcpu_info,list);
         PRINT(4,"\tLooking @ dom %i.%i\n",
               curinf->vcpu->domain->domain_id, curinf->vcpu->vcpu_id);
-        if (unlikely(curinf->slice == 0)) {
+
+        if ( unlikely(curinf->slice == 0) )
+        {
             /*ignore domains with empty slice*/
             PRINT(4,"\tUpdating zero-slice domain %i.%i\n",
                   curinf->vcpu->domain->domain_id,
@@ -504,7 +544,8 @@
             /*move them to their next period*/
             curinf->deadl_abs += curinf->period;
             /*ensure that the start of the next period is in the future*/
-            if (unlikely(PERIOD_BEGIN(curinf) < now)) {
+            if ( unlikely(PERIOD_BEGIN(curinf) < now) )
+            {
                 curinf->deadl_abs += 
                     (DIV_UP(now - PERIOD_BEGIN(curinf),
                            curinf->period)) * curinf->period;
@@ -513,8 +554,10 @@
             __add_to_waitqueue_sort(curinf->vcpu);
             continue;
         }
-        if (unlikely((curinf->deadl_abs < now) ||
-                     (curinf->cputime > curinf->slice))) {
+
+        if ( unlikely((curinf->deadl_abs < now) ||
+                      (curinf->cputime > curinf->slice)) )
+        {
             /*we missed the deadline or the slice was
               already finished... might hapen because
               of dom_adj.*/
@@ -550,6 +593,7 @@
     PRINT(3,"done updating the queues\n");
 }
 
+
 #if (EXTRA > EXTRA_OFF)
 /* removes a domain from the head of the according extraQ and
    requeues it at a specified position:
@@ -557,9 +601,10 @@
      weighted ext.: insert in sorted list by score
    if the domain is blocked / has regained its short-block-loss
    time it is not put on any queue */
-static inline void desched_extra_dom(s_time_t now, struct vcpu* d) {
+static void desched_extra_dom(s_time_t now, struct vcpu* d)
+{
     struct sedf_vcpu_info *inf = EDOM_INFO(d);
-    int    i    = extra_get_cur_q(inf);
+    int i = extra_get_cur_q(inf);
  
 #if (EXTRA == EXTRA_SLICE_WEIGHT || EXTRA == EXTRA_BLOCK_WEIGHT)
     unsigned long         oldscore;
@@ -575,14 +620,15 @@
     extraq_del(d, i);
 
 #if (EXTRA == EXTRA_ROUNDR)
-    if (sedf_runnable(d) && (inf->status & EXTRA_AWARE))
+    if ( sedf_runnable(d) && (inf->status & EXTRA_AWARE) )
         /*add to the tail if it is runnable => round-robin*/
         extraq_add_tail(d, EXTRA_UTIL_Q);
 #elif (EXTRA == EXTRA_SLICE_WEIGHT || EXTRA == EXTRA_BLOCK_WEIGHT)
     /*update the score*/
-    oldscore      = inf->score[i];
+    oldscore = inf->score[i];
 #if (EXTRA == EXTRA_BLOCK_WEIGHT)
-    if (i == EXTRA_PEN_Q) {
+    if ( i == EXTRA_PEN_Q )
+    {
         /*domain was running in L0 extraq*/
         /*reduce block lost, probably more sophistication here!*/
         /*inf->short_block_lost_tot -= EXTRA_QUANTUM;*/
@@ -605,12 +651,13 @@
         inf->score[EXTRA_PEN_Q] = (inf->period << 10) /
             inf->short_block_lost_tot;
         oldscore = 0;
-    } else
+    }
+    else
 #endif
     {
         /*domain was running in L1 extraq => score is inverse of
           utilization and is used somewhat incremental!*/
-        if (!inf->extraweight)
+        if ( !inf->extraweight )
             /*NB: use fixed point arithmetic with 10 bits*/
             inf->score[EXTRA_UTIL_Q] = (inf->period << 10) /
                 inf->slice;
@@ -619,24 +666,32 @@
               full (ie 100%) utilization is equivalent to 128 extraweight*/
             inf->score[EXTRA_UTIL_Q] = (1<<17) / inf->extraweight;
     }
+
  check_extra_queues:
     /* Adding a runnable domain to the right queue and removing blocked ones*/
-    if (sedf_runnable(d)) {
+    if ( sedf_runnable(d) )
+    {
         /*add according to score: weighted round robin*/
         if (((inf->status & EXTRA_AWARE) && (i == EXTRA_UTIL_Q)) ||
             ((inf->status & EXTRA_WANT_PEN_Q) && (i == EXTRA_PEN_Q)))
             extraq_add_sort_update(d, i, oldscore);
     }
-    else {
+    else
+    {
         /*remove this blocked domain from the waitq!*/
         __del_from_queue(d);
 #if (EXTRA == EXTRA_BLOCK_WEIGHT)
         /*make sure that we remove a blocked domain from the other
           extraq too*/
-        if (i == EXTRA_PEN_Q) {
-            if (extraq_on(d, EXTRA_UTIL_Q)) extraq_del(d, EXTRA_UTIL_Q);
-        } else {
-            if (extraq_on(d, EXTRA_PEN_Q)) extraq_del(d, EXTRA_PEN_Q);
+        if ( i == EXTRA_PEN_Q )
+        {
+            if ( extraq_on(d, EXTRA_UTIL_Q) )
+                extraq_del(d, EXTRA_UTIL_Q);
+        }
+        else
+        {
+            if ( extraq_on(d, EXTRA_PEN_Q) )
+                extraq_del(d, EXTRA_PEN_Q);
         }
 #endif
     }
@@ -647,16 +702,21 @@
 }
 #endif
 
-static inline struct task_slice sedf_do_extra_schedule (s_time_t now,
-                                                        s_time_t end_xt, 
struct list_head *extraq[], int cpu) {
+
+static struct task_slice sedf_do_extra_schedule(
+    s_time_t now, s_time_t end_xt, struct list_head *extraq[], int cpu)
+{
     struct task_slice   ret;
     struct sedf_vcpu_info *runinf;
     ASSERT(end_xt > now);
+
     /* Enough time left to use for extratime? */
-    if (end_xt - now < EXTRA_QUANTUM)
+    if ( end_xt - now < EXTRA_QUANTUM )
         goto return_idle;
+
 #if (EXTRA == EXTRA_BLOCK_WEIGHT)
-    if (!list_empty(extraq[EXTRA_PEN_Q])) {
+    if ( !list_empty(extraq[EXTRA_PEN_Q]) )
+    {
         /*we still have elements on the level 0 extraq 
           => let those run first!*/
         runinf   = list_entry(extraq[EXTRA_PEN_Q]->next, 
@@ -667,9 +727,12 @@
 #ifdef SEDF_STATS
         runinf->pen_extra_slices++;
 #endif
-    } else
-#endif
-        if (!list_empty(extraq[EXTRA_UTIL_Q])) {
+    }
+    else
+#endif
+    {
+        if ( !list_empty(extraq[EXTRA_UTIL_Q]) )
+        {
             /*use elements from the normal extraqueue*/
             runinf   = list_entry(extraq[EXTRA_UTIL_Q]->next,
                                   struct sedf_vcpu_info,
@@ -680,6 +743,7 @@
         }
         else
             goto return_idle;
+    }
 
     ASSERT(ret.time > 0);
     ASSERT(sedf_runnable(ret.task));
@@ -692,6 +756,8 @@
     ASSERT(sedf_runnable(ret.task));
     return ret;
 }
+
+
 /* Main scheduling function
    Reasons for calling this function are:
    -timeslice for the current period used up
@@ -699,7 +765,7 @@
    -and various others ;) in general: determine which domain to run next*/
 static struct task_slice sedf_do_schedule(s_time_t now)
 {
-    int                   cpu      = current->processor;
+    int                   cpu      = smp_processor_id();
     struct list_head     *runq     = RUNQ(cpu);
     struct list_head     *waitq    = WAITQ(cpu);
 #if (EXTRA > EXTRA_OFF)
@@ -711,20 +777,21 @@
     struct task_slice      ret;
 
     /*idle tasks don't need any of the following stuf*/
-    if (is_idle_task(current->domain))
+    if (is_idle_domain(current->domain))
         goto check_waitq;
  
     /* create local state of the status of the domain, in order to avoid
        inconsistent state during scheduling decisions, because data for
-       domain_runnable is not protected by the scheduling lock!*/
-    if(!domain_runnable(current))
+       vcpu_runnable is not protected by the scheduling lock!*/
+    if ( !vcpu_runnable(current) )
         inf->status |= SEDF_ASLEEP;
  
-    if (inf->status & SEDF_ASLEEP)
+    if ( inf->status & SEDF_ASLEEP )
         inf->block_abs = now;
 
 #if (EXTRA > EXTRA_OFF)
-    if (unlikely(extra_runs(inf))) {
+    if ( unlikely(extra_runs(inf)) )
+    {
         /*special treatment of domains running in extra time*/
         desched_extra_dom(now, current);
     }
@@ -739,10 +806,12 @@
     /*now simply pick the first domain from the runqueue, which has the
       earliest deadline, because the list is sorted*/
  
-    if (!list_empty(runq)) {
+    if ( !list_empty(runq) )
+    {
         runinf   = list_entry(runq->next,struct sedf_vcpu_info,list);
         ret.task = runinf->vcpu;
-        if (!list_empty(waitq)) {
+        if ( !list_empty(waitq) )
+        {
             waitinf  = list_entry(waitq->next,
                                   struct sedf_vcpu_info,list);
             /*rerun scheduler, when scheduled domain reaches it's
@@ -751,14 +820,16 @@
             ret.time = MIN(now + runinf->slice - runinf->cputime,
                            PERIOD_BEGIN(waitinf)) - now;
         }
-        else {
+        else
+        {
             ret.time = runinf->slice - runinf->cputime;
         }
         CHECK(ret.time > 0);
         goto sched_done;
     }
  
-    if (!list_empty(waitq)) {
+    if ( !list_empty(waitq) )
+    {
         waitinf  = list_entry(waitq->next,struct sedf_vcpu_info, list);
         /*we could not find any suitable domain 
           => look for domains that are aware of extratime*/
@@ -771,7 +842,8 @@
 #endif
         CHECK(ret.time > 0);
     }
-    else {
+    else
+    {
         /*this could probably never happen, but one never knows...*/
         /*it can... imagine a second CPU, which is pure scifi ATM,
           but one never knows ;)*/
@@ -782,11 +854,13 @@
  sched_done: 
     /*TODO: Do something USEFUL when this happens and find out, why it
       still can happen!!!*/
-    if (ret.time<0) {
+    if ( ret.time < 0)
+    {
         printk("Ouch! We are seriously BEHIND schedule! %"PRIi64"\n",
                ret.time);
         ret.time = EXTRA_QUANTUM;
     }
+
     EDOM_INFO(ret.task)->sched_start_abs = now;
     CHECK(ret.time > 0);
     ASSERT(sedf_runnable(ret.task));
@@ -794,30 +868,36 @@
     return ret;
 }
 
-static void sedf_sleep(struct vcpu *d) {
-    PRINT(2,"sedf_sleep was called, domain-id %i.%i\n",d->domain->domain_id, 
d->vcpu_id);
- 
-    if (is_idle_task(d->domain))
+
+static void sedf_sleep(struct vcpu *d)
+{
+    PRINT(2,"sedf_sleep was called, domain-id %i.%i\n",
+          d->domain->domain_id, d->vcpu_id);
+ 
+    if ( is_idle_domain(d->domain) )
         return;
 
     EDOM_INFO(d)->status |= SEDF_ASLEEP;
  
-    if ( test_bit(_VCPUF_running, &d->vcpu_flags) ) {
+    if ( schedule_data[d->processor].curr == d )
+    {
         cpu_raise_softirq(d->processor, SCHEDULE_SOFTIRQ);
     }
-    else  {
+    else
+    {
         if ( __task_on_queue(d) )
             __del_from_queue(d);
 #if (EXTRA > EXTRA_OFF)
-        if (extraq_on(d, EXTRA_UTIL_Q)) 
+        if ( extraq_on(d, EXTRA_UTIL_Q) ) 
             extraq_del(d, EXTRA_UTIL_Q);
 #endif
 #if (EXTRA == EXTRA_BLOCK_WEIGHT)
-        if (extraq_on(d, EXTRA_PEN_Q))
+        if ( extraq_on(d, EXTRA_PEN_Q) )
             extraq_del(d, EXTRA_PEN_Q);
 #endif
     }
 }
+
 
 /* This function wakes up a domain, i.e. moves them into the waitqueue
  * things to mention are: admission control is taking place nowhere at
@@ -890,17 +970,21 @@
  *     -either behaviour can lead to missed deadlines in other domains as
  *      opposed to approaches 1,2a,2b
  */
-static inline void unblock_short_vcons
-(struct sedf_vcpu_info* inf, s_time_t now) {
+#if (UNBLOCK <= UNBLOCK_SHORT_RESUME)
+static void unblock_short_vcons(struct sedf_vcpu_info* inf, s_time_t now)
+{
     inf->deadl_abs += inf->period;
     inf->cputime = 0;
 }
-
-static inline void unblock_short_cons(struct sedf_vcpu_info* inf, s_time_t now)
+#endif
+
+#if (UNBLOCK == UNBLOCK_SHORT_RESUME)
+static void unblock_short_cons(struct sedf_vcpu_info* inf, s_time_t now)
 {
     /*treat blocked time as consumed by the domain*/
     inf->cputime += now - inf->block_abs; 
-    if (inf->cputime + EXTRA_QUANTUM > inf->slice) {
+    if ( (inf->cputime + EXTRA_QUANTUM) > inf->slice )
+    {
         /*we don't have a reasonable amount of time in 
           our slice left :( => start in next period!*/
         unblock_short_vcons(inf, now);
@@ -910,8 +994,11 @@
         inf->short_cont++;
 #endif
 }
-static inline void unblock_short_extra_support (struct sedf_vcpu_info* inf,
-                                                s_time_t now) {
+#endif
+
+static void unblock_short_extra_support(
+    struct sedf_vcpu_info* inf, s_time_t now)
+{
     /*this unblocking scheme tries to support the domain, by assigning it
     a priority in extratime distribution according to the loss of time
     in this slice due to blocking*/
@@ -919,26 +1006,29 @@
  
     /*no more realtime execution in this period!*/
     inf->deadl_abs += inf->period;
-    if (likely(inf->block_abs)) {
+    if ( likely(inf->block_abs) )
+    {
         //treat blocked time as consumed by the domain*/
         /*inf->cputime += now - inf->block_abs;*/
         /*penalty is time the domain would have
           had if it continued to run */
         pen = (inf->slice - inf->cputime);
-        if (pen < 0) pen = 0;
+        if ( pen < 0 )
+            pen = 0;
         /*accumulate all penalties over the periods*/
         /*inf->short_block_lost_tot += pen;*/
         /*set penalty to the current value*/
         inf->short_block_lost_tot = pen;
         /*not sure which one is better.. but seems to work well...*/
   
-        if (inf->short_block_lost_tot) {
+        if ( inf->short_block_lost_tot )
+        {
             inf->score[0] = (inf->period << 10) /
                 inf->short_block_lost_tot;
 #ifdef SEDF_STATS
             inf->pen_extra_blocks++;
 #endif
-            if (extraq_on(inf->vcpu, EXTRA_PEN_Q))
+            if ( extraq_on(inf->vcpu, EXTRA_PEN_Q) )
                 /*remove domain for possible resorting!*/
                 extraq_del(inf->vcpu, EXTRA_PEN_Q);
             else
@@ -951,36 +1041,53 @@
             extraq_add_sort_update(inf->vcpu, EXTRA_PEN_Q, 0);
         }
     }
+
     /*give it a fresh slice in the next period!*/
     inf->cputime = 0;
 }
-static inline void unblock_long_vcons(struct sedf_vcpu_info* inf, s_time_t now)
+
+
+#if (UNBLOCK == UNBLOCK_ISOCHRONOUS_EDF)
+static void unblock_long_vcons(struct sedf_vcpu_info* inf, s_time_t now)
 {
     /* align to next future period */
     inf->deadl_abs += (DIV_UP(now - inf->deadl_abs, inf->period) +1)
         * inf->period;
     inf->cputime = 0;
 }
-
-static inline void unblock_long_cons_a (struct sedf_vcpu_info* inf,
-                                        s_time_t now) {
+#endif
+
+
+#if 0
+static void unblock_long_cons_a (struct sedf_vcpu_info* inf, s_time_t now)
+{
     /*treat the time the domain was blocked in the
-   CURRENT period as consumed by the domain*/
+     CURRENT period as consumed by the domain*/
     inf->cputime = (now - inf->deadl_abs) % inf->period; 
-    if (inf->cputime + EXTRA_QUANTUM > inf->slice) {
+    if ( (inf->cputime + EXTRA_QUANTUM) > inf->slice )
+    {
         /*we don't have a reasonable amount of time in our slice
           left :( => start in next period!*/
         unblock_long_vcons(inf, now);
     }
 }
-static inline void unblock_long_cons_b(struct sedf_vcpu_info* inf,s_time_t 
now) {
+#endif
+
+
+static void unblock_long_cons_b(struct sedf_vcpu_info* inf,s_time_t now)
+{
     /*Conservative 2b*/
     /*Treat the unblocking time as a start of a new period */
     inf->deadl_abs = now + inf->period;
     inf->cputime = 0;
 }
-static inline void unblock_long_cons_c(struct sedf_vcpu_info* inf,s_time_t 
now) {
-    if (likely(inf->latency)) {
+
+
+#if (UNBLOCK == UNBLOCK_ATROPOS)
+static void unblock_long_cons_c(struct sedf_vcpu_info* inf,s_time_t now)
+{
+    if ( likely(inf->latency) )
+    {
         /*scale the slice and period accordingly to the latency hint*/
         /*reduce period temporarily to the latency hint*/
         inf->period = inf->latency;
@@ -993,18 +1100,24 @@
         inf->deadl_abs = now + inf->period;
         inf->cputime = 0;
     } 
-    else {
+    else
+    {
         /*we don't have a latency hint.. use some other technique*/
         unblock_long_cons_b(inf, now);
     }
 }
+#endif
+
+
+#if (UNBLOCK == UNBLOCK_BURST)
 /*a new idea of dealing with short blocks: burst period scaling*/
-static inline void unblock_short_burst(struct sedf_vcpu_info* inf, s_time_t 
now)
+static void unblock_short_burst(struct sedf_vcpu_info* inf, s_time_t now)
 {
     /*treat blocked time as consumed by the domain*/
     inf->cputime += now - inf->block_abs;
  
-    if (inf->cputime + EXTRA_QUANTUM <= inf->slice) {
+    if ( (inf->cputime + EXTRA_QUANTUM) <= inf->slice )
+    {
         /*if we can still use some time in the current slice
           then use it!*/
 #ifdef SEDF_STATS
@@ -1012,10 +1125,12 @@
         inf->short_cont++;
 #endif
     }
-    else {
+    else
+    {
         /*we don't have a reasonable amount of time in
           our slice left => switch to burst mode*/
-        if (likely(inf->unblock_abs)) {
+        if ( likely(inf->unblock_abs) )
+        {
             /*set the period-length to the current blocking
               interval, possible enhancements: average over last
               blocking intervals, user-specified minimum,...*/
@@ -1030,17 +1145,23 @@
             /*set new (shorter) deadline*/
             inf->deadl_abs += inf->period;
         }
-        else {
+        else
+        {
             /*in case we haven't unblocked before
               start in next period!*/
             inf->cputime=0;
             inf->deadl_abs += inf->period;
         }
     }
+
     inf->unblock_abs = now;
 }
-static inline void unblock_long_burst(struct sedf_vcpu_info* inf, s_time_t 
now) {
-    if (unlikely(inf->latency && (inf->period > inf->latency))) {
+
+
+static void unblock_long_burst(struct sedf_vcpu_info* inf, s_time_t now)
+{
+    if ( unlikely(inf->latency && (inf->period > inf->latency)) )
+    {
         /*scale the slice and period accordingly to the latency hint*/
         inf->period = inf->latency;
         /*check for overflows on multiplication*/
@@ -1052,23 +1173,28 @@
         inf->deadl_abs = now + inf->period;
         inf->cputime = 0;
     }
-    else {
+    else
+    {
         /*we don't have a latency hint.. or we are currently in 
           "burst mode": use some other technique
           NB: this should be in fact the normal way of operation,
           when we are in sync with the device!*/
         unblock_long_cons_b(inf, now);
     }
+
     inf->unblock_abs = now;
 }
+#endif /* UNBLOCK == UNBLOCK_BURST */
+
 
 #define DOMAIN_EDF   1
 #define DOMAIN_EXTRA_PEN  2
 #define DOMAIN_EXTRA_UTIL  3
 #define DOMAIN_IDLE   4
-static inline int get_run_type(struct vcpu* d) {
+static inline int get_run_type(struct vcpu* d)
+{
     struct sedf_vcpu_info* inf = EDOM_INFO(d);
-    if (is_idle_task(d->domain))
+    if (is_idle_domain(d->domain))
         return DOMAIN_IDLE;
     if (inf->status & EXTRA_RUN_PEN)
         return DOMAIN_EXTRA_PEN;
@@ -1076,6 +1202,8 @@
         return DOMAIN_EXTRA_UTIL;
     return DOMAIN_EDF;
 }
+
+
 /*Compares two domains in the relation of whether the one is allowed to
   interrupt the others execution.
   It returns true (!=0) if a switch to the other domain is good.
@@ -1085,8 +1213,10 @@
   In the same class priorities are assigned as following:
    EDF: early deadline > late deadline
    L0 extra-time: lower score > higher score*/
-static inline int should_switch(struct vcpu* cur,
-                                struct vcpu* other, s_time_t now) {
+static inline int should_switch(struct vcpu *cur,
+                                struct vcpu *other,
+                                s_time_t now)
+{
     struct sedf_vcpu_info *cur_inf, *other_inf;
     cur_inf   = EDOM_INFO(cur);
     other_inf = EDOM_INFO(other);
@@ -1119,41 +1249,51 @@
     }
     return 1;
 }
-void sedf_wake(struct vcpu *d) {
+
+void sedf_wake(struct vcpu *d)
+{
     s_time_t              now = NOW();
     struct sedf_vcpu_info* inf = EDOM_INFO(d);
 
     PRINT(3, "sedf_wake was called, domain-id %i.%i\n",d->domain->domain_id,
           d->vcpu_id);
 
-    if (unlikely(is_idle_task(d->domain)))
+    if ( unlikely(is_idle_domain(d->domain)) )
         return;
    
-    if ( unlikely(__task_on_queue(d)) ) {
+    if ( unlikely(__task_on_queue(d)) )
+    {
         PRINT(3,"\tdomain %i.%i is already in some queue\n",
               d->domain->domain_id, d->vcpu_id);
         return;
     }
+
     ASSERT(!sedf_runnable(d));
     inf->status &= ~SEDF_ASLEEP;
     ASSERT(!extraq_on(d, EXTRA_UTIL_Q));
     ASSERT(!extraq_on(d, EXTRA_PEN_Q));
  
-    if (unlikely(inf->deadl_abs == 0))
+    if ( unlikely(inf->deadl_abs == 0) )
+    {
         /*initial setup of the deadline*/
         inf->deadl_abs = now + inf->slice;
+    }
   
-    PRINT(3,"waking up domain %i.%i (deadl= %"PRIu64" period= %"PRIu64" "\
-          "now= %"PRIu64")\n", d->domain->domain_id, d->vcpu_id, 
inf->deadl_abs,
-          inf->period, now);
+    PRINT(3, "waking up domain %i.%i (deadl= %"PRIu64" period= %"PRIu64
+          "now= %"PRIu64")\n",
+          d->domain->domain_id, d->vcpu_id, inf->deadl_abs, inf->period, now);
+
 #ifdef SEDF_STATS 
     inf->block_tot++;
 #endif
-    if (unlikely(now < PERIOD_BEGIN(inf))) {
+
+    if ( unlikely(now < PERIOD_BEGIN(inf)) )
+    {
         PRINT(4,"extratime unblock\n");
         /* unblocking in extra-time! */
 #if (EXTRA == EXTRA_BLOCK_WEIGHT)
-        if (inf->status & EXTRA_WANT_PEN_Q) {
+        if ( inf->status & EXTRA_WANT_PEN_Q )
+        {
             /*we have a domain that wants compensation
               for block penalty and did just block in
               its compensation time. Give it another
@@ -1163,8 +1303,10 @@
 #endif
         extraq_check_add_unblocked(d, 0);
     }  
-    else {  
-        if (now < inf->deadl_abs) {
+    else
+    {  
+        if ( now < inf->deadl_abs )
+        {
             PRINT(4,"short unblocking\n");
             /*short blocking*/
 #ifdef SEDF_STATS
@@ -1182,7 +1324,8 @@
 
             extraq_check_add_unblocked(d, 1);
         }
-        else {
+        else
+        {
             PRINT(4,"long unblocking\n");
             /*long unblocking*/
 #ifdef SEDF_STATS
@@ -1197,7 +1340,6 @@
             unblock_long_cons_c(inf, now);
 #elif (UNBLOCK == UNBLOCK_SHORT_RESUME)
             unblock_long_cons_b(inf, now);
-            /*unblock_short_cons_c(inf, now);*/
 #elif (UNBLOCK == UNBLOCK_BURST)
             unblock_long_burst(inf, now);
 #endif
@@ -1205,26 +1347,33 @@
             extraq_check_add_unblocked(d, 1);
         }
     }
-    PRINT(3,"woke up domain %i.%i (deadl= %"PRIu64" period= %"PRIu64" "\
-          "now= %"PRIu64")\n", d->domain->domain_id, d->vcpu_id, 
inf->deadl_abs,
+
+    PRINT(3, "woke up domain %i.%i (deadl= %"PRIu64" period= %"PRIu64
+          "now= %"PRIu64")\n",
+          d->domain->domain_id, d->vcpu_id, inf->deadl_abs,
           inf->period, now);
-    if (PERIOD_BEGIN(inf) > now) {
+
+    if ( PERIOD_BEGIN(inf) > now )
+    {
         __add_to_waitqueue_sort(d);
         PRINT(3,"added to waitq\n");
     }
-    else {
+    else
+    {
         __add_to_runqueue_sort(d);
         PRINT(3,"added to runq\n");
     }
  
 #ifdef SEDF_STATS
     /*do some statistics here...*/
-    if (inf->block_abs != 0) {
+    if ( inf->block_abs != 0 )
+    {
         inf->block_time_tot += now - inf->block_abs;
         inf->penalty_time_tot +=
             PERIOD_BEGIN(inf) + inf->cputime - inf->block_abs;
     }
 #endif
+
     /*sanity check: make sure each extra-aware domain IS on the util-q!*/
     ASSERT(IMPLY(inf->status & EXTRA_AWARE, extraq_on(d, EXTRA_UTIL_Q)));
     ASSERT(__task_on_queue(d));
@@ -1234,27 +1383,48 @@
     ASSERT(d->processor >= 0);
     ASSERT(d->processor < NR_CPUS);
     ASSERT(schedule_data[d->processor].curr);
-    if (should_switch(schedule_data[d->processor].curr, d, now))
+
+    if ( should_switch(schedule_data[d->processor].curr, d, now) )
         cpu_raise_softirq(d->processor, SCHEDULE_SOFTIRQ);
 }
 
-/*Print a lot of use-{full, less} information about a domains in the system*/
-static void sedf_dump_domain(struct vcpu *d) {
+
+static int sedf_set_affinity(struct vcpu *v, cpumask_t *affinity)
+{
+    if ( v == current )
+        return cpu_isset(v->processor, *affinity) ? 0 : -EBUSY;
+
+    vcpu_pause(v);
+    v->cpu_affinity = *affinity;
+    v->processor = first_cpu(v->cpu_affinity);
+    vcpu_unpause(v);
+
+    return 0;
+}
+
+
+/* Print a lot of useful information about a domains in the system */
+static void sedf_dump_domain(struct vcpu *d)
+{
     printk("%i.%i has=%c ", d->domain->domain_id, d->vcpu_id,
            test_bit(_VCPUF_running, &d->vcpu_flags) ? 'T':'F');
-    printk("p=%"PRIu64" sl=%"PRIu64" ddl=%"PRIu64" w=%hu c=%"PRIu64" sc=%i 
xtr(%s)=%"PRIu64" ew=%hu",
+    printk("p=%"PRIu64" sl=%"PRIu64" ddl=%"PRIu64" w=%hu c=%"PRIu64
+           " sc=%i xtr(%s)=%"PRIu64" ew=%hu",
            EDOM_INFO(d)->period, EDOM_INFO(d)->slice, EDOM_INFO(d)->deadl_abs,
-           EDOM_INFO(d)->weight, d->cpu_time, 
EDOM_INFO(d)->score[EXTRA_UTIL_Q],
+           EDOM_INFO(d)->weight, d->cpu_time,
+           EDOM_INFO(d)->score[EXTRA_UTIL_Q],
            (EDOM_INFO(d)->status & EXTRA_AWARE) ? "yes" : "no",
            EDOM_INFO(d)->extra_time_tot, EDOM_INFO(d)->extraweight);
-    if (d->cpu_time !=0)
+    
+    if ( d->cpu_time != 0 )
         printf(" (%"PRIu64"%%)", (EDOM_INFO(d)->extra_time_tot * 100)
                / d->cpu_time);
+
 #ifdef SEDF_STATS
-    if (EDOM_INFO(d)->block_time_tot!=0)
+    if ( EDOM_INFO(d)->block_time_tot != 0 )
         printf(" pen=%"PRIu64"%%", (EDOM_INFO(d)->penalty_time_tot * 100) /
                EDOM_INFO(d)->block_time_tot);
-    if (EDOM_INFO(d)->block_tot!=0)
+    if ( EDOM_INFO(d)->block_tot != 0 )
         printf("\n   blks=%u sh=%u (%u%%) (shc=%u (%u%%) shex=%i "\
                "shexsl=%i) l=%u (%u%%) avg: b=%"PRIu64" p=%"PRIu64"",
                EDOM_INFO(d)->block_tot, EDOM_INFO(d)->short_block_tot,
@@ -1271,7 +1441,8 @@
     printf("\n");
 }
 
-/*dumps all domains on hte specified cpu*/
+
+/* dumps all domains on hte specified cpu */
 static void sedf_dump_cpu_state(int i)
 {
     struct list_head      *list, *queue, *tmp;
@@ -1284,7 +1455,8 @@
     queue = RUNQ(i);
     printk("RUNQ rq %lx   n: %lx, p: %lx\n",  (unsigned long)queue,
            (unsigned long) queue->next, (unsigned long) queue->prev);
-    list_for_each_safe ( list, tmp, queue ) {
+    list_for_each_safe ( list, tmp, queue )
+    {
         printk("%3d: ",loop++);
         d_inf = list_entry(list, struct sedf_vcpu_info, list);
         sedf_dump_domain(d_inf->vcpu);
@@ -1293,7 +1465,8 @@
     queue = WAITQ(i); loop = 0;
     printk("\nWAITQ rq %lx   n: %lx, p: %lx\n",  (unsigned long)queue,
            (unsigned long) queue->next, (unsigned long) queue->prev);
-    list_for_each_safe ( list, tmp, queue ) {
+    list_for_each_safe ( list, tmp, queue )
+    {
         printk("%3d: ",loop++);
         d_inf = list_entry(list, struct sedf_vcpu_info, list);
         sedf_dump_domain(d_inf->vcpu);
@@ -1303,7 +1476,8 @@
     printk("\nEXTRAQ (penalty) rq %lx   n: %lx, p: %lx\n",
            (unsigned long)queue, (unsigned long) queue->next,
            (unsigned long) queue->prev);
-    list_for_each_safe ( list, tmp, queue ) {
+    list_for_each_safe ( list, tmp, queue )
+    {
         d_inf = list_entry(list, struct sedf_vcpu_info,
                            extralist[EXTRA_PEN_Q]);
         printk("%3d: ",loop++);
@@ -1314,7 +1488,8 @@
     printk("\nEXTRAQ (utilization) rq %lx   n: %lx, p: %lx\n",
            (unsigned long)queue, (unsigned long) queue->next,
            (unsigned long) queue->prev);
-    list_for_each_safe ( list, tmp, queue ) {
+    list_for_each_safe ( list, tmp, queue )
+    {
         d_inf = list_entry(list, struct sedf_vcpu_info,
                            extralist[EXTRA_UTIL_Q]);
         printk("%3d: ",loop++);
@@ -1323,69 +1498,93 @@
  
     loop = 0;
     printk("\nnot on Q\n");
-    for_each_domain(d)
+
+    for_each_domain ( d )
+    {
         for_each_vcpu(d, ed)
-    {
-        if (!__task_on_queue(ed) && (ed->processor == i)) {
-            printk("%3d: ",loop++);
-            sedf_dump_domain(ed);
-        }
-    }
-}
-/*Adjusts periods and slices of the domains accordingly to their weights*/
-static inline int sedf_adjust_weights(struct sched_adjdom_cmd *cmd) {
+        {
+            if ( !__task_on_queue(ed) && (ed->processor == i) )
+            {
+                printk("%3d: ",loop++);
+                sedf_dump_domain(ed);
+            }
+        }
+    }
+}
+
+
+/* Adjusts periods and slices of the domains accordingly to their weights. */
+static int sedf_adjust_weights(struct sched_adjdom_cmd *cmd)
+{
     struct vcpu *p;
     struct domain      *d;
     int                 sumw[NR_CPUS];
     s_time_t            sumt[NR_CPUS];
     int                 cpu;
  
-    for (cpu=0; cpu < NR_CPUS; cpu++) {
+    for ( cpu = 0; cpu < NR_CPUS; cpu++ )
+    {
         sumw[cpu] = 0;
         sumt[cpu] = 0;
     }
-    /*sum up all weights*/
-    for_each_domain(d)
-        for_each_vcpu(d, p) {
-        if (EDOM_INFO(p)->weight)
-            sumw[p->processor] += EDOM_INFO(p)->weight;
-        else {
-            /*don't modify domains who don't have a weight, but sum
-              up the time they need, projected to a WEIGHT_PERIOD,
-              so that this time is not given to the weight-driven
-              domains*/
-            /*check for overflows*/
-            ASSERT((WEIGHT_PERIOD < ULONG_MAX) 
-                   && (EDOM_INFO(p)->slice_orig < ULONG_MAX));
-            sumt[p->processor] += 
-                (WEIGHT_PERIOD * EDOM_INFO(p)->slice_orig) / 
-                EDOM_INFO(p)->period_orig;
-        }
-    }
-    /*adjust all slices (and periods) to the new weight*/
-    for_each_domain(d) 
-        for_each_vcpu(d, p) {
-        if (EDOM_INFO(p)->weight) {
-            EDOM_INFO(p)->period_orig = 
-                EDOM_INFO(p)->period  = WEIGHT_PERIOD;
-            EDOM_INFO(p)->slice_orig  =
-                EDOM_INFO(p)->slice   = 
-                (EDOM_INFO(p)->weight *
-                 (WEIGHT_PERIOD - WEIGHT_SAFETY - sumt[p->processor])) / 
-                sumw[p->processor];
-        }
-    }
+
+    /* sum up all weights */
+    for_each_domain( d )
+    {
+        for_each_vcpu( d, p )
+        {
+            if ( EDOM_INFO(p)->weight )
+            {
+                sumw[p->processor] += EDOM_INFO(p)->weight;
+            }
+            else
+            {
+                /*don't modify domains who don't have a weight, but sum
+                  up the time they need, projected to a WEIGHT_PERIOD,
+                  so that this time is not given to the weight-driven
+                  domains*/
+                /*check for overflows*/
+                ASSERT((WEIGHT_PERIOD < ULONG_MAX) 
+                       && (EDOM_INFO(p)->slice_orig < ULONG_MAX));
+                sumt[p->processor] += 
+                    (WEIGHT_PERIOD * EDOM_INFO(p)->slice_orig) / 
+                    EDOM_INFO(p)->period_orig;
+            }
+        }
+    }
+
+    /* adjust all slices (and periods) to the new weight */
+    for_each_domain( d )
+    {
+        for_each_vcpu ( d, p )
+        {
+            if ( EDOM_INFO(p)->weight )
+            {
+                EDOM_INFO(p)->period_orig = 
+                    EDOM_INFO(p)->period  = WEIGHT_PERIOD;
+                EDOM_INFO(p)->slice_orig  =
+                    EDOM_INFO(p)->slice   = 
+                    (EDOM_INFO(p)->weight *
+                     (WEIGHT_PERIOD - WEIGHT_SAFETY - sumt[p->processor])) / 
+                    sumw[p->processor];
+            }
+        }
+    }
+
     return 0;
 }
 
+
 /* set or fetch domain scheduling parameters */
-static int sedf_adjdom(struct domain *p, struct sched_adjdom_cmd *cmd) {
+static int sedf_adjdom(struct domain *p, struct sched_adjdom_cmd *cmd)
+{
     struct vcpu *v;
 
     PRINT(2,"sedf_adjdom was called, domain-id %i new period %"PRIu64" "\
           "new slice %"PRIu64"\nlatency %"PRIu64" extra:%s\n",
           p->domain_id, cmd->u.sedf.period, cmd->u.sedf.slice,
           cmd->u.sedf.latency, (cmd->u.sedf.extratime)?"yes":"no");
+
     if ( cmd->direction == SCHED_INFO_PUT )
     {
         /*check for sane parameters*/
@@ -1458,6 +1657,7 @@
     .sleep          = sedf_sleep,
     .wake           = sedf_wake,
     .adjdom         = sedf_adjdom,
+    .set_affinity   = sedf_set_affinity
 };
 
 /*
diff -r 25e3c8668f1f -r 8af1199488d3 xen/common/schedule.c
--- a/xen/common/schedule.c     Mon Jan  9 11:19:55 2006
+++ b/xen/common/schedule.c     Mon Jan  9 11:22:17 2006
@@ -100,7 +100,9 @@
     v->vcpu_id = vcpu_id;
     v->processor = cpu_id;
     atomic_set(&v->pausecnt, 0);
-    v->cpumap = CPUMAP_RUNANYWHERE;
+
+    v->cpu_affinity = is_idle_domain(d) ?
+        cpumask_of_cpu(cpu_id) : CPU_MASK_ALL;
 
     d->vcpu[vcpu_id] = v;
 
@@ -143,7 +145,7 @@
     /* Initialise the per-domain timer. */
     init_ac_timer(&v->timer, dom_timer_fn, v, v->processor);
 
-    if ( is_idle_task(d) )
+    if ( is_idle_domain(d) )
     {
         schedule_data[v->processor].curr = v;
         schedule_data[v->processor].idle = v;
@@ -166,7 +168,7 @@
     unsigned long flags;
 
     spin_lock_irqsave(&schedule_data[v->processor].schedule_lock, flags);
-    if ( likely(!domain_runnable(v)) )
+    if ( likely(!vcpu_runnable(v)) )
         SCHED_OP(sleep, v);
     spin_unlock_irqrestore(&schedule_data[v->processor].schedule_lock, flags);
 
@@ -182,7 +184,7 @@
      * flag is cleared and the scheduler lock is released. We also check that
      * the domain continues to be unrunnable, in case someone else wakes it.
      */
-    while ( !domain_runnable(v) &&
+    while ( !vcpu_runnable(v) &&
             (test_bit(_VCPUF_running, &v->vcpu_flags) ||
              spin_is_locked(&schedule_data[v->processor].schedule_lock)) )
         cpu_relax();
@@ -195,15 +197,22 @@
     unsigned long flags;
 
     spin_lock_irqsave(&schedule_data[v->processor].schedule_lock, flags);
-    if ( likely(domain_runnable(v)) )
+    if ( likely(vcpu_runnable(v)) )
     {
         SCHED_OP(wake, v);
         v->wokenup = NOW();
     }
-    clear_bit(_VCPUF_cpu_migrated, &v->vcpu_flags);
     spin_unlock_irqrestore(&schedule_data[v->processor].schedule_lock, flags);
 
     TRACE_2D(TRC_SCHED_WAKE, v->domain->domain_id, v->vcpu_id);
+}
+
+int vcpu_set_affinity(struct vcpu *v, cpumask_t *affinity)
+{
+    if ( cpus_empty(*affinity) )
+        return -EINVAL;
+
+    return SCHED_OP(set_affinity, v, affinity);
 }
 
 /* Block the currently-executing domain until a pertinent event occurs. */
@@ -330,18 +339,23 @@
     do {
         succ = 0;
         __clear_cpu_bits(have_lock);
-        for_each_vcpu(d, v) {
+        for_each_vcpu ( d, v )
+        {
             cpu = v->processor;
-            if (!__get_cpu_bit(cpu, have_lock)) {
+            if ( !__get_cpu_bit(cpu, have_lock) )
+            {
                 /* if we don't have a lock on this CPU: acquire it*/
-                if (spin_trylock(&schedule_data[cpu].schedule_lock)) {
+                if ( spin_trylock(&schedule_data[cpu].schedule_lock) )
+                {
                     /*we have this lock!*/
                     __set_cpu_bit(cpu, have_lock);
                     succ = 1;
-                } else {
+                }
+                else
+                {
                     /*we didn,t get this lock -> free all other locks too!*/
-                    for (cpu = 0; cpu < NR_CPUS; cpu++)
-                        if (__get_cpu_bit(cpu, have_lock))
+                    for ( cpu = 0; cpu < NR_CPUS; cpu++ )
+                        if ( __get_cpu_bit(cpu, have_lock) )
                             spin_unlock(&schedule_data[cpu].schedule_lock);
                     /* and start from the beginning! */
                     succ = 0;
@@ -354,8 +368,8 @@
 
     SCHED_OP(adjdom, d, cmd);
 
-    for (cpu = 0; cpu < NR_CPUS; cpu++)
-        if (__get_cpu_bit(cpu, have_lock))
+    for ( cpu = 0; cpu < NR_CPUS; cpu++ )
+        if ( __get_cpu_bit(cpu, have_lock) )
             spin_unlock(&schedule_data[cpu].schedule_lock);
     __clear_cpu_bits(have_lock);
 
@@ -371,22 +385,20 @@
  */
 static void __enter_scheduler(void)
 {
-    struct vcpu *prev = current, *next = NULL;
-    int                 cpu = prev->processor;
-    s_time_t            now;
+    struct vcpu        *prev = current, *next = NULL;
+    int                 cpu = smp_processor_id();
+    s_time_t            now = NOW();
     struct task_slice   next_slice;
     s32                 r_time;     /* time for new dom to run */
 
+    ASSERT(!in_irq());
+
     perfc_incrc(sched_run);
-    
+
     spin_lock_irq(&schedule_data[cpu].schedule_lock);
-
-    now = NOW();
 
     rem_ac_timer(&schedule_data[cpu].s_timer);
     
-    ASSERT(!in_irq());
-
     prev->cpu_time += now - prev->lastschd;
 
     /* get policy-specific decision on scheduling... */
@@ -394,7 +406,7 @@
 
     r_time = next_slice.time;
     next = next_slice.task;
-    
+
     schedule_data[cpu].curr = next;
     
     next->lastschd = now;
@@ -411,11 +423,6 @@
              prev->domain->domain_id, now - prev->lastschd);
     TRACE_3D(TRC_SCHED_SWITCH_INFNEXT,
              next->domain->domain_id, now - next->wokenup, r_time);
-
-    clear_bit(_VCPUF_running, &prev->vcpu_flags);
-    set_bit(_VCPUF_running, &next->vcpu_flags);
-
-    perfc_incrc(sched_ctx);
 
     /*
      * Logic of wokenup field in domain struct:
@@ -425,10 +432,10 @@
      * also set here then a preempted runnable domain will get a screwed up
      * "waiting time" value next time it is scheduled.
      */
-    prev->wokenup = NOW();
+    prev->wokenup = now;
 
 #if defined(WAKE_HISTO)
-    if ( !is_idle_task(next->domain) && next->wokenup )
+    if ( !is_idle_domain(next->domain) && next->wokenup )
     {
         ulong diff = (ulong)(now - next->wokenup);
         diff /= (ulong)MILLISECS(1);
@@ -438,7 +445,7 @@
     next->wokenup = (s_time_t)0;
 #elif defined(BLOCKTIME_HISTO)
     prev->lastdeschd = now;
-    if ( !is_idle_task(next->domain) )
+    if ( !is_idle_domain(next->domain) )
     {
         ulong diff = (ulong)((now - next->lastdeschd) / MILLISECS(10));
         if (diff <= BUCKETS-2)  schedule_data[cpu].hist[diff]++;
@@ -446,10 +453,16 @@
     }
 #endif
 
+    set_bit(_VCPUF_running, &next->vcpu_flags);
+
+    spin_unlock_irq(&schedule_data[cpu].schedule_lock);
+
+    perfc_incrc(sched_ctx);
+
     prev->sleep_tick = schedule_data[cpu].tick;
 
     /* Ensure that the domain has an up-to-date time base. */
-    if ( !is_idle_task(next->domain) )
+    if ( !is_idle_domain(next->domain) )
     {
         update_dom_time(next);
         if ( next->sleep_tick != schedule_data[cpu].tick )
@@ -461,17 +474,6 @@
              next->domain->domain_id, next->vcpu_id);
 
     context_switch(prev, next);
-
-    spin_unlock_irq(&schedule_data[cpu].schedule_lock);
-
-    context_switch_finalise(next);
-}
-
-/* No locking needed -- pointer comparison is safe :-) */
-int idle_cpu(int cpu)
-{
-    struct vcpu *p = schedule_data[cpu].curr;
-    return p == idle_task[cpu];
 }
 
 
@@ -493,11 +495,11 @@
 static void t_timer_fn(void *unused)
 {
     struct vcpu  *v  = current;
-    unsigned int  cpu = v->processor;
+    unsigned int  cpu = smp_processor_id();
 
     schedule_data[cpu].tick++;
 
-    if ( !is_idle_task(v->domain) )
+    if ( !is_idle_domain(v->domain) )
     {
         update_dom_time(v);
         send_guest_virq(v, VIRQ_TIMER);
@@ -531,8 +533,8 @@
         init_ac_timer(&t_timer[i], t_timer_fn, NULL, i);
     }
 
-    schedule_data[0].curr = idle_task[0];
-    schedule_data[0].idle = idle_task[0];
+    schedule_data[0].curr = idle_domain[0];
+    schedule_data[0].idle = idle_domain[0];
 
     for ( i = 0; schedulers[i] != NULL; i++ )
     {
@@ -546,10 +548,10 @@
 
     printk("Using scheduler: %s (%s)\n", ops.name, ops.opt_name);
 
-    rc = SCHED_OP(alloc_task, idle_task[0]);
+    rc = SCHED_OP(alloc_task, idle_domain[0]);
     BUG_ON(rc < 0);
 
-    sched_add_domain(idle_task[0]);
+    sched_add_domain(idle_domain[0]);
 }
 
 /*
diff -r 25e3c8668f1f -r 8af1199488d3 xen/common/vsprintf.c
--- a/xen/common/vsprintf.c     Mon Jan  9 11:19:55 2006
+++ b/xen/common/vsprintf.c     Mon Jan  9 11:22:17 2006
@@ -12,11 +12,15 @@
 /* 
  * Fri Jul 13 2001 Crutcher Dunnavant <crutcher+kernel@xxxxxxxxxxxxxx>
  * - changed to provide snprintf and vsnprintf functions
+ * So Feb  1 16:51:32 CET 2004 Juergen Quade <quade@xxxxxxx>
+ * - scnprintf and vscnprintf
  */
 
 #include <stdarg.h>
 #include <xen/ctype.h>
 #include <xen/lib.h>
+#include <asm/div64.h>
+#include <asm/page.h>
 
 /**
  * simple_strtoul - convert a string to an unsigned long
@@ -33,11 +37,14 @@
         if (*cp == '0') {
             base = 8;
             cp++;
-            if ((*cp == 'x') && isxdigit(cp[1])) {
+            if ((toupper(*cp) == 'X') && isxdigit(cp[1])) {
                 cp++;
                 base = 16;
             }
         }
+    } else if (base == 16) {
+        if (cp[0] == '0' && toupper(cp[1]) == 'X')
+            cp += 2;
     }
     while (isxdigit(*cp) &&
            (value = isdigit(*cp) ? *cp-'0' : toupper(*cp)-'A'+10) < base) {
@@ -49,6 +56,8 @@
     return result;
 }
 
+EXPORT_SYMBOL(simple_strtoul);
+
 /**
  * simple_strtol - convert a string to a signed long
  * @cp: The start of the string
@@ -61,6 +70,8 @@
         return -simple_strtoul(cp+1,endp,base);
     return simple_strtoul(cp,endp,base);
 }
+
+EXPORT_SYMBOL(simple_strtol);
 
 /**
  * simple_strtoull - convert a string to an unsigned long long
@@ -77,11 +88,14 @@
         if (*cp == '0') {
             base = 8;
             cp++;
-            if ((*cp == 'x') && isxdigit(cp[1])) {
+            if ((toupper(*cp) == 'X') && isxdigit(cp[1])) {
                 cp++;
                 base = 16;
             }
         }
+    } else if (base == 16) {
+        if (cp[0] == '0' && toupper(cp[1]) == 'X')
+            cp += 2;
     }
     while (isxdigit(*cp) && (value = isdigit(*cp) ? *cp-'0' : (islower(*cp)
                                                                ? toupper(*cp) 
: *cp)-'A'+10) < base) {
@@ -92,6 +106,8 @@
         *endp = (char *)cp;
     return result;
 }
+
+EXPORT_SYMBOL(simple_strtoull);
 
 /**
  * simple_strtoll - convert a string to a signed long long
@@ -123,25 +139,25 @@
 #define SPECIAL 32              /* 0x */
 #define LARGE   64              /* use 'ABCDEF' instead of 'abcdef' */
 
-static char * number(char * buf, char * end, long long num, int base, int 
size, int precision, int type)
+static char * number(char * buf, char * end, unsigned long long num, int base, 
int size, int precision, int type)
 {
     char c,sign,tmp[66];
     const char *digits;
-    const char small_digits[] = "0123456789abcdefghijklmnopqrstuvwxyz";
-    const char large_digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+    static const char small_digits[] = "0123456789abcdefghijklmnopqrstuvwxyz";
+    static const char large_digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
     int i;
 
     digits = (type & LARGE) ? large_digits : small_digits;
     if (type & LEFT)
         type &= ~ZEROPAD;
     if (base < 2 || base > 36)
-        return buf;
+        return NULL;
     c = (type & ZEROPAD) ? '0' : ' ';
     sign = 0;
     if (type & SIGN) {
-        if (num < 0) {
+        if ((signed long long) num < 0) {
             sign = '-';
-            num = -num;
+            num = - (signed long long) num;
             size--;
         } else if (type & PLUS) {
             sign = '+';
@@ -160,6 +176,9 @@
     i = 0;
     if (num == 0)
         tmp[i++]='0';
+    else while (num != 0)
+        tmp[i++] = digits[do_div(num,base)];
+#if 0
     else 
     {
         /* XXX KAF: force unsigned mod and div. */
@@ -167,6 +186,7 @@
         unsigned int base2=(unsigned int)base;
         while (num2 != 0) { tmp[i++] = digits[num2%base2]; num2 /= base2; }
     }
+#endif
     if (i > precision)
         precision = i;
     size -= precision;
@@ -222,14 +242,22 @@
 }
 
 /**
-* vsnprintf - Format a string and place it in a buffer
-* @buf: The buffer to place the result into
-* @size: The size of the buffer, including the trailing null space
-* @fmt: The format string to use
-* @args: Arguments for the format string
-*
-* Call this function if you are already dealing with a va_list.
-* You probably want snprintf instead.
+ * vsnprintf - Format a string and place it in a buffer
+ * @buf: The buffer to place the result into
+ * @size: The size of the buffer, including the trailing null space
+ * @fmt: The format string to use
+ * @args: Arguments for the format string
+ *
+ * The return value is the number of characters which would
+ * be generated for the given input, excluding the trailing
+ * '\0', as per ISO C99. If you want to have the exact
+ * number of characters written into @buf as return value
+ * (not including the trailing '\0'), use vscnprintf. If the
+ * return is greater than or equal to @size, the resulting
+ * string is truncated.
+ *
+ * Call this function if you are already dealing with a va_list.
+ * You probably want snprintf instead.
  */
 int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
 {
@@ -247,6 +275,9 @@
     int qualifier;              /* 'h', 'l', or 'L' for integer fields */
                                 /* 'z' support added 23/7/1999 S.H.    */
                                 /* 'z' changed to 'Z' --davidm 1/25/99 */
+
+    /* Reject out-of-range values early */
+    BUG_ON((int)size < 0);
 
     str = buf;
     end = buf + size - 1;
@@ -307,17 +338,14 @@
 
         /* get the conversion qualifier */
         qualifier = -1;
-        if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' || *fmt =='Z') {
+        if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' ||
+            *fmt =='Z' || *fmt == 'z') {
             qualifier = *fmt;
             ++fmt;
             if (qualifier == 'l' && *fmt == 'l') {
                 qualifier = 'L';
                 ++fmt;
             }
-        }
-        if (*fmt == 'q') {
-            qualifier = 'L';
-            ++fmt;
         }
 
         /* default base */
@@ -345,7 +373,7 @@
 
         case 's':
             s = va_arg(args, char *);
-            if (!s)
+            if ((unsigned long)s < PAGE_SIZE)
                 s = "<NULL>";
 
             len = strnlen(s, precision);
@@ -386,7 +414,7 @@
             if (qualifier == 'l') {
                 long * ip = va_arg(args, long *);
                 *ip = (str - buf);
-            } else if (qualifier == 'Z') {
+            } else if (qualifier == 'Z' || qualifier == 'z') {
                 size_t * ip = va_arg(args, size_t *);
                 *ip = (str - buf);
             } else {
@@ -437,7 +465,7 @@
             num = va_arg(args, unsigned long);
             if (flags & SIGN)
                 num = (signed long) num;
-        } else if (qualifier == 'Z') {
+        } else if (qualifier == 'Z' || qualifier == 'z') {
             num = va_arg(args, size_t);
         } else if (qualifier == 'h') {
             num = (unsigned short) va_arg(args, int);
@@ -463,12 +491,43 @@
     return str-buf;
 }
 
+EXPORT_SYMBOL(vsnprintf);
+
+/**
+ * vscnprintf - Format a string and place it in a buffer
+ * @buf: The buffer to place the result into
+ * @size: The size of the buffer, including the trailing null space
+ * @fmt: The format string to use
+ * @args: Arguments for the format string
+ *
+ * The return value is the number of characters which have been written into
+ * the @buf not including the trailing '\0'. If @size is <= 0 the function
+ * returns 0.
+ *
+ * Call this function if you are already dealing with a va_list.
+ * You probably want scnprintf instead.
+ */
+int vscnprintf(char *buf, size_t size, const char *fmt, va_list args)
+{
+    int i;
+
+    i=vsnprintf(buf,size,fmt,args);
+    return (i >= size) ? (size - 1) : i;
+}
+
+EXPORT_SYMBOL(vscnprintf);
+
 /**
  * snprintf - Format a string and place it in a buffer
  * @buf: The buffer to place the result into
  * @size: The size of the buffer, including the trailing null space
  * @fmt: The format string to use
  * @...: Arguments for the format string
+ *
+ * The return value is the number of characters which would be
+ * generated for the given input, excluding the trailing null,
+ * as per ISO C99.  If the return is greater than or equal to
+ * @size, the resulting string is truncated.
  */
 int snprintf(char * buf, size_t size, const char *fmt, ...)
 {
@@ -481,26 +540,61 @@
     return i;
 }
 
+EXPORT_SYMBOL(snprintf);
+
+/**
+ * scnprintf - Format a string and place it in a buffer
+ * @buf: The buffer to place the result into
+ * @size: The size of the buffer, including the trailing null space
+ * @fmt: The format string to use
+ * @...: Arguments for the format string
+ *
+ * The return value is the number of characters written into @buf not including
+ * the trailing '\0'. If @size is <= 0 the function returns 0. If the return is
+ * greater than or equal to @size, the resulting string is truncated.
+ */
+
+int scnprintf(char * buf, size_t size, const char *fmt, ...)
+{
+    va_list args;
+    int i;
+
+    va_start(args, fmt);
+    i = vsnprintf(buf, size, fmt, args);
+    va_end(args);
+    return (i >= size) ? (size - 1) : i;
+}
+EXPORT_SYMBOL(scnprintf);
+
 /**
  * vsprintf - Format a string and place it in a buffer
  * @buf: The buffer to place the result into
  * @fmt: The format string to use
  * @args: Arguments for the format string
  *
+ * The function returns the number of characters written
+ * into @buf. Use vsnprintf or vscnprintf in order to avoid
+ * buffer overflows.
+ *
  * Call this function if you are already dealing with a va_list.
  * You probably want sprintf instead.
  */
 int vsprintf(char *buf, const char *fmt, va_list args)
 {
-    return vsnprintf(buf, 0xFFFFFFFFUL, fmt, args);
-}
-
+    return vsnprintf(buf, INT_MAX, fmt, args);
+}
+
+EXPORT_SYMBOL(vsprintf);
 
 /**
  * sprintf - Format a string and place it in a buffer
  * @buf: The buffer to place the result into
  * @fmt: The format string to use
  * @...: Arguments for the format string
+ *
+ * The function returns the number of characters written
+ * into @buf. Use snprintf or scnprintf in order to avoid
+ * buffer overflows.
  */
 int sprintf(char * buf, const char *fmt, ...)
 {
@@ -508,11 +602,12 @@
     int i;
 
     va_start(args, fmt);
-    i=vsprintf(buf,fmt,args);
+    i=vsnprintf(buf, INT_MAX, fmt, args);
     va_end(args);
     return i;
 }
 
+EXPORT_SYMBOL(sprintf);
 
 /*
  * Local variables:
diff -r 25e3c8668f1f -r 8af1199488d3 xen/drivers/char/ns16550.c
--- a/xen/drivers/char/ns16550.c        Mon Jan  9 11:19:55 2006
+++ b/xen/drivers/char/ns16550.c        Mon Jan  9 11:22:17 2006
@@ -13,6 +13,7 @@
 #include <xen/irq.h>
 #include <xen/sched.h>
 #include <xen/serial.h>
+#include <xen/iocap.h>
 #include <asm/io.h>
 
 /*
@@ -233,11 +234,11 @@
 }
 
 #ifdef CONFIG_X86
-#include <asm/physdev.h>
 static void ns16550_endboot(struct serial_port *port)
 {
     struct ns16550 *uart = port->uart;
-    physdev_modify_ioport_access_range(dom0, 0, uart->io_base, 8);
+    if ( ioports_deny_access(dom0, uart->io_base, uart->io_base + 7) != 0 )
+        BUG();
 }
 #else
 #define ns16550_endboot NULL
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/asm-ia64/domain.h
--- a/xen/include/asm-ia64/domain.h     Mon Jan  9 11:19:55 2006
+++ b/xen/include/asm-ia64/domain.h     Mon Jan  9 11:22:17 2006
@@ -10,7 +10,7 @@
 #include <asm/vmx_platform.h>
 #include <xen/list.h>
 
-extern void arch_do_createdomain(struct vcpu *);
+extern int arch_do_createdomain(struct vcpu *);
 
 extern void domain_relinquish_resources(struct domain *);
 
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/asm-ia64/linux-xen/asm/pal.h
--- a/xen/include/asm-ia64/linux-xen/asm/pal.h  Mon Jan  9 11:19:55 2006
+++ b/xen/include/asm-ia64/linux-xen/asm/pal.h  Mon Jan  9 11:22:17 2006
@@ -925,7 +925,11 @@
 ia64_pal_cache_flush (u64 cache_type, u64 invalidate, u64 *progress, u64 
*vector)
 {
        struct ia64_pal_retval iprv;
+#ifdef XEN     /* fix a bug in Linux... PAL has changed */
+       PAL_CALL(iprv, PAL_CACHE_FLUSH, cache_type, invalidate, *progress);
+#else
        PAL_CALL_IC_OFF(iprv, PAL_CACHE_FLUSH, cache_type, invalidate, 
*progress);
+#endif
        if (vector)
                *vector = iprv.v0;
        *progress = iprv.v1;
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/asm-ia64/vmx.h
--- a/xen/include/asm-ia64/vmx.h        Mon Jan  9 11:19:55 2006
+++ b/xen/include/asm-ia64/vmx.h        Mon Jan  9 11:22:17 2006
@@ -23,7 +23,7 @@
 #define _ASM_IA64_VT_H
 
 #define RR7_SWITCH_SHIFT       12      /* 4k enough */
-#include <public/io/ioreq.h>
+#include <public/hvm/ioreq.h>
 
 extern void identify_vmx_feature(void);
 extern unsigned int vmx_enabled;
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/asm-x86/config.h
--- a/xen/include/asm-x86/config.h      Mon Jan  9 11:19:55 2006
+++ b/xen/include/asm-x86/config.h      Mon Jan  9 11:22:17 2006
@@ -248,12 +248,10 @@
 
 #ifdef CONFIG_X86_PAE
 /* Hypervisor owns top 168MB of virtual address space. */
-# define __HYPERVISOR_VIRT_START  0xF5800000
-# define HYPERVISOR_VIRT_START   (0xF5800000UL)
+#define HYPERVISOR_VIRT_START   mk_unsigned_long(0xF5800000)
 #else
 /* Hypervisor owns top 64MB of virtual address space. */
-# define __HYPERVISOR_VIRT_START  0xFC000000
-# define HYPERVISOR_VIRT_START   (0xFC000000UL)
+#define HYPERVISOR_VIRT_START   mk_unsigned_long(0xFC000000)
 #endif
 
 #define L2_PAGETABLE_FIRST_XEN_SLOT \
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/asm-x86/current.h
--- a/xen/include/asm-x86/current.h     Mon Jan  9 11:19:55 2006
+++ b/xen/include/asm-x86/current.h     Mon Jan  9 11:22:17 2006
@@ -49,7 +49,7 @@
 #define reset_stack_and_jump(__fn)              \
     __asm__ __volatile__ (                      \
         "mov %0,%%"__OP"sp; jmp "STR(__fn)      \
-        : : "r" (guest_cpu_user_regs()) )
+        : : "r" (guest_cpu_user_regs()) : "memory" )
 
 #define schedule_tail(_ed) (((_ed)->arch.schedule_tail)(_ed))
 
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/asm-x86/domain.h
--- a/xen/include/asm-x86/domain.h      Mon Jan  9 11:19:55 2006
+++ b/xen/include/asm-x86/domain.h      Mon Jan  9 11:22:17 2006
@@ -24,8 +24,8 @@
     /* Writable pagetables. */
     struct ptwr_info ptwr[2];
 
-    /* I/O-port access bitmap mask. */
-    u8 *iobmp_mask;       /* Address of IO bitmap mask, or NULL.      */
+    /* I/O-port admin-specified access capabilities. */
+    struct rangeset *ioport_caps;
 
     /* Shadow mode status and controls. */
     struct shadow_ops *ops;
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/asm-x86/mm.h
--- a/xen/include/asm-x86/mm.h  Mon Jan  9 11:19:55 2006
+++ b/xen/include/asm-x86/mm.h  Mon Jan  9 11:22:17 2006
@@ -336,11 +336,13 @@
 int  revalidate_l1(struct domain *, l1_pgentry_t *, l1_pgentry_t *);
 
 void cleanup_writable_pagetable(struct domain *d);
-#define sync_pagetable_state(d)                 \
-    do {                                        \
-        LOCK_BIGLOCK(d);                        \
-        cleanup_writable_pagetable(d);          \
-        UNLOCK_BIGLOCK(d);                      \
+#define sync_pagetable_state(d)                                 \
+    do {                                                        \
+        LOCK_BIGLOCK(d);                                        \
+        /* Avoid racing with ptwr_destroy(). */                 \
+        if ( !test_bit(_DOMF_dying, &(d)->domain_flags) )       \
+            cleanup_writable_pagetable(d);                      \
+        UNLOCK_BIGLOCK(d);                                      \
     } while ( 0 )
 
 int audit_adjust_pgtables(struct domain *d, int dir, int noisy);
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/asm-x86/msr.h
--- a/xen/include/asm-x86/msr.h Mon Jan  9 11:19:55 2006
+++ b/xen/include/asm-x86/msr.h Mon Jan  9 11:22:17 2006
@@ -12,7 +12,7 @@
        __asm__ __volatile__("rdmsr" \
                            : "=a" (a__), "=d" (b__) \
                            : "c" (msr)); \
-       val = a__ | (b__<<32); \
+       val = a__ | ((u64)b__<<32); \
 } while(0); 
 
 #define wrmsr(msr,val1,val2) \
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/asm-x86/processor.h
--- a/xen/include/asm-x86/processor.h   Mon Jan  9 11:19:55 2006
+++ b/xen/include/asm-x86/processor.h   Mon Jan  9 11:22:17 2006
@@ -190,7 +190,7 @@
 #ifdef CONFIG_X86_HT
 extern void detect_ht(struct cpuinfo_x86 *c);
 #else
-static inline void detect_ht(struct cpuinfo_x86 *c) {}
+static always_inline void detect_ht(struct cpuinfo_x86 *c) {}
 #endif
 
 /*
@@ -209,7 +209,7 @@
 /*
  * CPUID functions returning a single datum
  */
-static inline unsigned int cpuid_eax(unsigned int op)
+static always_inline unsigned int cpuid_eax(unsigned int op)
 {
     unsigned int eax;
 
@@ -219,7 +219,7 @@
             : "bx", "cx", "dx");
     return eax;
 }
-static inline unsigned int cpuid_ebx(unsigned int op)
+static always_inline unsigned int cpuid_ebx(unsigned int op)
 {
     unsigned int eax, ebx;
 
@@ -229,7 +229,7 @@
             : "cx", "dx" );
     return ebx;
 }
-static inline unsigned int cpuid_ecx(unsigned int op)
+static always_inline unsigned int cpuid_ecx(unsigned int op)
 {
     unsigned int eax, ecx;
 
@@ -239,7 +239,7 @@
             : "bx", "dx" );
     return ecx;
 }
-static inline unsigned int cpuid_edx(unsigned int op)
+static always_inline unsigned int cpuid_edx(unsigned int op)
 {
     unsigned int eax, edx;
 
@@ -281,7 +281,7 @@
  */
 extern unsigned long mmu_cr4_features;
 
-static inline void set_in_cr4 (unsigned long mask)
+static always_inline void set_in_cr4 (unsigned long mask)
 {
     unsigned long dummy;
     mmu_cr4_features |= mask;
@@ -292,7 +292,7 @@
         : "=&r" (dummy) : "irg" (mask) );
 }
 
-static inline void clear_in_cr4 (unsigned long mask)
+static always_inline void clear_in_cr4 (unsigned long mask)
 {
     unsigned long dummy;
     mmu_cr4_features &= ~mask;
@@ -334,7 +334,7 @@
        outb((data), 0x23); \
 } while (0)
 
-static inline void __monitor(const void *eax, unsigned long ecx,
+static always_inline void __monitor(const void *eax, unsigned long ecx,
                unsigned long edx)
 {
        /* "monitor %eax,%ecx,%edx;" */
@@ -343,7 +343,7 @@
                : :"a" (eax), "c" (ecx), "d"(edx));
 }
 
-static inline void __mwait(unsigned long eax, unsigned long ecx)
+static always_inline void __mwait(unsigned long eax, unsigned long ecx)
 {
        /* "mwait %eax,%ecx;" */
        asm volatile(
@@ -460,7 +460,7 @@
 };
 
 /* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
-static inline void rep_nop(void)
+static always_inline void rep_nop(void)
 {
     __asm__ __volatile__ ( "rep;nop" : : : "memory" );
 }
@@ -471,7 +471,7 @@
 #ifdef         CONFIG_MPENTIUMIII
 
 #define ARCH_HAS_PREFETCH
-extern inline void prefetch(const void *x)
+extern always_inline void prefetch(const void *x)
 {
     __asm__ __volatile__ ("prefetchnta (%0)" : : "r"(x));
 }
@@ -482,12 +482,12 @@
 #define ARCH_HAS_PREFETCHW
 #define ARCH_HAS_SPINLOCK_PREFETCH
 
-extern inline void prefetch(const void *x)
+extern always_inline void prefetch(const void *x)
 {
     __asm__ __volatile__ ("prefetch (%0)" : : "r"(x));
 }
 
-extern inline void prefetchw(const void *x)
+extern always_inline void prefetchw(const void *x)
 {
     __asm__ __volatile__ ("prefetchw (%0)" : : "r"(x));
 }
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/asm-x86/shadow.h
--- a/xen/include/asm-x86/shadow.h      Mon Jan  9 11:19:55 2006
+++ b/xen/include/asm-x86/shadow.h      Mon Jan  9 11:22:17 2006
@@ -341,10 +341,10 @@
 #if SHADOW_VERBOSE_DEBUG
 #define SH_LOG(_f, _a...)                                               \
     printk("DOM%uP%u: SH_LOG(%d): " _f "\n",                            \
-       current->domain->domain_id , current->processor, __LINE__ , ## _a )
+       current->domain->domain_id , smp_processor_id(), __LINE__ , ## _a )
 #define SH_VLOG(_f, _a...)                                              \
     printk("DOM%uP%u: SH_VLOG(%d): " _f "\n",                           \
-           current->domain->domain_id, current->processor, __LINE__ , ## _a )
+           current->domain->domain_id, smp_processor_id(), __LINE__ , ## _a )
 #else
 #define SH_LOG(_f, _a...) ((void)0)
 #define SH_VLOG(_f, _a...) ((void)0)
@@ -353,7 +353,7 @@
 #if SHADOW_VVERBOSE_DEBUG
 #define SH_VVLOG(_f, _a...)                                             \
     printk("DOM%uP%u: SH_VVLOG(%d): " _f "\n",                          \
-           current->domain->domain_id, current->processor, __LINE__ , ## _a )
+           current->domain->domain_id, smp_processor_id(), __LINE__ , ## _a )
 #else
 #define SH_VVLOG(_f, _a...) ((void)0)
 #endif
@@ -361,7 +361,7 @@
 #if SHADOW_VVVERBOSE_DEBUG
 #define SH_VVVLOG(_f, _a...)                                            \
     printk("DOM%uP%u: SH_VVVLOG(%d): " _f "\n",                         \
-           current->domain->domain_id, current->processor, __LINE__ , ## _a )
+           current->domain->domain_id, smp_processor_id(), __LINE__ , ## _a )
 #else
 #define SH_VVVLOG(_f, _a...) ((void)0)
 #endif
@@ -369,7 +369,7 @@
 #if FULLSHADOW_DEBUG
 #define FSH_LOG(_f, _a...)                                              \
     printk("DOM%uP%u: FSH_LOG(%d): " _f "\n",                           \
-           current->domain->domain_id, current->processor, __LINE__ , ## _a )
+           current->domain->domain_id, smp_processor_id(), __LINE__ , ## _a )
 #else
 #define FSH_LOG(_f, _a...) ((void)0)
 #endif
@@ -591,7 +591,7 @@
         if ( need_flush )
         {
             perfc_incrc(update_hl2e_invlpg);
-            flush_tlb_one_mask(v->domain->cpumask,
+            flush_tlb_one_mask(v->domain->domain_dirty_cpumask,
                                &linear_pg_table[l1_linear_offset(va)]);
         }
     }
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/asm-x86/vmx.h
--- a/xen/include/asm-x86/vmx.h Mon Jan  9 11:19:55 2006
+++ b/xen/include/asm-x86/vmx.h Mon Jan  9 11:22:17 2006
@@ -26,7 +26,7 @@
 #include <asm/vmx_vmcs.h>
 #include <asm/i387.h>
 
-#include <public/io/ioreq.h>
+#include <public/hvm/ioreq.h>
 
 extern int hvm_enabled;
 
@@ -38,7 +38,6 @@
 
 extern void arch_vmx_do_launch(struct vcpu *);
 extern void arch_vmx_do_resume(struct vcpu *);
-extern void arch_vmx_do_relaunch(struct vcpu *);
 
 extern unsigned int cpu_rev;
 
@@ -506,7 +505,7 @@
 
 static inline unsigned int vmx_get_vcpu_nr(struct domain *d)
 {
-    return d->arch.vmx_platform.nr_vcpu;
+    return d->arch.vmx_platform.nr_vcpus;
 }
 
 static inline shared_iopage_t *get_sp(struct domain *d)
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/asm-x86/vmx_intercept.h
--- a/xen/include/asm-x86/vmx_intercept.h       Mon Jan  9 11:19:55 2006
+++ b/xen/include/asm-x86/vmx_intercept.h       Mon Jan  9 11:22:17 2006
@@ -6,7 +6,7 @@
 #include <xen/lib.h>
 #include <xen/time.h>
 #include <xen/errno.h>
-#include <public/io/ioreq.h>
+#include <public/hvm/ioreq.h>
 
 #define MAX_IO_HANDLER              8
 
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/asm-x86/vmx_platform.h
--- a/xen/include/asm-x86/vmx_platform.h        Mon Jan  9 11:19:55 2006
+++ b/xen/include/asm-x86/vmx_platform.h        Mon Jan  9 11:22:17 2006
@@ -33,10 +33,10 @@
     (((size_reg) << 24) | ((index) << 16) | ((seg) << 8) | (flag))
 
 #define operand_size(operand)   \
-      ((operand >> 24) & 0xFF)
+    ((operand >> 24) & 0xFF)
 
 #define operand_index(operand)  \
-      ((operand >> 16) & 0xFF)
+    ((operand >> 16) & 0xFF)
 
 /* for instruction.operand[].size */
 #define BYTE    1
@@ -81,13 +81,13 @@
 
 struct vmx_platform {
     unsigned long          shared_page_va;
-    unsigned int           nr_vcpu;
-    unsigned int           lapic_enable;
+    unsigned int           nr_vcpus;
+    unsigned int           apic_enabled;
 
     struct vmx_virpit      vmx_pit;
     struct vmx_io_handler  vmx_io_handler;
     struct vmx_virpic      vmx_pic;
-    struct vmx_vioapic      vmx_vioapic;
+    struct vmx_vioapic     vmx_vioapic;
     unsigned char          round_info[256];
     spinlock_t             round_robin_lock;
     int                    interrupt_request;
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/asm-x86/vmx_vlapic.h
--- a/xen/include/asm-x86/vmx_vlapic.h  Mon Jan  9 11:19:55 2006
+++ b/xen/include/asm-x86/vmx_vlapic.h  Mon Jan  9 11:22:17 2006
@@ -21,7 +21,7 @@
 #define VMX_VLAPIC_H
 
 #include <asm/msr.h>
-#include <public/io/ioreq.h>
+#include <public/hvm/ioreq.h>
 
 #if defined(__i386__) || defined(__x86_64__)
 static inline int __fls(uint32_t word)
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/asm-x86/vmx_vmcs.h
--- a/xen/include/asm-x86/vmx_vmcs.h    Mon Jan  9 11:19:55 2006
+++ b/xen/include/asm-x86/vmx_vmcs.h    Mon Jan  9 11:22:17 2006
@@ -23,7 +23,7 @@
 #include <asm/vmx_cpu.h>
 #include <asm/vmx_platform.h>
 #include <asm/vmx_vlapic.h>
-#include <public/vmx_assist.h>
+#include <public/hvm/vmx_assist.h>
 
 extern int start_vmx(void);
 extern void stop_vmx(void);
@@ -86,7 +86,8 @@
 #define PC_DEBUG_PORT   0x80
 
 struct arch_vmx_struct {
-    struct vmcs_struct      *vmcs;  /* VMCS pointer in virtual */
+    struct vmcs_struct      *vmcs;  /* VMCS pointer in virtual. */
+    unsigned int            launch_cpu; /* VMCS is valid on this CPU. */
     unsigned long           flags;  /* VMCS flags */
     unsigned long           cpu_cr0; /* copy of guest CR0 */
     unsigned long           cpu_shadow_cr0; /* copy of guest read shadow CR0 */
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/asm-x86/x86_emulate.h
--- a/xen/include/asm-x86/x86_emulate.h Mon Jan  9 11:19:55 2006
+++ b/xen/include/asm-x86/x86_emulate.h Mon Jan  9 11:22:17 2006
@@ -18,10 +18,11 @@
  * special treatment or emulation (*_emulated).
  * 
  * The emulator assumes that an instruction accesses only one 'emulated memory'
- * location, and that this is one of its data operands. Instruction fetches and
+ * location, that this location is the given linear faulting address (cr2), and
+ * that this is one of the instruction's data operands. Instruction fetches and
  * stack operations are assumed never to access emulated memory. The emulator
  * automatically deduces which operand of a string-move operation is accessing
- * emulated memory, and requires that the other operand accesses normal memory.
+ * emulated memory, and assumes that the other operand accesses normal memory.
  * 
  * NOTES:
  *  1. The emulator isn't very smart about emulated vs. standard memory.
@@ -36,6 +37,7 @@
  *     then immediately bail.
  *  3. Valid access sizes are 1, 2, 4 and 8 bytes. On x86/32 systems only
  *     cmpxchg8b_emulated need support 8-byte accesses.
+ *  4. The emulator cannot handle 64-bit mode emulation on an x86/32 system.
  */
 /* Access completed successfully: continue emulation as normal. */
 #define X86EMUL_CONTINUE        0
@@ -141,14 +143,27 @@
 
 struct cpu_user_regs;
 
+/* Execution mode, passed to the emulator. */
+#define X86EMUL_MODE_REAL     0 /* Real mode.             */
+#define X86EMUL_MODE_PROT16   2 /* 16-bit protected mode. */
+#define X86EMUL_MODE_PROT32   4 /* 32-bit protected mode. */
+#define X86EMUL_MODE_PROT64   8 /* 64-bit (long) mode.    */
+
+/* Host execution mode. */
+#if defined(__i386__)
+#define X86EMUL_MODE_HOST X86EMUL_MODE_PROT32
+#elif defined(__x86_64__)
+#define X86EMUL_MODE_HOST X86EMUL_MODE_PROT64
+#endif
+
 /*
  * x86_emulate_memop: Emulate an instruction that faulted attempting to
  *                    read/write a 'special' memory area.
  *  @regs: Register state at time of fault.
- *  @cr2:  Linear faulting address.
+ *  @cr2:  Linear faulting address within an emulated/special memory area.
  *  @ops:  Interface to access special memory.
- *  @mode: Current execution mode, represented by the default size of memory
- *         addresses, in bytes. Valid values are 2, 4 and 8 (x86/64 only).
+ *  @mode: Emulated execution mode, represented by an X86EMUL_MODE value.
+ * Returns -1 on failure, 0 on success.
  */
 extern int
 x86_emulate_memop(
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/public/arch-x86_32.h
--- a/xen/include/public/arch-x86_32.h  Mon Jan  9 11:19:55 2006
+++ b/xen/include/public/arch-x86_32.h  Mon Jan  9 11:22:17 2006
@@ -49,10 +49,15 @@
  * machine->physical mapping table starts at this address, read-only.
  */
 #ifdef CONFIG_X86_PAE
-# define HYPERVISOR_VIRT_START (0xF5800000UL)
+#define __HYPERVISOR_VIRT_START 0xF5800000
 #else
-# define HYPERVISOR_VIRT_START (0xFC000000UL)
+#define __HYPERVISOR_VIRT_START 0xFC000000
 #endif
+
+#ifndef HYPERVISOR_VIRT_START
+#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START)
+#endif
+
 #ifndef machine_to_phys_mapping
 #define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START)
 #endif
@@ -137,7 +142,7 @@
     unsigned long pad[5]; /* sizeof(vcpu_info_t) == 64 */
 } arch_vcpu_info_t;
 
-#endif
+#endif /* !__ASSEMBLY__ */
 
 #endif
 
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/public/arch-x86_64.h
--- a/xen/include/public/arch-x86_64.h  Mon Jan  9 11:19:55 2006
+++ b/xen/include/public/arch-x86_64.h  Mon Jan  9 11:22:17 2006
@@ -59,9 +59,12 @@
 /* And the trap vector is... */
 #define TRAP_INSTR "syscall"
 
+#define __HYPERVISOR_VIRT_START 0xFFFF800000000000
+#define __HYPERVISOR_VIRT_END   0xFFFF880000000000
+
 #ifndef HYPERVISOR_VIRT_START
-#define HYPERVISOR_VIRT_START (0xFFFF800000000000UL)
-#define HYPERVISOR_VIRT_END   (0xFFFF880000000000UL)
+#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START)
+#define HYPERVISOR_VIRT_END   mk_unsigned_long(__HYPERVISOR_VIRT_END)
 #endif
 
 /* Maximum number of virtual CPUs in multi-processor guests. */
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/public/dom0_ops.h
--- a/xen/include/public/dom0_ops.h     Mon Jan  9 11:19:55 2006
+++ b/xen/include/public/dom0_ops.h     Mon Jan  9 11:22:17 2006
@@ -94,14 +94,14 @@
     xen_domain_handle_t handle;
 } dom0_getdomaininfo_t;
 
-#define DOM0_SETDOMAININFO      13
+#define DOM0_SETVCPUCONTEXT   13
 typedef struct {
     /* IN variables. */
     domid_t               domain;
     uint32_t              vcpu;
     /* IN/OUT parameters */
     vcpu_guest_context_t *ctxt;
-} dom0_setdomaininfo_t;
+} dom0_setvcpucontext_t;
 
 #define DOM0_MSR              15
 typedef struct {
@@ -163,13 +163,13 @@
 /* 
  * Set which physical cpus a vcpu can execute on.
  */
-#define DOM0_PINCPUDOMAIN     20
+#define DOM0_SETVCPUAFFINITY  20
 typedef struct {
     /* IN variables. */
     domid_t   domain;
     uint32_t  vcpu;
     cpumap_t  cpumap;
-} dom0_pincpudomain_t;
+} dom0_setvcpuaffinity_t;
 
 /* Get trace buffers machine base address */
 #define DOM0_TBUFCONTROL       21
@@ -410,6 +410,21 @@
     uint8_t enable;
 } dom0_setdebugging_t;
 
+#define DOM0_IRQ_PERMISSION 46
+typedef struct {
+    domid_t domain;          /* domain to be affected */
+    uint8_t pirq;
+    uint8_t allow_access;    /* flag to specify enable/disable of IRQ access */
+} dom0_irq_permission_t;
+
+#define DOM0_IOMEM_PERMISSION 47
+typedef struct {
+    domid_t  domain;          /* domain to be affected */
+    unsigned long first_pfn;  /* first page (physical page number) in range */
+    unsigned long nr_pfns;    /* number of pages in range (>0) */
+    uint8_t allow_access;     /* allow (!0) or deny (0) access to range? */
+} dom0_iomem_permission_t;
+ 
 typedef struct {
     uint32_t cmd;
     uint32_t interface_version; /* DOM0_INTERFACE_VERSION */
@@ -421,13 +436,13 @@
         dom0_getmemlist_t        getmemlist;
         dom0_schedctl_t          schedctl;
         dom0_adjustdom_t         adjustdom;
-        dom0_setdomaininfo_t     setdomaininfo;
+        dom0_setvcpucontext_t    setvcpucontext;
         dom0_getdomaininfo_t     getdomaininfo;
         dom0_getpageframeinfo_t  getpageframeinfo;
         dom0_msr_t               msr;
         dom0_settime_t           settime;
         dom0_readconsole_t       readconsole;
-        dom0_pincpudomain_t      pincpudomain;
+        dom0_setvcpuaffinity_t   setvcpuaffinity;
         dom0_tbufcontrol_t       tbufcontrol;
         dom0_physinfo_t          physinfo;
         dom0_sched_id_t          sched_id;
@@ -448,6 +463,8 @@
         dom0_max_vcpus_t         max_vcpus;
         dom0_setdomainhandle_t   setdomainhandle;        
         dom0_setdebugging_t      setdebugging;
+        dom0_irq_permission_t    irq_permission;
+        dom0_iomem_permission_t  iomem_permission;
         uint8_t                  pad[128];
     } u;
 } dom0_op_t;
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/public/xen.h
--- a/xen/include/public/xen.h  Mon Jan  9 11:19:55 2006
+++ b/xen/include/public/xen.h  Mon Jan  9 11:22:17 2006
@@ -426,6 +426,15 @@
 
 typedef uint8_t xen_domain_handle_t[16];
 
+/* Turn a plain number into a C unsigned long constant. */
+#define __mk_unsigned_long(x) x ## UL
+#define mk_unsigned_long(x) __mk_unsigned_long(x)
+
+#else /* __ASSEMBLY__ */
+
+/* In assembly code we cannot use C numeric constant suffixes. */
+#define mk_unsigned_long(x) x
+
 #endif /* !__ASSEMBLY__ */
 
 #endif /* __XEN_PUBLIC_XEN_H__ */
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/xen/bitmap.h
--- a/xen/include/xen/bitmap.h  Mon Jan  9 11:19:55 2006
+++ b/xen/include/xen/bitmap.h  Mon Jan  9 11:22:17 2006
@@ -41,6 +41,8 @@
  * bitmap_weight(src, nbits)                   Hamming Weight: number set bits
  * bitmap_shift_right(dst, src, n, nbits)      *dst = *src >> n
  * bitmap_shift_left(dst, src, n, nbits)       *dst = *src << n
+ * bitmap_scnprintf(buf, len, src, nbits)      Print bitmap src to buf
+ * bitmap_scnlistprintf(buf, len, src, nbits)  Print bitmap src as list to buf
  */
 
 /*
@@ -93,6 +95,10 @@
                        const unsigned long *bitmap2, int bits);
 extern int __bitmap_weight(const unsigned long *bitmap, int bits);
 
+extern int bitmap_scnprintf(char *buf, unsigned int len,
+                       const unsigned long *src, int nbits);
+extern int bitmap_scnlistprintf(char *buf, unsigned int len,
+                       const unsigned long *src, int nbits);
 extern int bitmap_find_free_region(unsigned long *bitmap, int bits, int order);
 extern void bitmap_release_region(unsigned long *bitmap, int pos, int order);
 extern int bitmap_allocate_region(unsigned long *bitmap, int pos, int order);
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/xen/compiler.h
--- a/xen/include/xen/compiler.h        Mon Jan  9 11:19:55 2006
+++ b/xen/include/xen/compiler.h        Mon Jan  9 11:22:17 2006
@@ -19,4 +19,10 @@
 #define __attribute_used__ __attribute__((__unused__))
 #endif
 
+#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
+#define __must_check __attribute__((warn_unused_result))
+#else
+#define __must_check
+#endif
+
 #endif /* __LINUX_COMPILER_H */
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/xen/config.h
--- a/xen/include/xen/config.h  Mon Jan  9 11:19:55 2006
+++ b/xen/include/xen/config.h  Mon Jan  9 11:22:17 2006
@@ -43,4 +43,13 @@
 #define __STR(...) #__VA_ARGS__
 #define STR(...) __STR(__VA_ARGS__)
 
+#ifndef __ASSEMBLY__
+/* Turn a plain number into a C unsigned long constant. */
+#define __mk_unsigned_long(x) x ## UL
+#define mk_unsigned_long(x) __mk_unsigned_long(x)
+#else /* __ASSEMBLY__ */
+/* In assembly code we cannot use C numeric constant suffixes. */
+#define mk_unsigned_long(x) x
+#endif /* !__ASSEMBLY__ */
+
 #endif /* __XEN_CONFIG_H__ */
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/xen/cpumask.h
--- a/xen/include/xen/cpumask.h Mon Jan  9 11:19:55 2006
+++ b/xen/include/xen/cpumask.h Mon Jan  9 11:22:17 2006
@@ -8,8 +8,8 @@
  * See detailed comments in the file xen/bitmap.h describing the
  * data type on which these cpumasks are based.
  *
- * For details of cpumask_scnprintf() and cpumask_parse(),
- * see bitmap_scnprintf() and bitmap_parse() in lib/bitmap.c.
+ * For details of cpumask_scnprintf() and cpulist_scnprintf(),
+ * see bitmap_scnprintf() and bitmap_scnlistprintf() in lib/bitmap.c.
  *
  * The available cpumask operations are:
  *
@@ -36,8 +36,8 @@
  * void cpus_shift_right(dst, src, n)  Shift right
  * void cpus_shift_left(dst, src, n)   Shift left
  *
- * int first_cpu(mask)                 Number lowest set bit, or >= NR_CPUS
- * int next_cpu(cpu, mask)             Next cpu past 'cpu', or >= NR_CPUS
+ * int first_cpu(mask)                 Number lowest set bit, or NR_CPUS
+ * int next_cpu(cpu, mask)             Next cpu past 'cpu', or NR_CPUS
  *
  * cpumask_t cpumask_of_cpu(cpu)       Return cpumask with bit 'cpu' set
  * CPU_MASK_ALL                                Initializer - all bits set
@@ -45,7 +45,7 @@
  * unsigned long *cpus_addr(mask)      Array of unsigned long's in mask
  *
  * int cpumask_scnprintf(buf, len, mask) Format cpumask for printing
- * int cpumask_parse(ubuf, ulen, mask) Parse ascii string as cpumask
+ * int cpulist_scnprintf(buf, len, mask) Format cpumask as list for printing
  *
  * for_each_cpu_mask(cpu, mask)                for-loop cpu over mask
  *
@@ -207,13 +207,13 @@
 #define first_cpu(src) __first_cpu(&(src), NR_CPUS)
 static inline int __first_cpu(const cpumask_t *srcp, int nbits)
 {
-       return find_first_bit(srcp->bits, nbits);
+       return min_t(int, nbits, find_first_bit(srcp->bits, nbits));
 }
 
 #define next_cpu(n, src) __next_cpu((n), &(src), NR_CPUS)
 static inline int __next_cpu(int n, const cpumask_t *srcp, int nbits)
 {
-       return find_next_bit(srcp->bits, nbits, n+1);
+       return min_t(int, nbits, find_next_bit(srcp->bits, nbits, n+1));
 }
 
 #define cpumask_of_cpu(cpu)                                            \
@@ -259,7 +259,6 @@
 
 #define cpus_addr(src) ((src).bits)
 
-/*
 #define cpumask_scnprintf(buf, len, src) \
                        __cpumask_scnprintf((buf), (len), &(src), NR_CPUS)
 static inline int __cpumask_scnprintf(char *buf, int len,
@@ -268,14 +267,13 @@
        return bitmap_scnprintf(buf, len, srcp->bits, nbits);
 }
 
-#define cpumask_parse(ubuf, ulen, src) \
-                       __cpumask_parse((ubuf), (ulen), &(src), NR_CPUS)
-static inline int __cpumask_parse(const char __user *buf, int len,
-                                       cpumask_t *dstp, int nbits)
-{
-       return bitmap_parse(buf, len, dstp->bits, nbits);
-}
-*/
+#define cpulist_scnprintf(buf, len, src) \
+                       __cpulist_scnprintf((buf), (len), &(src), NR_CPUS)
+static inline int __cpulist_scnprintf(char *buf, int len,
+                                       const cpumask_t *srcp, int nbits)
+{
+       return bitmap_scnlistprintf(buf, len, srcp->bits, nbits);
+}
 
 #if NR_CPUS > 1
 #define for_each_cpu_mask(cpu, mask)           \
@@ -368,7 +366,7 @@
        for_each_cpu_mask(cpu, (mask))          \
                if (cpu_online(cpu))            \
                        break;                  \
-       min_t(int, NR_CPUS, cpu);               \
+       cpu;                                    \
 })
 
 #define for_each_cpu(cpu)        for_each_cpu_mask((cpu), cpu_possible_map)
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/xen/domain.h
--- a/xen/include/xen/domain.h  Mon Jan  9 11:19:55 2006
+++ b/xen/include/xen/domain.h  Mon Jan  9 11:22:17 2006
@@ -13,12 +13,10 @@
 
 extern void free_vcpu_struct(struct vcpu *v);
 
-extern void arch_do_createdomain(struct vcpu *v);
+extern int arch_do_createdomain(struct vcpu *v);
 
-extern int  arch_set_info_guest(
+extern int arch_set_info_guest(
     struct vcpu *v, struct vcpu_guest_context *c);
-
-extern void vcpu_migrate_cpu(struct vcpu *v, int newcpu);
 
 extern void free_perdomain_pt(struct domain *d);
 
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/xen/lib.h
--- a/xen/include/xen/lib.h     Mon Jan  9 11:19:55 2006
+++ b/xen/include/xen/lib.h     Mon Jan  9 11:22:17 2006
@@ -53,10 +53,16 @@
 /* vsprintf.c */
 extern int sprintf(char * buf, const char * fmt, ...)
     __attribute__ ((format (printf, 2, 3)));
-extern int vsprintf(char *buf, const char *, va_list);
+extern int vsprintf(char *buf, const char *, va_list)
+    __attribute__ ((format (printf, 2, 0)));
 extern int snprintf(char * buf, size_t size, const char * fmt, ...)
     __attribute__ ((format (printf, 3, 4)));
-extern int vsnprintf(char *buf, size_t size, const char *fmt, va_list args);
+extern int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
+    __attribute__ ((format (printf, 3, 0)));
+extern int scnprintf(char * buf, size_t size, const char * fmt, ...)
+    __attribute__ ((format (printf, 3, 4)));
+extern int vscnprintf(char *buf, size_t size, const char *fmt, va_list args)
+    __attribute__ ((format (printf, 3, 0)));
 
 long simple_strtol(
     const char *cp,char **endp, unsigned int base);
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/xen/sched-if.h
--- a/xen/include/xen/sched-if.h        Mon Jan  9 11:19:55 2006
+++ b/xen/include/xen/sched-if.h        Mon Jan  9 11:22:17 2006
@@ -13,8 +13,8 @@
 
 struct schedule_data {
     spinlock_t          schedule_lock;  /* spinlock protecting curr        */
-    struct vcpu *curr;           /* current task                    */
-    struct vcpu *idle;           /* idle task for this cpu          */
+    struct vcpu        *curr;           /* current task                    */
+    struct vcpu        *idle;           /* idle task for this cpu          */
     void               *sched_priv;
     struct ac_timer     s_timer;        /* scheduling timer                */
     unsigned long       tick;           /* current periodic 'tick'         */
@@ -39,6 +39,7 @@
     void         (*rem_task)       (struct vcpu *);
     void         (*sleep)          (struct vcpu *);
     void         (*wake)           (struct vcpu *);
+    int          (*set_affinity)   (struct vcpu *, cpumask_t *);
     struct task_slice (*do_schedule) (s_time_t);
     int          (*control)        (struct sched_ctl_cmd *);
     int          (*adjdom)         (struct domain *,
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/xen/sched.h
--- a/xen/include/xen/sched.h   Mon Jan  9 11:19:55 2006
+++ b/xen/include/xen/sched.h   Mon Jan  9 11:22:17 2006
@@ -11,6 +11,7 @@
 #include <xen/time.h>
 #include <xen/ac_timer.h>
 #include <xen/grant_table.h>
+#include <xen/rangeset.h>
 #include <asm/domain.h>
 
 extern unsigned long volatile jiffies;
@@ -50,8 +51,6 @@
 int  evtchn_init(struct domain *d);
 void evtchn_destroy(struct domain *d);
 
-#define CPUMAP_RUNANYWHERE 0xFFFFFFFF
-
 struct vcpu 
 {
     int              vcpu_id;
@@ -79,7 +78,11 @@
 
     atomic_t         pausecnt;
 
-    cpumap_t         cpumap;        /* which cpus this domain can run on */
+    /* Bitmask of CPUs on which this VCPU may run. */
+    cpumask_t        cpu_affinity;
+
+    /* Bitmask of CPUs which are holding onto this VCPU's state. */
+    cpumask_t        vcpu_dirty_cpumask;
 
     struct arch_vcpu arch;
 };
@@ -109,6 +112,9 @@
 
     struct domain   *next_in_list;
     struct domain   *next_in_hashbucket;
+
+    struct list_head rangesets;
+    spinlock_t       rangesets_lock;
 
     /* Event channel information. */
     struct evtchn   *evtchn[NR_EVTCHN_BUCKETS];
@@ -125,6 +131,10 @@
     u16              pirq_to_evtchn[NR_PIRQS];
     u32              pirq_mask[NR_PIRQS/32];
 
+    /* I/O capabilities (access to IRQs and memory-mapped I/O). */
+    struct rangeset *iomem_caps;
+    struct rangeset *irq_caps;
+
     unsigned long    domain_flags;
     unsigned long    vm_assist;
 
@@ -133,7 +143,7 @@
     struct vcpu *vcpu[MAX_VIRT_CPUS];
 
     /* Bitmask of CPUs which are holding onto this domain's state. */
-    cpumask_t        cpumask;
+    cpumask_t        domain_dirty_cpumask;
 
     struct arch_domain arch;
 
@@ -165,9 +175,9 @@
 extern struct domain idle0_domain;
 extern struct vcpu idle0_vcpu;
 
-extern struct vcpu *idle_task[NR_CPUS];
+extern struct vcpu *idle_domain[NR_CPUS];
 #define IDLE_DOMAIN_ID   (0x7FFFU)
-#define is_idle_task(_d) (test_bit(_DOMF_idle_domain, &(_d)->domain_flags))
+#define is_idle_domain(_d) (test_bit(_DOMF_idle_domain, &(_d)->domain_flags))
 
 struct vcpu *alloc_vcpu(
     struct domain *d, unsigned int vcpu_id, unsigned int cpu_id);
@@ -215,7 +225,7 @@
     unsigned long image_start, unsigned long image_len, 
     unsigned long initrd_start, unsigned long initrd_len,
     char *cmdline);
-extern int set_info_guest(struct domain *d, dom0_setdomaininfo_t *);
+extern int set_info_guest(struct domain *d, dom0_setvcpucontext_t *);
 
 struct domain *find_domain_by_id(domid_t dom);
 extern void domain_destruct(struct domain *d);
@@ -261,36 +271,27 @@
 extern void sync_vcpu_execstate(struct vcpu *v);
 
 /*
- * Called by the scheduler to switch to another VCPU. On entry, although
- * VCPUF_running is no longer asserted for @prev, its context is still running
- * on the local CPU and is not committed to memory. The local scheduler lock
- * is therefore still held, and interrupts are disabled, because the local CPU
- * is in an inconsistent state.
- * 
- * The callee must ensure that the local CPU is no longer running in @prev's
- * context, and that the context is saved to memory, before returning.
- * Alternatively, if implementing lazy context switching, it suffices to ensure
- * that invoking sync_vcpu_execstate() will switch and commit @prev's state.
+ * Called by the scheduler to switch to another VCPU. This function must
+ * call context_saved(@prev) when the local CPU is no longer running in
+ * @prev's context, and that context is saved to memory. Alternatively, if
+ * implementing lazy context switching, it suffices to ensure that invoking
+ * sync_vcpu_execstate() will switch and commit @prev's state.
  */
 extern void context_switch(
     struct vcpu *prev, 
     struct vcpu *next);
 
 /*
- * On some architectures (notably x86) it is not possible to entirely load
- * @next's context with interrupts disabled. These may implement a function to
- * finalise loading the new context after interrupts are re-enabled. This
- * function is not given @prev and is not permitted to access it.
- */
-extern void context_switch_finalise(
-    struct vcpu *next);
+ * As described above, context_switch() must call this function when the
+ * local CPU is no longer running in @prev's context, and @prev's context is
+ * saved to memory. Alternatively, if implementing lazy context switching,
+ * ensure that invoking sync_vcpu_execstate() will switch and commit @prev.
+ */
+#define context_saved(prev) (clear_bit(_VCPUF_running, &(prev)->vcpu_flags))
 
 /* Called by the scheduler to continue running the current VCPU. */
 extern void continue_running(
     struct vcpu *same);
-
-/* Is CPU 'cpu' idle right now? */
-int idle_cpu(int cpu);
 
 void startup_cpu_idle_loop(void);
 
@@ -356,17 +357,11 @@
  /* Currently running on a CPU? */
 #define _VCPUF_running         3
 #define VCPUF_running          (1UL<<_VCPUF_running)
- /* Disables auto-migration between CPUs. */
-#define _VCPUF_cpu_pinned      4
-#define VCPUF_cpu_pinned       (1UL<<_VCPUF_cpu_pinned)
- /* Domain migrated between CPUs. */
-#define _VCPUF_cpu_migrated    5
-#define VCPUF_cpu_migrated     (1UL<<_VCPUF_cpu_migrated)
  /* Initialization completed. */
-#define _VCPUF_initialised     6
+#define _VCPUF_initialised     4
 #define VCPUF_initialised      (1UL<<_VCPUF_initialised)
  /* VCPU is not-runnable */
-#define _VCPUF_down            7
+#define _VCPUF_down            5
 #define VCPUF_down             (1UL<<_VCPUF_down)
 
 /*
@@ -378,32 +373,25 @@
  /* Is this domain privileged? */
 #define _DOMF_privileged       1
 #define DOMF_privileged        (1UL<<_DOMF_privileged)
- /* May this domain do IO to physical devices? */
-#define _DOMF_physdev_access   2
-#define DOMF_physdev_access    (1UL<<_DOMF_physdev_access)
  /* Guest shut itself down for some reason. */
-#define _DOMF_shutdown         3
+#define _DOMF_shutdown         2
 #define DOMF_shutdown          (1UL<<_DOMF_shutdown)
- /* Guest is in process of shutting itself down (becomes DOMF_shutdown). */
-#define _DOMF_shuttingdown     4
-#define DOMF_shuttingdown      (1UL<<_DOMF_shuttingdown)
  /* Death rattle. */
-#define _DOMF_dying            5
+#define _DOMF_dying            3
 #define DOMF_dying             (1UL<<_DOMF_dying)
  /* Domain is paused by controller software. */
-#define _DOMF_ctrl_pause       6
+#define _DOMF_ctrl_pause       4
 #define DOMF_ctrl_pause        (1UL<<_DOMF_ctrl_pause)
  /* Domain is being debugged by controller software. */
-#define _DOMF_debugging        7
+#define _DOMF_debugging        5
 #define DOMF_debugging         (1UL<<_DOMF_debugging)
 
 
-static inline int domain_runnable(struct vcpu *v)
+static inline int vcpu_runnable(struct vcpu *v)
 {
     return ( (atomic_read(&v->pausecnt) == 0) &&
              !(v->vcpu_flags & (VCPUF_blocked|VCPUF_down)) &&
-             !(v->domain->domain_flags &
-               (DOMF_shutdown|DOMF_shuttingdown|DOMF_ctrl_pause)) );
+             !(v->domain->domain_flags & (DOMF_shutdown|DOMF_ctrl_pause)) );
 }
 
 void vcpu_pause(struct vcpu *v);
@@ -414,6 +402,8 @@
 void domain_unpause_by_systemcontroller(struct domain *d);
 void cpu_init(void);
 
+int vcpu_set_affinity(struct vcpu *v, cpumask_t *affinity);
+
 static inline void vcpu_unblock(struct vcpu *v)
 {
     if ( test_and_clear_bit(_VCPUF_blocked, &v->vcpu_flags) )
@@ -422,8 +412,6 @@
 
 #define IS_PRIV(_d)                                         \
     (test_bit(_DOMF_privileged, &(_d)->domain_flags))
-#define IS_CAPABLE_PHYSDEV(_d)                              \
-    (test_bit(_DOMF_physdev_access, &(_d)->domain_flags))
 
 #define VM_ASSIST(_d,_t) (test_bit((_t), &(_d)->vm_assist))
 
diff -r 25e3c8668f1f -r 8af1199488d3 tools/guest-headers/Makefile
--- /dev/null   Mon Jan  9 11:19:55 2006
+++ b/tools/guest-headers/Makefile      Mon Jan  9 11:22:17 2006
@@ -0,0 +1,11 @@
+
+XEN_ROOT=../..
+linuxsparsetree = $(XEN_ROOT)/linux-2.6-xen-sparse
+
+check:
+
+install:
+       mkdir -p $(DESTDIR)/usr/include/xen/linux
+       install -m0644 $(linuxsparsetree)/include/asm-xen/linux-public/*.h 
$(DESTDIR)/usr/include/xen/linux
+
+clean:
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/ia64/linux-xen/process-linux-xen.c
--- /dev/null   Mon Jan  9 11:19:55 2006
+++ b/xen/arch/ia64/linux-xen/process-linux-xen.c       Mon Jan  9 11:22:17 2006
@@ -0,0 +1,848 @@
+/*
+ * Architecture-specific setup.
+ *
+ * Copyright (C) 1998-2003 Hewlett-Packard Co
+ *     David Mosberger-Tang <davidm@xxxxxxxxxx>
+ * 04/11/17 Ashok Raj  <ashok.raj@xxxxxxxxx> Added CPU Hotplug Support
+ */
+#ifdef XEN
+#include <xen/types.h>
+#include <xen/lib.h>
+#include <xen/symbols.h>
+#include <xen/smp.h>
+#include <asm/uaccess.h>
+#include <asm/processor.h>
+#include <asm/ptrace.h>
+#include <asm/unwind.h>
+#else
+#define __KERNEL_SYSCALLS__    /* see <asm/unistd.h> */
+#include <linux/config.h>
+
+#include <linux/cpu.h>
+#include <linux/pm.h>
+#include <linux/elf.h>
+#include <linux/errno.h>
+#include <linux/kallsyms.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/notifier.h>
+#include <linux/personality.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/smp_lock.h>
+#include <linux/stddef.h>
+#include <linux/thread_info.h>
+#include <linux/unistd.h>
+#include <linux/efi.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/kprobes.h>
+
+#include <asm/cpu.h>
+#include <asm/delay.h>
+#include <asm/elf.h>
+#include <asm/ia32.h>
+#include <asm/irq.h>
+#include <asm/pgalloc.h>
+#include <asm/processor.h>
+#include <asm/sal.h>
+#include <asm/tlbflush.h>
+#include <asm/uaccess.h>
+#include <asm/unwind.h>
+#include <asm/user.h>
+
+#include "entry.h"
+
+#ifdef CONFIG_PERFMON
+# include <asm/perfmon.h>
+#endif
+
+#include "sigframe.h"
+
+void (*ia64_mark_idle)(int);
+static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
+
+unsigned long boot_option_idle_override = 0;
+EXPORT_SYMBOL(boot_option_idle_override);
+#endif
+
+void
+ia64_do_show_stack (struct unw_frame_info *info, void *arg)
+{
+       unsigned long ip, sp, bsp;
+       char buf[128];                  /* don't make it so big that it 
overflows the stack! */
+
+       printk("\nCall Trace:\n");
+       do {
+               unw_get_ip(info, &ip);
+               if (ip == 0)
+                       break;
+
+               unw_get_sp(info, &sp);
+               unw_get_bsp(info, &bsp);
+               snprintf(buf, sizeof(buf),
+                        " [<%016lx>] %%s\n"
+                        "                                sp=%016lx 
bsp=%016lx\n",
+                        ip, sp, bsp);
+               print_symbol(buf, ip);
+       } while (unw_unwind(info) >= 0);
+}
+
+void
+show_stack (struct task_struct *task, unsigned long *sp)
+{
+       if (!task)
+               unw_init_running(ia64_do_show_stack, NULL);
+       else {
+               struct unw_frame_info info;
+
+               unw_init_from_blocked_task(&info, task);
+               ia64_do_show_stack(&info, NULL);
+       }
+}
+
+#ifndef XEN
+void
+dump_stack (void)
+{
+       show_stack(NULL, NULL);
+}
+
+EXPORT_SYMBOL(dump_stack);
+#endif
+
+#ifdef XEN
+void
+show_registers(struct pt_regs *regs)
+#else
+void
+show_regs (struct pt_regs *regs)
+#endif
+{
+       unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri;
+
+#ifndef XEN
+       print_modules();
+       printk("\nPid: %d, CPU %d, comm: %20s\n", current->pid, 
smp_processor_id(), current->comm);
+       printk("psr : %016lx ifs : %016lx ip  : [<%016lx>]    %s\n",
+              regs->cr_ipsr, regs->cr_ifs, ip, print_tainted());
+#else
+       printk("\nCPU %d\n", smp_processor_id());
+       printk("psr : %016lx ifs : %016lx ip  : [<%016lx>]\n",
+              regs->cr_ipsr, regs->cr_ifs, ip);
+#endif
+       print_symbol("ip is at %s\n", ip);
+       printk("unat: %016lx pfs : %016lx rsc : %016lx\n",
+              regs->ar_unat, regs->ar_pfs, regs->ar_rsc);
+       printk("rnat: %016lx bsps: %016lx pr  : %016lx\n",
+              regs->ar_rnat, regs->ar_bspstore, regs->pr);
+       printk("ldrs: %016lx ccv : %016lx fpsr: %016lx\n",
+              regs->loadrs, regs->ar_ccv, regs->ar_fpsr);
+       printk("csd : %016lx ssd : %016lx\n", regs->ar_csd, regs->ar_ssd);
+       printk("b0  : %016lx b6  : %016lx b7  : %016lx\n", regs->b0, regs->b6, 
regs->b7);
+       printk("f6  : %05lx%016lx f7  : %05lx%016lx\n",
+              regs->f6.u.bits[1], regs->f6.u.bits[0],
+              regs->f7.u.bits[1], regs->f7.u.bits[0]);
+       printk("f8  : %05lx%016lx f9  : %05lx%016lx\n",
+              regs->f8.u.bits[1], regs->f8.u.bits[0],
+              regs->f9.u.bits[1], regs->f9.u.bits[0]);
+       printk("f10 : %05lx%016lx f11 : %05lx%016lx\n",
+              regs->f10.u.bits[1], regs->f10.u.bits[0],
+              regs->f11.u.bits[1], regs->f11.u.bits[0]);
+
+       printk("r1  : %016lx r2  : %016lx r3  : %016lx\n", regs->r1, regs->r2, 
regs->r3);
+       printk("r8  : %016lx r9  : %016lx r10 : %016lx\n", regs->r8, regs->r9, 
regs->r10);
+       printk("r11 : %016lx r12 : %016lx r13 : %016lx\n", regs->r11, 
regs->r12, regs->r13);
+       printk("r14 : %016lx r15 : %016lx r16 : %016lx\n", regs->r14, 
regs->r15, regs->r16);
+       printk("r17 : %016lx r18 : %016lx r19 : %016lx\n", regs->r17, 
regs->r18, regs->r19);
+       printk("r20 : %016lx r21 : %016lx r22 : %016lx\n", regs->r20, 
regs->r21, regs->r22);
+       printk("r23 : %016lx r24 : %016lx r25 : %016lx\n", regs->r23, 
regs->r24, regs->r25);
+       printk("r26 : %016lx r27 : %016lx r28 : %016lx\n", regs->r26, 
regs->r27, regs->r28);
+       printk("r29 : %016lx r30 : %016lx r31 : %016lx\n", regs->r29, 
regs->r30, regs->r31);
+
+       if (user_mode(regs)) {
+               /* print the stacked registers */
+               unsigned long val, *bsp, ndirty;
+               int i, sof, is_nat = 0;
+
+               sof = regs->cr_ifs & 0x7f;      /* size of frame */
+               ndirty = (regs->loadrs >> 19);
+               bsp = ia64_rse_skip_regs((unsigned long *) regs->ar_bspstore, 
ndirty);
+               for (i = 0; i < sof; ++i) {
+                       get_user(val, (unsigned long __user *) 
ia64_rse_skip_regs(bsp, i));
+                       printk("r%-3u:%c%016lx%s", 32 + i, is_nat ? '*' : ' ', 
val,
+                              ((i == sof - 1) || (i % 3) == 2) ? "\n" : " ");
+               }
+       } else
+               show_stack(NULL, NULL);
+}
+
+#ifndef XEN
+void
+do_notify_resume_user (sigset_t *oldset, struct sigscratch *scr, long 
in_syscall)
+{
+       if (fsys_mode(current, &scr->pt)) {
+               /* defer signal-handling etc. until we return to 
privilege-level 0.  */
+               if (!ia64_psr(&scr->pt)->lp)
+                       ia64_psr(&scr->pt)->lp = 1;
+               return;
+       }
+
+#ifdef CONFIG_PERFMON
+       if (current->thread.pfm_needs_checking)
+               pfm_handle_work();
+#endif
+
+       /* deal with pending signal delivery */
+       if (test_thread_flag(TIF_SIGPENDING))
+               ia64_do_signal(oldset, scr, in_syscall);
+}
+
+static int pal_halt        = 1;
+static int can_do_pal_halt = 1;
+
+static int __init nohalt_setup(char * str)
+{
+       pal_halt = can_do_pal_halt = 0;
+       return 1;
+}
+__setup("nohalt", nohalt_setup);
+
+void
+update_pal_halt_status(int status)
+{
+       can_do_pal_halt = pal_halt && status;
+}
+
+/*
+ * We use this if we don't have any better idle routine..
+ */
+void
+default_idle (void)
+{
+       local_irq_enable();
+       while (!need_resched())
+               if (can_do_pal_halt)
+                       safe_halt();
+               else
+                       cpu_relax();
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+/* We don't actually take CPU down, just spin without interrupts. */
+static inline void play_dead(void)
+{
+       extern void ia64_cpu_local_tick (void);
+       unsigned int this_cpu = smp_processor_id();
+
+       /* Ack it */
+       __get_cpu_var(cpu_state) = CPU_DEAD;
+
+       max_xtp();
+       local_irq_disable();
+       idle_domain_exit();
+       ia64_jump_to_sal(&sal_boot_rendez_state[this_cpu]);
+       /*
+        * The above is a point of no-return, the processor is
+        * expected to be in SAL loop now.
+        */
+       BUG();
+}
+#else
+static inline void play_dead(void)
+{
+       BUG();
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
+void cpu_idle_wait(void)
+{
+       unsigned int cpu, this_cpu = get_cpu();
+       cpumask_t map;
+
+       set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
+       put_cpu();
+
+       cpus_clear(map);
+       for_each_online_cpu(cpu) {
+               per_cpu(cpu_idle_state, cpu) = 1;
+               cpu_set(cpu, map);
+       }
+
+       __get_cpu_var(cpu_idle_state) = 0;
+
+       wmb();
+       do {
+               ssleep(1);
+               for_each_online_cpu(cpu) {
+                       if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, 
cpu))
+                               cpu_clear(cpu, map);
+               }
+               cpus_and(map, map, cpu_online_map);
+       } while (!cpus_empty(map));
+}
+EXPORT_SYMBOL_GPL(cpu_idle_wait);
+
+void __attribute__((noreturn))
+cpu_idle (void)
+{
+       void (*mark_idle)(int) = ia64_mark_idle;
+
+       /* endless idle loop with no priority at all */
+       while (1) {
+#ifdef CONFIG_SMP
+               if (!need_resched())
+                       min_xtp();
+#endif
+               while (!need_resched()) {
+                       void (*idle)(void);
+
+                       if (__get_cpu_var(cpu_idle_state))
+                               __get_cpu_var(cpu_idle_state) = 0;
+
+                       rmb();
+                       if (mark_idle)
+                               (*mark_idle)(1);
+
+                       idle = pm_idle;
+                       if (!idle)
+                               idle = default_idle;
+                       (*idle)();
+               }
+
+               if (mark_idle)
+                       (*mark_idle)(0);
+
+#ifdef CONFIG_SMP
+               normal_xtp();
+#endif
+               schedule();
+               check_pgt_cache();
+               if (cpu_is_offline(smp_processor_id()))
+                       play_dead();
+       }
+}
+
+void
+ia64_save_extra (struct task_struct *task)
+{
+#ifdef CONFIG_PERFMON
+       unsigned long info;
+#endif
+
+       if ((task->thread.flags & IA64_THREAD_DBG_VALID) != 0)
+               ia64_save_debug_regs(&task->thread.dbr[0]);
+
+#ifdef CONFIG_PERFMON
+       if ((task->thread.flags & IA64_THREAD_PM_VALID) != 0)
+               pfm_save_regs(task);
+
+       info = __get_cpu_var(pfm_syst_info);
+       if (info & PFM_CPUINFO_SYST_WIDE)
+               pfm_syst_wide_update_task(task, info, 0);
+#endif
+
+#ifdef CONFIG_IA32_SUPPORT
+       if (IS_IA32_PROCESS(ia64_task_regs(task)))
+               ia32_save_state(task);
+#endif
+}
+
+void
+ia64_load_extra (struct task_struct *task)
+{
+#ifdef CONFIG_PERFMON
+       unsigned long info;
+#endif
+
+       if ((task->thread.flags & IA64_THREAD_DBG_VALID) != 0)
+               ia64_load_debug_regs(&task->thread.dbr[0]);
+
+#ifdef CONFIG_PERFMON
+       if ((task->thread.flags & IA64_THREAD_PM_VALID) != 0)
+               pfm_load_regs(task);
+
+       info = __get_cpu_var(pfm_syst_info);
+       if (info & PFM_CPUINFO_SYST_WIDE) 
+               pfm_syst_wide_update_task(task, info, 1);
+#endif
+
+#ifdef CONFIG_IA32_SUPPORT
+       if (IS_IA32_PROCESS(ia64_task_regs(task)))
+               ia32_load_state(task);
+#endif
+}
+
+/*
+ * Copy the state of an ia-64 thread.
+ *
+ * We get here through the following  call chain:
+ *
+ *     from user-level:        from kernel:
+ *
+ *     <clone syscall>         <some kernel call frames>
+ *     sys_clone                  :
+ *     do_fork                 do_fork
+ *     copy_thread             copy_thread
+ *
+ * This means that the stack layout is as follows:
+ *
+ *     +---------------------+ (highest addr)
+ *     |   struct pt_regs    |
+ *     +---------------------+
+ *     | struct switch_stack |
+ *     +---------------------+
+ *     |                     |
+ *     |    memory stack     |
+ *     |                     | <-- sp (lowest addr)
+ *     +---------------------+
+ *
+ * Observe that we copy the unat values that are in pt_regs and switch_stack.  
Spilling an
+ * integer to address X causes bit N in ar.unat to be set to the NaT bit of 
the register,
+ * with N=(X & 0x1ff)/8.  Thus, copying the unat value preserves the NaT bits 
ONLY if the
+ * pt_regs structure in the parent is congruent to that of the child, modulo 
512.  Since
+ * the stack is page aligned and the page size is at least 4KB, this is always 
the case,
+ * so there is nothing to worry about.
+ */
+int
+copy_thread (int nr, unsigned long clone_flags,
+            unsigned long user_stack_base, unsigned long user_stack_size,
+            struct task_struct *p, struct pt_regs *regs)
+{
+       extern char ia64_ret_from_clone, ia32_ret_from_clone;
+       struct switch_stack *child_stack, *stack;
+       unsigned long rbs, child_rbs, rbs_size;
+       struct pt_regs *child_ptregs;
+       int retval = 0;
+
+#ifdef CONFIG_SMP
+       /*
+        * For SMP idle threads, fork_by_hand() calls do_fork with
+        * NULL regs.
+        */
+       if (!regs)
+               return 0;
+#endif
+
+       stack = ((struct switch_stack *) regs) - 1;
+
+       child_ptregs = (struct pt_regs *) ((unsigned long) p + IA64_STK_OFFSET) 
- 1;
+       child_stack = (struct switch_stack *) child_ptregs - 1;
+
+       /* copy parent's switch_stack & pt_regs to child: */
+       memcpy(child_stack, stack, sizeof(*child_ptregs) + 
sizeof(*child_stack));
+
+       rbs = (unsigned long) current + IA64_RBS_OFFSET;
+       child_rbs = (unsigned long) p + IA64_RBS_OFFSET;
+       rbs_size = stack->ar_bspstore - rbs;
+
+       /* copy the parent's register backing store to the child: */
+       memcpy((void *) child_rbs, (void *) rbs, rbs_size);
+
+       if (likely(user_mode(child_ptregs))) {
+               if ((clone_flags & CLONE_SETTLS) && !IS_IA32_PROCESS(regs))
+                       child_ptregs->r13 = regs->r16;  /* see sys_clone2() in 
entry.S */
+               if (user_stack_base) {
+                       child_ptregs->r12 = user_stack_base + user_stack_size - 
16;
+                       child_ptregs->ar_bspstore = user_stack_base;
+                       child_ptregs->ar_rnat = 0;
+                       child_ptregs->loadrs = 0;
+               }
+       } else {
+               /*
+                * Note: we simply preserve the relative position of
+                * the stack pointer here.  There is no need to
+                * allocate a scratch area here, since that will have
+                * been taken care of by the caller of sys_clone()
+                * already.
+                */
+               child_ptregs->r12 = (unsigned long) child_ptregs - 16; /* 
kernel sp */
+               child_ptregs->r13 = (unsigned long) p;          /* set 
`current' pointer */
+       }
+       child_stack->ar_bspstore = child_rbs + rbs_size;
+       if (IS_IA32_PROCESS(regs))
+               child_stack->b0 = (unsigned long) &ia32_ret_from_clone;
+       else
+               child_stack->b0 = (unsigned long) &ia64_ret_from_clone;
+
+       /* copy parts of thread_struct: */
+       p->thread.ksp = (unsigned long) child_stack - 16;
+
+       /* stop some PSR bits from being inherited.
+        * the psr.up/psr.pp bits must be cleared on fork but inherited on 
execve()
+        * therefore we must specify them explicitly here and not include them 
in
+        * IA64_PSR_BITS_TO_CLEAR.
+        */
+       child_ptregs->cr_ipsr = ((child_ptregs->cr_ipsr | IA64_PSR_BITS_TO_SET)
+                                & ~(IA64_PSR_BITS_TO_CLEAR | IA64_PSR_PP | 
IA64_PSR_UP));
+
+       /*
+        * NOTE: The calling convention considers all floating point
+        * registers in the high partition (fph) to be scratch.  Since
+        * the only way to get to this point is through a system call,
+        * we know that the values in fph are all dead.  Hence, there
+        * is no need to inherit the fph state from the parent to the
+        * child and all we have to do is to make sure that
+        * IA64_THREAD_FPH_VALID is cleared in the child.
+        *
+        * XXX We could push this optimization a bit further by
+        * clearing IA64_THREAD_FPH_VALID on ANY system call.
+        * However, it's not clear this is worth doing.  Also, it
+        * would be a slight deviation from the normal Linux system
+        * call behavior where scratch registers are preserved across
+        * system calls (unless used by the system call itself).
+        */
+#      define THREAD_FLAGS_TO_CLEAR    (IA64_THREAD_FPH_VALID | 
IA64_THREAD_DBG_VALID \
+                                        | IA64_THREAD_PM_VALID)
+#      define THREAD_FLAGS_TO_SET      0
+       p->thread.flags = ((current->thread.flags & ~THREAD_FLAGS_TO_CLEAR)
+                          | THREAD_FLAGS_TO_SET);
+       ia64_drop_fpu(p);       /* don't pick up stale state from a CPU's fph */
+#ifdef CONFIG_IA32_SUPPORT
+       /*
+        * If we're cloning an IA32 task then save the IA32 extra
+        * state from the current task to the new task
+        */
+       if (IS_IA32_PROCESS(ia64_task_regs(current))) {
+               ia32_save_state(p);
+               if (clone_flags & CLONE_SETTLS)
+                       retval = ia32_clone_tls(p, child_ptregs);
+
+               /* Copy partially mapped page list */
+               if (!retval)
+                       retval = ia32_copy_partial_page_list(p, clone_flags);
+       }
+#endif
+
+#ifdef CONFIG_PERFMON
+       if (current->thread.pfm_context)
+               pfm_inherit(p, child_ptregs);
+#endif
+       return retval;
+}
+
+static void
+do_copy_task_regs (struct task_struct *task, struct unw_frame_info *info, void 
*arg)
+{
+       unsigned long mask, sp, nat_bits = 0, ip, ar_rnat, urbs_end, cfm;
+       elf_greg_t *dst = arg;
+       struct pt_regs *pt;
+       char nat;
+       int i;
+
+       memset(dst, 0, sizeof(elf_gregset_t));  /* don't leak any kernel bits 
to user-level */
+
+       if (unw_unwind_to_user(info) < 0)
+               return;
+
+       unw_get_sp(info, &sp);
+       pt = (struct pt_regs *) (sp + 16);
+
+       urbs_end = ia64_get_user_rbs_end(task, pt, &cfm);
+
+       if (ia64_sync_user_rbs(task, info->sw, pt->ar_bspstore, urbs_end) < 0)
+               return;
+
+       ia64_peek(task, info->sw, urbs_end, (long) ia64_rse_rnat_addr((long *) 
urbs_end),
+                 &ar_rnat);
+
+       /*
+        * coredump format:
+        *      r0-r31
+        *      NaT bits (for r0-r31; bit N == 1 iff rN is a NaT)
+        *      predicate registers (p0-p63)
+        *      b0-b7
+        *      ip cfm user-mask
+        *      ar.rsc ar.bsp ar.bspstore ar.rnat
+        *      ar.ccv ar.unat ar.fpsr ar.pfs ar.lc ar.ec
+        */
+
+       /* r0 is zero */
+       for (i = 1, mask = (1UL << i); i < 32; ++i) {
+               unw_get_gr(info, i, &dst[i], &nat);
+               if (nat)
+                       nat_bits |= mask;
+               mask <<= 1;
+       }
+       dst[32] = nat_bits;
+       unw_get_pr(info, &dst[33]);
+
+       for (i = 0; i < 8; ++i)
+               unw_get_br(info, i, &dst[34 + i]);
+
+       unw_get_rp(info, &ip);
+       dst[42] = ip + ia64_psr(pt)->ri;
+       dst[43] = cfm;
+       dst[44] = pt->cr_ipsr & IA64_PSR_UM;
+
+       unw_get_ar(info, UNW_AR_RSC, &dst[45]);
+       /*
+        * For bsp and bspstore, unw_get_ar() would return the kernel
+        * addresses, but we need the user-level addresses instead:
+        */
+       dst[46] = urbs_end;     /* note: by convention PT_AR_BSP points to the 
end of the urbs! */
+       dst[47] = pt->ar_bspstore;
+       dst[48] = ar_rnat;
+       unw_get_ar(info, UNW_AR_CCV, &dst[49]);
+       unw_get_ar(info, UNW_AR_UNAT, &dst[50]);
+       unw_get_ar(info, UNW_AR_FPSR, &dst[51]);
+       dst[52] = pt->ar_pfs;   /* UNW_AR_PFS is == to pt->cr_ifs for interrupt 
frames */
+       unw_get_ar(info, UNW_AR_LC, &dst[53]);
+       unw_get_ar(info, UNW_AR_EC, &dst[54]);
+       unw_get_ar(info, UNW_AR_CSD, &dst[55]);
+       unw_get_ar(info, UNW_AR_SSD, &dst[56]);
+}
+
+void
+do_dump_task_fpu (struct task_struct *task, struct unw_frame_info *info, void 
*arg)
+{
+       elf_fpreg_t *dst = arg;
+       int i;
+
+       memset(dst, 0, sizeof(elf_fpregset_t)); /* don't leak any "random" bits 
*/
+
+       if (unw_unwind_to_user(info) < 0)
+               return;
+
+       /* f0 is 0.0, f1 is 1.0 */
+
+       for (i = 2; i < 32; ++i)
+               unw_get_fr(info, i, dst + i);
+
+       ia64_flush_fph(task);
+       if ((task->thread.flags & IA64_THREAD_FPH_VALID) != 0)
+               memcpy(dst + 32, task->thread.fph, 96*16);
+}
+
+void
+do_copy_regs (struct unw_frame_info *info, void *arg)
+{
+       do_copy_task_regs(current, info, arg);
+}
+
+void
+do_dump_fpu (struct unw_frame_info *info, void *arg)
+{
+       do_dump_task_fpu(current, info, arg);
+}
+
+int
+dump_task_regs(struct task_struct *task, elf_gregset_t *regs)
+{
+       struct unw_frame_info tcore_info;
+
+       if (current == task) {
+               unw_init_running(do_copy_regs, regs);
+       } else {
+               memset(&tcore_info, 0, sizeof(tcore_info));
+               unw_init_from_blocked_task(&tcore_info, task);
+               do_copy_task_regs(task, &tcore_info, regs);
+       }
+       return 1;
+}
+
+void
+ia64_elf_core_copy_regs (struct pt_regs *pt, elf_gregset_t dst)
+{
+       unw_init_running(do_copy_regs, dst);
+}
+
+int
+dump_task_fpu (struct task_struct *task, elf_fpregset_t *dst)
+{
+       struct unw_frame_info tcore_info;
+
+       if (current == task) {
+               unw_init_running(do_dump_fpu, dst);
+       } else {
+               memset(&tcore_info, 0, sizeof(tcore_info));
+               unw_init_from_blocked_task(&tcore_info, task);
+               do_dump_task_fpu(task, &tcore_info, dst);
+       }
+       return 1;
+}
+
+int
+dump_fpu (struct pt_regs *pt, elf_fpregset_t dst)
+{
+       unw_init_running(do_dump_fpu, dst);
+       return 1;       /* f0-f31 are always valid so we always return 1 */
+}
+
+long
+sys_execve (char __user *filename, char __user * __user *argv, char __user * 
__user *envp,
+           struct pt_regs *regs)
+{
+       char *fname;
+       int error;
+
+       fname = getname(filename);
+       error = PTR_ERR(fname);
+       if (IS_ERR(fname))
+               goto out;
+       error = do_execve(fname, argv, envp, regs);
+       putname(fname);
+out:
+       return error;
+}
+
+pid_t
+kernel_thread (int (*fn)(void *), void *arg, unsigned long flags)
+{
+       extern void start_kernel_thread (void);
+       unsigned long *helper_fptr = (unsigned long *) &start_kernel_thread;
+       struct {
+               struct switch_stack sw;
+               struct pt_regs pt;
+       } regs;
+
+       memset(&regs, 0, sizeof(regs));
+       regs.pt.cr_iip = helper_fptr[0];        /* set entry point (IP) */
+       regs.pt.r1 = helper_fptr[1];            /* set GP */
+       regs.pt.r9 = (unsigned long) fn;        /* 1st argument */
+       regs.pt.r11 = (unsigned long) arg;      /* 2nd argument */
+       /* Preserve PSR bits, except for bits 32-34 and 37-45, which we can't 
read.  */
+       regs.pt.cr_ipsr = ia64_getreg(_IA64_REG_PSR) | IA64_PSR_BN;
+       regs.pt.cr_ifs = 1UL << 63;             /* mark as valid, empty frame */
+       regs.sw.ar_fpsr = regs.pt.ar_fpsr = ia64_getreg(_IA64_REG_AR_FPSR);
+       regs.sw.ar_bspstore = (unsigned long) current + IA64_RBS_OFFSET;
+       regs.sw.pr = (1 << PRED_KERNEL_STACK);
+       return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs.pt, 0, NULL, 
NULL);
+}
+EXPORT_SYMBOL(kernel_thread);
+
+/* This gets called from kernel_thread() via ia64_invoke_thread_helper().  */
+int
+kernel_thread_helper (int (*fn)(void *), void *arg)
+{
+#ifdef CONFIG_IA32_SUPPORT
+       if (IS_IA32_PROCESS(ia64_task_regs(current))) {
+               /* A kernel thread is always a 64-bit process. */
+               current->thread.map_base  = DEFAULT_MAP_BASE;
+               current->thread.task_size = DEFAULT_TASK_SIZE;
+               ia64_set_kr(IA64_KR_IO_BASE, current->thread.old_iob);
+               ia64_set_kr(IA64_KR_TSSD, current->thread.old_k1);
+       }
+#endif
+       return (*fn)(arg);
+}
+
+/*
+ * Flush thread state.  This is called when a thread does an execve().
+ */
+void
+flush_thread (void)
+{
+       /*
+        * Remove function-return probe instances associated with this task
+        * and put them back on the free list. Do not insert an exit probe for
+        * this function, it will be disabled by kprobe_flush_task if you do.
+        */
+       kprobe_flush_task(current);
+
+       /* drop floating-point and debug-register state if it exists: */
+       current->thread.flags &= ~(IA64_THREAD_FPH_VALID | 
IA64_THREAD_DBG_VALID);
+       ia64_drop_fpu(current);
+       if (IS_IA32_PROCESS(ia64_task_regs(current)))
+               ia32_drop_partial_page_list(current);
+}
+
+/*
+ * Clean up state associated with current thread.  This is called when
+ * the thread calls exit().
+ */
+void
+exit_thread (void)
+{
+
+       /*
+        * Remove function-return probe instances associated with this task
+        * and put them back on the free list. Do not insert an exit probe for
+        * this function, it will be disabled by kprobe_flush_task if you do.
+        */
+       kprobe_flush_task(current);
+
+       ia64_drop_fpu(current);
+#ifdef CONFIG_PERFMON
+       /* if needed, stop monitoring and flush state to perfmon context */
+       if (current->thread.pfm_context)
+               pfm_exit_thread(current);
+
+       /* free debug register resources */
+       if (current->thread.flags & IA64_THREAD_DBG_VALID)
+               pfm_release_debug_registers(current);
+#endif
+       if (IS_IA32_PROCESS(ia64_task_regs(current)))
+               ia32_drop_partial_page_list(current);
+}
+
+unsigned long
+get_wchan (struct task_struct *p)
+{
+       struct unw_frame_info info;
+       unsigned long ip;
+       int count = 0;
+
+       /*
+        * Note: p may not be a blocked task (it could be current or
+        * another process running on some other CPU.  Rather than
+        * trying to determine if p is really blocked, we just assume
+        * it's blocked and rely on the unwind routines to fail
+        * gracefully if the process wasn't really blocked after all.
+        * --davidm 99/12/15
+        */
+       unw_init_from_blocked_task(&info, p);
+       do {
+               if (unw_unwind(&info) < 0)
+                       return 0;
+               unw_get_ip(&info, &ip);
+               if (!in_sched_functions(ip))
+                       return ip;
+       } while (count++ < 16);
+       return 0;
+}
+
+void
+cpu_halt (void)
+{
+       pal_power_mgmt_info_u_t power_info[8];
+       unsigned long min_power;
+       int i, min_power_state;
+
+       if (ia64_pal_halt_info(power_info) != 0)
+               return;
+
+       min_power_state = 0;
+       min_power = power_info[0].pal_power_mgmt_info_s.power_consumption;
+       for (i = 1; i < 8; ++i)
+               if (power_info[i].pal_power_mgmt_info_s.im
+                   && power_info[i].pal_power_mgmt_info_s.power_consumption < 
min_power) {
+                       min_power = 
power_info[i].pal_power_mgmt_info_s.power_consumption;
+                       min_power_state = i;
+               }
+
+       while (1)
+               ia64_pal_halt(min_power_state);
+}
+
+void
+machine_restart (char *restart_cmd)
+{
+       (*efi.reset_system)(EFI_RESET_WARM, 0, 0, NULL);
+}
+
+void
+machine_halt (void)
+{
+       cpu_halt();
+}
+
+void
+machine_power_off (void)
+{
+       if (pm_power_off)
+               pm_power_off();
+       machine_halt();
+}
+#endif // !XEN
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/ia64/linux-xen/unwind.c
--- /dev/null   Mon Jan  9 11:19:55 2006
+++ b/xen/arch/ia64/linux-xen/unwind.c  Mon Jan  9 11:22:17 2006
@@ -0,0 +1,2332 @@
+/*
+ * Copyright (C) 1999-2004 Hewlett-Packard Co
+ *     David Mosberger-Tang <davidm@xxxxxxxxxx>
+ * Copyright (C) 2003 Fenghua Yu <fenghua.yu@xxxxxxxxx>
+ *     - Change pt_regs_off() to make it less dependant on pt_regs structure.
+ */
+/*
+ * This file implements call frame unwind support for the Linux
+ * kernel.  Parsing and processing the unwind information is
+ * time-consuming, so this implementation translates the unwind
+ * descriptors into unwind scripts.  These scripts are very simple
+ * (basically a sequence of assignments) and efficient to execute.
+ * They are cached for later re-use.  Each script is specific for a
+ * given instruction pointer address and the set of predicate values
+ * that the script depends on (most unwind descriptors are
+ * unconditional and scripts often do not depend on predicates at
+ * all).  This code is based on the unwind conventions described in
+ * the "IA-64 Software Conventions and Runtime Architecture" manual.
+ *
+ * SMP conventions:
+ *     o updates to the global unwind data (in structure "unw") are serialized
+ *       by the unw.lock spinlock
+ *     o each unwind script has its own read-write lock; a thread must acquire
+ *       a read lock before executing a script and must acquire a write lock
+ *       before modifying a script
+ *     o if both the unw.lock spinlock and a script's read-write lock must be
+ *       acquired, then the read-write lock must be acquired first.
+ */
+#ifdef XEN
+#include <xen/types.h>
+#include <xen/elf.h>
+#include <xen/kernel.h>
+#include <xen/sched.h>
+#include <xen/xmalloc.h>
+#include <xen/spinlock.h>
+
+// work around
+#ifdef CONFIG_SMP
+#define write_trylock(lock)    _raw_write_trylock(lock)
+#else
+#define write_trylock(lock)    ({1;})
+#endif
+
+#else
+#include <linux/module.h>
+#include <linux/bootmem.h>
+#include <linux/elf.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#endif
+
+#include <asm/unwind.h>
+
+#include <asm/delay.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include <asm/ptrace_offsets.h>
+#include <asm/rse.h>
+#include <asm/sections.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+#include "entry.h"
+#include "unwind_i.h"
+
+#define UNW_LOG_CACHE_SIZE     7       /* each unw_script is ~256 bytes in 
size */
+#define UNW_CACHE_SIZE         (1 << UNW_LOG_CACHE_SIZE)
+
+#define UNW_LOG_HASH_SIZE      (UNW_LOG_CACHE_SIZE + 1)
+#define UNW_HASH_SIZE          (1 << UNW_LOG_HASH_SIZE)
+
+#define UNW_STATS      0       /* WARNING: this disabled interrupts for long 
time-spans!! */
+
+#ifdef UNW_DEBUG
+  static unsigned int unw_debug_level = UNW_DEBUG;
+#  define UNW_DEBUG_ON(n)      unw_debug_level >= n
+   /* Do not code a printk level, not all debug lines end in newline */
+#  define UNW_DPRINT(n, ...)  if (UNW_DEBUG_ON(n)) printk(__VA_ARGS__)
+#  define inline
+#else /* !UNW_DEBUG */
+#  define UNW_DEBUG_ON(n)  0
+#  define UNW_DPRINT(n, ...)
+#endif /* UNW_DEBUG */
+
+#if UNW_STATS
+# define STAT(x...)    x
+#else
+# define STAT(x...)
+#endif
+
+#ifdef XEN
+#define alloc_reg_state()      xmalloc(struct unw_reg_state)
+#define free_reg_state(usr)    xfree(usr)
+#define alloc_labeled_state()  xmalloc(struct unw_labeled_state)
+#define free_labeled_state(usr)        xfree(usr)
+#else
+#define alloc_reg_state()      kmalloc(sizeof(struct unw_reg_state), 
GFP_ATOMIC)
+#define free_reg_state(usr)    kfree(usr)
+#define alloc_labeled_state()  kmalloc(sizeof(struct unw_labeled_state), 
GFP_ATOMIC)
+#define free_labeled_state(usr)        kfree(usr)
+#endif
+
+typedef unsigned long unw_word;
+typedef unsigned char unw_hash_index_t;
+
+static struct {
+       spinlock_t lock;                        /* spinlock for unwind data */
+
+       /* list of unwind tables (one per load-module) */
+       struct unw_table *tables;
+
+       unsigned long r0;                       /* constant 0 for r0 */
+
+       /* table of registers that prologues can save (and order in which 
they're saved): */
+       const unsigned char save_order[8];
+
+       /* maps a preserved register index (preg_index) to corresponding 
switch_stack offset: */
+       unsigned short sw_off[sizeof(struct unw_frame_info) / 8];
+
+       unsigned short lru_head;                /* index of lead-recently used 
script */
+       unsigned short lru_tail;                /* index of most-recently used 
script */
+
+       /* index into unw_frame_info for preserved register i */
+       unsigned short preg_index[UNW_NUM_REGS];
+
+       short pt_regs_offsets[32];
+
+       /* unwind table for the kernel: */
+       struct unw_table kernel_table;
+
+       /* unwind table describing the gate page (kernel code that is mapped 
into user space): */
+       size_t gate_table_size;
+       unsigned long *gate_table;
+
+       /* hash table that maps instruction pointer to script index: */
+       unsigned short hash[UNW_HASH_SIZE];
+
+       /* script cache: */
+       struct unw_script cache[UNW_CACHE_SIZE];
+
+# ifdef UNW_DEBUG
+       const char *preg_name[UNW_NUM_REGS];
+# endif
+# if UNW_STATS
+       struct {
+               struct {
+                       int lookups;
+                       int hinted_hits;
+                       int normal_hits;
+                       int collision_chain_traversals;
+               } cache;
+               struct {
+                       unsigned long build_time;
+                       unsigned long run_time;
+                       unsigned long parse_time;
+                       int builds;
+                       int news;
+                       int collisions;
+                       int runs;
+               } script;
+               struct {
+                       unsigned long init_time;
+                       unsigned long unwind_time;
+                       int inits;
+                       int unwinds;
+               } api;
+       } stat;
+# endif
+} unw = {
+       .tables = &unw.kernel_table,
+       .lock = SPIN_LOCK_UNLOCKED,
+       .save_order = {
+               UNW_REG_RP, UNW_REG_PFS, UNW_REG_PSP, UNW_REG_PR,
+               UNW_REG_UNAT, UNW_REG_LC, UNW_REG_FPSR, UNW_REG_PRI_UNAT_GR
+       },
+       .preg_index = {
+               offsetof(struct unw_frame_info, pri_unat_loc)/8,        /* 
PRI_UNAT_GR */
+               offsetof(struct unw_frame_info, pri_unat_loc)/8,        /* 
PRI_UNAT_MEM */
+               offsetof(struct unw_frame_info, bsp_loc)/8,
+               offsetof(struct unw_frame_info, bspstore_loc)/8,
+               offsetof(struct unw_frame_info, pfs_loc)/8,
+               offsetof(struct unw_frame_info, rnat_loc)/8,
+               offsetof(struct unw_frame_info, psp)/8,
+               offsetof(struct unw_frame_info, rp_loc)/8,
+               offsetof(struct unw_frame_info, r4)/8,
+               offsetof(struct unw_frame_info, r5)/8,
+               offsetof(struct unw_frame_info, r6)/8,
+               offsetof(struct unw_frame_info, r7)/8,
+               offsetof(struct unw_frame_info, unat_loc)/8,
+               offsetof(struct unw_frame_info, pr_loc)/8,
+               offsetof(struct unw_frame_info, lc_loc)/8,
+               offsetof(struct unw_frame_info, fpsr_loc)/8,
+               offsetof(struct unw_frame_info, b1_loc)/8,
+               offsetof(struct unw_frame_info, b2_loc)/8,
+               offsetof(struct unw_frame_info, b3_loc)/8,
+               offsetof(struct unw_frame_info, b4_loc)/8,
+               offsetof(struct unw_frame_info, b5_loc)/8,
+               offsetof(struct unw_frame_info, f2_loc)/8,
+               offsetof(struct unw_frame_info, f3_loc)/8,
+               offsetof(struct unw_frame_info, f4_loc)/8,
+               offsetof(struct unw_frame_info, f5_loc)/8,
+               offsetof(struct unw_frame_info, fr_loc[16 - 16])/8,
+               offsetof(struct unw_frame_info, fr_loc[17 - 16])/8,
+               offsetof(struct unw_frame_info, fr_loc[18 - 16])/8,
+               offsetof(struct unw_frame_info, fr_loc[19 - 16])/8,
+               offsetof(struct unw_frame_info, fr_loc[20 - 16])/8,
+               offsetof(struct unw_frame_info, fr_loc[21 - 16])/8,
+               offsetof(struct unw_frame_info, fr_loc[22 - 16])/8,
+               offsetof(struct unw_frame_info, fr_loc[23 - 16])/8,
+               offsetof(struct unw_frame_info, fr_loc[24 - 16])/8,
+               offsetof(struct unw_frame_info, fr_loc[25 - 16])/8,
+               offsetof(struct unw_frame_info, fr_loc[26 - 16])/8,
+               offsetof(struct unw_frame_info, fr_loc[27 - 16])/8,
+               offsetof(struct unw_frame_info, fr_loc[28 - 16])/8,
+               offsetof(struct unw_frame_info, fr_loc[29 - 16])/8,
+               offsetof(struct unw_frame_info, fr_loc[30 - 16])/8,
+               offsetof(struct unw_frame_info, fr_loc[31 - 16])/8,
+       },
+       .pt_regs_offsets = {
+               [0] = -1,
+               offsetof(struct pt_regs,  r1),
+               offsetof(struct pt_regs,  r2),
+               offsetof(struct pt_regs,  r3),
+               [4] = -1, [5] = -1, [6] = -1, [7] = -1,
+               offsetof(struct pt_regs,  r8),
+               offsetof(struct pt_regs,  r9),
+               offsetof(struct pt_regs, r10),
+               offsetof(struct pt_regs, r11),
+               offsetof(struct pt_regs, r12),
+               offsetof(struct pt_regs, r13),
+               offsetof(struct pt_regs, r14),
+               offsetof(struct pt_regs, r15),
+               offsetof(struct pt_regs, r16),
+               offsetof(struct pt_regs, r17),
+               offsetof(struct pt_regs, r18),
+               offsetof(struct pt_regs, r19),
+               offsetof(struct pt_regs, r20),
+               offsetof(struct pt_regs, r21),
+               offsetof(struct pt_regs, r22),
+               offsetof(struct pt_regs, r23),
+               offsetof(struct pt_regs, r24),
+               offsetof(struct pt_regs, r25),
+               offsetof(struct pt_regs, r26),
+               offsetof(struct pt_regs, r27),
+               offsetof(struct pt_regs, r28),
+               offsetof(struct pt_regs, r29),
+               offsetof(struct pt_regs, r30),
+               offsetof(struct pt_regs, r31),
+       },
+       .hash = { [0 ... UNW_HASH_SIZE - 1] = -1 },
+#ifdef UNW_DEBUG
+       .preg_name = {
+               "pri_unat_gr", "pri_unat_mem", "bsp", "bspstore", "ar.pfs", 
"ar.rnat", "psp", "rp",
+               "r4", "r5", "r6", "r7",
+               "ar.unat", "pr", "ar.lc", "ar.fpsr",
+               "b1", "b2", "b3", "b4", "b5",
+               "f2", "f3", "f4", "f5",
+               "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23",
+               "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31"
+       }
+#endif
+};
+
+static inline int
+read_only (void *addr)
+{
+       return (unsigned long) ((char *) addr - (char *) &unw.r0) < 
sizeof(unw.r0);
+}
+
+/*
+ * Returns offset of rREG in struct pt_regs.
+ */
+static inline unsigned long
+pt_regs_off (unsigned long reg)
+{
+       short off = -1;
+
+       if (reg < ARRAY_SIZE(unw.pt_regs_offsets))
+               off = unw.pt_regs_offsets[reg];
+
+       if (off < 0) {
+               UNW_DPRINT(0, "unwind.%s: bad scratch reg r%lu\n", 
__FUNCTION__, reg);
+               off = 0;
+       }
+       return (unsigned long) off;
+}
+
+static inline struct pt_regs *
+get_scratch_regs (struct unw_frame_info *info)
+{
+       if (!info->pt) {
+               /* This should not happen with valid unwind info.  */
+               UNW_DPRINT(0, "unwind.%s: bad unwind info: resetting 
info->pt\n", __FUNCTION__);
+               if (info->flags & UNW_FLAG_INTERRUPT_FRAME)
+                       info->pt = (unsigned long) ((struct pt_regs *) 
info->psp - 1);
+               else
+                       info->pt = info->sp - 16;
+       }
+       UNW_DPRINT(3, "unwind.%s: sp 0x%lx pt 0x%lx\n", __FUNCTION__, info->sp, 
info->pt);
+       return (struct pt_regs *) info->pt;
+}
+
+/* Unwind accessors.  */
+
+int
+unw_access_gr (struct unw_frame_info *info, int regnum, unsigned long *val, 
char *nat, int write)
+{
+       unsigned long *addr, *nat_addr, nat_mask = 0, dummy_nat;
+       struct unw_ireg *ireg;
+       struct pt_regs *pt;
+
+       if ((unsigned) regnum - 1 >= 127) {
+               if (regnum == 0 && !write) {
+                       *val = 0;       /* read r0 always returns 0 */
+                       *nat = 0;
+                       return 0;
+               }
+               UNW_DPRINT(0, "unwind.%s: trying to access non-existent r%u\n",
+                          __FUNCTION__, regnum);
+               return -1;
+       }
+
+       if (regnum < 32) {
+               if (regnum >= 4 && regnum <= 7) {
+                       /* access a preserved register */
+                       ireg = &info->r4 + (regnum - 4);
+                       addr = ireg->loc;
+                       if (addr) {
+                               nat_addr = addr + ireg->nat.off;
+                               switch (ireg->nat.type) {
+                                     case UNW_NAT_VAL:
+                                       /* simulate getf.sig/setf.sig */
+                                       if (write) {
+                                               if (*nat) {
+                                                       /* write NaTVal and be 
done with it */
+                                                       addr[0] = 0;
+                                                       addr[1] = 0x1fffe;
+                                                       return 0;
+                                               }
+                                               addr[1] = 0x1003e;
+                                       } else {
+                                               if (addr[0] == 0 && addr[1] == 
0x1ffe) {
+                                                       /* return NaT and be 
done with it */
+                                                       *val = 0;
+                                                       *nat = 1;
+                                                       return 0;
+                                               }
+                                       }
+                                       /* fall through */
+                                     case UNW_NAT_NONE:
+                                       dummy_nat = 0;
+                                       nat_addr = &dummy_nat;
+                                       break;
+
+                                     case UNW_NAT_MEMSTK:
+                                       nat_mask = (1UL << ((long) addr & 
0x1f8)/8);
+                                       break;
+
+                                     case UNW_NAT_REGSTK:
+                                       nat_addr = ia64_rse_rnat_addr(addr);
+                                       if ((unsigned long) addr < 
info->regstk.limit
+                                           || (unsigned long) addr >= 
info->regstk.top)
+                                       {
+                                               UNW_DPRINT(0, "unwind.%s: %p 
outside of regstk "
+                                                       "[0x%lx-0x%lx)\n",
+                                                       __FUNCTION__, (void *) 
addr,
+                                                       info->regstk.limit,
+                                                       info->regstk.top);
+                                               return -1;
+                                       }
+                                       if ((unsigned long) nat_addr >= 
info->regstk.top)
+                                               nat_addr = &info->sw->ar_rnat;
+                                       nat_mask = (1UL << 
ia64_rse_slot_num(addr));
+                                       break;
+                               }
+                       } else {
+                               addr = &info->sw->r4 + (regnum - 4);
+                               nat_addr = &info->sw->ar_unat;
+                               nat_mask = (1UL << ((long) addr & 0x1f8)/8);
+                       }
+               } else {
+                       /* access a scratch register */
+                       pt = get_scratch_regs(info);
+                       addr = (unsigned long *) ((unsigned long)pt + 
pt_regs_off(regnum));
+                       if (info->pri_unat_loc)
+                               nat_addr = info->pri_unat_loc;
+                       else
+                               nat_addr = &info->sw->caller_unat;
+                       nat_mask = (1UL << ((long) addr & 0x1f8)/8);
+               }
+       } else {
+               /* access a stacked register */
+               addr = ia64_rse_skip_regs((unsigned long *) info->bsp, regnum - 
32);
+               nat_addr = ia64_rse_rnat_addr(addr);
+               if ((unsigned long) addr < info->regstk.limit
+                   || (unsigned long) addr >= info->regstk.top)
+               {
+                       UNW_DPRINT(0, "unwind.%s: ignoring attempt to access 
register outside "
+                                  "of rbs\n",  __FUNCTION__);
+                       return -1;
+               }
+               if ((unsigned long) nat_addr >= info->regstk.top)
+                       nat_addr = &info->sw->ar_rnat;
+               nat_mask = (1UL << ia64_rse_slot_num(addr));
+       }
+
+       if (write) {
+               if (read_only(addr)) {
+                       UNW_DPRINT(0, "unwind.%s: ignoring attempt to write 
read-only location\n",
+                               __FUNCTION__);
+               } else {
+                       *addr = *val;
+                       if (*nat)
+                               *nat_addr |= nat_mask;
+                       else
+                               *nat_addr &= ~nat_mask;
+               }
+       } else {
+               if ((*nat_addr & nat_mask) == 0) {
+                       *val = *addr;
+                       *nat = 0;
+               } else {
+                       *val = 0;       /* if register is a NaT, *addr may 
contain kernel data! */
+                       *nat = 1;
+               }
+       }
+       return 0;
+}
+EXPORT_SYMBOL(unw_access_gr);
+
+int
+unw_access_br (struct unw_frame_info *info, int regnum, unsigned long *val, 
int write)
+{
+       unsigned long *addr;
+       struct pt_regs *pt;
+
+       switch (regnum) {
+               /* scratch: */
+             case 0: pt = get_scratch_regs(info); addr = &pt->b0; break;
+             case 6: pt = get_scratch_regs(info); addr = &pt->b6; break;
+             case 7: pt = get_scratch_regs(info); addr = &pt->b7; break;
+
+               /* preserved: */
+             case 1: case 2: case 3: case 4: case 5:
+               addr = *(&info->b1_loc + (regnum - 1));
+               if (!addr)
+                       addr = &info->sw->b1 + (regnum - 1);
+               break;
+
+             default:
+               UNW_DPRINT(0, "unwind.%s: trying to access non-existent b%u\n",
+                          __FUNCTION__, regnum);
+               return -1;
+       }
+       if (write)
+               if (read_only(addr)) {
+                       UNW_DPRINT(0, "unwind.%s: ignoring attempt to write 
read-only location\n",
+                               __FUNCTION__);
+               } else
+                       *addr = *val;
+       else
+               *val = *addr;
+       return 0;
+}
+EXPORT_SYMBOL(unw_access_br);
+
+int
+unw_access_fr (struct unw_frame_info *info, int regnum, struct ia64_fpreg 
*val, int write)
+{
+       struct ia64_fpreg *addr = NULL;
+       struct pt_regs *pt;
+
+       if ((unsigned) (regnum - 2) >= 126) {
+               UNW_DPRINT(0, "unwind.%s: trying to access non-existent f%u\n",
+                          __FUNCTION__, regnum);
+               return -1;
+       }
+
+       if (regnum <= 5) {
+               addr = *(&info->f2_loc + (regnum - 2));
+               if (!addr)
+                       addr = &info->sw->f2 + (regnum - 2);
+       } else if (regnum <= 15) {
+               if (regnum <= 11) {
+                       pt = get_scratch_regs(info);
+                       addr = &pt->f6  + (regnum - 6);
+               }
+               else
+                       addr = &info->sw->f12 + (regnum - 12);
+       } else if (regnum <= 31) {
+               addr = info->fr_loc[regnum - 16];
+               if (!addr)
+                       addr = &info->sw->f16 + (regnum - 16);
+       } else {
+               struct task_struct *t = info->task;
+
+               if (write)
+                       ia64_sync_fph(t);
+               else
+                       ia64_flush_fph(t);
+#ifdef XEN
+               addr = t->arch._thread.fph + (regnum - 32);
+#else
+               addr = t->thread.fph + (regnum - 32);
+#endif
+       }
+
+       if (write)
+               if (read_only(addr)) {
+                       UNW_DPRINT(0, "unwind.%s: ignoring attempt to write 
read-only location\n",
+                               __FUNCTION__);
+               } else
+                       *addr = *val;
+       else
+               *val = *addr;
+       return 0;
+}
+EXPORT_SYMBOL(unw_access_fr);
+
+int
+unw_access_ar (struct unw_frame_info *info, int regnum, unsigned long *val, 
int write)
+{
+       unsigned long *addr;
+       struct pt_regs *pt;
+
+       switch (regnum) {
+             case UNW_AR_BSP:
+               addr = info->bsp_loc;
+               if (!addr)
+                       addr = &info->sw->ar_bspstore;
+               break;
+
+             case UNW_AR_BSPSTORE:
+               addr = info->bspstore_loc;
+               if (!addr)
+                       addr = &info->sw->ar_bspstore;
+               break;
+
+             case UNW_AR_PFS:
+               addr = info->pfs_loc;
+               if (!addr)
+                       addr = &info->sw->ar_pfs;
+               break;
+
+             case UNW_AR_RNAT:
+               addr = info->rnat_loc;
+               if (!addr)
+                       addr = &info->sw->ar_rnat;
+               break;
+
+             case UNW_AR_UNAT:
+               addr = info->unat_loc;
+               if (!addr)
+                       addr = &info->sw->caller_unat;
+               break;
+
+             case UNW_AR_LC:
+               addr = info->lc_loc;
+               if (!addr)
+                       addr = &info->sw->ar_lc;
+               break;
+
+             case UNW_AR_EC:
+               if (!info->cfm_loc)
+                       return -1;
+               if (write)
+                       *info->cfm_loc =
+                               (*info->cfm_loc & ~(0x3fUL << 52)) | ((*val & 
0x3f) << 52);
+               else
+                       *val = (*info->cfm_loc >> 52) & 0x3f;
+               return 0;
+
+             case UNW_AR_FPSR:
+               addr = info->fpsr_loc;
+               if (!addr)
+                       addr = &info->sw->ar_fpsr;
+               break;
+
+             case UNW_AR_RSC:
+               pt = get_scratch_regs(info);
+               addr = &pt->ar_rsc;
+               break;
+
+             case UNW_AR_CCV:
+               pt = get_scratch_regs(info);
+               addr = &pt->ar_ccv;
+               break;
+
+             case UNW_AR_CSD:
+               pt = get_scratch_regs(info);
+               addr = &pt->ar_csd;
+               break;
+
+             case UNW_AR_SSD:
+               pt = get_scratch_regs(info);
+               addr = &pt->ar_ssd;
+               break;
+
+             default:
+               UNW_DPRINT(0, "unwind.%s: trying to access non-existent ar%u\n",
+                          __FUNCTION__, regnum);
+               return -1;
+       }
+
+       if (write) {
+               if (read_only(addr)) {
+                       UNW_DPRINT(0, "unwind.%s: ignoring attempt to write 
read-only location\n",
+                               __FUNCTION__);
+               } else
+                       *addr = *val;
+       } else
+               *val = *addr;
+       return 0;
+}
+EXPORT_SYMBOL(unw_access_ar);
+
+int
+unw_access_pr (struct unw_frame_info *info, unsigned long *val, int write)
+{
+       unsigned long *addr;
+
+       addr = info->pr_loc;
+       if (!addr)
+               addr = &info->sw->pr;
+
+       if (write) {
+               if (read_only(addr)) {
+                       UNW_DPRINT(0, "unwind.%s: ignoring attempt to write 
read-only location\n",
+                               __FUNCTION__);
+               } else
+                       *addr = *val;
+       } else
+               *val = *addr;
+       return 0;
+}
+EXPORT_SYMBOL(unw_access_pr);
+
+
+/* Routines to manipulate the state stack.  */
+
+static inline void
+push (struct unw_state_record *sr)
+{
+       struct unw_reg_state *rs;
+
+       rs = alloc_reg_state();
+       if (!rs) {
+               printk(KERN_ERR "unwind: cannot stack reg state!\n");
+               return;
+       }
+       memcpy(rs, &sr->curr, sizeof(*rs));
+       sr->curr.next = rs;
+}
+
+static void
+pop (struct unw_state_record *sr)
+{
+       struct unw_reg_state *rs = sr->curr.next;
+
+       if (!rs) {
+               printk(KERN_ERR "unwind: stack underflow!\n");
+               return;
+       }
+       memcpy(&sr->curr, rs, sizeof(*rs));
+       free_reg_state(rs);
+}
+
+/* Make a copy of the state stack.  Non-recursive to avoid stack overflows.  */
+static struct unw_reg_state *
+dup_state_stack (struct unw_reg_state *rs)
+{
+       struct unw_reg_state *copy, *prev = NULL, *first = NULL;
+
+       while (rs) {
+               copy = alloc_reg_state();
+               if (!copy) {
+                       printk(KERN_ERR "unwind.dup_state_stack: out of 
memory\n");
+                       return NULL;
+               }
+               memcpy(copy, rs, sizeof(*copy));
+               if (first)
+                       prev->next = copy;
+               else
+                       first = copy;
+               rs = rs->next;
+               prev = copy;
+       }
+       return first;
+}
+
+/* Free all stacked register states (but not RS itself).  */
+static void
+free_state_stack (struct unw_reg_state *rs)
+{
+       struct unw_reg_state *p, *next;
+
+       for (p = rs->next; p != NULL; p = next) {
+               next = p->next;
+               free_reg_state(p);
+       }
+       rs->next = NULL;
+}
+
+/* Unwind decoder routines */
+
+static enum unw_register_index __attribute_const__
+decode_abreg (unsigned char abreg, int memory)
+{
+       switch (abreg) {
+             case 0x04 ... 0x07: return UNW_REG_R4 + (abreg - 0x04);
+             case 0x22 ... 0x25: return UNW_REG_F2 + (abreg - 0x22);
+             case 0x30 ... 0x3f: return UNW_REG_F16 + (abreg - 0x30);
+             case 0x41 ... 0x45: return UNW_REG_B1 + (abreg - 0x41);
+             case 0x60: return UNW_REG_PR;
+             case 0x61: return UNW_REG_PSP;
+             case 0x62: return memory ? UNW_REG_PRI_UNAT_MEM : 
UNW_REG_PRI_UNAT_GR;
+             case 0x63: return UNW_REG_RP;
+             case 0x64: return UNW_REG_BSP;
+             case 0x65: return UNW_REG_BSPSTORE;
+             case 0x66: return UNW_REG_RNAT;
+             case 0x67: return UNW_REG_UNAT;
+             case 0x68: return UNW_REG_FPSR;
+             case 0x69: return UNW_REG_PFS;
+             case 0x6a: return UNW_REG_LC;
+             default:
+               break;
+       }
+       UNW_DPRINT(0, "unwind.%s: bad abreg=0x%x\n", __FUNCTION__, abreg);
+       return UNW_REG_LC;
+}
+
+static void
+set_reg (struct unw_reg_info *reg, enum unw_where where, int when, unsigned 
long val)
+{
+       reg->val = val;
+       reg->where = where;
+       if (reg->when == UNW_WHEN_NEVER)
+               reg->when = when;
+}
+
+static void
+alloc_spill_area (unsigned long *offp, unsigned long regsize,
+                 struct unw_reg_info *lo, struct unw_reg_info *hi)
+{
+       struct unw_reg_info *reg;
+
+       for (reg = hi; reg >= lo; --reg) {
+               if (reg->where == UNW_WHERE_SPILL_HOME) {
+                       reg->where = UNW_WHERE_PSPREL;
+                       *offp -= regsize;
+                       reg->val = *offp;
+               }
+       }
+}
+
+static inline void
+spill_next_when (struct unw_reg_info **regp, struct unw_reg_info *lim, 
unw_word t)
+{
+       struct unw_reg_info *reg;
+
+       for (reg = *regp; reg <= lim; ++reg) {
+               if (reg->where == UNW_WHERE_SPILL_HOME) {
+                       reg->when = t;
+                       *regp = reg + 1;
+                       return;
+               }
+       }
+       UNW_DPRINT(0, "unwind.%s: excess spill!\n",  __FUNCTION__);
+}
+
+static inline void
+finish_prologue (struct unw_state_record *sr)
+{
+       struct unw_reg_info *reg;
+       unsigned long off;
+       int i;
+
+       /*
+        * First, resolve implicit register save locations (see Section 
"11.4.2.3 Rules
+        * for Using Unwind Descriptors", rule 3):
+        */
+       for (i = 0; i < (int) ARRAY_SIZE(unw.save_order); ++i) {
+               reg = sr->curr.reg + unw.save_order[i];
+               if (reg->where == UNW_WHERE_GR_SAVE) {
+                       reg->where = UNW_WHERE_GR;
+                       reg->val = sr->gr_save_loc++;
+               }
+       }
+
+       /*
+        * Next, compute when the fp, general, and branch registers get
+        * saved.  This must come before alloc_spill_area() because
+        * we need to know which registers are spilled to their home
+        * locations.
+        */
+       if (sr->imask) {
+               unsigned char kind, mask = 0, *cp = sr->imask;
+               int t;
+               static const unsigned char limit[3] = {
+                       UNW_REG_F31, UNW_REG_R7, UNW_REG_B5
+               };
+               struct unw_reg_info *(regs[3]);
+
+               regs[0] = sr->curr.reg + UNW_REG_F2;
+               regs[1] = sr->curr.reg + UNW_REG_R4;
+               regs[2] = sr->curr.reg + UNW_REG_B1;
+
+               for (t = 0; t < sr->region_len; ++t) {
+                       if ((t & 3) == 0)
+                               mask = *cp++;
+                       kind = (mask >> 2*(3-(t & 3))) & 3;
+                       if (kind > 0)
+                               spill_next_when(&regs[kind - 1], sr->curr.reg + 
limit[kind - 1],
+                                               sr->region_start + t);
+               }
+       }
+       /*
+        * Next, lay out the memory stack spill area:
+        */
+       if (sr->any_spills) {
+               off = sr->spill_offset;
+               alloc_spill_area(&off, 16, sr->curr.reg + UNW_REG_F2, 
sr->curr.reg + UNW_REG_F31);
+               alloc_spill_area(&off,  8, sr->curr.reg + UNW_REG_B1, 
sr->curr.reg + UNW_REG_B5);
+               alloc_spill_area(&off,  8, sr->curr.reg + UNW_REG_R4, 
sr->curr.reg + UNW_REG_R7);
+       }
+}
+
+/*
+ * Region header descriptors.
+ */
+
+static void
+desc_prologue (int body, unw_word rlen, unsigned char mask, unsigned char 
grsave,
+              struct unw_state_record *sr)
+{
+       int i, region_start;
+
+       if (!(sr->in_body || sr->first_region))
+               finish_prologue(sr);
+       sr->first_region = 0;
+
+       /* check if we're done: */
+       if (sr->when_target < sr->region_start + sr->region_len) {
+               sr->done = 1;
+               return;
+       }
+
+       region_start = sr->region_start + sr->region_len;
+
+       for (i = 0; i < sr->epilogue_count; ++i)
+               pop(sr);
+       sr->epilogue_count = 0;
+       sr->epilogue_start = UNW_WHEN_NEVER;
+
+       sr->region_start = region_start;
+       sr->region_len = rlen;
+       sr->in_body = body;
+
+       if (!body) {
+               push(sr);
+
+               for (i = 0; i < 4; ++i) {
+                       if (mask & 0x8)
+                               set_reg(sr->curr.reg + unw.save_order[i], 
UNW_WHERE_GR,
+                                       sr->region_start + sr->region_len - 1, 
grsave++);
+                       mask <<= 1;
+               }
+               sr->gr_save_loc = grsave;
+               sr->any_spills = 0;
+               sr->imask = NULL;
+               sr->spill_offset = 0x10;        /* default to psp+16 */
+       }
+}
+
+/*
+ * Prologue descriptors.
+ */
+
+static inline void
+desc_abi (unsigned char abi, unsigned char context, struct unw_state_record 
*sr)
+{
+       if (abi == 3 && context == 'i') {
+               sr->flags |= UNW_FLAG_INTERRUPT_FRAME;
+               UNW_DPRINT(3, "unwind.%s: interrupt frame\n",  __FUNCTION__);
+       }
+       else
+               UNW_DPRINT(0, "unwind%s: ignoring 
unwabi(abi=0x%x,context=0x%x)\n",
+                               __FUNCTION__, abi, context);
+}
+
+static inline void
+desc_br_gr (unsigned char brmask, unsigned char gr, struct unw_state_record 
*sr)
+{
+       int i;
+
+       for (i = 0; i < 5; ++i) {
+               if (brmask & 1)
+                       set_reg(sr->curr.reg + UNW_REG_B1 + i, UNW_WHERE_GR,
+                               sr->region_start + sr->region_len - 1, gr++);
+               brmask >>= 1;
+       }
+}
+
+static inline void
+desc_br_mem (unsigned char brmask, struct unw_state_record *sr)
+{
+       int i;
+
+       for (i = 0; i < 5; ++i) {
+               if (brmask & 1) {
+                       set_reg(sr->curr.reg + UNW_REG_B1 + i, 
UNW_WHERE_SPILL_HOME,
+                               sr->region_start + sr->region_len - 1, 0);
+                       sr->any_spills = 1;
+               }
+               brmask >>= 1;
+       }
+}
+
+static inline void
+desc_frgr_mem (unsigned char grmask, unw_word frmask, struct unw_state_record 
*sr)
+{
+       int i;
+
+       for (i = 0; i < 4; ++i) {
+               if ((grmask & 1) != 0) {
+                       set_reg(sr->curr.reg + UNW_REG_R4 + i, 
UNW_WHERE_SPILL_HOME,
+                               sr->region_start + sr->region_len - 1, 0);
+                       sr->any_spills = 1;
+               }
+               grmask >>= 1;
+       }
+       for (i = 0; i < 20; ++i) {
+               if ((frmask & 1) != 0) {
+                       int base = (i < 4) ? UNW_REG_F2 : UNW_REG_F16 - 4;
+                       set_reg(sr->curr.reg + base + i, UNW_WHERE_SPILL_HOME,
+                               sr->region_start + sr->region_len - 1, 0);
+                       sr->any_spills = 1;
+               }
+               frmask >>= 1;
+       }
+}
+
+static inline void
+desc_fr_mem (unsigned char frmask, struct unw_state_record *sr)
+{
+       int i;
+
+       for (i = 0; i < 4; ++i) {
+               if ((frmask & 1) != 0) {
+                       set_reg(sr->curr.reg + UNW_REG_F2 + i, 
UNW_WHERE_SPILL_HOME,
+                               sr->region_start + sr->region_len - 1, 0);
+                       sr->any_spills = 1;
+               }
+               frmask >>= 1;
+       }
+}
+
+static inline void
+desc_gr_gr (unsigned char grmask, unsigned char gr, struct unw_state_record 
*sr)
+{
+       int i;
+
+       for (i = 0; i < 4; ++i) {
+               if ((grmask & 1) != 0)
+                       set_reg(sr->curr.reg + UNW_REG_R4 + i, UNW_WHERE_GR,
+                               sr->region_start + sr->region_len - 1, gr++);
+               grmask >>= 1;
+       }
+}
+
+static inline void
+desc_gr_mem (unsigned char grmask, struct unw_state_record *sr)
+{
+       int i;
+
+       for (i = 0; i < 4; ++i) {
+               if ((grmask & 1) != 0) {
+                       set_reg(sr->curr.reg + UNW_REG_R4 + i, 
UNW_WHERE_SPILL_HOME,
+                               sr->region_start + sr->region_len - 1, 0);
+                       sr->any_spills = 1;
+               }
+               grmask >>= 1;
+       }
+}
+
+static inline void
+desc_mem_stack_f (unw_word t, unw_word size, struct unw_state_record *sr)
+{
+       set_reg(sr->curr.reg + UNW_REG_PSP, UNW_WHERE_NONE,
+               sr->region_start + min_t(int, t, sr->region_len - 1), 16*size);
+}
+
+static inline void
+desc_mem_stack_v (unw_word t, struct unw_state_record *sr)
+{
+       sr->curr.reg[UNW_REG_PSP].when = sr->region_start + min_t(int, t, 
sr->region_len - 1);
+}
+
+static inline void
+desc_reg_gr (unsigned char reg, unsigned char dst, struct unw_state_record *sr)
+{
+       set_reg(sr->curr.reg + reg, UNW_WHERE_GR, sr->region_start + 
sr->region_len - 1, dst);
+}
+
+static inline void
+desc_reg_psprel (unsigned char reg, unw_word pspoff, struct unw_state_record 
*sr)
+{
+       set_reg(sr->curr.reg + reg, UNW_WHERE_PSPREL, sr->region_start + 
sr->region_len - 1,
+               0x10 - 4*pspoff);
+}
+
+static inline void
+desc_reg_sprel (unsigned char reg, unw_word spoff, struct unw_state_record *sr)
+{
+       set_reg(sr->curr.reg + reg, UNW_WHERE_SPREL, sr->region_start + 
sr->region_len - 1,
+               4*spoff);
+}
+
+static inline void
+desc_rp_br (unsigned char dst, struct unw_state_record *sr)
+{
+       sr->return_link_reg = dst;
+}
+
+static inline void
+desc_reg_when (unsigned char regnum, unw_word t, struct unw_state_record *sr)
+{
+       struct unw_reg_info *reg = sr->curr.reg + regnum;
+
+       if (reg->where == UNW_WHERE_NONE)
+               reg->where = UNW_WHERE_GR_SAVE;
+       reg->when = sr->region_start + min_t(int, t, sr->region_len - 1);
+}
+
+static inline void
+desc_spill_base (unw_word pspoff, struct unw_state_record *sr)
+{
+       sr->spill_offset = 0x10 - 4*pspoff;
+}
+
+static inline unsigned char *
+desc_spill_mask (unsigned char *imaskp, struct unw_state_record *sr)
+{
+       sr->imask = imaskp;
+       return imaskp + (2*sr->region_len + 7)/8;
+}
+
+/*
+ * Body descriptors.
+ */
+static inline void
+desc_epilogue (unw_word t, unw_word ecount, struct unw_state_record *sr)
+{
+       sr->epilogue_start = sr->region_start + sr->region_len - 1 - t;
+       sr->epilogue_count = ecount + 1;
+}
+
+static inline void
+desc_copy_state (unw_word label, struct unw_state_record *sr)
+{
+       struct unw_labeled_state *ls;
+
+       for (ls = sr->labeled_states; ls; ls = ls->next) {
+               if (ls->label == label) {
+                       free_state_stack(&sr->curr);
+                       memcpy(&sr->curr, &ls->saved_state, sizeof(sr->curr));
+                       sr->curr.next = dup_state_stack(ls->saved_state.next);
+                       return;
+               }
+       }
+       printk(KERN_ERR "unwind: failed to find state labeled 0x%lx\n", label);
+}
+
+static inline void
+desc_label_state (unw_word label, struct unw_state_record *sr)
+{
+       struct unw_labeled_state *ls;
+
+       ls = alloc_labeled_state();
+       if (!ls) {
+               printk(KERN_ERR "unwind.desc_label_state(): out of memory\n");
+               return;
+       }
+       ls->label = label;
+       memcpy(&ls->saved_state, &sr->curr, sizeof(ls->saved_state));
+       ls->saved_state.next = dup_state_stack(sr->curr.next);
+
+       /* insert into list of labeled states: */
+       ls->next = sr->labeled_states;
+       sr->labeled_states = ls;
+}
+
+/*
+ * General descriptors.
+ */
+
+static inline int
+desc_is_active (unsigned char qp, unw_word t, struct unw_state_record *sr)
+{
+       if (sr->when_target <= sr->region_start + min_t(int, t, sr->region_len 
- 1))
+               return 0;
+       if (qp > 0) {
+               if ((sr->pr_val & (1UL << qp)) == 0)
+                       return 0;
+               sr->pr_mask |= (1UL << qp);
+       }
+       return 1;
+}
+
+static inline void
+desc_restore_p (unsigned char qp, unw_word t, unsigned char abreg, struct 
unw_state_record *sr)
+{
+       struct unw_reg_info *r;
+
+       if (!desc_is_active(qp, t, sr))
+               return;
+
+       r = sr->curr.reg + decode_abreg(abreg, 0);
+       r->where = UNW_WHERE_NONE;
+       r->when = UNW_WHEN_NEVER;
+       r->val = 0;
+}
+
+static inline void
+desc_spill_reg_p (unsigned char qp, unw_word t, unsigned char abreg, unsigned 
char x,
+                    unsigned char ytreg, struct unw_state_record *sr)
+{
+       enum unw_where where = UNW_WHERE_GR;
+       struct unw_reg_info *r;
+
+       if (!desc_is_active(qp, t, sr))
+               return;
+
+       if (x)
+               where = UNW_WHERE_BR;
+       else if (ytreg & 0x80)
+               where = UNW_WHERE_FR;
+
+       r = sr->curr.reg + decode_abreg(abreg, 0);
+       r->where = where;
+       r->when = sr->region_start + min_t(int, t, sr->region_len - 1);
+       r->val = (ytreg & 0x7f);
+}
+
+static inline void
+desc_spill_psprel_p (unsigned char qp, unw_word t, unsigned char abreg, 
unw_word pspoff,
+                    struct unw_state_record *sr)
+{
+       struct unw_reg_info *r;
+
+       if (!desc_is_active(qp, t, sr))
+               return;
+
+       r = sr->curr.reg + decode_abreg(abreg, 1);
+       r->where = UNW_WHERE_PSPREL;
+       r->when = sr->region_start + min_t(int, t, sr->region_len - 1);
+       r->val = 0x10 - 4*pspoff;
+}
+
+static inline void
+desc_spill_sprel_p (unsigned char qp, unw_word t, unsigned char abreg, 
unw_word spoff,
+                      struct unw_state_record *sr)
+{
+       struct unw_reg_info *r;
+
+       if (!desc_is_active(qp, t, sr))
+               return;
+
+       r = sr->curr.reg + decode_abreg(abreg, 1);
+       r->where = UNW_WHERE_SPREL;
+       r->when = sr->region_start + min_t(int, t, sr->region_len - 1);
+       r->val = 4*spoff;
+}
+
+#define UNW_DEC_BAD_CODE(code)                 printk(KERN_ERR "unwind: 
unknown code 0x%02x\n", \
+                                                      code);
+
+/*
+ * region headers:
+ */
+#define UNW_DEC_PROLOGUE_GR(fmt,r,m,gr,arg)    desc_prologue(0,r,m,gr,arg)
+#define UNW_DEC_PROLOGUE(fmt,b,r,arg)          desc_prologue(b,r,0,32,arg)
+/*
+ * prologue descriptors:
+ */
+#define UNW_DEC_ABI(fmt,a,c,arg)               desc_abi(a,c,arg)
+#define UNW_DEC_BR_GR(fmt,b,g,arg)             desc_br_gr(b,g,arg)
+#define UNW_DEC_BR_MEM(fmt,b,arg)              desc_br_mem(b,arg)
+#define UNW_DEC_FRGR_MEM(fmt,g,f,arg)          desc_frgr_mem(g,f,arg)
+#define UNW_DEC_FR_MEM(fmt,f,arg)              desc_fr_mem(f,arg)
+#define UNW_DEC_GR_GR(fmt,m,g,arg)             desc_gr_gr(m,g,arg)
+#define UNW_DEC_GR_MEM(fmt,m,arg)              desc_gr_mem(m,arg)
+#define UNW_DEC_MEM_STACK_F(fmt,t,s,arg)       desc_mem_stack_f(t,s,arg)
+#define UNW_DEC_MEM_STACK_V(fmt,t,arg)         desc_mem_stack_v(t,arg)
+#define UNW_DEC_REG_GR(fmt,r,d,arg)            desc_reg_gr(r,d,arg)
+#define UNW_DEC_REG_PSPREL(fmt,r,o,arg)                desc_reg_psprel(r,o,arg)
+#define UNW_DEC_REG_SPREL(fmt,r,o,arg)         desc_reg_sprel(r,o,arg)
+#define UNW_DEC_REG_WHEN(fmt,r,t,arg)          desc_reg_when(r,t,arg)
+#define UNW_DEC_PRIUNAT_WHEN_GR(fmt,t,arg)     
desc_reg_when(UNW_REG_PRI_UNAT_GR,t,arg)
+#define UNW_DEC_PRIUNAT_WHEN_MEM(fmt,t,arg)    
desc_reg_when(UNW_REG_PRI_UNAT_MEM,t,arg)
+#define UNW_DEC_PRIUNAT_GR(fmt,r,arg)          
desc_reg_gr(UNW_REG_PRI_UNAT_GR,r,arg)
+#define UNW_DEC_PRIUNAT_PSPREL(fmt,o,arg)      
desc_reg_psprel(UNW_REG_PRI_UNAT_MEM,o,arg)
+#define UNW_DEC_PRIUNAT_SPREL(fmt,o,arg)       
desc_reg_sprel(UNW_REG_PRI_UNAT_MEM,o,arg)
+#define UNW_DEC_RP_BR(fmt,d,arg)               desc_rp_br(d,arg)
+#define UNW_DEC_SPILL_BASE(fmt,o,arg)          desc_spill_base(o,arg)
+#define UNW_DEC_SPILL_MASK(fmt,m,arg)          (m = desc_spill_mask(m,arg))
+/*
+ * body descriptors:
+ */
+#define UNW_DEC_EPILOGUE(fmt,t,c,arg)          desc_epilogue(t,c,arg)
+#define UNW_DEC_COPY_STATE(fmt,l,arg)          desc_copy_state(l,arg)
+#define UNW_DEC_LABEL_STATE(fmt,l,arg)         desc_label_state(l,arg)
+/*
+ * general unwind descriptors:
+ */
+#define UNW_DEC_SPILL_REG_P(f,p,t,a,x,y,arg)   desc_spill_reg_p(p,t,a,x,y,arg)
+#define UNW_DEC_SPILL_REG(f,t,a,x,y,arg)       desc_spill_reg_p(0,t,a,x,y,arg)
+#define UNW_DEC_SPILL_PSPREL_P(f,p,t,a,o,arg)  desc_spill_psprel_p(p,t,a,o,arg)
+#define UNW_DEC_SPILL_PSPREL(f,t,a,o,arg)      desc_spill_psprel_p(0,t,a,o,arg)
+#define UNW_DEC_SPILL_SPREL_P(f,p,t,a,o,arg)   desc_spill_sprel_p(p,t,a,o,arg)
+#define UNW_DEC_SPILL_SPREL(f,t,a,o,arg)       desc_spill_sprel_p(0,t,a,o,arg)
+#define UNW_DEC_RESTORE_P(f,p,t,a,arg)         desc_restore_p(p,t,a,arg)
+#define UNW_DEC_RESTORE(f,t,a,arg)             desc_restore_p(0,t,a,arg)
+
+#include "unwind_decoder.c"
+
+
+/* Unwind scripts. */
+
+static inline unw_hash_index_t
+hash (unsigned long ip)
+{
+#      define hashmagic        0x9e3779b97f4a7c16UL    /* based on 
(sqrt(5)/2-1)*2^64 */
+
+       return (ip >> 4)*hashmagic >> (64 - UNW_LOG_HASH_SIZE);
+#undef hashmagic
+}
+
+static inline long
+cache_match (struct unw_script *script, unsigned long ip, unsigned long pr)
+{
+       read_lock(&script->lock);
+       if (ip == script->ip && ((pr ^ script->pr_val) & script->pr_mask) == 0)
+               /* keep the read lock... */
+               return 1;
+       read_unlock(&script->lock);
+       return 0;
+}
+
+static inline struct unw_script *
+script_lookup (struct unw_frame_info *info)
+{
+       struct unw_script *script = unw.cache + info->hint;
+       unsigned short index;
+       unsigned long ip, pr;
+
+       if (UNW_DEBUG_ON(0))
+               return NULL;    /* Always regenerate scripts in debug mode */
+
+       STAT(++unw.stat.cache.lookups);
+
+       ip = info->ip;
+       pr = info->pr;
+
+       if (cache_match(script, ip, pr)) {
+               STAT(++unw.stat.cache.hinted_hits);
+               return script;
+       }
+
+       index = unw.hash[hash(ip)];
+       if (index >= UNW_CACHE_SIZE)
+               return NULL;
+
+       script = unw.cache + index;
+       while (1) {
+               if (cache_match(script, ip, pr)) {
+                       /* update hint; no locking required as single-word 
writes are atomic */
+                       STAT(++unw.stat.cache.normal_hits);
+                       unw.cache[info->prev_script].hint = script - unw.cache;
+                       return script;
+               }
+               if (script->coll_chain >= UNW_HASH_SIZE)
+                       return NULL;
+               script = unw.cache + script->coll_chain;
+               STAT(++unw.stat.cache.collision_chain_traversals);
+       }
+}
+
+/*
+ * On returning, a write lock for the SCRIPT is still being held.
+ */
+static inline struct unw_script *
+script_new (unsigned long ip)
+{
+       struct unw_script *script, *prev, *tmp;
+       unw_hash_index_t index;
+       unsigned short head;
+
+       STAT(++unw.stat.script.news);
+
+       /*
+        * Can't (easily) use cmpxchg() here because of ABA problem
+        * that is intrinsic in cmpxchg()...
+        */
+       head = unw.lru_head;
+       script = unw.cache + head;
+       unw.lru_head = script->lru_chain;
+
+       /*
+        * We'd deadlock here if we interrupted a thread that is holding a read 
lock on
+        * script->lock.  Thus, if the write_trylock() fails, we simply bail 
out.  The
+        * alternative would be to disable interrupts whenever we hold a 
read-lock, but
+        * that seems silly.
+        */
+       if (!write_trylock(&script->lock))
+               return NULL;
+
+       /* re-insert script at the tail of the LRU chain: */
+       unw.cache[unw.lru_tail].lru_chain = head;
+       unw.lru_tail = head;
+
+       /* remove the old script from the hash table (if it's there): */
+       if (script->ip) {
+               index = hash(script->ip);
+               tmp = unw.cache + unw.hash[index];
+               prev = NULL;
+               while (1) {
+                       if (tmp == script) {
+                               if (prev)
+                                       prev->coll_chain = tmp->coll_chain;
+                               else
+                                       unw.hash[index] = tmp->coll_chain;
+                               break;
+                       } else
+                               prev = tmp;
+                       if (tmp->coll_chain >= UNW_CACHE_SIZE)
+                       /* old script wasn't in the hash-table */
+                               break;
+                       tmp = unw.cache + tmp->coll_chain;
+               }
+       }
+
+       /* enter new script in the hash table */
+       index = hash(ip);
+       script->coll_chain = unw.hash[index];
+       unw.hash[index] = script - unw.cache;
+
+       script->ip = ip;        /* set new IP while we're holding the locks */
+
+       STAT(if (script->coll_chain < UNW_CACHE_SIZE) 
++unw.stat.script.collisions);
+
+       script->flags = 0;
+       script->hint = 0;
+       script->count = 0;
+       return script;
+}
+
+static void
+script_finalize (struct unw_script *script, struct unw_state_record *sr)
+{
+       script->pr_mask = sr->pr_mask;
+       script->pr_val = sr->pr_val;
+       /*
+        * We could down-grade our write-lock on script->lock here but
+        * the rwlock API doesn't offer atomic lock downgrading, so
+        * we'll just keep the write-lock and release it later when
+        * we're done using the script.
+        */
+}
+
+static inline void
+script_emit (struct unw_script *script, struct unw_insn insn)
+{
+       if (script->count >= UNW_MAX_SCRIPT_LEN) {
+               UNW_DPRINT(0, "unwind.%s: script exceeds maximum size of %u 
instructions!\n",
+                       __FUNCTION__, UNW_MAX_SCRIPT_LEN);
+               return;
+       }
+       script->insn[script->count++] = insn;
+}
+
+static inline void
+emit_nat_info (struct unw_state_record *sr, int i, struct unw_script *script)
+{
+       struct unw_reg_info *r = sr->curr.reg + i;
+       enum unw_insn_opcode opc;
+       struct unw_insn insn;
+       unsigned long val = 0;
+
+       switch (r->where) {
+             case UNW_WHERE_GR:
+               if (r->val >= 32) {
+                       /* register got spilled to a stacked register */
+                       opc = UNW_INSN_SETNAT_TYPE;
+                       val = UNW_NAT_REGSTK;
+               } else
+                       /* register got spilled to a scratch register */
+                       opc = UNW_INSN_SETNAT_MEMSTK;
+               break;
+
+             case UNW_WHERE_FR:
+               opc = UNW_INSN_SETNAT_TYPE;
+               val = UNW_NAT_VAL;
+               break;
+
+             case UNW_WHERE_BR:
+               opc = UNW_INSN_SETNAT_TYPE;
+               val = UNW_NAT_NONE;
+               break;
+
+             case UNW_WHERE_PSPREL:
+             case UNW_WHERE_SPREL:
+               opc = UNW_INSN_SETNAT_MEMSTK;
+               break;
+
+             default:
+               UNW_DPRINT(0, "unwind.%s: don't know how to emit nat info for 
where = %u\n",
+                          __FUNCTION__, r->where);
+               return;
+       }
+       insn.opc = opc;
+       insn.dst = unw.preg_index[i];
+       insn.val = val;
+       script_emit(script, insn);
+}
+
+static void
+compile_reg (struct unw_state_record *sr, int i, struct unw_script *script)
+{
+       struct unw_reg_info *r = sr->curr.reg + i;
+       enum unw_insn_opcode opc;
+       unsigned long val, rval;
+       struct unw_insn insn;
+       long need_nat_info;
+
+       if (r->where == UNW_WHERE_NONE || r->when >= sr->when_target)
+               return;
+
+       opc = UNW_INSN_MOVE;
+       val = rval = r->val;
+       need_nat_info = (i >= UNW_REG_R4 && i <= UNW_REG_R7);
+
+       switch (r->where) {
+             case UNW_WHERE_GR:
+               if (rval >= 32) {
+                       opc = UNW_INSN_MOVE_STACKED;
+                       val = rval - 32;
+               } else if (rval >= 4 && rval <= 7) {
+                       if (need_nat_info) {
+                               opc = UNW_INSN_MOVE2;
+                               need_nat_info = 0;
+                       }
+                       val = unw.preg_index[UNW_REG_R4 + (rval - 4)];
+               } else if (rval == 0) {
+                       opc = UNW_INSN_MOVE_CONST;
+                       val = 0;
+               } else {
+                       /* register got spilled to a scratch register */
+                       opc = UNW_INSN_MOVE_SCRATCH;
+                       val = pt_regs_off(rval);
+               }
+               break;
+
+             case UNW_WHERE_FR:
+               if (rval <= 5)
+                       val = unw.preg_index[UNW_REG_F2  + (rval -  2)];
+               else if (rval >= 16 && rval <= 31)
+                       val = unw.preg_index[UNW_REG_F16 + (rval - 16)];
+               else {
+                       opc = UNW_INSN_MOVE_SCRATCH;
+                       if (rval <= 11)
+                               val = offsetof(struct pt_regs, f6) + 16*(rval - 
6);
+                       else
+                               UNW_DPRINT(0, "unwind.%s: kernel may not touch 
f%lu\n",
+                                          __FUNCTION__, rval);
+               }
+               break;
+
+             case UNW_WHERE_BR:
+               if (rval >= 1 && rval <= 5)
+                       val = unw.preg_index[UNW_REG_B1 + (rval - 1)];
+               else {
+                       opc = UNW_INSN_MOVE_SCRATCH;
+                       if (rval == 0)
+                               val = offsetof(struct pt_regs, b0);
+                       else if (rval == 6)
+                               val = offsetof(struct pt_regs, b6);
+                       else
+                               val = offsetof(struct pt_regs, b7);
+               }
+               break;
+
+             case UNW_WHERE_SPREL:
+               opc = UNW_INSN_ADD_SP;
+               break;
+
+             case UNW_WHERE_PSPREL:
+               opc = UNW_INSN_ADD_PSP;
+               break;
+
+             default:
+               UNW_DPRINT(0, "unwind%s: register %u has unexpected `where' 
value of %u\n",
+                          __FUNCTION__, i, r->where);
+               break;
+       }
+       insn.opc = opc;
+       insn.dst = unw.preg_index[i];
+       insn.val = val;
+       script_emit(script, insn);
+       if (need_nat_info)
+               emit_nat_info(sr, i, script);
+
+       if (i == UNW_REG_PSP) {
+               /*
+                * info->psp must contain the _value_ of the previous
+                * sp, not it's save location.  We get this by
+                * dereferencing the value we just stored in
+                * info->psp:
+                */
+               insn.opc = UNW_INSN_LOAD;
+               insn.dst = insn.val = unw.preg_index[UNW_REG_PSP];
+               script_emit(script, insn);
+       }
+}
+
+static inline const struct unw_table_entry *
+lookup (struct unw_table *table, unsigned long rel_ip)
+{
+       const struct unw_table_entry *e = NULL;
+       unsigned long lo, hi, mid;
+
+       /* do a binary search for right entry: */
+       for (lo = 0, hi = table->length; lo < hi; ) {
+               mid = (lo + hi) / 2;
+               e = &table->array[mid];
+               if (rel_ip < e->start_offset)
+                       hi = mid;
+               else if (rel_ip >= e->end_offset)
+                       lo = mid + 1;
+               else
+                       break;
+       }
+       if (rel_ip < e->start_offset || rel_ip >= e->end_offset)
+               return NULL;
+       return e;
+}
+
+/*
+ * Build an unwind script that unwinds from state OLD_STATE to the
+ * entrypoint of the function that called OLD_STATE.
+ */
+static inline struct unw_script *
+build_script (struct unw_frame_info *info)
+{
+       const struct unw_table_entry *e = NULL;
+       struct unw_script *script = NULL;
+       struct unw_labeled_state *ls, *next;
+       unsigned long ip = info->ip;
+       struct unw_state_record sr;
+       struct unw_table *table;
+       struct unw_reg_info *r;
+       struct unw_insn insn;
+       u8 *dp, *desc_end;
+       u64 hdr;
+       int i;
+       STAT(unsigned long start, parse_start;)
+
+       STAT(++unw.stat.script.builds; start = ia64_get_itc());
+
+       /* build state record */
+       memset(&sr, 0, sizeof(sr));
+       for (r = sr.curr.reg; r < sr.curr.reg + UNW_NUM_REGS; ++r)
+               r->when = UNW_WHEN_NEVER;
+       sr.pr_val = info->pr;
+
+       UNW_DPRINT(3, "unwind.%s: ip 0x%lx\n", __FUNCTION__, ip);
+       script = script_new(ip);
+       if (!script) {
+               UNW_DPRINT(0, "unwind.%s: failed to create unwind script\n",  
__FUNCTION__);
+               STAT(unw.stat.script.build_time += ia64_get_itc() - start);
+               return NULL;
+       }
+       unw.cache[info->prev_script].hint = script - unw.cache;
+
+       /* search the kernels and the modules' unwind tables for IP: */
+
+       STAT(parse_start = ia64_get_itc());
+
+       for (table = unw.tables; table; table = table->next) {
+               if (ip >= table->start && ip < table->end) {
+                       e = lookup(table, ip - table->segment_base);
+                       break;
+               }
+       }
+       if (!e) {
+               /* no info, return default unwinder (leaf proc, no mem stack, 
no saved regs)  */
+               UNW_DPRINT(1, "unwind.%s: no unwind info for ip=0x%lx (prev 
ip=0x%lx)\n",
+                       __FUNCTION__, ip, unw.cache[info->prev_script].ip);
+               sr.curr.reg[UNW_REG_RP].where = UNW_WHERE_BR;
+               sr.curr.reg[UNW_REG_RP].when = -1;
+               sr.curr.reg[UNW_REG_RP].val = 0;
+               compile_reg(&sr, UNW_REG_RP, script);
+               script_finalize(script, &sr);
+               STAT(unw.stat.script.parse_time += ia64_get_itc() - 
parse_start);
+               STAT(unw.stat.script.build_time += ia64_get_itc() - start);
+               return script;
+       }
+
+       sr.when_target = (3*((ip & ~0xfUL) - (table->segment_base + 
e->start_offset))/16
+                         + (ip & 0xfUL));
+       hdr = *(u64 *) (table->segment_base + e->info_offset);
+       dp =   (u8 *)  (table->segment_base + e->info_offset + 8);
+       desc_end = dp + 8*UNW_LENGTH(hdr);
+
+       while (!sr.done && dp < desc_end)
+               dp = unw_decode(dp, sr.in_body, &sr);
+
+       if (sr.when_target > sr.epilogue_start) {
+               /*
+                * sp has been restored and all values on the memory stack below
+                * psp also have been restored.
+                */
+               sr.curr.reg[UNW_REG_PSP].val = 0;
+               sr.curr.reg[UNW_REG_PSP].where = UNW_WHERE_NONE;
+               sr.curr.reg[UNW_REG_PSP].when = UNW_WHEN_NEVER;
+               for (r = sr.curr.reg; r < sr.curr.reg + UNW_NUM_REGS; ++r)
+                       if ((r->where == UNW_WHERE_PSPREL && r->val <= 0x10)
+                           || r->where == UNW_WHERE_SPREL)
+                       {
+                               r->val = 0;
+                               r->where = UNW_WHERE_NONE;
+                               r->when = UNW_WHEN_NEVER;
+                       }
+       }
+
+       script->flags = sr.flags;
+
+       /*
+        * If RP did't get saved, generate entry for the return link
+        * register.
+        */
+       if (sr.curr.reg[UNW_REG_RP].when >= sr.when_target) {
+               sr.curr.reg[UNW_REG_RP].where = UNW_WHERE_BR;
+               sr.curr.reg[UNW_REG_RP].when = -1;
+               sr.curr.reg[UNW_REG_RP].val = sr.return_link_reg;
+               UNW_DPRINT(1, "unwind.%s: using default for rp at ip=0x%lx 
where=%d val=0x%lx\n",
+                          __FUNCTION__, ip, sr.curr.reg[UNW_REG_RP].where,
+                          sr.curr.reg[UNW_REG_RP].val);
+       }
+
+#ifdef UNW_DEBUG
+       UNW_DPRINT(1, "unwind.%s: state record for func 0x%lx, t=%u:\n",
+               __FUNCTION__, table->segment_base + e->start_offset, 
sr.when_target);
+       for (r = sr.curr.reg; r < sr.curr.reg + UNW_NUM_REGS; ++r) {
+               if (r->where != UNW_WHERE_NONE || r->when != UNW_WHEN_NEVER) {
+                       UNW_DPRINT(1, "  %s <- ", unw.preg_name[r - 
sr.curr.reg]);
+                       switch (r->where) {
+                             case UNW_WHERE_GR:     UNW_DPRINT(1, "r%lu", 
r->val); break;
+                             case UNW_WHERE_FR:     UNW_DPRINT(1, "f%lu", 
r->val); break;
+                             case UNW_WHERE_BR:     UNW_DPRINT(1, "b%lu", 
r->val); break;
+                             case UNW_WHERE_SPREL:  UNW_DPRINT(1, 
"[sp+0x%lx]", r->val); break;
+                             case UNW_WHERE_PSPREL: UNW_DPRINT(1, 
"[psp+0x%lx]", r->val); break;
+                             case UNW_WHERE_NONE:
+                               UNW_DPRINT(1, "%s+0x%lx", unw.preg_name[r - 
sr.curr.reg], r->val);
+                               break;
+
+                             default:
+                               UNW_DPRINT(1, "BADWHERE(%d)", r->where);
+                               break;
+                       }
+                       UNW_DPRINT(1, "\t\t%d\n", r->when);
+               }
+       }
+#endif
+
+       STAT(unw.stat.script.parse_time += ia64_get_itc() - parse_start);
+
+       /* translate state record into unwinder instructions: */
+
+       /*
+        * First, set psp if we're dealing with a fixed-size frame;
+        * subsequent instructions may depend on this value.
+        */
+       if (sr.when_target > sr.curr.reg[UNW_REG_PSP].when
+           && (sr.curr.reg[UNW_REG_PSP].where == UNW_WHERE_NONE)
+           && sr.curr.reg[UNW_REG_PSP].val != 0) {
+               /* new psp is sp plus frame size */
+               insn.opc = UNW_INSN_ADD;
+               insn.dst = offsetof(struct unw_frame_info, psp)/8;
+               insn.val = sr.curr.reg[UNW_REG_PSP].val;        /* frame size */
+               script_emit(script, insn);
+       }
+
+       /* determine where the primary UNaT is: */
+       if (sr.when_target < sr.curr.reg[UNW_REG_PRI_UNAT_GR].when)
+               i = UNW_REG_PRI_UNAT_MEM;
+       else if (sr.when_target < sr.curr.reg[UNW_REG_PRI_UNAT_MEM].when)
+               i = UNW_REG_PRI_UNAT_GR;
+       else if (sr.curr.reg[UNW_REG_PRI_UNAT_MEM].when > 
sr.curr.reg[UNW_REG_PRI_UNAT_GR].when)
+               i = UNW_REG_PRI_UNAT_MEM;
+       else
+               i = UNW_REG_PRI_UNAT_GR;
+
+       compile_reg(&sr, i, script);
+
+       for (i = UNW_REG_BSP; i < UNW_NUM_REGS; ++i)
+               compile_reg(&sr, i, script);
+
+       /* free labeled register states & stack: */
+
+       STAT(parse_start = ia64_get_itc());
+       for (ls = sr.labeled_states; ls; ls = next) {
+               next = ls->next;
+               free_state_stack(&ls->saved_state);
+               free_labeled_state(ls);
+       }
+       free_state_stack(&sr.curr);
+       STAT(unw.stat.script.parse_time += ia64_get_itc() - parse_start);
+
+       script_finalize(script, &sr);
+       STAT(unw.stat.script.build_time += ia64_get_itc() - start);
+       return script;
+}
+
+/*
+ * Apply the unwinding actions represented by OPS and update SR to
+ * reflect the state that existed upon entry to the function that this
+ * unwinder represents.
+ */
+static inline void
+run_script (struct unw_script *script, struct unw_frame_info *state)
+{
+       struct unw_insn *ip, *limit, next_insn;
+       unsigned long opc, dst, val, off;
+       unsigned long *s = (unsigned long *) state;
+       STAT(unsigned long start;)
+
+       STAT(++unw.stat.script.runs; start = ia64_get_itc());
+       state->flags = script->flags;
+       ip = script->insn;
+       limit = script->insn + script->count;
+       next_insn = *ip;
+
+       while (ip++ < limit) {
+               opc = next_insn.opc;
+               dst = next_insn.dst;
+               val = next_insn.val;
+               next_insn = *ip;
+
+         redo:
+               switch (opc) {
+                     case UNW_INSN_ADD:
+                       s[dst] += val;
+                       break;
+
+                     case UNW_INSN_MOVE2:
+                       if (!s[val])
+                               goto lazy_init;
+                       s[dst+1] = s[val+1];
+                       s[dst] = s[val];
+                       break;
+
+                     case UNW_INSN_MOVE:
+                       if (!s[val])
+                               goto lazy_init;
+                       s[dst] = s[val];
+                       break;
+
+                     case UNW_INSN_MOVE_SCRATCH:
+                       if (state->pt) {
+                               s[dst] = (unsigned long) 
get_scratch_regs(state) + val;
+                       } else {
+                               s[dst] = 0;
+                               UNW_DPRINT(0, "unwind.%s: no state->pt, 
dst=%ld, val=%ld\n",
+                                          __FUNCTION__, dst, val);
+                       }
+                       break;
+
+                     case UNW_INSN_MOVE_CONST:
+                       if (val == 0)
+                               s[dst] = (unsigned long) &unw.r0;
+                       else {
+                               s[dst] = 0;
+                               UNW_DPRINT(0, "unwind.%s: UNW_INSN_MOVE_CONST 
bad val=%ld\n",
+                                          __FUNCTION__, val);
+                       }
+                       break;
+
+
+                     case UNW_INSN_MOVE_STACKED:
+                       s[dst] = (unsigned long) ia64_rse_skip_regs((unsigned 
long *)state->bsp,
+                                                                   val);
+                       break;
+
+                     case UNW_INSN_ADD_PSP:
+                       s[dst] = state->psp + val;
+                       break;
+
+                     case UNW_INSN_ADD_SP:
+                       s[dst] = state->sp + val;
+                       break;
+
+                     case UNW_INSN_SETNAT_MEMSTK:
+                       if (!state->pri_unat_loc)
+                               state->pri_unat_loc = &state->sw->caller_unat;
+                       /* register off. is a multiple of 8, so the least 3 
bits (type) are 0 */
+                       s[dst+1] = ((unsigned long) state->pri_unat_loc - 
s[dst]) | UNW_NAT_MEMSTK;
+                       break;
+
+                     case UNW_INSN_SETNAT_TYPE:
+                       s[dst+1] = val;
+                       break;
+
+                     case UNW_INSN_LOAD:
+#ifdef UNW_DEBUG
+                       if ((s[val] & (local_cpu_data->unimpl_va_mask | 0x7)) 
!= 0
+#ifndef XEN
+                           || s[val] < TASK_SIZE
+#endif
+                               )
+                       {
+                               UNW_DPRINT(0, "unwind.%s: rejecting bad 
psp=0x%lx\n",
+                                          __FUNCTION__, s[val]);
+                               break;
+                       }
+#endif
+                       s[dst] = *(unsigned long *) s[val];
+                       break;
+               }
+       }
+       STAT(unw.stat.script.run_time += ia64_get_itc() - start);
+       return;
+
+  lazy_init:
+       off = unw.sw_off[val];
+       s[val] = (unsigned long) state->sw + off;
+       if (off >= offsetof(struct switch_stack, r4) && off <= offsetof(struct 
switch_stack, r7))
+               /*
+                * We're initializing a general register: init NaT info, too.  
Note that
+                * the offset is a multiple of 8 which gives us the 3 bits 
needed for
+                * the type field.
+                */
+               s[val+1] = (offsetof(struct switch_stack, ar_unat) - off) | 
UNW_NAT_MEMSTK;
+       goto redo;
+}
+
+static int
+find_save_locs (struct unw_frame_info *info)
+{
+       int have_write_lock = 0;
+       struct unw_script *scr;
+       unsigned long flags = 0;
+
+       if ((info->ip & (local_cpu_data->unimpl_va_mask | 0xf))
+#ifndef XEN
+           || info->ip < TASK_SIZE
+#endif
+               ) {
+               /* don't let obviously bad addresses pollute the cache */
+               /* FIXME: should really be level 0 but it occurs too often. KAO 
*/
+               UNW_DPRINT(1, "unwind.%s: rejecting bad ip=0x%lx\n", 
__FUNCTION__, info->ip);
+               info->rp_loc = NULL;
+               return -1;
+       }
+
+       scr = script_lookup(info);
+       if (!scr) {
+               spin_lock_irqsave(&unw.lock, flags);
+               scr = build_script(info);
+               if (!scr) {
+                       spin_unlock_irqrestore(&unw.lock, flags);
+                       UNW_DPRINT(0,
+                                  "unwind.%s: failed to locate/build unwind 
script for ip %lx\n",
+                                  __FUNCTION__, info->ip);
+                       return -1;
+               }
+               have_write_lock = 1;
+       }
+       info->hint = scr->hint;
+       info->prev_script = scr - unw.cache;
+
+       run_script(scr, info);
+
+       if (have_write_lock) {
+               write_unlock(&scr->lock);
+               spin_unlock_irqrestore(&unw.lock, flags);
+       } else
+               read_unlock(&scr->lock);
+       return 0;
+}
+
+int
+unw_unwind (struct unw_frame_info *info)
+{
+       unsigned long prev_ip, prev_sp, prev_bsp;
+       unsigned long ip, pr, num_regs;
+       STAT(unsigned long start, flags;)
+       int retval;
+
+       STAT(local_irq_save(flags); ++unw.stat.api.unwinds; start = 
ia64_get_itc());
+
+       prev_ip = info->ip;
+       prev_sp = info->sp;
+       prev_bsp = info->bsp;
+
+       /* restore the ip */
+       if (!info->rp_loc) {
+               /* FIXME: should really be level 0 but it occurs too often. KAO 
*/
+               UNW_DPRINT(1, "unwind.%s: failed to locate return link 
(ip=0x%lx)!\n",
+                          __FUNCTION__, info->ip);
+               STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; 
local_irq_restore(flags));
+               return -1;
+       }
+       ip = info->ip = *info->rp_loc;
+       if (ip < GATE_ADDR) {
+               UNW_DPRINT(2, "unwind.%s: reached user-space (ip=0x%lx)\n", 
__FUNCTION__, ip);
+               STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; 
local_irq_restore(flags));
+               return -1;
+       }
+
+       /* restore the cfm: */
+       if (!info->pfs_loc) {
+               UNW_DPRINT(0, "unwind.%s: failed to locate ar.pfs!\n", 
__FUNCTION__);
+               STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; 
local_irq_restore(flags));
+               return -1;
+       }
+       info->cfm_loc = info->pfs_loc;
+
+       /* restore the bsp: */
+       pr = info->pr;
+       num_regs = 0;
+       if ((info->flags & UNW_FLAG_INTERRUPT_FRAME)) {
+               info->pt = info->sp + 16;
+               if ((pr & (1UL << PRED_NON_SYSCALL)) != 0)
+                       num_regs = *info->cfm_loc & 0x7f;               /* size 
of frame */
+               info->pfs_loc =
+                       (unsigned long *) (info->pt + offsetof(struct pt_regs, 
ar_pfs));
+               UNW_DPRINT(3, "unwind.%s: interrupt_frame pt 0x%lx\n", 
__FUNCTION__, info->pt);
+       } else
+               num_regs = (*info->cfm_loc >> 7) & 0x7f;        /* size of 
locals */
+       info->bsp = (unsigned long) ia64_rse_skip_regs((unsigned long *) 
info->bsp, -num_regs);
+       if (info->bsp < info->regstk.limit || info->bsp > info->regstk.top) {
+               UNW_DPRINT(0, "unwind.%s: bsp (0x%lx) out of range 
[0x%lx-0x%lx]\n",
+                       __FUNCTION__, info->bsp, info->regstk.limit, 
info->regstk.top);
+               STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; 
local_irq_restore(flags));
+               return -1;
+       }
+
+       /* restore the sp: */
+       info->sp = info->psp;
+       if (info->sp < info->memstk.top || info->sp > info->memstk.limit) {
+               UNW_DPRINT(0, "unwind.%s: sp (0x%lx) out of range 
[0x%lx-0x%lx]\n",
+                       __FUNCTION__, info->sp, info->memstk.top, 
info->memstk.limit);
+               STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; 
local_irq_restore(flags));
+               return -1;
+       }
+
+       if (info->ip == prev_ip && info->sp == prev_sp && info->bsp == 
prev_bsp) {
+               UNW_DPRINT(0, "unwind.%s: ip, sp, bsp unchanged; stopping here 
(ip=0x%lx)\n",
+                          __FUNCTION__, ip);
+               STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; 
local_irq_restore(flags));
+               return -1;
+       }
+
+       /* as we unwind, the saved ar.unat becomes the primary unat: */
+       info->pri_unat_loc = info->unat_loc;
+
+       /* finally, restore the predicates: */
+       unw_get_pr(info, &info->pr);
+
+       retval = find_save_locs(info);
+       STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; 
local_irq_restore(flags));
+       return retval;
+}
+EXPORT_SYMBOL(unw_unwind);
+
+int
+unw_unwind_to_user (struct unw_frame_info *info)
+{
+       unsigned long ip, sp, pr = 0;
+
+       while (unw_unwind(info) >= 0) {
+               unw_get_sp(info, &sp);
+               if ((long)((unsigned long)info->task + IA64_STK_OFFSET - sp)
+                   < IA64_PT_REGS_SIZE) {
+                       UNW_DPRINT(0, "unwind.%s: ran off the top of the kernel 
stack\n",
+                                  __FUNCTION__);
+                       break;
+               }
+               if (unw_is_intr_frame(info) &&
+                   (pr & (1UL << PRED_USER_STACK)))
+                       return 0;
+               if (unw_get_pr (info, &pr) < 0) {
+                       unw_get_rp(info, &ip);
+                       UNW_DPRINT(0, "unwind.%s: failed to read "
+                                  "predicate register (ip=0x%lx)\n",
+                               __FUNCTION__, ip);
+                       return -1;
+               }
+       }
+       unw_get_ip(info, &ip);
+       UNW_DPRINT(0, "unwind.%s: failed to unwind to user-level (ip=0x%lx)\n",
+                  __FUNCTION__, ip);
+       return -1;
+}
+EXPORT_SYMBOL(unw_unwind_to_user);
+
+static void
+init_frame_info (struct unw_frame_info *info, struct task_struct *t,
+                struct switch_stack *sw, unsigned long stktop)
+{
+       unsigned long rbslimit, rbstop, stklimit;
+       STAT(unsigned long start, flags;)
+
+       STAT(local_irq_save(flags); ++unw.stat.api.inits; start = 
ia64_get_itc());
+
+       /*
+        * Subtle stuff here: we _could_ unwind through the switch_stack frame 
but we
+        * don't want to do that because it would be slow as each preserved 
register would
+        * have to be processed.  Instead, what we do here is zero out the 
frame info and
+        * start the unwind process at the function that created the 
switch_stack frame.
+        * When a preserved value in switch_stack needs to be accessed, 
run_script() will
+        * initialize the appropriate pointer on demand.
+        */
+       memset(info, 0, sizeof(*info));
+
+       rbslimit = (unsigned long) t + IA64_RBS_OFFSET;
+       rbstop   = sw->ar_bspstore;
+       if (rbstop - (unsigned long) t >= IA64_STK_OFFSET)
+               rbstop = rbslimit;
+
+       stklimit = (unsigned long) t + IA64_STK_OFFSET;
+       if (stktop <= rbstop)
+               stktop = rbstop;
+
+       info->regstk.limit = rbslimit;
+       info->regstk.top   = rbstop;
+       info->memstk.limit = stklimit;
+       info->memstk.top   = stktop;
+       info->task = t;
+       info->sw  = sw;
+       info->sp = info->psp = stktop;
+       info->pr = sw->pr;
+       UNW_DPRINT(3, "unwind.%s:\n"
+                  "  task   0x%lx\n"
+                  "  rbs = [0x%lx-0x%lx)\n"
+                  "  stk = [0x%lx-0x%lx)\n"
+                  "  pr     0x%lx\n"
+                  "  sw     0x%lx\n"
+                  "  sp     0x%lx\n",
+                  __FUNCTION__, (unsigned long) t, rbslimit, rbstop, stktop, 
stklimit,
+                  info->pr, (unsigned long) info->sw, info->sp);
+       STAT(unw.stat.api.init_time += ia64_get_itc() - start; 
local_irq_restore(flags));
+}
+
+void
+unw_init_frame_info (struct unw_frame_info *info, struct task_struct *t, 
struct switch_stack *sw)
+{
+       unsigned long sol;
+
+       init_frame_info(info, t, sw, (unsigned long) (sw + 1) - 16);
+       info->cfm_loc = &sw->ar_pfs;
+       sol = (*info->cfm_loc >> 7) & 0x7f;
+       info->bsp = (unsigned long) ia64_rse_skip_regs((unsigned long *) 
info->regstk.top, -sol);
+       info->ip = sw->b0;
+       UNW_DPRINT(3, "unwind.%s:\n"
+                  "  bsp    0x%lx\n"
+                  "  sol    0x%lx\n"
+                  "  ip     0x%lx\n",
+                  __FUNCTION__, info->bsp, sol, info->ip);
+       find_save_locs(info);
+}
+
+EXPORT_SYMBOL(unw_init_frame_info);
+
+void
+unw_init_from_blocked_task (struct unw_frame_info *info, struct task_struct *t)
+{
+#ifdef XEN
+       struct switch_stack *sw = (struct switch_stack *) (t->arch._thread.ksp 
+ 16);
+#else
+       struct switch_stack *sw = (struct switch_stack *) (t->thread.ksp + 16);
+#endif
+
+       UNW_DPRINT(1, "unwind.%s\n", __FUNCTION__);
+       unw_init_frame_info(info, t, sw);
+}
+EXPORT_SYMBOL(unw_init_from_blocked_task);
+
+static void
+init_unwind_table (struct unw_table *table, const char *name, unsigned long 
segment_base,
+                  unsigned long gp, const void *table_start, const void 
*table_end)
+{
+       const struct unw_table_entry *start = table_start, *end = table_end;
+
+       table->name = name;
+       table->segment_base = segment_base;
+       table->gp = gp;
+       table->start = segment_base + start[0].start_offset;
+       table->end = segment_base + end[-1].end_offset;
+       table->array = start;
+       table->length = end - start;
+}
+
+#ifndef XEN
+void *
+unw_add_unwind_table (const char *name, unsigned long segment_base, unsigned 
long gp,
+                     const void *table_start, const void *table_end)
+{
+       const struct unw_table_entry *start = table_start, *end = table_end;
+       struct unw_table *table;
+       unsigned long flags;
+
+       if (end - start <= 0) {
+               UNW_DPRINT(0, "unwind.%s: ignoring attempt to insert empty 
unwind table\n",
+                          __FUNCTION__);
+               return NULL;
+       }
+
+       table = kmalloc(sizeof(*table), GFP_USER);
+       if (!table)
+               return NULL;
+
+       init_unwind_table(table, name, segment_base, gp, table_start, 
table_end);
+
+       spin_lock_irqsave(&unw.lock, flags);
+       {
+               /* keep kernel unwind table at the front (it's searched most 
commonly): */
+               table->next = unw.tables->next;
+               unw.tables->next = table;
+       }
+       spin_unlock_irqrestore(&unw.lock, flags);
+
+       return table;
+}
+
+void
+unw_remove_unwind_table (void *handle)
+{
+       struct unw_table *table, *prev;
+       struct unw_script *tmp;
+       unsigned long flags;
+       long index;
+
+       if (!handle) {
+               UNW_DPRINT(0, "unwind.%s: ignoring attempt to remove 
non-existent unwind table\n",
+                          __FUNCTION__);
+               return;
+       }
+
+       table = handle;
+       if (table == &unw.kernel_table) {
+               UNW_DPRINT(0, "unwind.%s: sorry, freeing the kernel's unwind 
table is a "
+                          "no-can-do!\n", __FUNCTION__);
+               return;
+       }
+
+       spin_lock_irqsave(&unw.lock, flags);
+       {
+               /* first, delete the table: */
+
+               for (prev = (struct unw_table *) &unw.tables; prev; prev = 
prev->next)
+                       if (prev->next == table)
+                               break;
+               if (!prev) {
+                       UNW_DPRINT(0, "unwind.%s: failed to find unwind table 
%p\n",
+                                  __FUNCTION__, (void *) table);
+                       spin_unlock_irqrestore(&unw.lock, flags);
+                       return;
+               }
+               prev->next = table->next;
+       }
+       spin_unlock_irqrestore(&unw.lock, flags);
+
+       /* next, remove hash table entries for this table */
+
+       for (index = 0; index <= UNW_HASH_SIZE; ++index) {
+               tmp = unw.cache + unw.hash[index];
+               if (unw.hash[index] >= UNW_CACHE_SIZE
+                   || tmp->ip < table->start || tmp->ip >= table->end)
+                       continue;
+
+               write_lock(&tmp->lock);
+               {
+                       if (tmp->ip >= table->start && tmp->ip < table->end) {
+                               unw.hash[index] = tmp->coll_chain;
+                               tmp->ip = 0;
+                       }
+               }
+               write_unlock(&tmp->lock);
+       }
+
+       kfree(table);
+}
+
+static int __init
+create_gate_table (void)
+{
+       const struct unw_table_entry *entry, *start, *end;
+       unsigned long *lp, segbase = GATE_ADDR;
+       size_t info_size, size;
+       char *info;
+       Elf64_Phdr *punw = NULL, *phdr = (Elf64_Phdr *) (GATE_ADDR + 
GATE_EHDR->e_phoff);
+       int i;
+
+       for (i = 0; i < GATE_EHDR->e_phnum; ++i, ++phdr)
+               if (phdr->p_type == PT_IA_64_UNWIND) {
+                       punw = phdr;
+                       break;
+               }
+
+       if (!punw) {
+               printk("%s: failed to find gate DSO's unwind table!\n", 
__FUNCTION__);
+               return 0;
+       }
+
+       start = (const struct unw_table_entry *) punw->p_vaddr;
+       end = (struct unw_table_entry *) ((char *) start + punw->p_memsz);
+       size  = 0;
+
+       unw_add_unwind_table("linux-gate.so", segbase, 0, start, end);
+
+       for (entry = start; entry < end; ++entry)
+               size += 3*8 + 8 + 8*UNW_LENGTH(*(u64 *) (segbase + 
entry->info_offset));
+       size += 8;      /* reserve space for "end of table" marker */
+
+       unw.gate_table = kmalloc(size, GFP_KERNEL);
+       if (!unw.gate_table) {
+               unw.gate_table_size = 0;
+               printk(KERN_ERR "%s: unable to create unwind data for gate 
page!\n", __FUNCTION__);
+               return 0;
+       }
+       unw.gate_table_size = size;
+
+       lp = unw.gate_table;
+       info = (char *) unw.gate_table + size;
+
+       for (entry = start; entry < end; ++entry, lp += 3) {
+               info_size = 8 + 8*UNW_LENGTH(*(u64 *) (segbase + 
entry->info_offset));
+               info -= info_size;
+               memcpy(info, (char *) segbase + entry->info_offset, info_size);
+
+               lp[0] = segbase + entry->start_offset;          /* start */
+               lp[1] = segbase + entry->end_offset;            /* end */
+               lp[2] = info - (char *) unw.gate_table;         /* info */
+       }
+       *lp = 0;        /* end-of-table marker */
+       return 0;
+}
+
+__initcall(create_gate_table);
+#endif // !XEN
+
+void __init
+unw_init (void)
+{
+       extern char __gp[];
+       extern void unw_hash_index_t_is_too_narrow (void);
+       long i, off;
+
+       if (8*sizeof(unw_hash_index_t) < UNW_LOG_HASH_SIZE)
+               unw_hash_index_t_is_too_narrow();
+
+       unw.sw_off[unw.preg_index[UNW_REG_PRI_UNAT_GR]] = SW(CALLER_UNAT);
+       unw.sw_off[unw.preg_index[UNW_REG_BSPSTORE]] = SW(AR_BSPSTORE);
+       unw.sw_off[unw.preg_index[UNW_REG_PFS]] = SW(AR_PFS);
+       unw.sw_off[unw.preg_index[UNW_REG_RP]] = SW(B0);
+       unw.sw_off[unw.preg_index[UNW_REG_UNAT]] = SW(CALLER_UNAT);
+       unw.sw_off[unw.preg_index[UNW_REG_PR]] = SW(PR);
+       unw.sw_off[unw.preg_index[UNW_REG_LC]] = SW(AR_LC);
+       unw.sw_off[unw.preg_index[UNW_REG_FPSR]] = SW(AR_FPSR);
+       for (i = UNW_REG_R4, off = SW(R4); i <= UNW_REG_R7; ++i, off += 8)
+               unw.sw_off[unw.preg_index[i]] = off;
+       for (i = UNW_REG_B1, off = SW(B1); i <= UNW_REG_B5; ++i, off += 8)
+               unw.sw_off[unw.preg_index[i]] = off;
+       for (i = UNW_REG_F2, off = SW(F2); i <= UNW_REG_F5; ++i, off += 16)
+               unw.sw_off[unw.preg_index[i]] = off;
+       for (i = UNW_REG_F16, off = SW(F16); i <= UNW_REG_F31; ++i, off += 16)
+               unw.sw_off[unw.preg_index[i]] = off;
+
+       for (i = 0; i < UNW_CACHE_SIZE; ++i) {
+               if (i > 0)
+                       unw.cache[i].lru_chain = (i - 1);
+               unw.cache[i].coll_chain = -1;
+               rwlock_init(&unw.cache[i].lock);
+       }
+       unw.lru_head = UNW_CACHE_SIZE - 1;
+       unw.lru_tail = 0;
+
+       init_unwind_table(&unw.kernel_table, "kernel", KERNEL_START, (unsigned 
long) __gp,
+                         __start_unwind, __end_unwind);
+}
+
+/*
+ * DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED
+ *
+ *     This system call has been deprecated.  The new and improved way to get
+ *     at the kernel's unwind info is via the gate DSO.  The address of the
+ *     ELF header for this DSO is passed to user-level via AT_SYSINFO_EHDR.
+ *
+ * DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED
+ *
+ * This system call copies the unwind data into the buffer pointed to by BUF 
and returns
+ * the size of the unwind data.  If BUF_SIZE is smaller than the size of the 
unwind data
+ * or if BUF is NULL, nothing is copied, but the system call still returns the 
size of the
+ * unwind data.
+ *
+ * The first portion of the unwind data contains an unwind table and rest 
contains the
+ * associated unwind info (in no particular order).  The unwind table consists 
of a table
+ * of entries of the form:
+ *
+ *     u64 start;      (64-bit address of start of function)
+ *     u64 end;        (64-bit address of start of function)
+ *     u64 info;       (BUF-relative offset to unwind info)
+ *
+ * The end of the unwind table is indicated by an entry with a START address 
of zero.
+ *
+ * Please see the IA-64 Software Conventions and Runtime Architecture manual 
for details
+ * on the format of the unwind info.
+ *
+ * ERRORS
+ *     EFAULT  BUF points outside your accessible address space.
+ */
+asmlinkage long
+sys_getunwind (void __user *buf, size_t buf_size)
+{
+       if (buf && buf_size >= unw.gate_table_size)
+               if (copy_to_user(buf, unw.gate_table, unw.gate_table_size) != 0)
+                       return -EFAULT;
+       return unw.gate_table_size;
+}
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/ia64/linux-xen/unwind_decoder.c
--- /dev/null   Mon Jan  9 11:19:55 2006
+++ b/xen/arch/ia64/linux-xen/unwind_decoder.c  Mon Jan  9 11:22:17 2006
@@ -0,0 +1,459 @@
+/*
+ * Copyright (C) 2000 Hewlett-Packard Co
+ * Copyright (C) 2000 David Mosberger-Tang <davidm@xxxxxxxxxx>
+ *
+ * Generic IA-64 unwind info decoder.
+ *
+ * This file is used both by the Linux kernel and objdump.  Please keep
+ * the two copies of this file in sync.
+ *
+ * You need to customize the decoder by defining the following
+ * macros/constants before including this file:
+ *
+ *  Types:
+ *     unw_word        Unsigned integer type with at least 64 bits 
+ *
+ *  Register names:
+ *     UNW_REG_BSP
+ *     UNW_REG_BSPSTORE
+ *     UNW_REG_FPSR
+ *     UNW_REG_LC
+ *     UNW_REG_PFS
+ *     UNW_REG_PR
+ *     UNW_REG_RNAT
+ *     UNW_REG_PSP
+ *     UNW_REG_RP
+ *     UNW_REG_UNAT
+ *
+ *  Decoder action macros:
+ *     UNW_DEC_BAD_CODE(code)
+ *     UNW_DEC_ABI(fmt,abi,context,arg)
+ *     UNW_DEC_BR_GR(fmt,brmask,gr,arg)
+ *     UNW_DEC_BR_MEM(fmt,brmask,arg)
+ *     UNW_DEC_COPY_STATE(fmt,label,arg)
+ *     UNW_DEC_EPILOGUE(fmt,t,ecount,arg)
+ *     UNW_DEC_FRGR_MEM(fmt,grmask,frmask,arg)
+ *     UNW_DEC_FR_MEM(fmt,frmask,arg)
+ *     UNW_DEC_GR_GR(fmt,grmask,gr,arg)
+ *     UNW_DEC_GR_MEM(fmt,grmask,arg)
+ *     UNW_DEC_LABEL_STATE(fmt,label,arg)
+ *     UNW_DEC_MEM_STACK_F(fmt,t,size,arg)
+ *     UNW_DEC_MEM_STACK_V(fmt,t,arg)
+ *     UNW_DEC_PRIUNAT_GR(fmt,r,arg)
+ *     UNW_DEC_PRIUNAT_WHEN_GR(fmt,t,arg)
+ *     UNW_DEC_PRIUNAT_WHEN_MEM(fmt,t,arg)
+ *     UNW_DEC_PRIUNAT_WHEN_PSPREL(fmt,pspoff,arg)
+ *     UNW_DEC_PRIUNAT_WHEN_SPREL(fmt,spoff,arg)
+ *     UNW_DEC_PROLOGUE(fmt,body,rlen,arg)
+ *     UNW_DEC_PROLOGUE_GR(fmt,rlen,mask,grsave,arg)
+ *     UNW_DEC_REG_PSPREL(fmt,reg,pspoff,arg)
+ *     UNW_DEC_REG_REG(fmt,src,dst,arg)
+ *     UNW_DEC_REG_SPREL(fmt,reg,spoff,arg)
+ *     UNW_DEC_REG_WHEN(fmt,reg,t,arg)
+ *     UNW_DEC_RESTORE(fmt,t,abreg,arg)
+ *     UNW_DEC_RESTORE_P(fmt,qp,t,abreg,arg)
+ *     UNW_DEC_SPILL_BASE(fmt,pspoff,arg)
+ *     UNW_DEC_SPILL_MASK(fmt,imaskp,arg)
+ *     UNW_DEC_SPILL_PSPREL(fmt,t,abreg,pspoff,arg)
+ *     UNW_DEC_SPILL_PSPREL_P(fmt,qp,t,abreg,pspoff,arg)
+ *     UNW_DEC_SPILL_REG(fmt,t,abreg,x,ytreg,arg)
+ *     UNW_DEC_SPILL_REG_P(fmt,qp,t,abreg,x,ytreg,arg)
+ *     UNW_DEC_SPILL_SPREL(fmt,t,abreg,spoff,arg)
+ *     UNW_DEC_SPILL_SPREL_P(fmt,qp,t,abreg,pspoff,arg)
+ */
+
+static unw_word
+unw_decode_uleb128 (unsigned char **dpp)
+{
+  unsigned shift = 0;
+  unw_word byte, result = 0;
+  unsigned char *bp = *dpp;
+
+  while (1)
+    {
+      byte = *bp++;
+      result |= (byte & 0x7f) << shift;
+      if ((byte & 0x80) == 0)
+       break;
+      shift += 7;
+    }
+  *dpp = bp;
+  return result;
+}
+
+static unsigned char *
+unw_decode_x1 (unsigned char *dp, unsigned char code, void *arg)
+{
+  unsigned char byte1, abreg;
+  unw_word t, off;
+
+  byte1 = *dp++;
+  t = unw_decode_uleb128 (&dp);
+  off = unw_decode_uleb128 (&dp);
+  abreg = (byte1 & 0x7f);
+  if (byte1 & 0x80)
+         UNW_DEC_SPILL_SPREL(X1, t, abreg, off, arg);
+  else
+         UNW_DEC_SPILL_PSPREL(X1, t, abreg, off, arg);
+  return dp;
+}
+
+static unsigned char *
+unw_decode_x2 (unsigned char *dp, unsigned char code, void *arg)
+{
+  unsigned char byte1, byte2, abreg, x, ytreg;
+  unw_word t;
+
+  byte1 = *dp++; byte2 = *dp++;
+  t = unw_decode_uleb128 (&dp);
+  abreg = (byte1 & 0x7f);
+  ytreg = byte2;
+  x = (byte1 >> 7) & 1;
+  if ((byte1 & 0x80) == 0 && ytreg == 0)
+    UNW_DEC_RESTORE(X2, t, abreg, arg);
+  else
+    UNW_DEC_SPILL_REG(X2, t, abreg, x, ytreg, arg);
+  return dp;
+}
+
+static unsigned char *
+unw_decode_x3 (unsigned char *dp, unsigned char code, void *arg)
+{
+  unsigned char byte1, byte2, abreg, qp;
+  unw_word t, off;
+
+  byte1 = *dp++; byte2 = *dp++;
+  t = unw_decode_uleb128 (&dp);
+  off = unw_decode_uleb128 (&dp);
+
+  qp = (byte1 & 0x3f);
+  abreg = (byte2 & 0x7f);
+
+  if (byte1 & 0x80)
+    UNW_DEC_SPILL_SPREL_P(X3, qp, t, abreg, off, arg);
+  else
+    UNW_DEC_SPILL_PSPREL_P(X3, qp, t, abreg, off, arg);
+  return dp;
+}
+
+static unsigned char *
+unw_decode_x4 (unsigned char *dp, unsigned char code, void *arg)
+{
+  unsigned char byte1, byte2, byte3, qp, abreg, x, ytreg;
+  unw_word t;
+
+  byte1 = *dp++; byte2 = *dp++; byte3 = *dp++;
+  t = unw_decode_uleb128 (&dp);
+
+  qp = (byte1 & 0x3f);
+  abreg = (byte2 & 0x7f);
+  x = (byte2 >> 7) & 1;
+  ytreg = byte3;
+
+  if ((byte2 & 0x80) == 0 && byte3 == 0)
+    UNW_DEC_RESTORE_P(X4, qp, t, abreg, arg);
+  else
+    UNW_DEC_SPILL_REG_P(X4, qp, t, abreg, x, ytreg, arg);
+  return dp;
+}
+
+static unsigned char *
+unw_decode_r1 (unsigned char *dp, unsigned char code, void *arg)
+{
+  int body = (code & 0x20) != 0;
+  unw_word rlen;
+
+  rlen = (code & 0x1f);
+  UNW_DEC_PROLOGUE(R1, body, rlen, arg);
+  return dp;
+}
+
+static unsigned char *
+unw_decode_r2 (unsigned char *dp, unsigned char code, void *arg)
+{
+  unsigned char byte1, mask, grsave;
+  unw_word rlen;
+
+  byte1 = *dp++;
+
+  mask = ((code & 0x7) << 1) | ((byte1 >> 7) & 1);
+  grsave = (byte1 & 0x7f);
+  rlen = unw_decode_uleb128 (&dp);
+  UNW_DEC_PROLOGUE_GR(R2, rlen, mask, grsave, arg);
+  return dp;
+}
+
+static unsigned char *
+unw_decode_r3 (unsigned char *dp, unsigned char code, void *arg)
+{
+  unw_word rlen;
+
+  rlen = unw_decode_uleb128 (&dp);
+  UNW_DEC_PROLOGUE(R3, ((code & 0x3) == 1), rlen, arg);
+  return dp;
+}
+
+static unsigned char *
+unw_decode_p1 (unsigned char *dp, unsigned char code, void *arg)
+{
+  unsigned char brmask = (code & 0x1f);
+
+  UNW_DEC_BR_MEM(P1, brmask, arg);
+  return dp;
+}
+
+static unsigned char *
+unw_decode_p2_p5 (unsigned char *dp, unsigned char code, void *arg)
+{
+  if ((code & 0x10) == 0)
+    {
+      unsigned char byte1 = *dp++;
+
+      UNW_DEC_BR_GR(P2, ((code & 0xf) << 1) | ((byte1 >> 7) & 1),
+                   (byte1 & 0x7f), arg);
+    }
+  else if ((code & 0x08) == 0)
+    {
+      unsigned char byte1 = *dp++, r, dst;
+
+      r = ((code & 0x7) << 1) | ((byte1 >> 7) & 1);
+      dst = (byte1 & 0x7f);
+      switch (r)
+       {
+       case 0: UNW_DEC_REG_GR(P3, UNW_REG_PSP, dst, arg); break;
+       case 1: UNW_DEC_REG_GR(P3, UNW_REG_RP, dst, arg); break;
+       case 2: UNW_DEC_REG_GR(P3, UNW_REG_PFS, dst, arg); break;
+       case 3: UNW_DEC_REG_GR(P3, UNW_REG_PR, dst, arg); break;
+       case 4: UNW_DEC_REG_GR(P3, UNW_REG_UNAT, dst, arg); break;
+       case 5: UNW_DEC_REG_GR(P3, UNW_REG_LC, dst, arg); break;
+       case 6: UNW_DEC_RP_BR(P3, dst, arg); break;
+       case 7: UNW_DEC_REG_GR(P3, UNW_REG_RNAT, dst, arg); break;
+       case 8: UNW_DEC_REG_GR(P3, UNW_REG_BSP, dst, arg); break;
+       case 9: UNW_DEC_REG_GR(P3, UNW_REG_BSPSTORE, dst, arg); break;
+       case 10: UNW_DEC_REG_GR(P3, UNW_REG_FPSR, dst, arg); break;
+       case 11: UNW_DEC_PRIUNAT_GR(P3, dst, arg); break;
+       default: UNW_DEC_BAD_CODE(r); break;
+       }
+    }
+  else if ((code & 0x7) == 0)
+    UNW_DEC_SPILL_MASK(P4, dp, arg);
+  else if ((code & 0x7) == 1)
+    {
+      unw_word grmask, frmask, byte1, byte2, byte3;
+
+      byte1 = *dp++; byte2 = *dp++; byte3 = *dp++;
+      grmask = ((byte1 >> 4) & 0xf);
+      frmask = ((byte1 & 0xf) << 16) | (byte2 << 8) | byte3;
+      UNW_DEC_FRGR_MEM(P5, grmask, frmask, arg);
+    }
+  else
+    UNW_DEC_BAD_CODE(code);
+  return dp;
+}
+
+static unsigned char *
+unw_decode_p6 (unsigned char *dp, unsigned char code, void *arg)
+{
+  int gregs = (code & 0x10) != 0;
+  unsigned char mask = (code & 0x0f);
+
+  if (gregs)
+    UNW_DEC_GR_MEM(P6, mask, arg);
+  else
+    UNW_DEC_FR_MEM(P6, mask, arg);
+  return dp;
+}
+
+static unsigned char *
+unw_decode_p7_p10 (unsigned char *dp, unsigned char code, void *arg)
+{
+  unsigned char r, byte1, byte2;
+  unw_word t, size;
+
+  if ((code & 0x10) == 0)
+    {
+      r = (code & 0xf);
+      t = unw_decode_uleb128 (&dp);
+      switch (r)
+       {
+       case 0:
+         size = unw_decode_uleb128 (&dp);
+         UNW_DEC_MEM_STACK_F(P7, t, size, arg);
+         break;
+
+       case 1: UNW_DEC_MEM_STACK_V(P7, t, arg); break;
+       case 2: UNW_DEC_SPILL_BASE(P7, t, arg); break;
+       case 3: UNW_DEC_REG_SPREL(P7, UNW_REG_PSP, t, arg); break;
+       case 4: UNW_DEC_REG_WHEN(P7, UNW_REG_RP, t, arg); break;
+       case 5: UNW_DEC_REG_PSPREL(P7, UNW_REG_RP, t, arg); break;
+       case 6: UNW_DEC_REG_WHEN(P7, UNW_REG_PFS, t, arg); break;
+       case 7: UNW_DEC_REG_PSPREL(P7, UNW_REG_PFS, t, arg); break;
+       case 8: UNW_DEC_REG_WHEN(P7, UNW_REG_PR, t, arg); break;
+       case 9: UNW_DEC_REG_PSPREL(P7, UNW_REG_PR, t, arg); break;
+       case 10: UNW_DEC_REG_WHEN(P7, UNW_REG_LC, t, arg); break;
+       case 11: UNW_DEC_REG_PSPREL(P7, UNW_REG_LC, t, arg); break;
+       case 12: UNW_DEC_REG_WHEN(P7, UNW_REG_UNAT, t, arg); break;
+       case 13: UNW_DEC_REG_PSPREL(P7, UNW_REG_UNAT, t, arg); break;
+       case 14: UNW_DEC_REG_WHEN(P7, UNW_REG_FPSR, t, arg); break;
+       case 15: UNW_DEC_REG_PSPREL(P7, UNW_REG_FPSR, t, arg); break;
+       default: UNW_DEC_BAD_CODE(r); break;
+       }
+    }
+  else
+    {
+      switch (code & 0xf)
+       {
+       case 0x0: /* p8 */
+         {
+           r = *dp++;
+           t = unw_decode_uleb128 (&dp);
+           switch (r)
+             {
+             case  1: UNW_DEC_REG_SPREL(P8, UNW_REG_RP, t, arg); break;
+             case  2: UNW_DEC_REG_SPREL(P8, UNW_REG_PFS, t, arg); break;
+             case  3: UNW_DEC_REG_SPREL(P8, UNW_REG_PR, t, arg); break;
+             case  4: UNW_DEC_REG_SPREL(P8, UNW_REG_LC, t, arg); break;
+             case  5: UNW_DEC_REG_SPREL(P8, UNW_REG_UNAT, t, arg); break;
+             case  6: UNW_DEC_REG_SPREL(P8, UNW_REG_FPSR, t, arg); break;
+             case  7: UNW_DEC_REG_WHEN(P8, UNW_REG_BSP, t, arg); break;
+             case  8: UNW_DEC_REG_PSPREL(P8, UNW_REG_BSP, t, arg); break;
+             case  9: UNW_DEC_REG_SPREL(P8, UNW_REG_BSP, t, arg); break;
+             case 10: UNW_DEC_REG_WHEN(P8, UNW_REG_BSPSTORE, t, arg); break;
+             case 11: UNW_DEC_REG_PSPREL(P8, UNW_REG_BSPSTORE, t, arg); break;
+             case 12: UNW_DEC_REG_SPREL(P8, UNW_REG_BSPSTORE, t, arg); break;
+             case 13: UNW_DEC_REG_WHEN(P8, UNW_REG_RNAT, t, arg); break;
+             case 14: UNW_DEC_REG_PSPREL(P8, UNW_REG_RNAT, t, arg); break;
+             case 15: UNW_DEC_REG_SPREL(P8, UNW_REG_RNAT, t, arg); break;
+             case 16: UNW_DEC_PRIUNAT_WHEN_GR(P8, t, arg); break;
+             case 17: UNW_DEC_PRIUNAT_PSPREL(P8, t, arg); break;
+             case 18: UNW_DEC_PRIUNAT_SPREL(P8, t, arg); break;
+             case 19: UNW_DEC_PRIUNAT_WHEN_MEM(P8, t, arg); break;
+             default: UNW_DEC_BAD_CODE(r); break;
+           }
+         }
+         break;
+
+       case 0x1:
+         byte1 = *dp++; byte2 = *dp++;
+         UNW_DEC_GR_GR(P9, (byte1 & 0xf), (byte2 & 0x7f), arg);
+         break;
+
+       case 0xf: /* p10 */
+         byte1 = *dp++; byte2 = *dp++;
+         UNW_DEC_ABI(P10, byte1, byte2, arg);
+         break;
+
+       case 0x9:
+         return unw_decode_x1 (dp, code, arg);
+
+       case 0xa:
+         return unw_decode_x2 (dp, code, arg);
+
+       case 0xb:
+         return unw_decode_x3 (dp, code, arg);
+
+       case 0xc:
+         return unw_decode_x4 (dp, code, arg);
+
+       default:
+         UNW_DEC_BAD_CODE(code);
+         break;
+       }
+    }
+  return dp;
+}
+
+static unsigned char *
+unw_decode_b1 (unsigned char *dp, unsigned char code, void *arg)
+{
+  unw_word label = (code & 0x1f);
+
+  if ((code & 0x20) != 0)
+    UNW_DEC_COPY_STATE(B1, label, arg);
+  else
+    UNW_DEC_LABEL_STATE(B1, label, arg);
+  return dp;
+}
+
+static unsigned char *
+unw_decode_b2 (unsigned char *dp, unsigned char code, void *arg)
+{
+  unw_word t;
+
+  t = unw_decode_uleb128 (&dp);
+  UNW_DEC_EPILOGUE(B2, t, (code & 0x1f), arg);
+  return dp;
+}
+
+static unsigned char *
+unw_decode_b3_x4 (unsigned char *dp, unsigned char code, void *arg)
+{
+  unw_word t, ecount, label;
+
+  if ((code & 0x10) == 0)
+    {
+      t = unw_decode_uleb128 (&dp);
+      ecount = unw_decode_uleb128 (&dp);
+      UNW_DEC_EPILOGUE(B3, t, ecount, arg);
+    }
+  else if ((code & 0x07) == 0)
+    {
+      label = unw_decode_uleb128 (&dp);
+      if ((code & 0x08) != 0)
+       UNW_DEC_COPY_STATE(B4, label, arg);
+      else
+       UNW_DEC_LABEL_STATE(B4, label, arg);
+    }
+  else
+    switch (code & 0x7)
+      {
+      case 1: return unw_decode_x1 (dp, code, arg);
+      case 2: return unw_decode_x2 (dp, code, arg);
+      case 3: return unw_decode_x3 (dp, code, arg);
+      case 4: return unw_decode_x4 (dp, code, arg);
+      default: UNW_DEC_BAD_CODE(code); break;
+      }
+  return dp;
+}
+
+typedef unsigned char *(*unw_decoder) (unsigned char *, unsigned char, void *);
+
+static unw_decoder unw_decode_table[2][8] =
+{
+  /* prologue table: */
+  {
+    unw_decode_r1,     /* 0 */
+    unw_decode_r1,
+    unw_decode_r2,
+    unw_decode_r3,
+    unw_decode_p1,     /* 4 */
+    unw_decode_p2_p5,
+    unw_decode_p6,
+    unw_decode_p7_p10
+  },
+  {
+    unw_decode_r1,     /* 0 */
+    unw_decode_r1,
+    unw_decode_r2,
+    unw_decode_r3,
+    unw_decode_b1,     /* 4 */
+    unw_decode_b1,
+    unw_decode_b2,
+    unw_decode_b3_x4
+  }
+};
+
+/*
+ * Decode one descriptor and return address of next descriptor.
+ */
+static inline unsigned char *
+unw_decode (unsigned char *dp, int inside_body, void *arg)
+{
+  unw_decoder decoder;
+  unsigned char code;
+
+  code = *dp++;
+  decoder = unw_decode_table[inside_body][code >> 5];
+  dp = (*decoder) (dp, code, arg);
+  return dp;
+}
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/ia64/linux-xen/unwind_i.h
--- /dev/null   Mon Jan  9 11:19:55 2006
+++ b/xen/arch/ia64/linux-xen/unwind_i.h        Mon Jan  9 11:22:17 2006
@@ -0,0 +1,164 @@
+/*
+ * Copyright (C) 2000, 2002-2003 Hewlett-Packard Co
+ *     David Mosberger-Tang <davidm@xxxxxxxxxx>
+ *
+ * Kernel unwind support.
+ */
+
+#define UNW_VER(x)             ((x) >> 48)
+#define UNW_FLAG_MASK          0x0000ffff00000000
+#define UNW_FLAG_OSMASK                0x0000f00000000000
+#define UNW_FLAG_EHANDLER(x)   ((x) & 0x0000000100000000L)
+#define UNW_FLAG_UHANDLER(x)   ((x) & 0x0000000200000000L)
+#define UNW_LENGTH(x)          ((x) & 0x00000000ffffffffL)
+
+enum unw_register_index {
+       /* primary unat: */
+       UNW_REG_PRI_UNAT_GR,
+       UNW_REG_PRI_UNAT_MEM,
+
+       /* register stack */
+       UNW_REG_BSP,                                    /* register stack 
pointer */
+       UNW_REG_BSPSTORE,
+       UNW_REG_PFS,                                    /* previous function 
state */
+       UNW_REG_RNAT,
+       /* memory stack */
+       UNW_REG_PSP,                                    /* previous memory 
stack pointer */
+       /* return pointer: */
+       UNW_REG_RP,
+
+       /* preserved registers: */
+       UNW_REG_R4, UNW_REG_R5, UNW_REG_R6, UNW_REG_R7,
+       UNW_REG_UNAT, UNW_REG_PR, UNW_REG_LC, UNW_REG_FPSR,
+       UNW_REG_B1, UNW_REG_B2, UNW_REG_B3, UNW_REG_B4, UNW_REG_B5,
+       UNW_REG_F2, UNW_REG_F3, UNW_REG_F4, UNW_REG_F5,
+       UNW_REG_F16, UNW_REG_F17, UNW_REG_F18, UNW_REG_F19,
+       UNW_REG_F20, UNW_REG_F21, UNW_REG_F22, UNW_REG_F23,
+       UNW_REG_F24, UNW_REG_F25, UNW_REG_F26, UNW_REG_F27,
+       UNW_REG_F28, UNW_REG_F29, UNW_REG_F30, UNW_REG_F31,
+       UNW_NUM_REGS
+};
+
+struct unw_info_block {
+       u64 header;
+       u64 desc[0];            /* unwind descriptors */
+       /* personality routine and language-specific data follow behind 
descriptors */
+};
+
+struct unw_table {
+       struct unw_table *next;         /* must be first member! */
+       const char *name;
+       unsigned long gp;               /* global pointer for this load-module 
*/
+       unsigned long segment_base;     /* base for offsets in the unwind table 
entries */
+       unsigned long start;
+       unsigned long end;
+       const struct unw_table_entry *array;
+       unsigned long length;
+};
+
+enum unw_where {
+       UNW_WHERE_NONE,                 /* register isn't saved at all */
+       UNW_WHERE_GR,                   /* register is saved in a general 
register */
+       UNW_WHERE_FR,                   /* register is saved in a 
floating-point register */
+       UNW_WHERE_BR,                   /* register is saved in a branch 
register */
+       UNW_WHERE_SPREL,                /* register is saved on memstack 
(sp-relative) */
+       UNW_WHERE_PSPREL,               /* register is saved on memstack 
(psp-relative) */
+       /*
+        * At the end of each prologue these locations get resolved to
+        * UNW_WHERE_PSPREL and UNW_WHERE_GR, respectively:
+        */
+       UNW_WHERE_SPILL_HOME,           /* register is saved in its spill home 
*/
+       UNW_WHERE_GR_SAVE               /* register is saved in next general 
register */
+};
+
+#define UNW_WHEN_NEVER 0x7fffffff
+
+struct unw_reg_info {
+       unsigned long val;              /* save location: register number or 
offset */
+       enum unw_where where;           /* where the register gets saved */
+       int when;                       /* when the register gets saved */
+};
+
+struct unw_reg_state {
+       struct unw_reg_state *next;             /* next (outer) element on 
state stack */
+       struct unw_reg_info reg[UNW_NUM_REGS];  /* register save locations */
+};
+
+struct unw_labeled_state {
+       struct unw_labeled_state *next;         /* next labeled state (or NULL) 
*/
+       unsigned long label;                    /* label for this state */
+       struct unw_reg_state saved_state;
+};
+
+struct unw_state_record {
+       unsigned int first_region : 1;  /* is this the first region? */
+       unsigned int done : 1;          /* are we done scanning descriptors? */
+       unsigned int any_spills : 1;    /* got any register spills? */
+       unsigned int in_body : 1;       /* are we inside a body (as opposed to 
a prologue)? */
+       unsigned long flags;            /* see UNW_FLAG_* in unwind.h */
+
+       u8 *imask;                      /* imask of spill_mask record or NULL */
+       unsigned long pr_val;           /* predicate values */
+       unsigned long pr_mask;          /* predicate mask */
+       long spill_offset;              /* psp-relative offset for spill base */
+       int region_start;
+       int region_len;
+       int epilogue_start;
+       int epilogue_count;
+       int when_target;
+
+       u8 gr_save_loc;                 /* next general register to use for 
saving a register */
+       u8 return_link_reg;             /* branch register in which the return 
link is passed */
+
+       struct unw_labeled_state *labeled_states;       /* list of all labeled 
states */
+       struct unw_reg_state curr;      /* current state */
+};
+
+enum unw_nat_type {
+       UNW_NAT_NONE,           /* NaT not represented */
+       UNW_NAT_VAL,            /* NaT represented by NaT value (fp reg) */
+       UNW_NAT_MEMSTK,         /* NaT value is in unat word at offset OFF  */
+       UNW_NAT_REGSTK          /* NaT is in rnat */
+};
+
+enum unw_insn_opcode {
+       UNW_INSN_ADD,                   /* s[dst] += val */
+       UNW_INSN_ADD_PSP,               /* s[dst] = (s.psp + val) */
+       UNW_INSN_ADD_SP,                /* s[dst] = (s.sp + val) */
+       UNW_INSN_MOVE,                  /* s[dst] = s[val] */
+       UNW_INSN_MOVE2,                 /* s[dst] = s[val]; s[dst+1] = s[val+1] 
*/
+       UNW_INSN_MOVE_STACKED,          /* s[dst] = ia64_rse_skip(*s.bsp, val) 
*/
+       UNW_INSN_SETNAT_MEMSTK,         /* s[dst+1].nat.type = MEMSTK;
+                                          s[dst+1].nat.off = *s.pri_unat - 
s[dst] */
+       UNW_INSN_SETNAT_TYPE,           /* s[dst+1].nat.type = val */
+       UNW_INSN_LOAD,                  /* s[dst] = *s[val] */
+       UNW_INSN_MOVE_SCRATCH,          /* s[dst] = scratch reg "val" */
+       UNW_INSN_MOVE_CONST,            /* s[dst] = constant reg "val" */
+};
+
+struct unw_insn {
+       unsigned int opc        :  4;
+       unsigned int dst        :  9;
+       signed int val          : 19;
+};
+
+/*
+ * Preserved general static registers (r4-r7) give rise to two script
+ * instructions; everything else yields at most one instruction; at
+ * the end of the script, the psp gets popped, accounting for one more
+ * instruction.
+ */
+#define UNW_MAX_SCRIPT_LEN     (UNW_NUM_REGS + 5)
+
+struct unw_script {
+       unsigned long ip;               /* ip this script is for */
+       unsigned long pr_mask;          /* mask of predicates script depends on 
*/
+       unsigned long pr_val;           /* predicate values this script is for 
*/
+       rwlock_t lock;
+       unsigned int flags;             /* see UNW_FLAG_* in unwind.h */
+       unsigned short lru_chain;       /* used for least-recently-used chain */
+       unsigned short coll_chain;      /* used for hash collisions */
+       unsigned short hint;            /* hint for next script to try (or -1) 
*/
+       unsigned short count;           /* number of instructions in script */
+       struct unw_insn insn[UNW_MAX_SCRIPT_LEN];
+};
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/x86_32/xen.lds.S
--- /dev/null   Mon Jan  9 11:19:55 2006
+++ b/xen/arch/x86/x86_32/xen.lds.S     Mon Jan  9 11:22:17 2006
@@ -0,0 +1,85 @@
+/* ld script to make i386 Linux kernel
+ * Written by Martin Mares <mj@xxxxxxxxxxxxxxxxxxxxxxxx>
+ * Modified for i386 Xen by Keir Fraser
+ */
+
+#include <xen/config.h>
+#include <asm/page.h>
+#undef ENTRY
+#undef ALIGN
+
+OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
+OUTPUT_ARCH(i386)
+ENTRY(start)
+PHDRS
+{
+  text PT_LOAD ;
+}
+SECTIONS
+{
+  . = 0xFF000000 + 0x100000;
+  _text = .;                   /* Text and read-only data */
+  .text : {
+       *(.text)
+       *(.fixup)
+       *(.gnu.warning)
+       } :text =0x9090
+  .text.lock : { *(.text.lock) } :text /* out-of-line lock text */
+
+  _etext = .;                  /* End of text section */
+
+  .rodata : { *(.rodata) *(.rodata.*) } :text
+
+  . = ALIGN(32);               /* Exception table */
+  __start___ex_table = .;
+  __ex_table : { *(__ex_table) } :text
+  __stop___ex_table = .;
+
+  . = ALIGN(32);               /* Pre-exception table */
+  __start___pre_ex_table = .;
+  __pre_ex_table : { *(__pre_ex_table) } :text
+  __stop___pre_ex_table = .;
+
+  .data : {                    /* Data */
+       *(.data)
+       CONSTRUCTORS
+       } :text
+
+  . = ALIGN(4096);             /* Init code and data */
+  __init_begin = .;
+  .text.init : { *(.text.init) } :text
+  .data.init : { *(.data.init) } :text
+  . = ALIGN(32);
+  __setup_start = .;
+  .setup.init : { *(.setup.init) } :text
+  __setup_end = .;
+  __initcall_start = .;
+  .initcall.init : { *(.initcall.init) } :text
+  __initcall_end = .;
+  . = ALIGN(STACK_SIZE);
+  __init_end = .;
+
+  __bss_start = .;             /* BSS */
+  .bss : {
+       *(.bss.stack_aligned)
+       *(.bss.page_aligned)
+       *(.bss)
+       } :text
+  _end = . ;
+
+  /* Sections to be discarded */
+  /DISCARD/ : {
+       *(.text.exit)
+       *(.data.exit)
+       *(.exitcall.exit)
+       }
+
+  /* Stabs debugging sections.  */
+  .stab 0 : { *(.stab) }
+  .stabstr 0 : { *(.stabstr) }
+  .stab.excl 0 : { *(.stab.excl) }
+  .stab.exclstr 0 : { *(.stab.exclstr) }
+  .stab.index 0 : { *(.stab.index) }
+  .stab.indexstr 0 : { *(.stab.indexstr) }
+  .comment 0 : { *(.comment) }
+}
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/x86_64/xen.lds.S
--- /dev/null   Mon Jan  9 11:19:55 2006
+++ b/xen/arch/x86/x86_64/xen.lds.S     Mon Jan  9 11:22:17 2006
@@ -0,0 +1,83 @@
+/* Excerpts written by Martin Mares <mj@xxxxxxxxxxxxxxxxxxxxxxxx> */
+/* Modified for x86-64 Xen by Keir Fraser */
+
+#include <xen/config.h>
+#include <asm/page.h>
+#undef ENTRY
+#undef ALIGN
+
+OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64")
+OUTPUT_ARCH(i386:x86-64)
+ENTRY(start)
+PHDRS
+{
+  text PT_LOAD ;
+}
+SECTIONS
+{
+  . = 0xFFFF830000100000;
+  _text = .;                   /* Text and read-only data */
+  .text : {
+       *(.text)
+       *(.fixup)
+       *(.gnu.warning)
+       } :text = 0x9090
+  .text.lock : { *(.text.lock) } :text /* out-of-line lock text */
+
+  _etext = .;                  /* End of text section */
+
+  .rodata : { *(.rodata) *(.rodata.*) } :text
+
+  . = ALIGN(32);               /* Exception table */
+  __start___ex_table = .;
+  __ex_table : { *(__ex_table) } :text
+  __stop___ex_table = .;
+
+  . = ALIGN(32);                /* Pre-exception table */
+  __start___pre_ex_table = .;
+  __pre_ex_table : { *(__pre_ex_table) } :text
+  __stop___pre_ex_table = .;
+
+  .data : {                    /* Data */
+       *(.data)
+       CONSTRUCTORS
+       } :text
+
+  . = ALIGN(4096);             /* Init code and data */
+  __init_begin = .;
+  .text.init : { *(.text.init) } :text
+  .data.init : { *(.data.init) } :text
+  . = ALIGN(32);
+  __setup_start = .;
+  .setup.init : { *(.setup.init) } :text
+  __setup_end = .;
+  __initcall_start = .;
+  .initcall.init : { *(.initcall.init) } :text
+  __initcall_end = .;
+  . = ALIGN(STACK_SIZE);
+  __init_end = .;
+
+  __bss_start = .;             /* BSS */
+  .bss : {
+       *(.bss.stack_aligned)
+       *(.bss.page_aligned)
+       *(.bss)
+       } :text
+  _end = . ;
+
+  /* Sections to be discarded */
+  /DISCARD/ : {
+       *(.text.exit)
+       *(.data.exit)
+       *(.exitcall.exit)
+       }
+
+  /* Stabs debugging sections.  */
+  .stab 0 : { *(.stab) }
+  .stabstr 0 : { *(.stabstr) }
+  .stab.excl 0 : { *(.stab.excl) }
+  .stab.exclstr 0 : { *(.stab.exclstr) }
+  .stab.index 0 : { *(.stab.index) }
+  .stab.indexstr 0 : { *(.stab.indexstr) }
+  .comment 0 : { *(.comment) }
+}
diff -r 25e3c8668f1f -r 8af1199488d3 xen/common/rangeset.c
--- /dev/null   Mon Jan  9 11:19:55 2006
+++ b/xen/common/rangeset.c     Mon Jan  9 11:22:17 2006
@@ -0,0 +1,399 @@
+/******************************************************************************
+ * rangeset.c
+ * 
+ * Creation, maintenance and automatic destruction of per-domain sets of
+ * numeric ranges.
+ * 
+ * Copyright (c) 2005, K A Fraser
+ */
+
+#include <xen/sched.h>
+#include <xen/rangeset.h>
+
+/* An inclusive range [s,e] and pointer to next range in ascending order. */
+struct range {
+    struct list_head list;
+    unsigned long s, e;
+};
+
+struct rangeset {
+    /* Owning domain and threaded list of rangesets. */
+    struct list_head rangeset_list;
+    struct domain   *domain;
+
+    /* Ordered list of ranges contained in this set, and protecting lock. */
+    struct list_head range_list;
+    spinlock_t       lock;
+
+    /* Pretty-printing name. */
+    char             name[32];
+
+    /* RANGESETF flags. */
+    unsigned int     flags;
+};
+
+/*****************************
+ * Private range functions hide the underlying linked-list implemnetation.
+ */
+
+/* Find highest range lower than or containing s. NULL if no such range. */
+static struct range *find_range(
+    struct rangeset *r, unsigned long s)
+{
+    struct range *x = NULL, *y;
+
+    list_for_each_entry ( y, &r->range_list, list )
+    {
+        if ( y->s > s )
+            break;
+        x = y;
+    }
+
+    return x;
+}
+
+/* Return the lowest range in the set r, or NULL if r is empty. */
+static struct range *first_range(
+    struct rangeset *r)
+{
+    if ( list_empty(&r->range_list) )
+        return NULL;
+    return list_entry(r->range_list.next, struct range, list);
+}
+
+/* Return range following x in ascending order, or NULL if x is the highest. */
+static struct range *next_range(
+    struct rangeset *r, struct range *x)
+{
+    if ( x->list.next == &r->range_list )
+        return NULL;
+    return list_entry(x->list.next, struct range, list);
+}
+
+/* Insert range y after range x in r. Insert as first range if x is NULL. */
+static void insert_range(
+    struct rangeset *r, struct range *x, struct range *y)
+{
+    list_add(&y->list, (x != NULL) ? &x->list : &r->range_list);
+}
+
+/* Remove a range from its list and free it. */
+static void destroy_range(
+    struct range *x)
+{
+    list_del(&x->list);
+    xfree(x);
+}
+
+/*****************************
+ * Core public functions
+ */
+
+int rangeset_add_range(
+    struct rangeset *r, unsigned long s, unsigned long e)
+{
+    struct range *x, *y;
+    int rc = 0;
+
+    spin_lock(&r->lock);
+
+    x = find_range(r, s);
+    y = find_range(r, e);
+
+    if ( x == y )
+    {
+        if ( (x == NULL) || ((x->e < s) && ((x->e + 1) != s)) )
+        {
+            x = xmalloc(struct range);
+            if ( x == NULL )
+            {
+                rc = -ENOMEM;
+                goto out;
+            }
+
+            x->s = s;
+            x->e = e;
+
+            insert_range(r, y, x);
+        }
+        else if ( x->e < e )
+            x->e = e;
+    }
+    else
+    {
+        if ( x == NULL )
+        {
+            x = first_range(r);
+            x->s = s;
+        }
+        else if ( (x->e < s) && ((x->e + 1) != s) )
+        {
+            x = next_range(r, x);
+            x->s = s;
+        }
+        
+        x->e = (y->e > e) ? y->e : e;
+
+        for ( ; ; )
+        {
+            y = next_range(r, x);
+            if ( (y == NULL) || (y->e > x->e) )
+                break;
+            destroy_range(y);
+        }
+    }
+
+    y = next_range(r, x);
+    if ( (y != NULL) && ((x->e + 1) == y->s) )
+    {
+        x->e = y->e;
+        destroy_range(y);
+    }
+
+ out:
+    spin_unlock(&r->lock);
+    return rc;
+}
+
+int rangeset_remove_range(
+    struct rangeset *r, unsigned long s, unsigned long e)
+{
+    struct range *x, *y, *t;
+    int rc = 0;
+
+    spin_lock(&r->lock);
+
+    x = find_range(r, s);
+    y = find_range(r, e);
+
+    if ( x == y )
+    {
+        if ( (x == NULL) || (x->e < s) )
+            goto out;
+
+        if ( (x->s < s) && (x->e > e) )
+        {
+            y = xmalloc(struct range);
+            if ( y == NULL )
+            {
+                rc = -ENOMEM;
+                goto out;
+            }
+
+            y->s = e + 1;
+            y->e = x->e;
+            x->e = s - 1;
+
+            insert_range(r, x, y);
+        }
+        else if ( (x->s == s) && (x->e <= e) )
+            destroy_range(x);
+        else if ( x->s == s )
+            x->s = e + 1;
+        else if ( x->e <= e )
+            x->e = s - 1;
+    }
+    else
+    {
+        if ( x == NULL )
+            x = first_range(r);
+
+        if ( x->s < s )
+        {
+            x->e = s - 1;
+            x = next_range(r, x);
+        }
+
+        while ( x != y )
+        {
+            t = x;
+            x = next_range(r, x);
+            destroy_range(t);
+        }
+
+        x->s = e + 1;
+        if ( x->s > x->e )
+            destroy_range(x);
+    }
+
+ out:
+    spin_unlock(&r->lock);
+    return rc;
+}
+
+int rangeset_contains_range(
+    struct rangeset *r, unsigned long s, unsigned long e)
+{
+    struct range *x;
+    int contains;
+
+    spin_lock(&r->lock);
+    x = find_range(r, s);
+    contains = (x && (x->e >= e));
+    spin_unlock(&r->lock);
+
+    return contains;
+}
+
+int rangeset_add_singleton(
+    struct rangeset *r, unsigned long s)
+{
+    return rangeset_add_range(r, s, s);
+}
+
+int rangeset_remove_singleton(
+    struct rangeset *r, unsigned long s)
+{
+    return rangeset_remove_range(r, s, s);
+}
+
+int rangeset_contains_singleton(
+    struct rangeset *r, unsigned long s)
+{
+    return rangeset_contains_range(r, s, s);
+}
+
+int rangeset_is_empty(
+    struct rangeset *r)
+{
+    return list_empty(&r->range_list);
+}
+
+struct rangeset *rangeset_new(
+    struct domain *d, char *name, unsigned int flags)
+{
+    struct rangeset *r;
+
+    r = xmalloc(struct rangeset);
+    if ( r == NULL )
+        return NULL;
+
+    spin_lock_init(&r->lock);
+    INIT_LIST_HEAD(&r->range_list);
+
+    BUG_ON(flags & ~RANGESETF_prettyprint_hex);
+    r->flags = flags;
+
+    if ( name != NULL )
+    {
+        strncpy(r->name, name, sizeof(r->name));
+        r->name[sizeof(r->name)-1] = '\0';
+    }
+    else
+    {
+        sprintf(r->name, "(no name)");
+    }
+
+    if ( (r->domain = d) != NULL )
+    {
+        spin_lock(&d->rangesets_lock);
+        list_add(&r->rangeset_list, &d->rangesets);
+        spin_unlock(&d->rangesets_lock);
+    }
+
+    return r;
+}
+
+void rangeset_destroy(
+    struct rangeset *r)
+{
+    struct range *x;
+
+    if ( r == NULL )
+        return;
+
+    if ( r->domain != NULL )
+    {
+        spin_lock(&r->domain->rangesets_lock);
+        list_del(&r->rangeset_list);
+        spin_unlock(&r->domain->rangesets_lock);
+    }
+
+    while ( (x = first_range(r)) != NULL )
+        destroy_range(x);
+
+    xfree(r);
+}
+
+void rangeset_domain_initialise(
+    struct domain *d)
+{
+    INIT_LIST_HEAD(&d->rangesets);
+    spin_lock_init(&d->rangesets_lock);
+}
+
+void rangeset_domain_destroy(
+    struct domain *d)
+{
+    struct rangeset *r;
+
+    while ( !list_empty(&d->rangesets) )
+    {
+        r = list_entry(d->rangesets.next, struct rangeset, rangeset_list);
+
+        BUG_ON(r->domain != d);
+        r->domain = NULL;
+        list_del(&r->rangeset_list);
+
+        rangeset_destroy(r);
+    }
+}
+
+/*****************************
+ * Pretty-printing functions
+ */
+
+static void print_limit(struct rangeset *r, unsigned long s)
+{
+    printk((r->flags & RANGESETF_prettyprint_hex) ? "%lx" : "%lu", s);
+}
+
+void rangeset_printk(
+    struct rangeset *r)
+{
+    int nr_printed = 0;
+    struct range *x;
+
+    spin_lock(&r->lock);
+
+    printk("%-10s {", r->name);
+
+    for ( x = first_range(r); x != NULL; x = next_range(r, x) )
+    {
+        if ( nr_printed++ )
+            printk(",");
+        printk(" ");
+        print_limit(r, x->s);
+        if ( x->s != x->e )
+        {
+            printk("-");
+            print_limit(r, x->e);
+        }
+    }
+
+    printk(" }");
+
+    spin_unlock(&r->lock);
+}
+
+void rangeset_domain_printk(
+    struct domain *d)
+{
+    struct rangeset *r;
+
+    printk("Rangesets belonging to domain %u:\n", d->domain_id);
+
+    spin_lock(&d->rangesets_lock);
+
+    if ( list_empty(&d->rangesets) )
+        printk("    None\n");
+
+    list_for_each_entry ( r, &d->rangesets, rangeset_list )
+    {
+        printk("    ");
+        rangeset_printk(r);
+        printk("\n");
+    }
+
+    spin_unlock(&d->rangesets_lock);
+}
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/asm-ia64/iocap.h
--- /dev/null   Mon Jan  9 11:19:55 2006
+++ b/xen/include/asm-ia64/iocap.h      Mon Jan  9 11:22:17 2006
@@ -0,0 +1,10 @@
+/******************************************************************************
+ * iocap.h
+ * 
+ * Architecture-specific per-domain I/O capabilities.
+ */
+
+#ifndef __IA64_IOCAP_H__
+#define __IA64_IOCAP_H__
+
+#endif /* __IA64_IOCAP_H__ */
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/asm-x86/iocap.h
--- /dev/null   Mon Jan  9 11:19:55 2006
+++ b/xen/include/asm-x86/iocap.h       Mon Jan  9 11:22:17 2006
@@ -0,0 +1,20 @@
+/******************************************************************************
+ * iocap.h
+ * 
+ * Architecture-specific per-domain I/O capabilities.
+ */
+
+#ifndef __X86_IOCAP_H__
+#define __X86_IOCAP_H__
+
+#define ioports_permit_access(d, s, e)                  \
+    rangeset_add_range((d)->arch.ioport_caps, s, e)
+#define ioports_deny_access(d, s, e)                    \
+    rangeset_remove_range((d)->arch.ioport_caps, s, e)
+#define ioports_access_permitted(d, s, e)               \
+    rangeset_contains_range((d)->arch.ioport_caps, s, e)
+
+#define cache_flush_permitted(d)                       \
+    (!rangeset_is_empty((d)->iomem_caps))
+
+#endif /* __X86_IOCAP_H__ */
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/public/hvm/hvm_info_table.h
--- /dev/null   Mon Jan  9 11:19:55 2006
+++ b/xen/include/public/hvm/hvm_info_table.h   Mon Jan  9 11:22:17 2006
@@ -0,0 +1,24 @@
+/******************************************************************************
+ * hvm/hvm_info_table.h
+ * 
+ * HVM parameter and information table, written into guest memory map.
+ */
+
+#ifndef __XEN_PUBLIC_HVM_HVM_INFO_TABLE_H__
+#define __XEN_PUBLIC_HVM_HVM_INFO_TABLE_H__
+
+#define HVM_INFO_PFN         0x09F
+#define HVM_INFO_OFFSET      0x800
+#define HVM_INFO_PADDR       ((HVM_INFO_PFN << 12) + HVM_INFO_OFFSET)
+
+struct hvm_info_table {
+    char        signature[8]; /* "HVM INFO" */
+    uint32_t    length;
+    uint8_t     checksum;
+    uint8_t     acpi_enabled;
+    uint8_t     apic_enabled;
+    uint8_t     pad[1];
+    uint32_t    nr_vcpus;
+};
+
+#endif /* __XEN_PUBLIC_HVM_HVM_INFO_TABLE_H__ */
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/public/hvm/ioreq.h
--- /dev/null   Mon Jan  9 11:19:55 2006
+++ b/xen/include/public/hvm/ioreq.h    Mon Jan  9 11:22:17 2006
@@ -0,0 +1,90 @@
+/*
+ * ioreq.h: I/O request definitions for device models
+ * Copyright (c) 2004, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#ifndef _IOREQ_H_
+#define _IOREQ_H_
+
+#define IOREQ_READ      1
+#define IOREQ_WRITE     0
+
+#define STATE_INVALID           0
+#define STATE_IOREQ_READY       1
+#define STATE_IOREQ_INPROCESS   2
+#define STATE_IORESP_READY      3
+#define STATE_IORESP_HOOK       4
+
+#define IOREQ_TYPE_PIO          0 /* pio */
+#define IOREQ_TYPE_COPY         1 /* mmio ops */
+#define IOREQ_TYPE_AND          2
+#define IOREQ_TYPE_OR           3
+#define IOREQ_TYPE_XOR          4
+
+/*
+ * VMExit dispatcher should cooperate with instruction decoder to
+ * prepare this structure and notify service OS and DM by sending
+ * virq
+ */
+typedef struct {
+    uint64_t addr;          /*  physical address            */
+    uint64_t size;          /*  size in bytes               */
+    uint64_t count;         /*  for rep prefixes            */
+    union {
+        uint64_t data;      /*  data                        */
+        void    *pdata;     /*  pointer to data             */
+    } u;
+    uint8_t state:4;
+    uint8_t pdata_valid:1;  /* if 1, use pdata above        */
+    uint8_t dir:1;          /*  1=read, 0=write             */
+    uint8_t df:1;
+    uint8_t type;           /* I/O type                     */
+} ioreq_t;
+
+#define MAX_VECTOR      256
+#define BITS_PER_BYTE   8
+#define INTR_LEN        (MAX_VECTOR/(BITS_PER_BYTE * sizeof(uint64_t)))
+#define INTR_LEN_32     (MAX_VECTOR/(BITS_PER_BYTE * sizeof(uint32_t)))
+
+typedef struct {
+    uint16_t    pic_elcr;
+    uint16_t    pic_irr;
+    uint16_t    pic_last_irr;
+    uint16_t    pic_clear_irr;
+    int         eport; /* Event channel port */
+} global_iodata_t;
+
+typedef struct {
+    ioreq_t     vp_ioreq;
+} vcpu_iodata_t;
+
+typedef struct {
+    global_iodata_t sp_global;
+    vcpu_iodata_t   vcpu_iodata[1];
+} shared_iopage_t;
+
+#endif /* _IOREQ_H_ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/public/hvm/vmx_assist.h
--- /dev/null   Mon Jan  9 11:19:55 2006
+++ b/xen/include/public/hvm/vmx_assist.h       Mon Jan  9 11:22:17 2006
@@ -0,0 +1,97 @@
+/*
+ * vmx_assist.h: Context definitions for the VMXASSIST world switch.
+ *
+ * Leendert van Doorn, leendert@xxxxxxxxxxxxxx
+ * Copyright (c) 2005, International Business Machines Corporation.
+ */
+
+#ifndef _VMX_ASSIST_H_
+#define _VMX_ASSIST_H_
+
+#define VMXASSIST_BASE         0xD0000
+#define VMXASSIST_MAGIC        0x17101966
+#define VMXASSIST_MAGIC_OFFSET (VMXASSIST_BASE+8)
+
+#define VMXASSIST_NEW_CONTEXT (VMXASSIST_BASE + 12)
+#define VMXASSIST_OLD_CONTEXT (VMXASSIST_NEW_CONTEXT + 4)
+
+#ifndef __ASSEMBLY__
+
+union vmcs_arbytes {
+    struct arbyte_fields {
+        unsigned int seg_type : 4,
+            s         : 1,
+            dpl       : 2,
+            p         : 1, 
+            reserved0 : 4,
+            avl       : 1,
+            reserved1 : 1,     
+            default_ops_size: 1,
+            g         : 1,
+            null_bit  : 1, 
+            reserved2 : 15;
+    } fields;
+    unsigned int bytes;
+};
+
+/*
+ * World switch state
+ */
+typedef struct vmx_assist_context {
+    uint32_t  eip;        /* execution pointer */
+    uint32_t  esp;        /* stack pointer */
+    uint32_t  eflags;     /* flags register */
+    uint32_t  cr0;
+    uint32_t  cr3;        /* page table directory */
+    uint32_t  cr4;
+    uint32_t  idtr_limit; /* idt */
+    uint32_t  idtr_base;
+    uint32_t  gdtr_limit; /* gdt */
+    uint32_t  gdtr_base;
+    uint32_t  cs_sel;     /* cs selector */
+    uint32_t  cs_limit;
+    uint32_t  cs_base;
+    union vmcs_arbytes cs_arbytes;
+    uint32_t  ds_sel;     /* ds selector */
+    uint32_t  ds_limit;
+    uint32_t  ds_base;
+    union vmcs_arbytes ds_arbytes;
+    uint32_t  es_sel;     /* es selector */
+    uint32_t  es_limit;
+    uint32_t  es_base;
+    union vmcs_arbytes es_arbytes;
+    uint32_t  ss_sel;     /* ss selector */
+    uint32_t  ss_limit;
+    uint32_t  ss_base;
+    union vmcs_arbytes ss_arbytes;
+    uint32_t  fs_sel;     /* fs selector */
+    uint32_t  fs_limit;
+    uint32_t  fs_base;
+    union vmcs_arbytes fs_arbytes;
+    uint32_t  gs_sel;     /* gs selector */
+    uint32_t  gs_limit;
+    uint32_t  gs_base;
+    union vmcs_arbytes gs_arbytes;
+    uint32_t  tr_sel;     /* task selector */
+    uint32_t  tr_limit;
+    uint32_t  tr_base;
+    union vmcs_arbytes tr_arbytes;
+    uint32_t  ldtr_sel;   /* ldtr selector */
+    uint32_t  ldtr_limit;
+    uint32_t  ldtr_base;
+    union vmcs_arbytes ldtr_arbytes;
+} vmx_assist_context_t;
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _VMX_ASSIST_H_ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/xen/iocap.h
--- /dev/null   Mon Jan  9 11:19:55 2006
+++ b/xen/include/xen/iocap.h   Mon Jan  9 11:22:17 2006
@@ -0,0 +1,34 @@
+/******************************************************************************
+ * iocap.h
+ * 
+ * Per-domain I/O capabilities.
+ */
+
+#ifndef __XEN_IOCAP_H__
+#define __XEN_IOCAP_H__
+
+#include <xen/rangeset.h>
+#include <asm/iocap.h>
+
+#define iomem_permit_access(d, s, e)                    \
+    rangeset_add_range((d)->iomem_caps, s, e)
+#define iomem_deny_access(d, s, e)                      \
+    rangeset_remove_range((d)->iomem_caps, s, e)
+#define iomem_access_permitted(d, s, e)                 \
+    rangeset_contains_range((d)->iomem_caps, s, e)
+
+#define irq_permit_access(d, i)                         \
+    rangeset_add_singleton((d)->irq_caps, i)
+#define irq_deny_access(d, i)                           \
+    rangeset_remove_singleton((d)->irq_caps, i)
+#define irqs_permit_access(d, s, e)                     \
+    rangeset_add_range((d)->irq_caps, s, e)
+#define irqs_deny_access(d, s, e)                       \
+    rangeset_remove_range((d)->irq_caps, s, e)
+#define irq_access_permitted(d, i)                      \
+    rangeset_contains_singleton((d)->irq_caps, i)
+
+#define multipage_allocation_permitted(d)               \
+    (!rangeset_is_empty((d)->iomem_caps))
+
+#endif /* __XEN_IOCAP_H__ */
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/xen/rangeset.h
--- /dev/null   Mon Jan  9 11:19:55 2006
+++ b/xen/include/xen/rangeset.h        Mon Jan  9 11:22:17 2006
@@ -0,0 +1,71 @@
+/******************************************************************************
+ * rangeset.h
+ * 
+ * Creation, maintenance and automatic destruction of per-domain sets of
+ * numeric ranges.
+ * 
+ * Copyright (c) 2005, K A Fraser
+ */
+
+#ifndef __XEN_RANGESET_H__
+#define __XEN_RANGESET_H__
+
+struct domain;
+struct rangeset;
+
+/*
+ * Initialise/destroy per-domain rangeset information.
+ * 
+ * It is invalid to create or destroy a rangeset belonging to a domain @d
+ * before rangeset_domain_initialise(d) returns or after calling
+ * rangeset_domain_destroy(d).
+ */
+void rangeset_domain_initialise(
+    struct domain *d);
+void rangeset_domain_destroy(
+    struct domain *d);
+
+/*
+ * Create/destroy a rangeset. Optionally attach to specified domain @d for
+ * auto-destruction when the domain dies. A name may be specified, for use
+ * in debug pretty-printing, and various RANGESETF flags (defined below).
+ * 
+ * It is invalid to perform any operation on a rangeset @r after calling
+ * rangeset_destroy(r).
+ */
+struct rangeset *rangeset_new(
+    struct domain *d, char *name, unsigned int flags);
+void rangeset_destroy(
+    struct rangeset *r);
+
+/* Flags for passing to rangeset_new(). */
+ /* Pretty-print range limits in hexadecimal. */
+#define _RANGESETF_prettyprint_hex 0
+#define RANGESETF_prettyprint_hex  (1U << _RANGESETF_prettyprint_hex)
+
+int __must_check rangeset_is_empty(
+    struct rangeset *r);
+
+/* Add/remove/query a numeric range. */
+int __must_check rangeset_add_range(
+    struct rangeset *r, unsigned long s, unsigned long e);
+int __must_check rangeset_remove_range(
+    struct rangeset *r, unsigned long s, unsigned long e);
+int __must_check rangeset_contains_range(
+    struct rangeset *r, unsigned long s, unsigned long e);
+
+/* Add/remove/query a single number. */
+int __must_check rangeset_add_singleton(
+    struct rangeset *r, unsigned long s);
+int __must_check rangeset_remove_singleton(
+    struct rangeset *r, unsigned long s);
+int __must_check rangeset_contains_singleton(
+    struct rangeset *r, unsigned long s);
+
+/* Rangeset pretty printing. */
+void rangeset_printk(
+    struct rangeset *r);
+void rangeset_domain_printk(
+    struct domain *d);
+
+#endif /* __XEN_RANGESET_H__ */
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/x86_32/xen.lds
--- a/xen/arch/x86/x86_32/xen.lds       Mon Jan  9 11:19:55 2006
+++ /dev/null   Mon Jan  9 11:22:17 2006
@@ -1,79 +0,0 @@
-/* ld script to make i386 Linux kernel
- * Written by Martin Mares <mj@xxxxxxxxxxxxxxxxxxxxxxxx>
- * Modified for i386 Xen by Keir Fraser
- */
-OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
-OUTPUT_ARCH(i386)
-ENTRY(start)
-PHDRS
-{
-  text PT_LOAD ;
-}
-SECTIONS
-{
-  . = 0xFF000000 + 0x100000;
-  _text = .;                   /* Text and read-only data */
-  .text : {
-       *(.text)
-       *(.fixup)
-       *(.gnu.warning)
-       } :text =0x9090
-  .text.lock : { *(.text.lock) } :text /* out-of-line lock text */
-
-  _etext = .;                  /* End of text section */
-
-  .rodata : { *(.rodata) *(.rodata.*) } :text
-
-  . = ALIGN(32);               /* Exception table */
-  __start___ex_table = .;
-  __ex_table : { *(__ex_table) } :text
-  __stop___ex_table = .;
-
-  . = ALIGN(32);               /* Pre-exception table */
-  __start___pre_ex_table = .;
-  __pre_ex_table : { *(__pre_ex_table) } :text
-  __stop___pre_ex_table = .;
-
-  .data : {                    /* Data */
-       *(.data)
-       CONSTRUCTORS
-       } :text
-
-  . = ALIGN(4096);             /* Init code and data */
-  __init_begin = .;
-  .text.init : { *(.text.init) } :text
-  .data.init : { *(.data.init) } :text
-  . = ALIGN(32);
-  __setup_start = .;
-  .setup.init : { *(.setup.init) } :text
-  __setup_end = .;
-  __initcall_start = .;
-  .initcall.init : { *(.initcall.init) } :text
-  __initcall_end = .;
-  . = ALIGN(8192);
-  __init_end = .;
-
-  __bss_start = .;             /* BSS */
-  .bss : {
-       *(.bss.twopage_aligned)
-       *(.bss.page_aligned)
-       *(.bss)
-       } :text
-  _end = . ;
-
-  /* Sections to be discarded */
-  /DISCARD/ : {
-       *(.text.exit)
-       *(.data.exit)
-       *(.exitcall.exit)
-       }
-
-  /* Stabs debugging sections.  */
-  .stab 0 : { *(.stab) }
-  .stabstr 0 : { *(.stabstr) }
-  .stab.excl 0 : { *(.stab.excl) }
-  .stab.exclstr 0 : { *(.stab.exclstr) }
-  .stab.index 0 : { *(.stab.index) }
-  .stab.indexstr 0 : { *(.stab.indexstr) }
-  .comment 0 : { *(.comment) }
-}
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/x86_64/xen.lds
--- a/xen/arch/x86/x86_64/xen.lds       Mon Jan  9 11:19:55 2006
+++ /dev/null   Mon Jan  9 11:22:17 2006
@@ -1,77 +0,0 @@
-/* Excerpts written by Martin Mares <mj@xxxxxxxxxxxxxxxxxxxxxxxx> */
-/* Modified for x86-64 Xen by Keir Fraser */
-OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64")
-OUTPUT_ARCH(i386:x86-64)
-ENTRY(start)
-PHDRS
-{
-  text PT_LOAD ;
-}
-SECTIONS
-{
-  . = 0xFFFF830000100000;
-  _text = .;                   /* Text and read-only data */
-  .text : {
-       *(.text)
-       *(.fixup)
-       *(.gnu.warning)
-       } :text = 0x9090
-  .text.lock : { *(.text.lock) } :text /* out-of-line lock text */
-
-  _etext = .;                  /* End of text section */
-
-  .rodata : { *(.rodata) *(.rodata.*) } :text
-
-  . = ALIGN(32);               /* Exception table */
-  __start___ex_table = .;
-  __ex_table : { *(__ex_table) } :text
-  __stop___ex_table = .;
-
-  . = ALIGN(32);                /* Pre-exception table */
-  __start___pre_ex_table = .;
-  __pre_ex_table : { *(__pre_ex_table) } :text
-  __stop___pre_ex_table = .;
-
-  .data : {                    /* Data */
-       *(.data)
-       CONSTRUCTORS
-       } :text
-
-  . = ALIGN(4096);             /* Init code and data */
-  __init_begin = .;
-  .text.init : { *(.text.init) } :text
-  .data.init : { *(.data.init) } :text
-  . = ALIGN(32);
-  __setup_start = .;
-  .setup.init : { *(.setup.init) } :text
-  __setup_end = .;
-  __initcall_start = .;
-  .initcall.init : { *(.initcall.init) } :text
-  __initcall_end = .;
-  . = ALIGN(8192);
-  __init_end = .;
-
-  __bss_start = .;             /* BSS */
-  .bss : {
-       *(.bss.twopage_aligned)
-       *(.bss.page_aligned)
-       *(.bss)
-       } :text
-  _end = . ;
-
-  /* Sections to be discarded */
-  /DISCARD/ : {
-       *(.text.exit)
-       *(.data.exit)
-       *(.exitcall.exit)
-       }
-
-  /* Stabs debugging sections.  */
-  .stab 0 : { *(.stab) }
-  .stabstr 0 : { *(.stabstr) }
-  .stab.excl 0 : { *(.stab.excl) }
-  .stab.exclstr 0 : { *(.stab.exclstr) }
-  .stab.index 0 : { *(.stab.index) }
-  .stab.indexstr 0 : { *(.stab.indexstr) }
-  .comment 0 : { *(.comment) }
-}
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/asm-x86/physdev.h
--- a/xen/include/asm-x86/physdev.h     Mon Jan  9 11:19:55 2006
+++ /dev/null   Mon Jan  9 11:22:17 2006
@@ -1,17 +0,0 @@
-/******************************************************************************
- * physdev.h
- */
-
-#ifndef __XEN_PHYSDEV_H__
-#define __XEN_PHYSDEV_H__
-
-#include <public/physdev.h>
-
-void physdev_modify_ioport_access_range(
-    struct domain *d, int enable, int port, int num );
-void physdev_destroy_state(struct domain *d);
-int domain_iomem_in_pfn(struct domain *p, unsigned long pfn);
-long do_physdev_op(physdev_op_t *uop);
-void physdev_init_dom0(struct domain *d);
-
-#endif /* __XEN_PHYSDEV_H__ */
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/public/io/ioreq.h
--- a/xen/include/public/io/ioreq.h     Mon Jan  9 11:19:55 2006
+++ /dev/null   Mon Jan  9 11:22:17 2006
@@ -1,91 +0,0 @@
-/*
- * ioreq.h: I/O request definitions for device models
- * Copyright (c) 2004, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-#ifndef _IOREQ_H_
-#define _IOREQ_H_
-
-#define IOREQ_READ      1
-#define IOREQ_WRITE     0
-
-#define STATE_INVALID           0
-#define STATE_IOREQ_READY       1
-#define STATE_IOREQ_INPROCESS   2
-#define STATE_IORESP_READY      3
-#define STATE_IORESP_HOOK       4
-
-#define IOREQ_TYPE_PIO          0 /* pio */
-#define IOREQ_TYPE_COPY         1 /* mmio ops */
-#define IOREQ_TYPE_AND          2
-#define IOREQ_TYPE_OR           3
-#define IOREQ_TYPE_XOR          4
-
-/*
- * VMExit dispatcher should cooperate with instruction decoder to
- * prepare this structure and notify service OS and DM by sending
- * virq 
- */
-typedef struct {
-    uint64_t addr;   /*  physical address            */
-    uint64_t size;   /*  size in bytes               */
-    uint64_t count;  /*  for rep prefixes            */
-    union {
-        uint64_t data;           /*  data                        */
-        void    *pdata;          /*  pointer to data             */
-    } u;
-    uint8_t state:4;
-    uint8_t pdata_valid:1; /* if 1, use pdata above  */
-    uint8_t dir:1;   /*  1=read, 0=write             */
-    uint8_t df:1;
-    uint8_t type;    /* I/O type                     */
-} ioreq_t;
-
-#define MAX_VECTOR      256
-#define BITS_PER_BYTE   8
-#define INTR_LEN        (MAX_VECTOR/(BITS_PER_BYTE * sizeof(uint64_t)))
-#define INTR_LEN_32     (MAX_VECTOR/(BITS_PER_BYTE * sizeof(uint32_t)))
-
-typedef struct {
-    uint16_t  pic_elcr;
-    uint16_t   pic_irr;
-    uint16_t   pic_last_irr;
-    uint16_t   pic_clear_irr;
-    int      eport; /* Event channel port */
-} global_iodata_t;
-
-typedef struct {
-    ioreq_t       vp_ioreq;
-    unsigned long vp_intr[INTR_LEN];
-} vcpu_iodata_t;
-
-typedef struct {
-    global_iodata_t sp_global;
-    vcpu_iodata_t   vcpu_iodata[1];
-} shared_iopage_t;
-
-#endif /* _IOREQ_H_ */
-
-/*
- * Local variables:
- * mode: C
- * c-set-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/public/vmx_assist.h
--- a/xen/include/public/vmx_assist.h   Mon Jan  9 11:19:55 2006
+++ /dev/null   Mon Jan  9 11:22:17 2006
@@ -1,97 +0,0 @@
-/*
- * vmx_assist.h: Context definitions for the VMXASSIST world switch.
- *
- * Leendert van Doorn, leendert@xxxxxxxxxxxxxx
- * Copyright (c) 2005, International Business Machines Corporation.
- */
-
-#ifndef _VMX_ASSIST_H_
-#define _VMX_ASSIST_H_
-
-#define VMXASSIST_BASE         0xD0000
-#define VMXASSIST_MAGIC        0x17101966
-#define VMXASSIST_MAGIC_OFFSET (VMXASSIST_BASE+8)
-
-#define VMXASSIST_NEW_CONTEXT (VMXASSIST_BASE + 12)
-#define VMXASSIST_OLD_CONTEXT (VMXASSIST_NEW_CONTEXT + 4)
-
-#ifndef __ASSEMBLY__
-
-union vmcs_arbytes {
-    struct arbyte_fields {
-        unsigned int seg_type : 4,
-            s         : 1,
-            dpl       : 2,
-            p         : 1, 
-            reserved0 : 4,
-            avl       : 1,
-            reserved1 : 1,     
-            default_ops_size: 1,
-            g         : 1,
-            null_bit  : 1, 
-            reserved2 : 15;
-    } fields;
-    unsigned int bytes;
-};
-
-/*
- * World switch state
- */
-typedef struct vmx_assist_context {
-    uint32_t  eip;        /* execution pointer */
-    uint32_t  esp;        /* stack pointer */
-    uint32_t  eflags;     /* flags register */
-    uint32_t  cr0;
-    uint32_t  cr3;        /* page table directory */
-    uint32_t  cr4;
-    uint32_t  idtr_limit; /* idt */
-    uint32_t  idtr_base;
-    uint32_t  gdtr_limit; /* gdt */
-    uint32_t  gdtr_base;
-    uint32_t  cs_sel;     /* cs selector */
-    uint32_t  cs_limit;
-    uint32_t  cs_base;
-    union vmcs_arbytes cs_arbytes;
-    uint32_t  ds_sel;     /* ds selector */
-    uint32_t  ds_limit;
-    uint32_t  ds_base;
-    union vmcs_arbytes ds_arbytes;
-    uint32_t  es_sel;     /* es selector */
-    uint32_t  es_limit;
-    uint32_t  es_base;
-    union vmcs_arbytes es_arbytes;
-    uint32_t  ss_sel;     /* ss selector */
-    uint32_t  ss_limit;
-    uint32_t  ss_base;
-    union vmcs_arbytes ss_arbytes;
-    uint32_t  fs_sel;     /* fs selector */
-    uint32_t  fs_limit;
-    uint32_t  fs_base;
-    union vmcs_arbytes fs_arbytes;
-    uint32_t  gs_sel;     /* gs selector */
-    uint32_t  gs_limit;
-    uint32_t  gs_base;
-    union vmcs_arbytes gs_arbytes;
-    uint32_t  tr_sel;     /* task selector */
-    uint32_t  tr_limit;
-    uint32_t  tr_base;
-    union vmcs_arbytes tr_arbytes;
-    uint32_t  ldtr_sel;   /* ldtr selector */
-    uint32_t  ldtr_limit;
-    uint32_t  ldtr_base;
-    union vmcs_arbytes ldtr_arbytes;
-} vmx_assist_context_t;
-
-#endif /* __ASSEMBLY__ */
-
-#endif /* _VMX_ASSIST_H_ */
-
-/*
- * Local variables:
- * mode: C
- * c-set-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>