| # HG changeset patch
# User awilliam@xxxxxxxxxxx
# Node ID c073ebdbde8c0f5c9437706b46c4a34f35033c0c
# Parent  9d52a66c74996a66adf5ee71a0d7f91bb880f7fb
# Parent  954f4dea9da6336aaa35d0706aed55fde7909644
merge with xen-unstable.hg
---
 linux-2.6-xen-sparse/drivers/xen/net_driver_util.c          |   58 
 linux-2.6-xen-sparse/include/asm-x86_64/e820.h              |   63 
 linux-2.6-xen-sparse/include/xen/net_driver_util.h          |   48 
 tools/xenstore/xenstored_proc.h                             |   27 
 .hgignore                                                   |    2 
 extras/mini-os/Makefile                                     |    9 
 extras/mini-os/lib/printf.c                                 |    4 
 extras/mini-os/lib/string.c                                 |    4 
 linux-2.6-xen-sparse/arch/ia64/xen/drivers/Makefile         |    1 
 linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c             |    2 
 linux-2.6-xen-sparse/drivers/xen/Makefile                   |    1 
 linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c          |    8 
 linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c          |    2 
 linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c           |    2 
 linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c        |    4 
 linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c             |    2 
 linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c            |    2 
 linux-2.6-xen-sparse/drivers/xen/console/console.c          |   28 
 linux-2.6-xen-sparse/drivers/xen/core/Makefile              |   11 
 linux-2.6-xen-sparse/drivers/xen/core/cpu_hotplug.c         |  185 +
 linux-2.6-xen-sparse/drivers/xen/core/evtchn.c              |   31 
 linux-2.6-xen-sparse/drivers/xen/core/gnttab.c              |    3 
 linux-2.6-xen-sparse/drivers/xen/core/reboot.c              |    9 
 linux-2.6-xen-sparse/drivers/xen/core/smpboot.c             |  215 --
 linux-2.6-xen-sparse/drivers/xen/netback/netback.c          |    4 
 linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c           |   31 
 linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c        |   56 
 linux-2.6-xen-sparse/drivers/xen/pciback/xenbus.c           |    2 
 linux-2.6-xen-sparse/drivers/xen/pcifront/xenbus.c          |    4 
 linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c          |   23 
 linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c           |    6 
 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c     |    8 
 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c      |    4 
 linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/e820.h |   63 
 linux-2.6-xen-sparse/include/xen/cpu_hotplug.h              |   42 
 linux-2.6-xen-sparse/include/xen/xenbus.h                   |    8 
 patches/linux-2.6.16.13/fix-ide-cd-pio-mode.patch           |   13 
 tools/libxc/Makefile                                        |    1 
 tools/libxc/xc_csched.c                                     |   50 
 tools/libxc/xc_linux_build.c                                |   13 
 tools/libxc/xc_linux_restore.c                              |  122 -
 tools/libxc/xc_private.c                                    |   22 
 tools/libxc/xc_ptrace.c                                     |   77 
 tools/libxc/xc_ptrace.h                                     |    3 
 tools/libxc/xc_ptrace_core.c                                |    7 
 tools/libxc/xc_tbuf.c                                       |   56 
 tools/libxc/xenctrl.h                                       |   11 
 tools/libxc/xg_private.h                                    |   10 
 tools/python/xen/lowlevel/xc/xc.c                           |   61 
 tools/python/xen/lowlevel/xs/xs.c                           |   11 
 tools/python/xen/xend/XendDomain.py                         |   22 
 tools/python/xen/xend/XendDomainInfo.py                     |   14 
 tools/python/xen/xend/balloon.py                            |   11 
 tools/python/xen/xend/server/SrvDomain.py                   |   14 
 tools/python/xen/xend/xenstore/xstransact.py                |   28 
 tools/python/xen/xm/main.py                                 |   45 
 tools/tests/test_x86_emulator.c                             |   67 
 tools/xenstore/Makefile                                     |    8 
 tools/xenstore/xenstored_core.c                             |    7 
 tools/xenstore/xenstored_core.h                             |    8 
 tools/xenstore/xenstored_domain.c                           |   37 
 tools/xenstore/xenstored_linux.c                            |   69 
 xen/arch/x86/domain_build.c                                 |    5 
 xen/arch/x86/hvm/hvm.c                                      |   16 
 xen/arch/x86/hvm/i8254.c                                    |  405 +--
 xen/arch/x86/hvm/intercept.c                                |   82 
 xen/arch/x86/hvm/svm/intr.c                                 |   47 
 xen/arch/x86/hvm/svm/svm.c                                  |   44 
 xen/arch/x86/hvm/svm/vmcb.c                                 |   14 
 xen/arch/x86/hvm/vmx/io.c                                   |   62 
 xen/arch/x86/hvm/vmx/vmx.c                                  |   37 
 xen/arch/x86/mm.c                                           |  129 +
 xen/arch/x86/traps.c                                        |    4 
 xen/arch/x86/x86_emulate.c                                  |   81 
 xen/common/Makefile                                         |    1 
 xen/common/grant_table.c                                    |   15 
 xen/common/kernel.c                                         |    5 
 xen/common/sched_credit.c                                   | 1233 ++++++++++++
 xen/common/schedule.c                                       |    5 
 xen/common/trace.c                                          |    6 
 xen/include/asm-x86/domain.h                                |   12 
 xen/include/asm-x86/hvm/domain.h                            |    6 
 xen/include/asm-x86/hvm/svm/intr.h                          |    1 
 xen/include/asm-x86/hvm/svm/svm.h                           |    1 
 xen/include/asm-x86/hvm/vcpu.h                              |    3 
 xen/include/asm-x86/hvm/vmx/vmx.h                           |    1 
 xen/include/asm-x86/hvm/vpit.h                              |   67 
 xen/include/asm-x86/string.h                                |  162 -
 xen/include/asm-x86/x86_emulate.h                           |   66 
 xen/include/public/io/xenbus.h                              |   59 
 xen/include/public/sched_ctl.h                              |    5 
 xen/include/xen/sched-if.h                                  |    2 
 xen/include/xen/softirq.h                                   |   13 
 93 files changed, 2802 insertions(+), 1546 deletions(-)
diff -r 9d52a66c7499 -r c073ebdbde8c .hgignore
--- a/.hgignore Thu May 25 15:59:18 2006 -0600
+++ b/.hgignore Fri May 26 13:41:49 2006 -0600
@@ -14,7 +14,7 @@
 .*\.orig$
 .*\.rej$
 .*/a\.out$
-.*/cscope\.*$
+.*/cscope\..*$
 ^[^/]*\.bz2$
 ^TAGS$
 ^dist/.*$
diff -r 9d52a66c7499 -r c073ebdbde8c extras/mini-os/Makefile
--- a/extras/mini-os/Makefile   Thu May 25 15:59:18 2006 -0600
+++ b/extras/mini-os/Makefile   Fri May 26 13:41:49 2006 -0600
@@ -13,6 +13,7 @@ override CPPFLAGS := -Iinclude $(CPPFLAG
 override CPPFLAGS := -Iinclude $(CPPFLAGS)
 ASFLAGS = -D__ASSEMBLY__
 
+LDLIBS =  -L. -lminios
 LDFLAGS := -N -T minios-$(TARGET_ARCH).lds
 
 ifeq ($(TARGET_ARCH),x86_32)
@@ -55,11 +56,11 @@ links:
 links:
        [ -e include/xen ] || ln -sf ../../../xen/include/public include/xen
 
-libminios.a: $(OBJS) $(HEAD)
-       ar r libminios.a $(HEAD) $(OBJS)
+libminios.a: links $(OBJS) $(HEAD)
+       $(AR) r libminios.a $(HEAD) $(OBJS)
 
-$(TARGET): links libminios.a $(HEAD)
-       $(LD) $(LDFLAGS) $(HEAD) -L. -lminios -o $@.elf
+$(TARGET): libminios.a $(HEAD)
+       $(LD) $(LDFLAGS) $(HEAD) $(LDLIBS) -o $@.elf
        gzip -f -9 -c $@.elf >$@.gz
 
 .PHONY: clean
diff -r 9d52a66c7499 -r c073ebdbde8c extras/mini-os/lib/printf.c
--- a/extras/mini-os/lib/printf.c       Thu May 25 15:59:18 2006 -0600
+++ b/extras/mini-os/lib/printf.c       Fri May 26 13:41:49 2006 -0600
@@ -53,6 +53,8 @@
  *
  * $FreeBSD: src/sys/libkern/divdi3.c,v 1.6 1999/08/28 00:46:31 peter Exp $
  */
+
+#if !defined HAVE_LIBC
 
 #include <os.h>
 #include <types.h>
@@ -789,4 +791,4 @@ int sscanf(const char * buf, const char 
        return i;
 }
 
-
+#endif
diff -r 9d52a66c7499 -r c073ebdbde8c extras/mini-os/lib/string.c
--- a/extras/mini-os/lib/string.c       Thu May 25 15:59:18 2006 -0600
+++ b/extras/mini-os/lib/string.c       Fri May 26 13:41:49 2006 -0600
@@ -17,6 +17,8 @@
  * $Id: c-insert.c,v 1.7 2002/11/08 16:04:34 rn Exp $
  ****************************************************************************
  */
+
+#if !defined HAVE_LIBC
 
 #include <os.h>
 #include <types.h>
@@ -153,3 +155,5 @@ char * strstr(const char * s1,const char
         }
         return NULL;
 }
+
+#endif
diff -r 9d52a66c7499 -r c073ebdbde8c 
linux-2.6-xen-sparse/arch/ia64/xen/drivers/Makefile
--- a/linux-2.6-xen-sparse/arch/ia64/xen/drivers/Makefile       Thu May 25 
15:59:18 2006 -0600
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/drivers/Makefile       Fri May 26 
13:41:49 2006 -0600
@@ -2,7 +2,6 @@ ifneq ($(CONFIG_XEN_IA64_DOM0_VP),y)
 ifneq ($(CONFIG_XEN_IA64_DOM0_VP),y)
 obj-y   += util.o
 endif
-obj-$(CONFIG_XEN_IA64_DOM0_VP) += net_driver_util.o
 
 obj-y  += core/
 #obj-y += char/
diff -r 9d52a66c7499 -r c073ebdbde8c 
linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c
--- a/linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c   Thu May 25 15:59:18 
2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c   Fri May 26 13:41:49 
2006 -0600
@@ -329,7 +329,7 @@ out:
  * Callback received when the backend's state changes.
  */
 static void backend_changed(struct xenbus_device *dev,
-                           XenbusState backend_state)
+                           enum xenbus_state backend_state)
 {
        struct tpm_private *tp = dev->data;
        DPRINTK("\n");
diff -r 9d52a66c7499 -r c073ebdbde8c linux-2.6-xen-sparse/drivers/xen/Makefile
--- a/linux-2.6-xen-sparse/drivers/xen/Makefile Thu May 25 15:59:18 2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/Makefile Fri May 26 13:41:49 2006 -0600
@@ -1,5 +1,4 @@
 
-obj-y  += net_driver_util.o
 obj-y  += util.o
 
 obj-y  += core/
diff -r 9d52a66c7499 -r c073ebdbde8c 
linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c
--- a/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c        Thu May 25 
15:59:18 2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c        Fri May 26 
13:41:49 2006 -0600
@@ -67,7 +67,7 @@ static DECLARE_MUTEX(balloon_mutex);
  * Also protects non-atomic updates of current_pages and driver_pages, and
  * balloon lists.
  */
-spinlock_t balloon_lock = SPIN_LOCK_UNLOCKED;
+DEFINE_SPINLOCK(balloon_lock);
 
 /* We aim for 'current allocation' == 'target allocation'. */
 static unsigned long current_pages;
@@ -360,6 +360,12 @@ static void balloon_process(void *unused
 /* Resets the Xen limit, sets new target, and kicks off processing. */
 static void set_new_target(unsigned long target)
 {
+       unsigned long min_target;
+
+       /* Do not allow target to reduce below 2% of maximum memory size. */
+       min_target = max_pfn / 50;
+       target = max(target, min_target);
+
        /* No need for lock. Not read-modify-write updates. */
        hard_limit   = ~0UL;
        target_pages = target;
diff -r 9d52a66c7499 -r c073ebdbde8c 
linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c        Thu May 25 
15:59:18 2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c        Fri May 26 
13:41:49 2006 -0600
@@ -82,7 +82,7 @@ typedef struct {
 
 static pending_req_t *pending_reqs;
 static struct list_head pending_free;
-static spinlock_t pending_free_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(pending_free_lock);
 static DECLARE_WAIT_QUEUE_HEAD(pending_free_wq);
 
 #define BLKBACK_INVALID_HANDLE (~0)
diff -r 9d52a66c7499 -r c073ebdbde8c 
linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Thu May 25 15:59:18 
2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Fri May 26 13:41:49 
2006 -0600
@@ -247,7 +247,7 @@ static void backend_changed(struct xenbu
  * Callback received when the frontend's state changes.
  */
 static void frontend_changed(struct xenbus_device *dev,
-                            XenbusState frontend_state)
+                            enum xenbus_state frontend_state)
 {
        struct backend_info *be = dev->data;
        int err;
diff -r 9d52a66c7499 -r c073ebdbde8c 
linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c      Thu May 25 
15:59:18 2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c      Fri May 26 
13:41:49 2006 -0600
@@ -247,7 +247,7 @@ fail:
  * Callback received when the backend's state changes.
  */
 static void backend_changed(struct xenbus_device *dev,
-                           XenbusState backend_state)
+                           enum xenbus_state backend_state)
 {
        struct blkfront_info *info = dev->data;
        struct block_device *bd;
@@ -434,7 +434,7 @@ int blkif_release(struct inode *inode, s
                   have ignored this request initially, as the device was
                   still mounted. */
                struct xenbus_device * dev = info->xbdev;
-               XenbusState state = xenbus_read_driver_state(dev->otherend);
+               enum xenbus_state state = 
xenbus_read_driver_state(dev->otherend);
 
                if (state == XenbusStateClosing)
                        blkfront_closing(dev);
diff -r 9d52a66c7499 -r c073ebdbde8c 
linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c   Thu May 25 15:59:18 
2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c   Fri May 26 13:41:49 
2006 -0600
@@ -93,7 +93,7 @@ static struct block_device_operations xl
        .ioctl  = blkif_ioctl,
 };
 
-spinlock_t blkif_io_lock = SPIN_LOCK_UNLOCKED;
+DEFINE_SPINLOCK(blkif_io_lock);
 
 static struct xlbd_major_info *
 xlbd_alloc_major_info(int major, int minor, int index)
diff -r 9d52a66c7499 -r c073ebdbde8c 
linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c  Thu May 25 15:59:18 
2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c  Fri May 26 13:41:49 
2006 -0600
@@ -138,7 +138,7 @@ typedef struct {
  */
 static pending_req_t pending_reqs[MAX_PENDING_REQS];
 static unsigned char pending_ring[MAX_PENDING_REQS];
-static spinlock_t pend_prod_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(pend_prod_lock);
 /* NB. We use a different index type to differentiate from shared blk rings. */
 typedef unsigned int PEND_RING_IDX;
 #define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
diff -r 9d52a66c7499 -r c073ebdbde8c 
linux-2.6-xen-sparse/drivers/xen/console/console.c
--- a/linux-2.6-xen-sparse/drivers/xen/console/console.c        Thu May 25 
15:59:18 2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/console/console.c        Fri May 26 
13:41:49 2006 -0600
@@ -117,14 +117,17 @@ static int __init xencons_bufsz_setup(ch
 {
        unsigned int goal;
        goal = simple_strtoul(str, NULL, 0);
-       while (wbuf_size < goal)
-               wbuf_size <<= 1;
+       if (goal) {
+               goal = roundup_pow_of_two(goal);
+               if (wbuf_size < goal)
+                       wbuf_size = goal;
+       }
        return 1;
 }
 __setup("xencons_bufsz=", xencons_bufsz_setup);
 
 /* This lock protects accesses to the common transmit buffer. */
-static spinlock_t xencons_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(xencons_lock);
 
 /* Common transmit-kick routine. */
 static void __xencons_tx_flush(void);
@@ -133,8 +136,7 @@ static struct tty_driver *xencons_driver
 
 /******************** Kernel console driver ********************************/
 
-static void kcons_write(
-       struct console *c, const char *s, unsigned int count)
+static void kcons_write(struct console *c, const char *s, unsigned int count)
 {
        int           i = 0;
        unsigned long flags;
@@ -155,14 +157,14 @@ static void kcons_write(
        spin_unlock_irqrestore(&xencons_lock, flags);
 }
 
-static void kcons_write_dom0(
-       struct console *c, const char *s, unsigned int count)
-{
-       int rc;
-
-       while ((count > 0) &&
-              ((rc = HYPERVISOR_console_io(
-                       CONSOLEIO_write, count, (char *)s)) > 0)) {
+static void kcons_write_dom0(struct console *c, const char *s, unsigned int 
count)
+{
+
+       while (count > 0) {
+               int rc;
+               rc = HYPERVISOR_console_io( CONSOLEIO_write, count, (char *)s);
+               if (rc <= 0)
+                       break;
                count -= rc;
                s += rc;
        }
diff -r 9d52a66c7499 -r c073ebdbde8c 
linux-2.6-xen-sparse/drivers/xen/core/Makefile
--- a/linux-2.6-xen-sparse/drivers/xen/core/Makefile    Thu May 25 15:59:18 
2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/core/Makefile    Fri May 26 13:41:49 
2006 -0600
@@ -4,8 +4,9 @@
 
 obj-y   := evtchn.o reboot.o gnttab.o features.o
 
-obj-$(CONFIG_PROC_FS) += xen_proc.o
-obj-$(CONFIG_NET)     += skbuff.o
-obj-$(CONFIG_SMP)     += smpboot.o
-obj-$(CONFIG_SYSFS)   += hypervisor_sysfs.o
-obj-$(CONFIG_XEN_SYSFS) += xen_sysfs.o
+obj-$(CONFIG_PROC_FS)     += xen_proc.o
+obj-$(CONFIG_NET)         += skbuff.o
+obj-$(CONFIG_SMP)         += smpboot.o
+obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o
+obj-$(CONFIG_SYSFS)       += hypervisor_sysfs.o
+obj-$(CONFIG_XEN_SYSFS)   += xen_sysfs.o
diff -r 9d52a66c7499 -r c073ebdbde8c 
linux-2.6-xen-sparse/drivers/xen/core/evtchn.c
--- a/linux-2.6-xen-sparse/drivers/xen/core/evtchn.c    Thu May 25 15:59:18 
2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/core/evtchn.c    Fri May 26 13:41:49 
2006 -0600
@@ -51,10 +51,10 @@
  * This lock protects updates to the following mapping and reference-count
  * arrays. The lock does not need to be acquired to read the mapping tables.
  */
-static spinlock_t irq_mapping_update_lock;
+static DEFINE_SPINLOCK(irq_mapping_update_lock);
 
 /* IRQ <-> event-channel mappings. */
-static int evtchn_to_irq[NR_EVENT_CHANNELS];
+static int evtchn_to_irq[NR_EVENT_CHANNELS] = {[0 ...  NR_EVENT_CHANNELS-1] = 
-1};
 
 /* Packed IRQ information: binding type, sub-type index, and event channel. */
 static u32 irq_info[NR_IRQS];
@@ -91,13 +91,13 @@ static inline unsigned int type_from_irq
 }
 
 /* IRQ <-> VIRQ mapping. */
-DEFINE_PER_CPU(int, virq_to_irq[NR_VIRQS]);
+DEFINE_PER_CPU(int, virq_to_irq[NR_VIRQS]) = {[0 ... NR_VIRQS-1] = -1};
 
 /* IRQ <-> IPI mapping. */
 #ifndef NR_IPIS
 #define NR_IPIS 1
 #endif
-DEFINE_PER_CPU(int, ipi_to_irq[NR_IPIS]);
+DEFINE_PER_CPU(int, ipi_to_irq[NR_IPIS]) = {[0 ... NR_IPIS-1] = -1};
 
 /* Reference counts for bindings to IRQs. */
 static int irq_bindcount[NR_IRQS];
@@ -751,7 +751,9 @@ void irq_resume(void)
                BUG_ON(irq_info[pirq_to_irq(pirq)] != IRQ_UNBOUND);
 
        /* Secondary CPUs must have no VIRQ or IPI bindings. */
-       for (cpu = 1; cpu < NR_CPUS; cpu++) {
+       for_each_possible_cpu(cpu) {
+               if (cpu == 0)
+                       continue;
                for (virq = 0; virq < NR_VIRQS; virq++)
                        BUG_ON(per_cpu(virq_to_irq, cpu)[virq] != -1);
                for (ipi = 0; ipi < NR_IPIS; ipi++)
@@ -813,25 +815,12 @@ void __init xen_init_IRQ(void)
 void __init xen_init_IRQ(void)
 {
        int i;
-       int cpu;
-
-       spin_lock_init(&irq_mapping_update_lock);
 
        init_evtchn_cpu_bindings();
 
-       /* No VIRQ or IPI bindings. */
-       for (cpu = 0; cpu < NR_CPUS; cpu++) {
-               for (i = 0; i < NR_VIRQS; i++)
-                       per_cpu(virq_to_irq, cpu)[i] = -1;
-               for (i = 0; i < NR_IPIS; i++)
-                       per_cpu(ipi_to_irq, cpu)[i] = -1;
-       }
-
-       /* No event-channel -> IRQ mappings. */
-       for (i = 0; i < NR_EVENT_CHANNELS; i++) {
-               evtchn_to_irq[i] = -1;
-               mask_evtchn(i); /* No event channels are 'live' right now. */
-       }
+       /* No event channels are 'live' right now. */
+       for (i = 0; i < NR_EVENT_CHANNELS; i++)
+               mask_evtchn(i);
 
        /* No IRQ -> event-channel mappings. */
        for (i = 0; i < NR_IRQS; i++)
diff -r 9d52a66c7499 -r c073ebdbde8c 
linux-2.6-xen-sparse/drivers/xen/core/gnttab.c
--- a/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c    Thu May 25 15:59:18 
2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c    Fri May 26 13:41:49 
2006 -0600
@@ -38,7 +38,6 @@
 #include <linux/vmalloc.h>
 #include <asm/pgtable.h>
 #include <xen/interface/xen.h>
-#include <asm/fixmap.h>
 #include <asm/uaccess.h>
 #include <xen/gnttab.h>
 #include <asm/synch_bitops.h>
@@ -81,7 +80,7 @@ static grant_ref_t gnttab_list[NR_GRANT_
 static grant_ref_t gnttab_list[NR_GRANT_ENTRIES];
 static int gnttab_free_count;
 static grant_ref_t gnttab_free_head;
-static spinlock_t gnttab_list_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(gnttab_list_lock);
 
 static grant_entry_t *shared = NULL;
 
diff -r 9d52a66c7499 -r c073ebdbde8c 
linux-2.6-xen-sparse/drivers/xen/core/reboot.c
--- a/linux-2.6-xen-sparse/drivers/xen/core/reboot.c    Thu May 25 15:59:18 
2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/core/reboot.c    Fri May 26 13:41:49 
2006 -0600
@@ -17,6 +17,7 @@
 #include <linux/kthread.h>
 #include <xen/gnttab.h>
 #include <xen/xencons.h>
+#include <xen/cpu_hotplug.h>
 
 #if defined(__i386__) || defined(__x86_64__)
 /*
@@ -80,14 +81,6 @@ static int shutting_down = SHUTDOWN_INVA
 static int shutting_down = SHUTDOWN_INVALID;
 static void __shutdown_handler(void *unused);
 static DECLARE_WORK(shutdown_work, __shutdown_handler, NULL);
-
-#ifdef CONFIG_SMP
-int  smp_suspend(void);
-void smp_resume(void);
-#else
-#define smp_suspend()  (0)
-#define smp_resume()   ((void)0)
-#endif
 
 /* Ensure we run on the idle task page tables so that we will
    switch page tables before running user space. This is needed
diff -r 9d52a66c7499 -r c073ebdbde8c 
linux-2.6-xen-sparse/drivers/xen/core/smpboot.c
--- a/linux-2.6-xen-sparse/drivers/xen/core/smpboot.c   Thu May 25 15:59:18 
2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/core/smpboot.c   Fri May 26 13:41:49 
2006 -0600
@@ -23,6 +23,7 @@
 #include <asm/pgalloc.h>
 #include <xen/evtchn.h>
 #include <xen/interface/vcpu.h>
+#include <xen/cpu_hotplug.h>
 #include <xen/xenbus.h>
 
 #ifdef CONFIG_SMP_ALTERNATIVES
@@ -78,15 +79,6 @@ EXPORT_SYMBOL(x86_cpu_to_apicid);
 #elif !defined(CONFIG_X86_IO_APIC)
 unsigned int maxcpus = NR_CPUS;
 #endif
-
-/*
- * Set of CPUs that remote admin software will allow us to bring online.
- * Notified to us via xenbus.
- */
-static cpumask_t xenbus_allowed_cpumask;
-
-/* Set of CPUs that local admin will allow us to bring online. */
-static cpumask_t local_allowed_cpumask = CPU_MASK_ALL;
 
 void __init prefill_possible_map(void)
 {
@@ -167,17 +159,17 @@ static void cpu_bringup(void)
        cpu_idle();
 }
 
-static void vcpu_prepare(int vcpu)
+void cpu_initialize_context(unsigned int cpu)
 {
        vcpu_guest_context_t ctxt;
-       struct task_struct *idle = idle_task(vcpu);
+       struct task_struct *idle = idle_task(cpu);
 #ifdef __x86_64__
-       struct desc_ptr *gdt_descr = &cpu_gdt_descr[vcpu];
+       struct desc_ptr *gdt_descr = &cpu_gdt_descr[cpu];
 #else
-       struct Xgt_desc_struct *gdt_descr = &per_cpu(cpu_gdt_descr, vcpu);
-#endif
-
-       if (vcpu == 0)
+       struct Xgt_desc_struct *gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
+#endif
+
+       if (cpu == 0)
                return;
 
        memset(&ctxt, 0, sizeof(ctxt));
@@ -226,10 +218,10 @@ static void vcpu_prepare(int vcpu)
 
        ctxt.ctrlreg[3] = virt_to_mfn(init_level4_pgt) << PAGE_SHIFT;
 
-       ctxt.gs_base_kernel = (unsigned long)(cpu_pda(vcpu));
-#endif
-
-       BUG_ON(HYPERVISOR_vcpu_op(VCPUOP_initialise, vcpu, &ctxt));
+       ctxt.gs_base_kernel = (unsigned long)(cpu_pda(cpu));
+#endif
+
+       BUG_ON(HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, &ctxt));
 }
 
 void __init smp_prepare_cpus(unsigned int max_cpus)
@@ -304,10 +296,10 @@ void __init smp_prepare_cpus(unsigned in
                cpu_set(cpu, cpu_present_map);
 #endif
 
-               vcpu_prepare(cpu);
-       }
-
-       xenbus_allowed_cpumask = cpu_present_map;
+               cpu_initialize_context(cpu);
+       }
+
+       init_xenbus_allowed_cpumask();
 
        /* Currently, Xen gives no dynamic NUMA/HT info. */
        for (cpu = 1; cpu < NR_CPUS; cpu++) {
@@ -332,15 +324,6 @@ void __devinit smp_prepare_boot_cpu(void
        cpu_online_map   = cpumask_of_cpu(0);
 }
 
-static int local_cpu_hotplug_request(void)
-{
-       /*
-        * We assume a CPU hotplug request comes from local admin if it is made
-        * via a userspace process (i.e., one with a real mm_struct).
-        */
-       return (current->mm != NULL);
-}
-
 #ifdef CONFIG_HOTPLUG_CPU
 
 /*
@@ -355,141 +338,6 @@ static int __init initialize_cpu_present
 }
 core_initcall(initialize_cpu_present_map);
 
-static void vcpu_hotplug(unsigned int cpu)
-{
-       int err;
-       char dir[32], state[32];
-
-       if ((cpu >= NR_CPUS) || !cpu_possible(cpu))
-               return;
-
-       sprintf(dir, "cpu/%d", cpu);
-       err = xenbus_scanf(XBT_NULL, dir, "availability", "%s", state);
-       if (err != 1) {
-               printk(KERN_ERR "XENBUS: Unable to read cpu state\n");
-               return;
-       }
-
-       if (strcmp(state, "online") == 0) {
-               cpu_set(cpu, xenbus_allowed_cpumask);
-               (void)cpu_up(cpu);
-       } else if (strcmp(state, "offline") == 0) {
-               cpu_clear(cpu, xenbus_allowed_cpumask);
-               (void)cpu_down(cpu);
-       } else {
-               printk(KERN_ERR "XENBUS: unknown state(%s) on CPU%d\n",
-                      state, cpu);
-       }
-}
-
-static void handle_vcpu_hotplug_event(
-       struct xenbus_watch *watch, const char **vec, unsigned int len)
-{
-       int cpu;
-       char *cpustr;
-       const char *node = vec[XS_WATCH_PATH];
-
-       if ((cpustr = strstr(node, "cpu/")) != NULL) {
-               sscanf(cpustr, "cpu/%d", &cpu);
-               vcpu_hotplug(cpu);
-       }
-}
-
-static int smpboot_cpu_notify(struct notifier_block *notifier,
-                             unsigned long action, void *hcpu)
-{
-       int cpu = (long)hcpu;
-
-       /*
-        * We do this in a callback notifier rather than __cpu_disable()
-        * because local_cpu_hotplug_request() does not work in the latter
-        * as it's always executed from within a stopmachine kthread.
-        */
-       if ((action == CPU_DOWN_PREPARE) && local_cpu_hotplug_request())
-               cpu_clear(cpu, local_allowed_cpumask);
-
-       return NOTIFY_OK;
-}
-
-static int setup_cpu_watcher(struct notifier_block *notifier,
-                             unsigned long event, void *data)
-{
-       int i;
-
-       static struct xenbus_watch cpu_watch = {
-               .node = "cpu",
-               .callback = handle_vcpu_hotplug_event,
-               .flags = XBWF_new_thread };
-       (void)register_xenbus_watch(&cpu_watch);
-
-       if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
-               for_each_cpu(i)
-                       vcpu_hotplug(i);
-               printk(KERN_INFO "Brought up %ld CPUs\n",
-                      (long)num_online_cpus());
-       }
-
-       return NOTIFY_DONE;
-}
-
-static int __init setup_vcpu_hotplug_event(void)
-{
-       static struct notifier_block hotplug_cpu = {
-               .notifier_call = smpboot_cpu_notify };
-       static struct notifier_block xsn_cpu = {
-               .notifier_call = setup_cpu_watcher };
-
-       register_cpu_notifier(&hotplug_cpu);
-       register_xenstore_notifier(&xsn_cpu);
-
-       return 0;
-}
-
-arch_initcall(setup_vcpu_hotplug_event);
-
-int smp_suspend(void)
-{
-       int i, err;
-
-       lock_cpu_hotplug();
-
-       /*
-        * Take all other CPUs offline. We hold the hotplug mutex to
-        * avoid other processes bringing up CPUs under our feet.
-        */
-       while (num_online_cpus() > 1) {
-               unlock_cpu_hotplug();
-               for_each_online_cpu(i) {
-                       if (i == 0)
-                               continue;
-                       err = cpu_down(i);
-                       if (err) {
-                               printk(KERN_CRIT "Failed to take all CPUs "
-                                      "down: %d.\n", err);
-                               for_each_cpu(i)
-                                       vcpu_hotplug(i);
-                               return err;
-                       }
-               }
-               lock_cpu_hotplug();
-       }
-
-       return 0;
-}
-
-void smp_resume(void)
-{
-       int i;
-
-       for_each_cpu(i)
-               vcpu_prepare(i);
-
-       unlock_cpu_hotplug();
-
-       for_each_cpu(i)
-               vcpu_hotplug(i);
-}
-
 static void
 remove_siblinginfo(int cpu)
 {
@@ -536,20 +384,6 @@ void __cpu_die(unsigned int cpu)
 
 #else /* !CONFIG_HOTPLUG_CPU */
 
-int smp_suspend(void)
-{
-       if (num_online_cpus() > 1) {
-               printk(KERN_WARNING "Can't suspend SMP guests "
-                      "without CONFIG_HOTPLUG_CPU\n");
-               return -EOPNOTSUPP;
-       }
-       return 0;
-}
-
-void smp_resume(void)
-{
-}
-
 int __cpu_disable(void)
 {
        return -ENOSYS;
@@ -566,17 +400,9 @@ int __devinit __cpu_up(unsigned int cpu)
 {
        int rc;
 
-       if (local_cpu_hotplug_request()) {
-               cpu_set(cpu, local_allowed_cpumask);
-               if (!cpu_isset(cpu, xenbus_allowed_cpumask)) {
-                       printk("%s: attempt to bring up CPU %u disallowed by "
-                              "remote admin.\n", __FUNCTION__, cpu);
-                       return -EBUSY;
-               }
-       } else if (!cpu_isset(cpu, local_allowed_cpumask) ||
-                  !cpu_isset(cpu, xenbus_allowed_cpumask)) {
-               return -EBUSY;
-       }
+       rc = cpu_up_is_allowed(cpu);
+       if (rc)
+               return rc;
 
 #ifdef CONFIG_SMP_ALTERNATIVES
        if (num_online_cpus() == 1)
@@ -591,8 +417,7 @@ int __devinit __cpu_up(unsigned int cpu)
        cpu_set(cpu, cpu_online_map);
 
        rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL);
-       if (rc != 0)
-               BUG();
+       BUG_ON(rc);
 
        return 0;
 }
diff -r 9d52a66c7499 -r c073ebdbde8c 
linux-2.6-xen-sparse/drivers/xen/netback/netback.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c        Thu May 25 
15:59:18 2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c        Fri May 26 
13:41:49 2006 -0600
@@ -99,7 +99,7 @@ static spinlock_t net_schedule_list_lock
 #define MAX_MFN_ALLOC 64
 static unsigned long mfn_list[MAX_MFN_ALLOC];
 static unsigned int alloc_index = 0;
-static spinlock_t mfn_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(mfn_lock);
 
 static unsigned long alloc_mfn(void)
 {
@@ -691,7 +691,7 @@ static void net_tx_action(unsigned long 
 
 static void netif_idx_release(u16 pending_idx)
 {
-       static spinlock_t _lock = SPIN_LOCK_UNLOCKED;
+       static DEFINE_SPINLOCK(_lock);
        unsigned long flags;
 
        spin_lock_irqsave(&_lock, flags);
diff -r 9d52a66c7499 -r c073ebdbde8c 
linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Thu May 25 15:59:18 
2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Fri May 26 13:41:49 
2006 -0600
@@ -17,13 +17,10 @@
     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */
 
-
 #include <stdarg.h>
 #include <linux/module.h>
 #include <xen/xenbus.h>
-#include <xen/net_driver_util.h>
 #include "common.h"
-
 
 #if 0
 #undef DPRINTK
@@ -31,22 +28,19 @@
     printk("netback/xenbus (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, ##args)
 #endif
 
-
 struct backend_info
 {
        struct xenbus_device *dev;
        netif_t *netif;
        struct xenbus_watch backend_watch;
-       XenbusState frontend_state;
+       enum xenbus_state frontend_state;
 };
-
 
 static int connect_rings(struct backend_info *);
 static void connect(struct backend_info *);
 static void maybe_connect(struct backend_info *);
 static void backend_changed(struct xenbus_watch *, const char **,
                            unsigned int);
-
 
 static int netback_remove(struct xenbus_device *dev)
 {
@@ -191,7 +185,7 @@ static void backend_changed(struct xenbu
  * Callback received when the frontend's state changes.
  */
 static void frontend_changed(struct xenbus_device *dev,
-                            XenbusState frontend_state)
+                            enum xenbus_state frontend_state)
 {
        struct backend_info *be = dev->data;
 
@@ -273,6 +267,27 @@ static void xen_net_read_rate(struct xen
        kfree(ratestr);
 }
 
+static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
+{
+       char *s, *e, *macstr;
+       int i;
+
+       macstr = s = xenbus_read(XBT_NULL, dev->nodename, "mac", NULL);
+       if (IS_ERR(macstr))
+               return PTR_ERR(macstr);
+
+       for (i = 0; i < ETH_ALEN; i++) {
+               mac[i] = simple_strtoul(s, &e, 16);
+               if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) {
+                       kfree(macstr);
+                       return -ENOENT;
+               }
+               s = e+1;
+       }
+
+       kfree(macstr);
+       return 0;
+}
 
 static void connect(struct backend_info *be)
 {
diff -r 9d52a66c7499 -r c073ebdbde8c 
linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c      Thu May 25 
15:59:18 2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c      Fri May 26 
13:41:49 2006 -0600
@@ -60,7 +60,6 @@
 #include <asm/uaccess.h>
 #include <xen/interface/grant_table.h>
 #include <xen/gnttab.h>
-#include <xen/net_driver_util.h>
 
 #define GRANT_INVALID_REF      0
 
@@ -88,12 +87,6 @@ struct netfront_info {
 
        unsigned int handle;
        unsigned int evtchn, irq;
-
-       /* What is the status of our connection to the remote backend? */
-#define BEST_CLOSED       0
-#define BEST_DISCONNECTED 1
-#define BEST_CONNECTED    2
-       unsigned int backend_state;
 
        /* Receive-ring batched refills. */
 #define RX_MIN_TARGET 8
@@ -143,14 +136,6 @@ static inline unsigned short get_id_from
        list[0] = list[id];
        return id;
 }
-
-#ifdef DEBUG
-static const char *be_state_name[] = {
-       [BEST_CLOSED]       = "closed",
-       [BEST_DISCONNECTED] = "disconnected",
-       [BEST_CONNECTED]    = "connected",
-};
-#endif
 
 #define DPRINTK(fmt, args...) pr_debug("netfront (%s:%d) " fmt, \
                                        __FUNCTION__, __LINE__, ##args)
@@ -247,6 +232,27 @@ static int netfront_resume(struct xenbus
        return talk_to_backend(dev, info);
 }
 
+static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
+{
+       char *s, *e, *macstr;
+       int i;
+
+       macstr = s = xenbus_read(XBT_NULL, dev->nodename, "mac", NULL);
+       if (IS_ERR(macstr))
+               return PTR_ERR(macstr);
+
+       for (i = 0; i < ETH_ALEN; i++) {
+               mac[i] = simple_strtoul(s, &e, 16);
+               if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) {
+                       kfree(macstr);
+                       return -ENOENT;
+               }
+               s = e+1;
+       }
+
+       kfree(macstr);
+       return 0;
+}
 
 /* Common code used when first setting up, and when resuming. */
 static int talk_to_backend(struct xenbus_device *dev,
@@ -342,7 +348,6 @@ static int setup_device(struct xenbus_de
        }
        memset(txs, 0, PAGE_SIZE);
        memset(rxs, 0, PAGE_SIZE);
-       info->backend_state = BEST_DISCONNECTED;
 
        SHARED_RING_INIT(txs);
        FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE);
@@ -384,7 +389,7 @@ static int setup_device(struct xenbus_de
  * Callback received when the backend's state changes.
  */
 static void backend_changed(struct xenbus_device *dev,
-                           XenbusState backend_state)
+                           enum xenbus_state backend_state)
 {
        DPRINTK("\n");
 
@@ -465,7 +470,7 @@ static void network_tx_buf_gc(struct net
        struct netfront_info *np = netdev_priv(dev);
        struct sk_buff *skb;
 
-       if (np->backend_state != BEST_CONNECTED)
+       if (unlikely(!netif_carrier_ok(dev)))
                return;
 
        do {
@@ -527,7 +532,7 @@ static void network_alloc_rx_buffers(str
        struct xen_memory_reservation reservation;
        grant_ref_t ref;
 
-       if (unlikely(np->backend_state != BEST_CONNECTED))
+       if (unlikely(!netif_carrier_ok(dev)))
                return;
 
        /*
@@ -662,7 +667,7 @@ static int network_start_xmit(struct sk_
 
        spin_lock_irq(&np->tx_lock);
 
-       if (np->backend_state != BEST_CONNECTED) {
+       if (unlikely(!netif_carrier_ok(dev))) {
                spin_unlock_irq(&np->tx_lock);
                goto drop;
        }
@@ -748,7 +753,7 @@ static int netif_poll(struct net_device 
 
        spin_lock(&np->rx_lock);
 
-       if (np->backend_state != BEST_CONNECTED) {
+       if (unlikely(!netif_carrier_ok(dev))) {
                spin_unlock(&np->rx_lock);
                return 0;
        }
@@ -1041,7 +1046,7 @@ static void network_connect(struct net_d
         * domain a kick because we've probably just requeued some
         * packets.
         */
-       np->backend_state = BEST_CONNECTED;
+       netif_carrier_on(dev);
        notify_remote_via_irq(np->irq);
        network_tx_buf_gc(dev);
 
@@ -1055,7 +1060,7 @@ static void show_device(struct netfront_
        if (np) {
                IPRINTK("<vif handle=%u %s(%s) evtchn=%u tx=%p rx=%p>\n",
                        np->handle,
-                       be_state_name[np->backend_state],
+                       netif_carrier_ok(np->netdev) ? "on" : "off",
                        netif_running(np->netdev) ? "open" : "closed",
                        np->evtchn,
                        np->tx,
@@ -1241,9 +1246,10 @@ static struct net_device * __devinit cre
        }
 
        np                = netdev_priv(netdev);
-       np->backend_state = BEST_CLOSED;
        np->handle        = handle;
        np->xbdev         = dev;
+
+       netif_carrier_off(netdev);
 
        spin_lock_init(&np->tx_lock);
        spin_lock_init(&np->rx_lock);
@@ -1392,7 +1398,7 @@ static void netif_disconnect_backend(str
        /* Stop old i/f to prevent errors whilst we rebuild the state. */
        spin_lock_irq(&info->tx_lock);
        spin_lock(&info->rx_lock);
-       info->backend_state = BEST_DISCONNECTED;
+       netif_carrier_off(info->netdev);
        spin_unlock(&info->rx_lock);
        spin_unlock_irq(&info->tx_lock);
 
diff -r 9d52a66c7499 -r c073ebdbde8c 
linux-2.6-xen-sparse/drivers/xen/pciback/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/pciback/xenbus.c Thu May 25 15:59:18 
2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/pciback/xenbus.c Fri May 26 13:41:49 
2006 -0600
@@ -166,7 +166,7 @@ static int pciback_attach(struct pciback
 }
 
 static void pciback_frontend_changed(struct xenbus_device *xdev,
-                                    XenbusState fe_state)
+                                    enum xenbus_state fe_state)
 {
        struct pciback_device *pdev = xdev->data;
 
diff -r 9d52a66c7499 -r c073ebdbde8c 
linux-2.6-xen-sparse/drivers/xen/pcifront/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/pcifront/xenbus.c        Thu May 25 
15:59:18 2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/pcifront/xenbus.c        Fri May 26 
13:41:49 2006 -0600
@@ -196,7 +196,7 @@ static int pcifront_try_disconnect(struc
 static int pcifront_try_disconnect(struct pcifront_device *pdev)
 {
        int err = 0;
-       XenbusState prev_state;
+       enum xenbus_state prev_state;
 
        spin_lock(&pdev->dev_lock);
 
@@ -214,7 +214,7 @@ static int pcifront_try_disconnect(struc
 }
 
 static void pcifront_backend_changed(struct xenbus_device *xdev,
-                                    XenbusState be_state)
+                                    enum xenbus_state be_state)
 {
        struct pcifront_device *pdev = xdev->data;
 
diff -r 9d52a66c7499 -r c073ebdbde8c 
linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c
--- a/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c        Thu May 25 
15:59:18 2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c        Fri May 26 
13:41:49 2006 -0600
@@ -159,10 +159,6 @@ static int privcmd_ioctl(struct inode *i
        break;
 
        case IOCTL_PRIVCMD_MMAPBATCH: {
-#ifndef __ia64__
-               mmu_update_t u;
-               uint64_t ptep;
-#endif
                privcmd_mmapbatch_t m;
                struct vm_area_struct *vma = NULL;
                unsigned long __user *p;
@@ -200,24 +196,12 @@ static int privcmd_ioctl(struct inode *i
                for (i = 0; i < m.num; i++, addr += PAGE_SIZE, p++) {
                        if (get_user(mfn, p))
                                return -EFAULT;
-#ifdef __ia64__
+
                        ret = direct_remap_pfn_range(vma, addr & PAGE_MASK,
-                                                    mfn, 1 << PAGE_SHIFT,
+                                                    mfn, PAGE_SIZE,
                                                     vma->vm_page_prot, m.dom);
                        if (ret < 0)
-                           goto batch_err;
-#else
-
-                       ret = create_lookup_pte_addr(vma->vm_mm, addr, &ptep);
-                       if (ret)
-                               goto batch_err;
-
-                       u.val = pte_val_ma(pfn_pte_ma(mfn, vma->vm_page_prot));
-                       u.ptr = ptep;
-
-                       if (HYPERVISOR_mmu_update(&u, 1, NULL, m.dom) < 0)
                                put_user(0xF0000000 | mfn, p);
-#endif
                }
 
                ret = 0;
@@ -283,6 +267,9 @@ static int __init privcmd_init(void)
        set_bit(__HYPERVISOR_mmuext_op,        hypercall_permission_map);
        set_bit(__HYPERVISOR_xen_version,      hypercall_permission_map);
        set_bit(__HYPERVISOR_sched_op,         hypercall_permission_map);
+       set_bit(__HYPERVISOR_sched_op_compat,  hypercall_permission_map);
+       set_bit(__HYPERVISOR_event_channel_op_compat,
+               hypercall_permission_map);
 
        privcmd_intf = create_xen_proc_entry("privcmd", 0400);
        if (privcmd_intf != NULL)
diff -r 9d52a66c7499 -r c073ebdbde8c 
linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c Thu May 25 15:59:18 
2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c Fri May 26 13:41:49 
2006 -0600
@@ -34,7 +34,7 @@ struct backend_info
 
        /* watch front end for changes */
        struct xenbus_watch backend_watch;
-       XenbusState frontend_state;
+       enum xenbus_state frontend_state;
 };
 
 static void maybe_connect(struct backend_info *be);
@@ -43,7 +43,7 @@ static void backend_changed(struct xenbu
 static void backend_changed(struct xenbus_watch *watch,
                             const char **vec, unsigned int len);
 static void frontend_changed(struct xenbus_device *dev,
-                             XenbusState frontend_state);
+                             enum xenbus_state frontend_state);
 
 static int tpmback_remove(struct xenbus_device *dev)
 {
@@ -129,7 +129,7 @@ static void backend_changed(struct xenbu
 
 
 static void frontend_changed(struct xenbus_device *dev,
-                             XenbusState frontend_state)
+                             enum xenbus_state frontend_state)
 {
        struct backend_info *be = dev->data;
        int err;
diff -r 9d52a66c7499 -r c073ebdbde8c 
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c   Thu May 25 
15:59:18 2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c   Fri May 26 
13:41:49 2006 -0600
@@ -84,7 +84,7 @@ EXPORT_SYMBOL_GPL(xenbus_watch_path2);
 EXPORT_SYMBOL_GPL(xenbus_watch_path2);
 
 
-int xenbus_switch_state(struct xenbus_device *dev, XenbusState state)
+int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state state)
 {
        /* We check whether the state is currently set to the given value, and
           if not, then the state is set.  We don't want to unconditionally
@@ -269,9 +269,9 @@ int xenbus_free_evtchn(struct xenbus_dev
 }
 
 
-XenbusState xenbus_read_driver_state(const char *path)
-{
-       XenbusState result;
+enum xenbus_state xenbus_read_driver_state(const char *path)
+{
+       enum xenbus_state result;
        int err = xenbus_gather(XBT_NULL, path, "state", "%d", &result, NULL);
        if (err)
                result = XenbusStateClosed;
diff -r 9d52a66c7499 -r c073ebdbde8c 
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c    Thu May 25 
15:59:18 2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c    Fri May 26 
13:41:49 2006 -0600
@@ -284,7 +284,7 @@ static void otherend_changed(struct xenb
        struct xenbus_device *dev =
                container_of(watch, struct xenbus_device, otherend_watch);
        struct xenbus_driver *drv = to_xenbus_driver(dev->dev.driver);
-       XenbusState state;
+       enum xenbus_state state;
 
        /* Protect us against watches firing on old details when the otherend
           details change, say immediately after a resume. */
@@ -539,7 +539,7 @@ static int xenbus_probe_node(struct xen_
        size_t stringlen;
        char *tmpstring;
 
-       XenbusState state = xenbus_read_driver_state(nodename);
+       enum xenbus_state state = xenbus_read_driver_state(nodename);
 
        if (state != XenbusStateInitialising) {
                /* Device is not new, so ignore it.  This can happen if a
diff -r 9d52a66c7499 -r c073ebdbde8c linux-2.6-xen-sparse/include/xen/xenbus.h
--- a/linux-2.6-xen-sparse/include/xen/xenbus.h Thu May 25 15:59:18 2006 -0600
+++ b/linux-2.6-xen-sparse/include/xen/xenbus.h Fri May 26 13:41:49 2006 -0600
@@ -75,7 +75,7 @@ struct xenbus_device {
        int otherend_id;
        struct xenbus_watch otherend_watch;
        struct device dev;
-       XenbusState state;
+       enum xenbus_state state;
        void *data;
 };
 
@@ -98,7 +98,7 @@ struct xenbus_driver {
        int (*probe)(struct xenbus_device *dev,
                     const struct xenbus_device_id *id);
        void (*otherend_changed)(struct xenbus_device *dev,
-                                XenbusState backend_state);
+                                enum xenbus_state backend_state);
        int (*remove)(struct xenbus_device *dev);
        int (*suspend)(struct xenbus_device *dev);
        int (*resume)(struct xenbus_device *dev);
@@ -207,7 +207,7 @@ int xenbus_watch_path2(struct xenbus_dev
  * Return 0 on success, or -errno on error.  On error, the device will switch
  * to XenbusStateClosing, and the error will be saved in the store.
  */
-int xenbus_switch_state(struct xenbus_device *dev, XenbusState new_state);
+int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state 
new_state);
 
 
 /**
@@ -273,7 +273,7 @@ int xenbus_free_evtchn(struct xenbus_dev
  * Return the state of the driver rooted at the given store path, or
  * XenbusStateClosed if no state can be read.
  */
-XenbusState xenbus_read_driver_state(const char *path);
+enum xenbus_state xenbus_read_driver_state(const char *path);
 
 
 /***
diff -r 9d52a66c7499 -r c073ebdbde8c tools/libxc/Makefile
--- a/tools/libxc/Makefile      Thu May 25 15:59:18 2006 -0600
+++ b/tools/libxc/Makefile      Fri May 26 13:41:49 2006 -0600
@@ -20,6 +20,7 @@ SRCS       += xc_physdev.c
 SRCS       += xc_physdev.c
 SRCS       += xc_private.c
 SRCS       += xc_sedf.c
+SRCS       += xc_csched.c
 SRCS       += xc_tbuf.c
 
 ifeq ($(patsubst x86%,x86,$(XEN_TARGET_ARCH)),x86)
diff -r 9d52a66c7499 -r c073ebdbde8c tools/libxc/xc_linux_build.c
--- a/tools/libxc/xc_linux_build.c      Thu May 25 15:59:18 2006 -0600
+++ b/tools/libxc/xc_linux_build.c      Fri May 26 13:41:49 2006 -0600
@@ -268,21 +268,10 @@ static int setup_pg_tables_pae(int xc_ha
     l2_pgentry_64_t *vl2tab = NULL, *vl2e = NULL;
     l3_pgentry_64_t *vl3tab = NULL, *vl3e = NULL;
     uint64_t l1tab, l2tab, l3tab, pl1tab, pl2tab, pl3tab;
-    unsigned long ppt_alloc, count, nmfn;
+    unsigned long ppt_alloc, count;
 
     /* First allocate page for page dir. */
     ppt_alloc = (vpt_start - dsi_v_start) >> PAGE_SHIFT;
-
-    if ( page_array[ppt_alloc] > 0xfffff )
-    {
-        nmfn = xc_make_page_below_4G(xc_handle, dom, page_array[ppt_alloc]);
-        if ( nmfn == 0 )
-        {
-            fprintf(stderr, "Couldn't get a page below 4GB :-(\n");
-            goto error_out;
-        }
-        page_array[ppt_alloc] = nmfn;
-    }
 
     alloc_pt(l3tab, vl3tab, pl3tab);
     vl3e = &vl3tab[l3_table_offset_pae(dsi_v_start)];
diff -r 9d52a66c7499 -r c073ebdbde8c tools/libxc/xc_linux_restore.c
--- a/tools/libxc/xc_linux_restore.c    Thu May 25 15:59:18 2006 -0600
+++ b/tools/libxc/xc_linux_restore.c    Fri May 26 13:41:49 2006 -0600
@@ -331,25 +331,17 @@ int xc_linux_restore(int xc_handle, int 
                 ** A page table page - need to 'uncanonicalize' it, i.e.
                 ** replace all the references to pfns with the corresponding
                 ** mfns for the new domain.
-                **
-                ** On PAE we need to ensure that PGDs are in MFNs < 4G, and
-                ** so we may need to update the p2m after the main loop.
-                ** Hence we defer canonicalization of L1s until then.
                 */
-                if(pt_levels != 3 || pagetype != L1TAB) {
-
-                    if(!uncanonicalize_pagetable(pagetype, page)) {
-                        /*
-                        ** Failing to uncanonicalize a page table can be ok
-                        ** under live migration since the pages type may have
-                        ** changed by now (and we'll get an update later).
-                        */
-                        DPRINTF("PT L%ld race on pfn=%08lx mfn=%08lx\n",
-                                pagetype >> 28, pfn, mfn);
-                        nraces++;
-                        continue;
-                    }
-
+                if(!uncanonicalize_pagetable(pagetype, page)) {
+                    /*
+                    ** Failing to uncanonicalize a page table can be ok
+                    ** under live migration since the pages type may have
+                    ** changed by now (and we'll get an update later).
+                    */
+                    DPRINTF("PT L%ld race on pfn=%08lx mfn=%08lx\n",
+                            pagetype >> 28, pfn, mfn);
+                    nraces++;
+                    continue;
                 }
 
             } else if(pagetype != NOTAB) {
@@ -397,100 +389,6 @@ int xc_linux_restore(int xc_handle, int 
     }
 
     DPRINTF("Received all pages (%d races)\n", nraces);
-
-    if(pt_levels == 3) {
-
-        /*
-        ** XXX SMH on PAE we need to ensure PGDs are in MFNs < 4G. This
-        ** is a little awkward and involves (a) finding all such PGDs and
-        ** replacing them with 'lowmem' versions; (b) upating the p2m[]
-        ** with the new info; and (c) canonicalizing all the L1s using the
-        ** (potentially updated) p2m[].
-        **
-        ** This is relatively slow (and currently involves two passes through
-        ** the pfn_type[] array), but at least seems to be correct. May wish
-        ** to consider more complex approaches to optimize this later.
-        */
-
-        int j, k;
-
-        /* First pass: find all L3TABs current in > 4G mfns and get new mfns */
-        for (i = 0; i < max_pfn; i++) {
-
-            if (((pfn_type[i] & LTABTYPE_MASK)==L3TAB) && (p2m[i]>0xfffffUL)) {
-
-                unsigned long new_mfn;
-                uint64_t l3ptes[4];
-                uint64_t *l3tab;
-
-                l3tab = (uint64_t *)
-                    xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
-                                         PROT_READ, p2m[i]);
-
-                for(j = 0; j < 4; j++)
-                    l3ptes[j] = l3tab[j];
-
-                munmap(l3tab, PAGE_SIZE);
-
-                if (!(new_mfn=xc_make_page_below_4G(xc_handle, dom, p2m[i]))) {
-                    ERR("Couldn't get a page below 4GB :-(");
-                    goto out;
-                }
-
-                p2m[i] = new_mfn;
-                if (xc_add_mmu_update(xc_handle, mmu,
-                                      (((unsigned long long)new_mfn)
-                                       << PAGE_SHIFT) |
-                                      MMU_MACHPHYS_UPDATE, i)) {
-                    ERR("Couldn't m2p on PAE root pgdir");
-                    goto out;
-                }
-
-                l3tab = (uint64_t *)
-                    xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
-                                         PROT_READ | PROT_WRITE, p2m[i]);
-
-                for(j = 0; j < 4; j++)
-                    l3tab[j] = l3ptes[j];
-
-                munmap(l3tab, PAGE_SIZE);
-
-            }
-        }
-
-        /* Second pass: find all L1TABs and uncanonicalize them */
-        j = 0;
-
-        for(i = 0; i < max_pfn; i++) {
-
-            if (((pfn_type[i] & LTABTYPE_MASK)==L1TAB)) {
-                region_mfn[j] = p2m[i];
-                j++;
-            }
-
-            if(i == (max_pfn-1) || j == MAX_BATCH_SIZE) {
-
-                if (!(region_base = xc_map_foreign_batch(
-                          xc_handle, dom, PROT_READ | PROT_WRITE,
-                          region_mfn, j))) {
-                    ERR("map batch failed");
-                    goto out;
-                }
-
-                for(k = 0; k < j; k++) {
-                    if(!uncanonicalize_pagetable(L1TAB,
-                                                 region_base + k*PAGE_SIZE)) {
-                        ERR("failed uncanonicalize pt!");
-                        goto out;
-                    }
-                }
-
-                munmap(region_base, j*PAGE_SIZE);
-                j = 0;
-            }
-        }
-
-    }
 
 
     if (xc_finish_mmu_updates(xc_handle, mmu)) {
diff -r 9d52a66c7499 -r c073ebdbde8c tools/libxc/xc_private.c
--- a/tools/libxc/xc_private.c  Thu May 25 15:59:18 2006 -0600
+++ b/tools/libxc/xc_private.c  Fri May 26 13:41:49 2006 -0600
@@ -430,28 +430,6 @@ int xc_version(int xc_handle, int cmd, v
     return rc;
 }
 
-unsigned long xc_make_page_below_4G(
-    int xc_handle, uint32_t domid, unsigned long mfn)
-{
-    unsigned long new_mfn;
-
-    if ( xc_domain_memory_decrease_reservation(
-        xc_handle, domid, 1, 0, &mfn) != 0 )
-    {
-        fprintf(stderr,"xc_make_page_below_4G decrease failed. mfn=%lx\n",mfn);
-        return 0;
-    }
-
-    if ( xc_domain_memory_increase_reservation(
-        xc_handle, domid, 1, 0, 32, &new_mfn) != 0 )
-    {
-        fprintf(stderr,"xc_make_page_below_4G increase failed. mfn=%lx\n",mfn);
-        return 0;
-    }
-
-    return new_mfn;
-}
-
 /*
  * Local variables:
  * mode: C
diff -r 9d52a66c7499 -r c073ebdbde8c tools/libxc/xc_ptrace.c
--- a/tools/libxc/xc_ptrace.c   Thu May 25 15:59:18 2006 -0600
+++ b/tools/libxc/xc_ptrace.c   Fri May 26 13:41:49 2006 -0600
@@ -185,7 +185,7 @@ map_domain_va_32(
     void *guest_va,
     int perm)
 {
-    unsigned long l1p, p, va = (unsigned long)guest_va;
+    unsigned long l2e, l1e, l1p, p, va = (unsigned long)guest_va;
     uint32_t *l2, *l1;
     static void *v[MAX_VIRT_CPUS];
 
@@ -194,18 +194,20 @@ map_domain_va_32(
     if ( l2 == NULL )
         return NULL;
 
-    l1p = to_ma(cpu, l2[l2_table_offset(va)]);
+    l2e = l2[l2_table_offset_i386(va)];
     munmap(l2, PAGE_SIZE);
-    if ( !(l1p & _PAGE_PRESENT) )
-        return NULL;
+    if ( !(l2e & _PAGE_PRESENT) )
+        return NULL;
+    l1p = to_ma(cpu, l2e);
     l1 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, PROT_READ, 
l1p >> PAGE_SHIFT);
     if ( l1 == NULL )
         return NULL;
 
-    p = to_ma(cpu, l1[l1_table_offset(va)]);
+    l1e = l1[l1_table_offset_i386(va)];
     munmap(l1, PAGE_SIZE);
-    if ( !(p & _PAGE_PRESENT) )
-        return NULL;
+    if ( !(l1e & _PAGE_PRESENT) )
+        return NULL;
+    p = to_ma(cpu, l1e);
     if ( v[cpu] != NULL )
         munmap(v[cpu], PAGE_SIZE);
     v[cpu] = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, perm, p 
>> PAGE_SHIFT);
@@ -223,7 +225,7 @@ map_domain_va_pae(
     void *guest_va,
     int perm)
 {
-    unsigned long l2p, l1p, p, va = (unsigned long)guest_va;
+    unsigned long l3e, l2e, l1e, l2p, l1p, p, va = (unsigned long)guest_va;
     uint64_t *l3, *l2, *l1;
     static void *v[MAX_VIRT_CPUS];
 
@@ -232,26 +234,29 @@ map_domain_va_pae(
     if ( l3 == NULL )
         return NULL;
 
-    l2p = to_ma(cpu, l3[l3_table_offset_pae(va)]);
+    l3e = l3[l3_table_offset_pae(va)];
     munmap(l3, PAGE_SIZE);
-    if ( !(l2p & _PAGE_PRESENT) )
-        return NULL;
+    if ( !(l3e & _PAGE_PRESENT) )
+        return NULL;
+    l2p = to_ma(cpu, l3e);
     l2 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, PROT_READ, 
l2p >> PAGE_SHIFT);
     if ( l2 == NULL )
         return NULL;
 
-    l1p = to_ma(cpu, l2[l2_table_offset_pae(va)]);
+    l2e = l2[l2_table_offset_pae(va)];
     munmap(l2, PAGE_SIZE);
-    if ( !(l1p & _PAGE_PRESENT) )
-        return NULL;
+    if ( !(l2e & _PAGE_PRESENT) )
+        return NULL;
+    l1p = to_ma(cpu, l2e);
     l1 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, perm, l1p 
>> PAGE_SHIFT);
     if ( l1 == NULL )
         return NULL;
 
-    p = to_ma(cpu, l1[l1_table_offset_pae(va)]);
+    l1e = l1[l1_table_offset_pae(va)];
     munmap(l1, PAGE_SIZE);
-    if ( !(p & _PAGE_PRESENT) )
-        return NULL;
+    if ( !(l1e & _PAGE_PRESENT) )
+        return NULL;
+    p = to_ma(cpu, l1e);
     if ( v[cpu] != NULL )
         munmap(v[cpu], PAGE_SIZE);
     v[cpu] = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, perm, p 
>> PAGE_SHIFT);
@@ -269,9 +274,10 @@ map_domain_va_64(
     void *guest_va,
     int perm)
 {
-    unsigned long l3p, l2p, l1p, l1e, p, va = (unsigned long)guest_va;
+    unsigned long l4e, l3e, l2e, l1e, l3p, l2p, l1p, p, va = (unsigned 
long)guest_va;
     uint64_t *l4, *l3, *l2, *l1;
     static void *v[MAX_VIRT_CPUS];
+
 
     if ((ctxt[cpu].ctrlreg[4] & 0x20) == 0 ) /* legacy ia32 mode */
         return map_domain_va_32(xc_handle, cpu, guest_va, perm);
@@ -281,40 +287,41 @@ map_domain_va_64(
     if ( l4 == NULL )
         return NULL;
 
-    l3p = to_ma(cpu, l4[l4_table_offset(va)]);
+    l4e = l4[l4_table_offset(va)];
     munmap(l4, PAGE_SIZE);
-    if ( !(l3p & _PAGE_PRESENT) )
-        return NULL;
+    if ( !(l4e & _PAGE_PRESENT) )
+        return NULL;
+    l3p = to_ma(cpu, l4e);
     l3 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, PROT_READ, 
l3p >> PAGE_SHIFT);
     if ( l3 == NULL )
         return NULL;
 
-    l2p = to_ma(cpu, l3[l3_table_offset(va)]);
+    l3e = l3[l3_table_offset(va)];
     munmap(l3, PAGE_SIZE);
-    if ( !(l2p & _PAGE_PRESENT) )
-        return NULL;
+    if ( !(l3e & _PAGE_PRESENT) )
+        return NULL;
+    l2p = to_ma(cpu, l3e);
     l2 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, PROT_READ, 
l2p >> PAGE_SHIFT);
     if ( l2 == NULL )
         return NULL;
 
     l1 = NULL;
-    l1e = to_ma(cpu, l2[l2_table_offset(va)]);
-    if ( !(l1e & _PAGE_PRESENT) )
-    {
-        munmap(l2, PAGE_SIZE);
-        return NULL;
-    }
-    l1p = l1e >> PAGE_SHIFT;
-    if (l1e & 0x80)  { /* 2M pages */
+    l2e = l2[l2_table_offset(va)];
+    munmap(l2, PAGE_SIZE);
+    if ( !(l2e & _PAGE_PRESENT) )
+        return NULL;
+    l1p = to_ma(cpu, l2e);
+    if (l2e & 0x80)  { /* 2M pages */
         p = to_ma(cpu, (l1p + l1_table_offset(va)) << PAGE_SHIFT);
     } else { /* 4K pages */
-        l1p = to_ma(cpu, l1p);
         l1 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, perm, 
l1p >> PAGE_SHIFT);
-        munmap(l2, PAGE_SIZE);
         if ( l1 == NULL )
             return NULL;
 
-        p = to_ma(cpu, l1[l1_table_offset(va)]);
+        l1e = l1[l1_table_offset(va)];
+        if ( !(l1e & _PAGE_PRESENT) )
+            return NULL;
+        p = to_ma(cpu, l1e);
     }
     if ( v[cpu] != NULL )
         munmap(v[cpu], PAGE_SIZE);
diff -r 9d52a66c7499 -r c073ebdbde8c tools/libxc/xc_ptrace.h
--- a/tools/libxc/xc_ptrace.h   Thu May 25 15:59:18 2006 -0600
+++ b/tools/libxc/xc_ptrace.h   Fri May 26 13:41:49 2006 -0600
@@ -7,7 +7,6 @@
 #define X86_CR0_PE              0x00000001 /* Enable Protected Mode    (RW) */
 #define X86_CR0_PG              0x80000000 /* Paging                   (RW) */
 #define BSD_PAGE_MASK (PAGE_SIZE-1)
-#define PDRSHIFT        22
 #define PSL_T  0x00000100 /* trace enable bit */
 
 #ifdef __x86_64__
@@ -162,8 +161,6 @@ struct gdb_regs {
 #endif
 
 #define printval(x) printf("%s = %lx\n", #x, (long)x);
-#define vtopdi(va) ((va) >> PDRSHIFT)
-#define vtopti(va) (((va) >> PAGE_SHIFT) & 0x3ff)
 #endif
 
 typedef void (*thr_ev_handler_t)(long);
diff -r 9d52a66c7499 -r c073ebdbde8c tools/libxc/xc_ptrace_core.c
--- a/tools/libxc/xc_ptrace_core.c      Thu May 25 15:59:18 2006 -0600
+++ b/tools/libxc/xc_ptrace_core.c      Fri May 26 13:41:49 2006 -0600
@@ -3,6 +3,7 @@
 #include <sys/ptrace.h>
 #include <sys/wait.h>
 #include "xc_private.h"
+#include "xg_private.h"
 #include "xc_ptrace.h"
 #include <time.h>
 
@@ -54,7 +55,7 @@ map_domain_va_core(unsigned long domfd, 
         }
         cr3_virt[cpu] = v;
     }
-    if ((pde = cr3_virt[cpu][vtopdi(va)]) == 0) /* logical address */
+    if ((pde = cr3_virt[cpu][l2_table_offset_i386(va)]) == 0) /* logical 
address */
         return NULL;
     if (ctxt[cpu].flags & VGCF_HVM_GUEST)
         pde = p2m_array[pde >> PAGE_SHIFT] << PAGE_SHIFT;
@@ -70,7 +71,7 @@ map_domain_va_core(unsigned long domfd, 
             return NULL;
         pde_virt[cpu] = v;
     }
-    if ((page = pde_virt[cpu][vtopti(va)]) == 0) /* logical address */
+    if ((page = pde_virt[cpu][l1_table_offset_i386(va)]) == 0) /* logical 
address */
         return NULL;
     if (ctxt[cpu].flags & VGCF_HVM_GUEST)
         page = p2m_array[page >> PAGE_SHIFT] << PAGE_SHIFT;
@@ -84,7 +85,7 @@ map_domain_va_core(unsigned long domfd, 
             map_mtop_offset(page_phys[cpu]));
         if (v == MAP_FAILED)
         {
-            printf("cr3 %lx pde %lx page %lx pti %lx\n", cr3[cpu], pde, page, 
vtopti(va));
+            printf("cr3 %lx pde %lx page %lx pti %lx\n", cr3[cpu], pde, page, 
l1_table_offset_i386(va));
             page_phys[cpu] = 0;
             return NULL;
         }
diff -r 9d52a66c7499 -r c073ebdbde8c tools/libxc/xc_tbuf.c
--- a/tools/libxc/xc_tbuf.c     Thu May 25 15:59:18 2006 -0600
+++ b/tools/libxc/xc_tbuf.c     Fri May 26 13:41:49 2006 -0600
@@ -18,53 +18,57 @@
 
 static int tbuf_enable(int xc_handle, int enable)
 {
-  DECLARE_DOM0_OP;
+    DECLARE_DOM0_OP;
 
-  op.cmd = DOM0_TBUFCONTROL;
-  op.interface_version = DOM0_INTERFACE_VERSION;
-  if (enable)
-    op.u.tbufcontrol.op  = DOM0_TBUF_ENABLE;
-  else
-    op.u.tbufcontrol.op  = DOM0_TBUF_DISABLE;
+    op.cmd = DOM0_TBUFCONTROL;
+    op.interface_version = DOM0_INTERFACE_VERSION;
+    if (enable)
+        op.u.tbufcontrol.op  = DOM0_TBUF_ENABLE;
+    else
+        op.u.tbufcontrol.op  = DOM0_TBUF_DISABLE;
 
-  return xc_dom0_op(xc_handle, &op);
+    return xc_dom0_op(xc_handle, &op);
 }
 
 int xc_tbuf_set_size(int xc_handle, unsigned long size)
 {
-  DECLARE_DOM0_OP;
+    DECLARE_DOM0_OP;
 
-  op.cmd = DOM0_TBUFCONTROL;
-  op.interface_version = DOM0_INTERFACE_VERSION;
-  op.u.tbufcontrol.op  = DOM0_TBUF_SET_SIZE;
-  op.u.tbufcontrol.size = size;
+    op.cmd = DOM0_TBUFCONTROL;
+    op.interface_version = DOM0_INTERFACE_VERSION;
+    op.u.tbufcontrol.op  = DOM0_TBUF_SET_SIZE;
+    op.u.tbufcontrol.size = size;
 
-  return xc_dom0_op(xc_handle, &op);
+    return xc_dom0_op(xc_handle, &op);
 }
 
 int xc_tbuf_get_size(int xc_handle, unsigned long *size)
 {
-  int rc;
-  DECLARE_DOM0_OP;
+    int rc;
+    DECLARE_DOM0_OP;
 
-  op.cmd = DOM0_TBUFCONTROL;
-  op.interface_version = DOM0_INTERFACE_VERSION;
-  op.u.tbufcontrol.op  = DOM0_TBUF_GET_INFO;
+    op.cmd = DOM0_TBUFCONTROL;
+    op.interface_version = DOM0_INTERFACE_VERSION;
+    op.u.tbufcontrol.op  = DOM0_TBUF_GET_INFO;
 
-  rc = xc_dom0_op(xc_handle, &op);
-  if (rc == 0)
-    *size = op.u.tbufcontrol.size;
-  return rc;
+    rc = xc_dom0_op(xc_handle, &op);
+    if (rc == 0)
+        *size = op.u.tbufcontrol.size;
+    return rc;
 }
 
 int xc_tbuf_enable(int xc_handle, size_t cnt, unsigned long *mfn,
-    unsigned long *size)
+                   unsigned long *size)
 {
     DECLARE_DOM0_OP;
     int rc;
 
-    if ( xc_tbuf_set_size(xc_handle, cnt) != 0 )
-        return -1;
+    /*
+     * Ignore errors (at least for now) as we get an error if size is already
+     * set (since trace buffers cannot be reallocated). If we really have no
+     * buffers at all then tbuf_enable() will fail, so this is safe.
+     */
+    (void)xc_tbuf_set_size(xc_handle, cnt);
 
     if ( tbuf_enable(xc_handle, 1) != 0 )
         return -1;
diff -r 9d52a66c7499 -r c073ebdbde8c tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h     Thu May 25 15:59:18 2006 -0600
+++ b/tools/libxc/xenctrl.h     Fri May 26 13:41:49 2006 -0600
@@ -354,6 +354,14 @@ int xc_sedf_domain_get(int xc_handle,
                        uint64_t *latency, uint16_t *extratime,
                        uint16_t *weight);
 
+int xc_csched_domain_set(int xc_handle,
+                         uint32_t domid,
+                         struct csched_domain *sdom);
+
+int xc_csched_domain_get(int xc_handle,
+                         uint32_t domid,
+                         struct csched_domain *sdom);
+
 typedef evtchn_status_t xc_evtchn_status_t;
 
 /*
@@ -444,9 +452,6 @@ int xc_domain_iomem_permission(int xc_ha
                                unsigned long first_mfn,
                                unsigned long nr_mfns,
                                uint8_t allow_access);
-
-unsigned long xc_make_page_below_4G(int xc_handle, uint32_t domid,
-                                    unsigned long mfn);
 
 typedef dom0_perfc_desc_t xc_perfc_desc_t;
 /* IMPORTANT: The caller is responsible for mlock()'ing the @desc array. */
diff -r 9d52a66c7499 -r c073ebdbde8c tools/libxc/xg_private.h
--- a/tools/libxc/xg_private.h  Thu May 25 15:59:18 2006 -0600
+++ b/tools/libxc/xg_private.h  Fri May 26 13:41:49 2006 -0600
@@ -48,6 +48,8 @@ unsigned long csum_page (void * page);
 #define L2_PAGETABLE_SHIFT_PAE   21
 #define L3_PAGETABLE_SHIFT_PAE   30
 
+#define L2_PAGETABLE_SHIFT_I386  22
+
 #if defined(__i386__)
 #define L1_PAGETABLE_SHIFT       12
 #define L2_PAGETABLE_SHIFT       22
@@ -61,6 +63,9 @@ unsigned long csum_page (void * page);
 #define L1_PAGETABLE_ENTRIES_PAE  512
 #define L2_PAGETABLE_ENTRIES_PAE  512
 #define L3_PAGETABLE_ENTRIES_PAE    4
+
+#define L1_PAGETABLE_ENTRIES_I386 1024
+#define L2_PAGETABLE_ENTRIES_I386 1024
 
 #if defined(__i386__)
 #define L1_PAGETABLE_ENTRIES   1024
@@ -95,6 +100,11 @@ typedef unsigned long l4_pgentry_t;
 #define l3_table_offset_pae(_a) \
   (((_a) >> L3_PAGETABLE_SHIFT_PAE) & (L3_PAGETABLE_ENTRIES_PAE - 1))
 
+#define l1_table_offset_i386(_a) \
+  (((_a) >> L1_PAGETABLE_SHIFT) & (L1_PAGETABLE_ENTRIES_I386 - 1))
+#define l2_table_offset_i386(_a) \
+  (((_a) >> L2_PAGETABLE_SHIFT_I386) & (L2_PAGETABLE_ENTRIES_I386 - 1))
+
 #if defined(__i386__)
 #define l1_table_offset(_a) \
           (((_a) >> L1_PAGETABLE_SHIFT) & (L1_PAGETABLE_ENTRIES - 1))
diff -r 9d52a66c7499 -r c073ebdbde8c tools/python/xen/lowlevel/xc/xc.c
--- a/tools/python/xen/lowlevel/xc/xc.c Thu May 25 15:59:18 2006 -0600
+++ b/tools/python/xen/lowlevel/xc/xc.c Fri May 26 13:41:49 2006 -0600
@@ -716,6 +716,49 @@ static PyObject *pyxc_sedf_domain_get(Xc
                          "weight",    weight);
 }
 
+static PyObject *pyxc_csched_domain_set(XcObject *self,
+                                        PyObject *args,
+                                        PyObject *kwds)
+{
+    uint32_t domid;
+    uint16_t weight;
+    uint16_t cap;
+    static char *kwd_list[] = { "dom", "weight", "cap", NULL };
+    static char kwd_type[] = "I|HH";
+    struct csched_domain sdom;
+    
+    weight = 0;
+    cap = (uint16_t)~0U;
+    if( !PyArg_ParseTupleAndKeywords(args, kwds, kwd_type, kwd_list, 
+                                     &domid, &weight, &cap) )
+        return NULL;
+
+    sdom.weight = weight;
+    sdom.cap = cap;
+
+    if ( xc_csched_domain_set(self->xc_handle, domid, &sdom) != 0 )
+        return PyErr_SetFromErrno(xc_error);
+
+    Py_INCREF(zero);
+    return zero;
+}
+
+static PyObject *pyxc_csched_domain_get(XcObject *self, PyObject *args)
+{
+    uint32_t domid;
+    struct csched_domain sdom;
+    
+    if( !PyArg_ParseTuple(args, "I", &domid) )
+        return NULL;
+    
+    if ( xc_csched_domain_get(self->xc_handle, domid, &sdom) != 0 )
+        return PyErr_SetFromErrno(xc_error);
+
+    return Py_BuildValue("{s:H,s:H}",
+                         "weight",  sdom.weight,
+                         "cap",     sdom.cap);
+}
+
 static PyObject *pyxc_domain_setmaxmem(XcObject *self, PyObject *args)
 {
     uint32_t dom;
@@ -1040,6 +1083,24 @@ static PyMethodDef pyxc_methods[] = {
       " slice     [long]: CPU reservation per period\n"
       " latency   [long]: domain's wakeup latency hint\n"
       " extratime [int]:  domain aware of extratime?\n"},
+    
+    { "csched_domain_set",
+      (PyCFunction)pyxc_csched_domain_set,
+      METH_KEYWORDS, "\n"
+      "Set the scheduling parameters for a domain when running with the\n"
+      "SMP credit scheduler.\n"
+      " domid     [int]:   domain id to set\n"
+      " weight    [short]: domain's scheduling weight\n"
+      "Returns: [int] 0 on success; -1 on error.\n" },
+
+    { "csched_domain_get",
+      (PyCFunction)pyxc_csched_domain_get,
+      METH_VARARGS, "\n"
+      "Get the scheduling parameters for a domain when running with the\n"
+      "SMP credit scheduler.\n"
+      " domid     [int]:   domain id to get\n"
+      "Returns:   [dict]\n"
+      " weight    [short]: domain's scheduling weight\n"},
 
     { "evtchn_alloc_unbound", 
       (PyCFunction)pyxc_evtchn_alloc_unbound,
diff -r 9d52a66c7499 -r c073ebdbde8c tools/python/xen/lowlevel/xs/xs.c
--- a/tools/python/xen/lowlevel/xs/xs.c Thu May 25 15:59:18 2006 -0600
+++ b/tools/python/xen/lowlevel/xs/xs.c Fri May 26 13:41:49 2006 -0600
@@ -272,11 +272,12 @@ static PyObject *xspy_get_permissions(Xs
 
     if (perms) {
         PyObject *val = PyList_New(perms_n);
-        for (i = 0; i < perms_n; i++, perms++) {
-            PyObject *p = Py_BuildValue("{s:i,s:i,s:i}",
-                                        "dom",  perms->id,
-                                        "read", perms->perms & XS_PERM_READ,
-                                        "write",perms->perms & XS_PERM_WRITE);
+        for (i = 0; i < perms_n; i++) {
+            PyObject *p =
+                Py_BuildValue("{s:i,s:i,s:i}",
+                              "dom",   perms[i].id,
+                              "read",  perms[i].perms & XS_PERM_READ,
+                              "write", perms[i].perms & XS_PERM_WRITE);
             PyList_SetItem(val, i, p);
         }
 
diff -r 9d52a66c7499 -r c073ebdbde8c tools/python/xen/xend/XendDomain.py
--- a/tools/python/xen/xend/XendDomain.py       Thu May 25 15:59:18 2006 -0600
+++ b/tools/python/xen/xend/XendDomain.py       Fri May 26 13:41:49 2006 -0600
@@ -522,6 +522,28 @@ class XendDomain:
         except Exception, ex:
             raise XendError(str(ex))
 
+    def domain_csched_get(self, domid):
+        """Get credit scheduler parameters for a domain.
+        """
+        dominfo = self.domain_lookup_by_name_or_id_nr(domid)
+        if not dominfo:
+            raise XendInvalidDomain(str(domid))
+        try:
+            return xc.csched_domain_get(dominfo.getDomid())
+        except Exception, ex:
+            raise XendError(str(ex))
+    
+    def domain_csched_set(self, domid, weight, cap):
+        """Set credit scheduler parameters for a domain.
+        """
+        dominfo = self.domain_lookup_by_name_or_id_nr(domid)
+        if not dominfo:
+            raise XendInvalidDomain(str(domid))
+        try:
+            return xc.csched_domain_set(dominfo.getDomid(), weight, cap)
+        except Exception, ex:
+            raise XendError(str(ex))
+
     def domain_maxmem_set(self, domid, mem):
         """Set the memory limit for a domain.
 
diff -r 9d52a66c7499 -r c073ebdbde8c tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py   Thu May 25 15:59:18 2006 -0600
+++ b/tools/python/xen/xend/XendDomainInfo.py   Fri May 26 13:41:49 2006 -0600
@@ -701,6 +701,16 @@ class XendDomainInfo:
         log.debug("Storing VM details: %s", to_store)
 
         self.writeVm(to_store)
+        self.setVmPermissions()
+
+
+    def setVmPermissions(self):
+        """Allow the guest domain to read its UUID.  We don't allow it to
+        access any other entry, for security."""
+        xstransact.SetPermissions('%s/uuid' % self.vmpath,
+                                  { 'dom' : self.domid,
+                                    'read' : True,
+                                    'write' : False })
 
 
     def storeDomDetails(self):
@@ -1535,6 +1545,10 @@ class XendDomainInfo:
 
         self.configure_bootloader()
         config = self.sxpr()
+
+        if self.infoIsSet('cpus') and len(self.info['cpus']) != 0:
+            config.append(['cpus', reduce(lambda x, y: str(x) + "," + str(y),
+                                          self.info['cpus'])])
 
         if self.readVm(RESTART_IN_PROGRESS):
             log.error('Xend failed during restart of domain %d.  '
diff -r 9d52a66c7499 -r c073ebdbde8c tools/python/xen/xend/balloon.py
--- a/tools/python/xen/xend/balloon.py  Thu May 25 15:59:18 2006 -0600
+++ b/tools/python/xen/xend/balloon.py  Fri May 26 13:41:49 2006 -0600
@@ -32,6 +32,7 @@ BALLOON_OUT_SLACK = 1 # MiB.  We need th
 BALLOON_OUT_SLACK = 1 # MiB.  We need this because the physinfo details are
                       # rounded.
 RETRY_LIMIT = 10
+RETRY_LIMIT_INCR = 5
 ##
 # The time to sleep between retries grows linearly, using this value (in
 # seconds).  When the system is lightly loaded, memory should be scrubbed and
@@ -118,7 +119,8 @@ def free(required):
         retries = 0
         sleep_time = SLEEP_TIME_GROWTH
         last_new_alloc = None
-        while retries < RETRY_LIMIT:
+        rlimit = RETRY_LIMIT
+        while retries < rlimit:
             free_mem = xc.physinfo()['free_memory']
 
             if free_mem >= need_mem:
@@ -127,7 +129,9 @@ def free(required):
                 return
 
             if retries == 0:
-                log.debug("Balloon: free %d; need %d.", free_mem, need_mem)
+                rlimit += ((need_mem - free_mem)/1024) * RETRY_LIMIT_INCR
+                log.debug("Balloon: free %d; need %d; retries: %d.", 
+                          free_mem, need_mem, rlimit)
 
             if dom0_min_mem > 0:
                 dom0_alloc = get_dom0_current_alloc()
@@ -143,8 +147,9 @@ def free(required):
                     # Continue to retry, waiting for ballooning.
 
             time.sleep(sleep_time)
+            if retries < 2 * RETRY_LIMIT:
+                sleep_time += SLEEP_TIME_GROWTH
             retries += 1
-            sleep_time += SLEEP_TIME_GROWTH
 
         # Not enough memory; diagnose the problem.
         if dom0_min_mem == 0:
diff -r 9d52a66c7499 -r c073ebdbde8c tools/python/xen/xend/server/SrvDomain.py
--- a/tools/python/xen/xend/server/SrvDomain.py Thu May 25 15:59:18 2006 -0600
+++ b/tools/python/xen/xend/server/SrvDomain.py Fri May 26 13:41:49 2006 -0600
@@ -129,6 +129,20 @@ class SrvDomain(SrvDir):
                     ['latency', 'int'],
                     ['extratime', 'int'],
                     ['weight', 'int']])
+        val = fn(req.args, {'dom': self.dom.domid})
+        return val
+    
+    def op_domain_csched_get(self, _, req):
+        fn = FormFn(self.xd.domain_csched_get,
+                    [['dom', 'int']])
+        val = fn(req.args, {'dom': self.dom.domid})
+        return val
+
+
+    def op_domain_csched_set(self, _, req):
+        fn = FormFn(self.xd.domain_csched_set,
+                    [['dom', 'int'],
+                     ['weight', 'int']])
         val = fn(req.args, {'dom': self.dom.domid})
         return val
 
diff -r 9d52a66c7499 -r c073ebdbde8c 
tools/python/xen/xend/xenstore/xstransact.py
--- a/tools/python/xen/xend/xenstore/xstransact.py      Thu May 25 15:59:18 
2006 -0600
+++ b/tools/python/xen/xend/xenstore/xstransact.py      Fri May 26 13:41:49 
2006 -0600
@@ -221,6 +221,34 @@ class xstransact:
                 xshandle().mkdir(self.transaction, self.prependPath(key))
 
 
+    def get_permissions(self, *args):
+        """If no arguments are given, return the permissions at this
+        transaction's path.  If one argument is given, treat that argument as
+        a subpath to this transaction's path, and return the permissions at
+        that path.  Otherwise, treat each argument as a subpath to this
+        transaction's path, and return a list composed of the permissions at
+        each of those instead.
+        """
+        if len(args) == 0:
+            return xshandle().get_permissions(self.transaction, self.path)
+        if len(args) == 1:
+            return self._get_permissions(args[0])
+        ret = []
+        for key in args:
+            ret.append(self._get_permissions(key))
+        return ret
+
+
+    def _get_permissions(self, key):
+        path = self.prependPath(key)
+        try:
+            return xshandle().get_permissions(self.transaction, path)
+        except RuntimeError, ex:
+            raise RuntimeError(ex.args[0],
+                               '%s, while getting permissions from %s' %
+                               (ex.args[1], path))
+
+
     def set_permissions(self, *args):
         if len(args) == 0:
             raise TypeError
diff -r 9d52a66c7499 -r c073ebdbde8c tools/python/xen/xm/main.py
--- a/tools/python/xen/xm/main.py       Thu May 25 15:59:18 2006 -0600
+++ b/tools/python/xen/xm/main.py       Fri May 26 13:41:49 2006 -0600
@@ -99,6 +99,7 @@ sched_sedf_help = "sched-sedf [DOM] [OPT
                                     specifies another way of setting a 
domain's\n\
                                     cpu period/slice."
 
+csched_help = "csched                           Set or get credit scheduler 
parameters"
 block_attach_help = """block-attach <DomId> <BackDev> <FrontDev> <Mode>
                 [BackDomId]         Create a new virtual block device"""
 block_detach_help = """block-detach  <DomId> <DevId>    Destroy a domain's 
virtual block device,
@@ -174,6 +175,7 @@ host_commands = [
     ]
 
 scheduler_commands = [
+    "csched",
     "sched-bvt",
     "sched-bvt-ctxallow",
     "sched-sedf",
@@ -735,6 +737,48 @@ def xm_sched_sedf(args):
         else:
             print_sedf(sedf_info)
 
+def xm_csched(args):
+    usage_msg = """Csched:     Set or get credit scheduler parameters
+ Usage:
+
+        csched -d domain [-w weight] [-c cap]
+    """
+    try:
+        opts, args = getopt.getopt(args[0:], "d:w:c:",
+            ["domain=", "weight=", "cap="])
+    except getopt.GetoptError:
+        # print help information and exit:
+        print usage_msg
+        sys.exit(1)
+
+    domain = None
+    weight = None
+    cap = None
+
+    for o, a in opts:
+        if o == "-d":
+            domain = a
+        elif o == "-w":
+            weight = int(a)
+        elif o == "-c":
+            cap = int(a);
+
+    if domain is None:
+        # place holder for system-wide scheduler parameters
+        print usage_msg
+        sys.exit(1)
+
+    if weight is None and cap is None:
+        print server.xend.domain.csched_get(domain)
+    else:
+        if weight is None:
+            weight = int(0)
+        if cap is None:
+            cap = int(~0)
+
+        err = server.xend.domain.csched_set(domain, weight, cap)
+        if err != 0:
+            print err
 
 def xm_info(args):
     arg_check(args, "info", 0)
@@ -1032,6 +1076,7 @@ commands = {
     "sched-bvt": xm_sched_bvt,
     "sched-bvt-ctxallow": xm_sched_bvt_ctxallow,
     "sched-sedf": xm_sched_sedf,
+    "csched": xm_csched,
     # block
     "block-attach": xm_block_attach,
     "block-detach": xm_block_detach,
diff -r 9d52a66c7499 -r c073ebdbde8c tools/tests/test_x86_emulator.c
--- a/tools/tests/test_x86_emulator.c   Thu May 25 15:59:18 2006 -0600
+++ b/tools/tests/test_x86_emulator.c   Fri May 26 13:41:49 2006 -0600
@@ -17,7 +17,8 @@ static int read_any(
 static int read_any(
     unsigned long addr,
     unsigned long *val,
-    unsigned int bytes)
+    unsigned int bytes,
+    struct x86_emulate_ctxt *ctxt)
 {
     switch ( bytes )
     {
@@ -32,7 +33,8 @@ static int write_any(
 static int write_any(
     unsigned long addr,
     unsigned long val,
-    unsigned int bytes)
+    unsigned int bytes,
+    struct x86_emulate_ctxt *ctxt)
 {
     switch ( bytes )
     {
@@ -48,7 +50,8 @@ static int cmpxchg_any(
     unsigned long addr,
     unsigned long old,
     unsigned long new,
-    unsigned int bytes)
+    unsigned int bytes,
+    struct x86_emulate_ctxt *ctxt)
 {
     switch ( bytes )
     {
@@ -65,34 +68,38 @@ static int cmpxchg8b_any(
     unsigned long old_lo,
     unsigned long old_hi,
     unsigned long new_lo,
-    unsigned long new_hi)
+    unsigned long new_hi,
+    struct x86_emulate_ctxt *ctxt)
 {
     ((unsigned long *)addr)[0] = new_lo;
     ((unsigned long *)addr)[1] = new_hi;
     return X86EMUL_CONTINUE;
 }
 
-static struct x86_mem_emulator emulops = {
+static struct x86_emulate_ops emulops = {
     read_any, write_any, read_any, write_any, cmpxchg_any, cmpxchg8b_any
 };
 
 int main(int argc, char **argv)
 {
+    struct x86_emulate_ctxt ctxt;
     struct cpu_user_regs regs;
     char instr[20] = { 0x01, 0x08 }; /* add %ecx,(%eax) */
     unsigned int res = 0x7FFFFFFF;
     u32 cmpxchg8b_res[2] = { 0x12345678, 0x87654321 };
-    unsigned long cr2;
     int rc;
+
+    ctxt.regs = ®s;
+    ctxt.mode = X86EMUL_MODE_PROT32;
 
     printf("%-40s", "Testing addl %%ecx,(%%eax)...");
     instr[0] = 0x01; instr[1] = 0x08;
     regs.eflags = 0x200;
     regs.eip    = (unsigned long)&instr[0];
     regs.ecx    = 0x12345678;
-    cr2         = (unsigned long)&res;
+    ctxt.cr2    = (unsigned long)&res;
     res         = 0x7FFFFFFF;
-    rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32);
+    rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) || 
          (res != 0x92345677) || 
          (regs.eflags != 0xa94) ||
@@ -109,8 +116,8 @@ int main(int argc, char **argv)
 #else
     regs.ecx    = 0x12345678UL;
 #endif
-    cr2         = (unsigned long)&res;
-    rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32);
+    ctxt.cr2    = (unsigned long)&res;
+    rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) || 
          (res != 0x92345677) || 
          (regs.ecx != 0x8000000FUL) ||
@@ -124,8 +131,8 @@ int main(int argc, char **argv)
     regs.eip    = (unsigned long)&instr[0];
     regs.eax    = 0x92345677UL;
     regs.ecx    = 0xAA;
-    cr2         = (unsigned long)&res;
-    rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32);    
+    ctxt.cr2    = (unsigned long)&res;
+    rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) || 
          (res != 0x923456AA) || 
          (regs.eflags != 0x244) ||
@@ -140,8 +147,8 @@ int main(int argc, char **argv)
     regs.eip    = (unsigned long)&instr[0];
     regs.eax    = 0xAABBCC77UL;
     regs.ecx    = 0xFF;
-    cr2         = (unsigned long)&res;
-    rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32);    
+    ctxt.cr2    = (unsigned long)&res;
+    rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) || 
          (res != 0x923456AA) || 
          ((regs.eflags&0x240) != 0x200) ||
@@ -156,8 +163,8 @@ int main(int argc, char **argv)
     regs.eflags = 0x200;
     regs.eip    = (unsigned long)&instr[0];
     regs.ecx    = 0x12345678;
-    cr2         = (unsigned long)&res;
-    rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32);    
+    ctxt.cr2    = (unsigned long)&res;
+    rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) || 
          (res != 0x12345678) || 
          (regs.eflags != 0x200) ||
@@ -173,8 +180,8 @@ int main(int argc, char **argv)
     regs.eip    = (unsigned long)&instr[0];
     regs.eax    = 0x923456AAUL;
     regs.ecx    = 0xDDEEFF00L;
-    cr2         = (unsigned long)&res;
-    rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32);    
+    ctxt.cr2    = (unsigned long)&res;
+    rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) || 
          (res != 0xDDEEFF00) || 
          (regs.eflags != 0x244) ||
@@ -192,8 +199,8 @@ int main(int argc, char **argv)
     regs.esi    = (unsigned long)&res + 0;
     regs.edi    = (unsigned long)&res + 2;
     regs.error_code = 0; /* read fault */
-    cr2         = regs.esi;
-    rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32);    
+    ctxt.cr2    = regs.esi;
+    rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) || 
          (res != 0x44554455) ||
          (regs.eflags != 0x200) ||
@@ -210,8 +217,8 @@ int main(int argc, char **argv)
     regs.eflags = 0x200;
     regs.eip    = (unsigned long)&instr[0];
     regs.edi    = (unsigned long)&res;
-    cr2         = regs.edi;
-    rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32);    
+    ctxt.cr2    = regs.edi;
+    rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) || 
          (res != 0x2233445D) ||
          ((regs.eflags&0x201) != 0x201) ||
@@ -228,8 +235,8 @@ int main(int argc, char **argv)
     regs.ecx    = 0xCCCCFFFF;
     regs.eip    = (unsigned long)&instr[0];
     regs.edi    = (unsigned long)cmpxchg8b_res;
-    cr2         = regs.edi;
-    rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32);
+    ctxt.cr2    = regs.edi;
+    rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) || 
          (cmpxchg8b_res[0] != 0x9999AAAA) ||
          (cmpxchg8b_res[1] != 0xCCCCFFFF) ||
@@ -242,8 +249,8 @@ int main(int argc, char **argv)
     instr[0] = 0x0f; instr[1] = 0xc7; instr[2] = 0x0f;
     regs.eip    = (unsigned long)&instr[0];
     regs.edi    = (unsigned long)cmpxchg8b_res;
-    cr2         = regs.edi;
-    rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32);
+    ctxt.cr2    = regs.edi;
+    rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) || 
          (cmpxchg8b_res[0] != 0x9999AAAA) ||
          (cmpxchg8b_res[1] != 0xCCCCFFFF) ||
@@ -258,9 +265,9 @@ int main(int argc, char **argv)
     instr[0] = 0x0f; instr[1] = 0xbe; instr[2] = 0x08;
     regs.eip    = (unsigned long)&instr[0];
     regs.ecx    = 0x12345678;
-    cr2         = (unsigned long)&res;
+    ctxt.cr2    = (unsigned long)&res;
     res         = 0x82;
-    rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32);
+    rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) ||
          (res != 0x82) ||
          (regs.ecx != 0xFFFFFF82) ||
@@ -273,9 +280,9 @@ int main(int argc, char **argv)
     instr[0] = 0x0f; instr[1] = 0xb7; instr[2] = 0x08;
     regs.eip    = (unsigned long)&instr[0];
     regs.ecx    = 0x12345678;
-    cr2         = (unsigned long)&res;
+    ctxt.cr2    = (unsigned long)&res;
     res         = 0x1234aa82;
-    rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32);
+    rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) ||
          (res != 0x1234aa82) ||
          (regs.ecx != 0xaa82) ||
diff -r 9d52a66c7499 -r c073ebdbde8c tools/xenstore/Makefile
--- a/tools/xenstore/Makefile   Thu May 25 15:59:18 2006 -0600
+++ b/tools/xenstore/Makefile   Fri May 26 13:41:49 2006 -0600
@@ -27,6 +27,12 @@ CLIENTS += xenstore-write
 CLIENTS += xenstore-write
 CLIENTS_OBJS := $(patsubst xenstore-%,xenstore_%.o,$(CLIENTS))
 
+XENSTORED_OBJS = xenstored_core.o xenstored_watch.o xenstored_domain.o 
xenstored_transaction.o xs_lib.o talloc.o utils.o tdb.o hashtable.o
+
+XENSTORED_Linux = xenstored_linux.o
+
+XENSTORED_OBJS += $(XENSTORED_$(OS))
+
 .PHONY: all
 all: libxenstore.so xenstored $(CLIENTS) xs_tdb_dump xenstore-control 
xenstore-ls
 
@@ -36,7 +42,7 @@ test_interleaved_transactions: test_inte
 .PHONY: testcode
 testcode: xs_test xenstored_test xs_random
 
-xenstored: xenstored_core.o xenstored_watch.o xenstored_domain.o 
xenstored_transaction.o xs_lib.o talloc.o utils.o tdb.o hashtable.o
+xenstored: $(XENSTORED_OBJS)
        $(LINK.o) $^ $(LOADLIBES) $(LDLIBS) -lxenctrl -o $@
 
 $(CLIENTS): xenstore-%: xenstore_%.o libxenstore.so
diff -r 9d52a66c7499 -r c073ebdbde8c tools/xenstore/xenstored_core.c
--- a/tools/xenstore/xenstored_core.c   Thu May 25 15:59:18 2006 -0600
+++ b/tools/xenstore/xenstored_core.c   Fri May 26 13:41:49 2006 -0600
@@ -451,6 +451,11 @@ static struct node *read_node(struct con
 
 static bool write_node(struct connection *conn, const struct node *node)
 {
+       /*
+        * conn will be null when this is called from manual_node.
+        * tdb_context copes with this.
+        */
+
        TDB_DATA key, data;
        void *p;
 
@@ -478,7 +483,7 @@ static bool write_node(struct connection
 
        /* TDB should set errno, but doesn't even set ecode AFAICT. */
        if (tdb_store(tdb_context(conn), key, data, TDB_REPLACE) != 0) {
-               corrupt(conn, "Write of %s = %s failed", key, data);
+               corrupt(conn, "Write of %s failed", key.dptr);
                goto error;
        }
        return true;
diff -r 9d52a66c7499 -r c073ebdbde8c tools/xenstore/xenstored_core.h
--- a/tools/xenstore/xenstored_core.h   Thu May 25 15:59:18 2006 -0600
+++ b/tools/xenstore/xenstored_core.h   Fri May 26 13:41:49 2006 -0600
@@ -19,6 +19,8 @@
 
 #ifndef _XENSTORED_CORE_H
 #define _XENSTORED_CORE_H
+
+#include <xenctrl.h>
 
 #include <sys/types.h>
 #include <dirent.h>
@@ -163,6 +165,12 @@ void trace(const char *fmt, ...);
 
 extern int event_fd;
 
+/* Map the kernel's xenstore page. */
+void *xenbus_map(void);
+
+/* Return the event channel used by xenbus. */
+evtchn_port_t xenbus_evtchn(void);
+
 #endif /* _XENSTORED_CORE_H */
 
 /*
diff -r 9d52a66c7499 -r c073ebdbde8c tools/xenstore/xenstored_domain.c
--- a/tools/xenstore/xenstored_domain.c Thu May 25 15:59:18 2006 -0600
+++ b/tools/xenstore/xenstored_domain.c Fri May 26 13:41:49 2006 -0600
@@ -33,12 +33,11 @@
 #include "talloc.h"
 #include "xenstored_core.h"
 #include "xenstored_domain.h"
-#include "xenstored_proc.h"
 #include "xenstored_watch.h"
 #include "xenstored_test.h"
 
 #include <xenctrl.h>
-#include <xen/linux/evtchn.h>
+#include <xen/sys/evtchn.h>
 
 static int *xc_handle;
 static evtchn_port_t virq_port;
@@ -476,44 +475,24 @@ void restore_existing_connections(void)
 
 static int dom0_init(void) 
 { 
-       int rc, fd;
-       evtchn_port_t port; 
-       char str[20]; 
-       struct domain *dom0; 
-
-       fd = open(XENSTORED_PROC_PORT, O_RDONLY); 
-       if (fd == -1)
+       evtchn_port_t port;
+       struct domain *dom0;
+
+       port = xenbus_evtchn();
+       if (port == -1)
                return -1;
 
-       rc = read(fd, str, sizeof(str)); 
-       if (rc == -1)
-               goto outfd;
-       str[rc] = '\0'; 
-       port = strtoul(str, NULL, 0); 
-
-       close(fd); 
-
        dom0 = new_domain(NULL, 0, port); 
 
-       fd = open(XENSTORED_PROC_KVA, O_RDWR);
-       if (fd == -1)
+       dom0->interface = xenbus_map();
+       if (dom0->interface == NULL)
                return -1;
 
-       dom0->interface = mmap(NULL, getpagesize(), PROT_READ|PROT_WRITE,
-                              MAP_SHARED, fd, 0);
-       if (dom0->interface == MAP_FAILED)
-               goto outfd;
-
-       close(fd);
-
        talloc_steal(dom0->conn, dom0); 
 
        evtchn_notify(dom0->port); 
 
        return 0; 
-outfd:
-       close(fd);
-       return -1;
 }
 
 
diff -r 9d52a66c7499 -r c073ebdbde8c xen/arch/x86/domain_build.c
--- a/xen/arch/x86/domain_build.c       Thu May 25 15:59:18 2006 -0600
+++ b/xen/arch/x86/domain_build.c       Fri May 26 13:41:49 2006 -0600
@@ -367,7 +367,10 @@ int construct_dom0(struct domain *d,
     if ( (1UL << order) > nr_pages )
         panic("Domain 0 allocation is too small for kernel image.\n");
 
-    /* Allocate from DMA pool: PAE L3 table must be below 4GB boundary. */
+    /*
+     * Allocate from DMA pool: on i386 this ensures that our low-memory 1:1
+     * mapping covers the allocation.
+     */
     if ( (page = alloc_domheap_pages(d, order, ALLOC_DOM_DMA)) == NULL )
         panic("Not enough RAM for domain 0 allocation.\n");
     alloc_spfn = page_to_mfn(page);
diff -r 9d52a66c7499 -r c073ebdbde8c xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c    Thu May 25 15:59:18 2006 -0600
+++ b/xen/arch/x86/hvm/hvm.c    Fri May 26 13:41:49 2006 -0600
@@ -185,8 +185,9 @@ void hvm_setup_platform(struct domain* d
 void hvm_setup_platform(struct domain* d)
 {
     struct hvm_domain *platform;
-
-    if ( !hvm_guest(current) || (current->vcpu_id != 0) )
+    struct vcpu *v=current;
+
+    if ( !hvm_guest(v) || (v->vcpu_id != 0) )
         return;
 
     if ( shadow_direct_map_init(d) == 0 )
@@ -208,7 +209,8 @@ void hvm_setup_platform(struct domain* d
         hvm_vioapic_init(d);
     }
 
-    pit_init(&platform->vpit, current);
+    init_timer(&platform->pl_time.periodic_tm.timer, pt_timer_fn, v, 
v->processor);
+    pit_init(v, cpu_khz);
 }
 
 void pic_irq_request(void *data, int level)
@@ -238,6 +240,14 @@ void hvm_pic_assist(struct vcpu *v)
         } while ( (u16)cmpxchg(virq_line,irqs, 0) != irqs );
         do_pic_irqs(pic, irqs);
     }
+}
+
+u64 hvm_get_guest_time(struct vcpu *v)
+{
+    u64    host_tsc;
+    
+    rdtscll(host_tsc);
+    return host_tsc + v->arch.hvm_vcpu.cache_tsc_offset;
 }
 
 int cpu_get_interrupt(struct vcpu *v, int *type)
diff -r 9d52a66c7499 -r c073ebdbde8c xen/arch/x86/hvm/i8254.c
--- a/xen/arch/x86/hvm/i8254.c  Thu May 25 15:59:18 2006 -0600
+++ b/xen/arch/x86/hvm/i8254.c  Fri May 26 13:41:49 2006 -0600
@@ -22,11 +22,10 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  * THE SOFTWARE.
  */
-/* Edwin Zhai <edwin.zhai@xxxxxxxxx>
+/* Edwin Zhai <edwin.zhai@xxxxxxxxx>, Eddie Dong <eddie.dong@xxxxxxxxx>
  * Ported to xen:
- * use actimer for intr generation;
+ * Add a new layer of periodic time on top of PIT;
  * move speaker io access to hypervisor;
- * use new method for counter/intrs calculation
  */
 
 #include <xen/config.h>
@@ -42,184 +41,117 @@
 #include <asm/hvm/vpit.h>
 #include <asm/current.h>
 
-/*#define DEBUG_PIT*/
+/* Enable DEBUG_PIT may cause guest calibration inaccuracy */
+/* #define DEBUG_PIT */
 
 #define RW_STATE_LSB 1
 #define RW_STATE_MSB 2
 #define RW_STATE_WORD0 3
 #define RW_STATE_WORD1 4
 
-#ifndef NSEC_PER_SEC
-#define NSEC_PER_SEC (1000000000ULL)
-#endif
-
-#ifndef TIMER_SLOP 
-#define TIMER_SLOP (50*1000) /* ns */
-#endif
-
-static void pit_irq_timer_update(PITChannelState *s, s64 current_time);
-
-s_time_t hvm_get_clock(void)
-{
-    /* TODO: add pause/unpause support */
-    return NOW();
+#define ticks_per_sec(v)      (v->domain->arch.hvm_domain.tsc_frequency)
+static int handle_pit_io(ioreq_t *p);
+static int handle_speaker_io(ioreq_t *p);
+
+/* compute with 96 bit intermediate result: (a*b)/c */
+uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
+{
+    union {
+        uint64_t ll;
+        struct {
+#ifdef WORDS_BIGENDIAN
+            uint32_t high, low;
+#else
+            uint32_t low, high;
+#endif            
+        } l;
+    } u, res;
+    uint64_t rl, rh;
+
+    u.ll = a;
+    rl = (uint64_t)u.l.low * (uint64_t)b;
+    rh = (uint64_t)u.l.high * (uint64_t)b;
+    rh += (rl >> 32);
+    res.l.high = rh / c;
+    res.l.low = (((rh % c) << 32) + (rl & 0xffffffff)) / c;
+    return res.ll;
+}
+
+/*
+ * get processor time.
+ * unit: TSC
+ */
+int64_t hvm_get_clock(struct vcpu *v)
+{
+    uint64_t  gtsc;
+    gtsc = hvm_get_guest_time(v);
+    return gtsc;
 }
 
 static int pit_get_count(PITChannelState *s)
 {
-    u64 d;
-    u64 counter;
-
-    d = hvm_get_clock() - s->count_load_time;
+    uint64_t d;
+    int  counter;
+
+    d = muldiv64(hvm_get_clock(s->vcpu) - s->count_load_time, PIT_FREQ, 
ticks_per_sec(s->vcpu));
     switch(s->mode) {
     case 0:
     case 1:
     case 4:
     case 5:
-        counter = (s->period - d) & 0xffff;
+        counter = (s->count - d) & 0xffff;
         break;
     case 3:
         /* XXX: may be incorrect for odd counts */
-        counter = s->period - ((2 * d) % s->period);
+        counter = s->count - ((2 * d) % s->count);
         break;
     default:
-        /* mod 2 counter handle */
-        d = hvm_get_clock() - s->hvm_time->count_point;
-        d += s->hvm_time->count_advance;
-        counter = s->period - (d % s->period);
-        break;
-    }
-    /* change from ns to pit counter */
-    counter = DIV_ROUND( (counter * PIT_FREQ), NSEC_PER_SEC);
+        counter = s->count - (d % s->count);
+        break;
+    }
     return counter;
 }
 
 /* get pit output bit */
-static int pit_get_out1(PITChannelState *s, s64 current_time)
-{
-    u64 d;
+static int pit_get_out1(PITChannelState *s, int64_t current_time)
+{
+    uint64_t d;
     int out;
 
-    d = current_time - s->count_load_time;
+    d = muldiv64(current_time - s->count_load_time, PIT_FREQ, 
ticks_per_sec(s->vcpu));
     switch(s->mode) {
     default:
     case 0:
-        out = (d >= s->period);
+        out = (d >= s->count);
         break;
     case 1:
-        out = (d < s->period);
+        out = (d < s->count);
         break;
     case 2:
-        /* mod2 out is no meaning, since intr are generated in background */
-        if ((d % s->period) == 0 && d != 0)
+        if ((d % s->count) == 0 && d != 0)
             out = 1;
         else
             out = 0;
         break;
     case 3:
-        out = (d % s->period) < ((s->period + 1) >> 1);
+        out = (d % s->count) < ((s->count + 1) >> 1);
         break;
     case 4:
     case 5:
-        out = (d == s->period);
+        out = (d == s->count);
         break;
     }
     return out;
 }
 
-int pit_get_out(hvm_virpit *pit, int channel, s64 current_time)
+int pit_get_out(PITState *pit, int channel, int64_t current_time)
 {
     PITChannelState *s = &pit->channels[channel];
     return pit_get_out1(s, current_time);
 }
 
-static __inline__ s64 missed_ticks(PITChannelState *s, s64 current_time)
-{
-    struct hvm_time_info *hvm_time = s->hvm_time;
-    struct domain *d = (void *) s - 
-        offsetof(struct domain, arch.hvm_domain.vpit.channels[0]);
-
-    /* ticks from current time(expected time) to NOW */ 
-    int missed_ticks;
-    /* current_time is expected time for next intr, check if it's true
-     * (actimer has a TIMER_SLOP in advance)
-     */
-    s64 missed_time = hvm_get_clock() + TIMER_SLOP - current_time;
-
-    if (missed_time >= 0) {
-        missed_ticks = missed_time/(s_time_t)s->period + 1;
-        if (test_bit(_DOMF_debugging, &d->domain_flags)) {
-            hvm_time->pending_intr_nr++;
-        } else {
-            hvm_time->pending_intr_nr += missed_ticks;
-        }
-        s->next_transition_time = current_time + (missed_ticks ) * s->period;
-    }
-
-    return s->next_transition_time;
-}
-
-/* only rearm the actimer when return value > 0
- *  -2: init state
- *  -1: the mode has expired
- *   0: current VCPU is not running
- *  >0: the next fired time
- */
-s64 pit_get_next_transition_time(PITChannelState *s, 
-                                            s64 current_time)
-{
-    s64 d, next_time, base;
-    int period2;
-    struct hvm_time_info *hvm_time = s->hvm_time;
-
-    d = current_time - s->count_load_time;
-    switch(s->mode) {
-    default:
-    case 0:
-    case 1:
-        if (d < s->period)
-            next_time = s->period;
-        else
-            return -1;
-        break;
-    case 2:
-        next_time = missed_ticks(s, current_time);
-        if ( !test_bit(_VCPUF_running, &(hvm_time->vcpu->vcpu_flags)) )
-            return 0;
-        break;
-    case 3:
-        base = (d / s->period) * s->period;
-        period2 = ((s->period + 1) >> 1);
-        if ((d - base) < period2) 
-            next_time = base + period2;
-        else
-            next_time = base + s->period;
-        break;
-    case 4:
-    case 5:
-        if (d < s->period)
-            next_time = s->period;
-        else if (d == s->period)
-            next_time = s->period + 1;
-        else
-            return -1;
-        break;
-    case 0xff:
-        return -2;      /* for init state */ 
-        break;
-    }
-    /* XXX: better solution: use a clock at PIT_FREQ Hz */
-    if (next_time <= current_time){
-#ifdef DEBUG_PIT
-        printk("HVM_PIT:next_time <= current_time. next=0x%llx, 
current=0x%llx!\n",next_time, current_time);
-#endif
-        next_time = current_time + 1;
-    }
-    return next_time;
-}
-
 /* val must be 0 or 1 */
-void pit_set_gate(hvm_virpit *pit, int channel, int val)
+void pit_set_gate(PITState *pit, int channel, int val)
 {
     PITChannelState *s = &pit->channels[channel];
 
@@ -233,16 +165,16 @@ void pit_set_gate(hvm_virpit *pit, int c
     case 5:
         if (s->gate < val) {
             /* restart counting on rising edge */
-            s->count_load_time = hvm_get_clock();
-            pit_irq_timer_update(s, s->count_load_time);
+            s->count_load_time = hvm_get_clock(s->vcpu);
+//            pit_irq_timer_update(s, s->count_load_time);
         }
         break;
     case 2:
     case 3:
         if (s->gate < val) {
             /* restart counting on rising edge */
-            s->count_load_time = hvm_get_clock();
-            pit_irq_timer_update(s, s->count_load_time);
+            s->count_load_time = hvm_get_clock(s->vcpu);
+//            pit_irq_timer_update(s, s->count_load_time);
         }
         /* XXX: disable/enable counting */
         break;
@@ -250,7 +182,7 @@ void pit_set_gate(hvm_virpit *pit, int c
     s->gate = val;
 }
 
-int pit_get_gate(hvm_virpit *pit, int channel)
+int pit_get_gate(PITState *pit, int channel)
 {
     PITChannelState *s = &pit->channels[channel];
     return s->gate;
@@ -258,37 +190,37 @@ int pit_get_gate(hvm_virpit *pit, int ch
 
 static inline void pit_load_count(PITChannelState *s, int val)
 {
+    u32   period;
     if (val == 0)
         val = 0x10000;
-
-    s->count_load_time = hvm_get_clock();
+    s->count_load_time = hvm_get_clock(s->vcpu);
     s->count = val;
-    s->period = DIV_ROUND(((s->count) * NSEC_PER_SEC), PIT_FREQ);
+    period = DIV_ROUND((val * 1000000000ULL), PIT_FREQ);
 
 #ifdef DEBUG_PIT
-    printk("HVM_PIT: pit-load-counter, count=0x%x,period=0x%u us,mode=%d, 
load_time=%lld\n",
+    printk("HVM_PIT: pit-load-counter(%p), count=0x%x, period=%uns mode=%d, 
load_time=%lld\n",
+            s,
             val,
-            s->period / 1000,
+            period,
             s->mode,
-            s->count_load_time);
+            (long long)s->count_load_time);
 #endif
 
-    if (s->mode == HVM_PIT_ACCEL_MODE) {
-        if (!s->hvm_time) {
-            printk("HVM_PIT:guest should only set mod 2 on channel 0!\n");
-            return;
-        }
-        s->hvm_time->period_cycles = (u64)s->period * cpu_khz / 1000000L;
-        s->hvm_time->first_injected = 0;
-
-        if (s->period < 900000) { /* < 0.9 ms */
-            printk("HVM_PIT: guest programmed too small an count: %x\n",
-                    s->count);
-            s->period = 1000000;
-        }
-    }
-        
-    pit_irq_timer_update(s, s->count_load_time);
+    switch (s->mode) {
+        case 2:
+            /* create periodic time */
+            s->pt = create_periodic_time (s->vcpu, period, 0, 0);
+            break;
+        case 1:
+            /* create one shot time */
+            s->pt = create_periodic_time (s->vcpu, period, 0, 1);
+#ifdef DEBUG_PIT
+            printk("HVM_PIT: create one shot time.\n");
+#endif
+            break;
+        default:
+            break;
+    }
 }
 
 /* if already latched, do not latch again */
@@ -300,9 +232,9 @@ static void pit_latch_count(PITChannelSt
     }
 }
 
-static void pit_ioport_write(void *opaque, u32 addr, u32 val)
-{
-    hvm_virpit *pit = opaque;
+static void pit_ioport_write(void *opaque, uint32_t addr, uint32_t val)
+{
+    PITState *pit = opaque;
     int channel, access;
     PITChannelState *s;
     val &= 0xff;
@@ -321,7 +253,7 @@ static void pit_ioport_write(void *opaqu
                     if (!(val & 0x10) && !s->status_latched) {
                         /* status latch */
                         /* XXX: add BCD and null count */
-                        s->status =  (pit_get_out1(s, hvm_get_clock()) << 7) |
+                        s->status =  (pit_get_out1(s, hvm_get_clock(s->vcpu)) 
<< 7) |
                             (s->rw_mode << 4) |
                             (s->mode << 1) |
                             s->bcd;
@@ -366,9 +298,9 @@ static void pit_ioport_write(void *opaqu
     }
 }
 
-static u32 pit_ioport_read(void *opaque, u32 addr)
-{
-    hvm_virpit *pit = opaque;
+static uint32_t pit_ioport_read(void *opaque, uint32_t addr)
+{
+    PITState *pit = opaque;
     int ret, count;
     PITChannelState *s;
     
@@ -419,84 +351,51 @@ static u32 pit_ioport_read(void *opaque,
     return ret;
 }
 
-static void pit_irq_timer_update(PITChannelState *s, s64 current_time)
-{
-    s64 expire_time;
-    int irq_level;
-    struct vcpu *v = current;
-    struct hvm_virpic *pic= &v->domain->arch.hvm_domain.vpic;
-
-    if (!s->hvm_time || s->mode == 0xff)
-        return;
-
-    expire_time = pit_get_next_transition_time(s, current_time);
-    /* not generate intr by direct pic_set_irq in mod 2
-     * XXX:mod 3 should be same as mod 2
-     */
-    if (s->mode != HVM_PIT_ACCEL_MODE) {
-        irq_level = pit_get_out1(s, current_time);
-        pic_set_irq(pic, s->irq, irq_level);
-        s->next_transition_time = expire_time;
-#ifdef DEBUG_PIT
-        printk("HVM_PIT:irq_level=%d next_delay=%l ns\n",
-                irq_level, 
-                (expire_time - current_time));
-#endif
-    }
-
-    if (expire_time > 0)
-        set_timer(&(s->hvm_time->pit_timer), s->next_transition_time);
-
-}
-
-static void pit_irq_timer(void *data)
-{
-    PITChannelState *s = data;
-
-    pit_irq_timer_update(s, s->next_transition_time);
-}
-
 static void pit_reset(void *opaque)
 {
-    hvm_virpit *pit = opaque;
+    PITState *pit = opaque;
     PITChannelState *s;
     int i;
 
     for(i = 0;i < 3; i++) {
         s = &pit->channels[i];
+        if ( s -> pt ) {
+            destroy_periodic_time (s->pt);
+            s->pt = NULL;
+        }
         s->mode = 0xff; /* the init mode */
         s->gate = (i != 2);
         pit_load_count(s, 0);
     }
 }
 
-/* hvm_io_assist light-weight version, specific to PIT DM */ 
-static void resume_pit_io(ioreq_t *p)
-{
-    struct cpu_user_regs *regs = guest_cpu_user_regs();
-    unsigned long old_eax = regs->eax;
-    p->state = STATE_INVALID;
-
-    switch(p->size) {
-    case 1:
-        regs->eax = (old_eax & 0xffffff00) | (p->u.data & 0xff);
-        break;
-    case 2:
-        regs->eax = (old_eax & 0xffff0000) | (p->u.data & 0xffff);
-        break;
-    case 4:
-        regs->eax = (p->u.data & 0xffffffff);
-        break;
-    default:
-        BUG();
-    }
+void pit_init(struct vcpu *v, unsigned long cpu_khz)
+{
+    PITState *pit = &v->domain->arch.hvm_domain.pl_time.vpit;
+    PITChannelState *s;
+
+    s = &pit->channels[0];
+    /* the timer 0 is connected to an IRQ */
+    s->vcpu = v;
+    s++; s->vcpu = v;
+    s++; s->vcpu = v;
+
+    register_portio_handler(PIT_BASE, 4, handle_pit_io);
+    /* register the speaker port */
+    register_portio_handler(0x61, 1, handle_speaker_io);
+    ticks_per_sec(v) = cpu_khz * (int64_t)1000; 
+#ifdef DEBUG_PIT
+    printk("HVM_PIT: guest frequency =%lld\n", (long long)ticks_per_sec(v));
+#endif
+    pit_reset(pit);
+    return;
 }
 
 /* the intercept action for PIT DM retval:0--not handled; 1--handled */  
-int handle_pit_io(ioreq_t *p)
+static int handle_pit_io(ioreq_t *p)
 {
     struct vcpu *v = current;
-    struct hvm_virpit *vpit = &(v->domain->arch.hvm_domain.vpit);
+    struct PITState *vpit = &(v->domain->arch.hvm_domain.pl_time.vpit);
 
     if (p->size != 1 ||
         p->pdata_valid ||
@@ -508,18 +407,18 @@ int handle_pit_io(ioreq_t *p)
     if (p->dir == 0) {/* write */
         pit_ioport_write(vpit, p->addr, p->u.data);
     } else if (p->dir == 1) { /* read */
-        p->u.data = pit_ioport_read(vpit, p->addr);
-        resume_pit_io(p);
-    }
-
-    /* always return 1, since PIT sit in HV now */
+        if ( (p->addr & 3) != 3 ) {
+            p->u.data = pit_ioport_read(vpit, p->addr);
+        } else {
+            printk("HVM_PIT: read A1:A0=3!\n");
+        }
+    }
     return 1;
 }
 
 static void speaker_ioport_write(void *opaque, uint32_t addr, uint32_t val)
 {
-    hvm_virpit *pit = opaque;
-    val &= 0xff;
+    PITState *pit = opaque;
     pit->speaker_data_on = (val >> 1) & 1;
     pit_set_gate(pit, 2, val & 1);
 }
@@ -527,18 +426,18 @@ static uint32_t speaker_ioport_read(void
 static uint32_t speaker_ioport_read(void *opaque, uint32_t addr)
 {
     int out;
-    hvm_virpit *pit = opaque;
-    out = pit_get_out(pit, 2, hvm_get_clock());
+    PITState *pit = opaque;
+    out = pit_get_out(pit, 2, hvm_get_clock(pit->channels[2].vcpu));
     pit->dummy_refresh_clock ^= 1;
 
     return (pit->speaker_data_on << 1) | pit_get_gate(pit, 2) | (out << 5) |
       (pit->dummy_refresh_clock << 4);
 }
 
-int handle_speaker_io(ioreq_t *p)
+static int handle_speaker_io(ioreq_t *p)
 {
     struct vcpu *v = current;
-    struct hvm_virpit *vpit = &(v->domain->arch.hvm_domain.vpit);
+    struct PITState *vpit = &(v->domain->arch.hvm_domain.pl_time.vpit);
 
     if (p->size != 1 ||
         p->pdata_valid ||
@@ -551,45 +450,7 @@ int handle_speaker_io(ioreq_t *p)
         speaker_ioport_write(vpit, p->addr, p->u.data);
     } else if (p->dir == 1) {/* read */
         p->u.data = speaker_ioport_read(vpit, p->addr);
-        resume_pit_io(p);
     }
 
     return 1;
 }
-
-/* pick up missed timer ticks at deactive time */
-void pickup_deactive_ticks(struct hvm_virpit *vpit)
-{
-    s64 next_time;
-    PITChannelState *s = &(vpit->channels[0]);
-    if ( !active_timer(&(vpit->time_info.pit_timer)) ) {
-        next_time = pit_get_next_transition_time(s, s->next_transition_time); 
-        if (next_time >= 0)
-            set_timer(&(s->hvm_time->pit_timer), s->next_transition_time);
-    }
-}
-
-void pit_init(struct hvm_virpit *pit, struct vcpu *v)
-{
-    PITChannelState *s;
-    struct hvm_time_info *hvm_time;
-
-    s = &pit->channels[0];
-    /* the timer 0 is connected to an IRQ */
-    s->irq = 0;
-    /* channel 0 need access the related time info for intr injection */
-    hvm_time = s->hvm_time = &pit->time_info;
-    hvm_time->vcpu = v;
-
-    init_timer(&(hvm_time->pit_timer), pit_irq_timer, s, v->processor);
-
-    register_portio_handler(PIT_BASE, 4, handle_pit_io);
-
-    /* register the speaker port */
-    register_portio_handler(0x61, 1, handle_speaker_io);
-
-    pit_reset(pit);
-
-    return;
-
-}
diff -r 9d52a66c7499 -r c073ebdbde8c xen/arch/x86/hvm/intercept.c
--- a/xen/arch/x86/hvm/intercept.c      Thu May 25 15:59:18 2006 -0600
+++ b/xen/arch/x86/hvm/intercept.c      Fri May 26 13:41:49 2006 -0600
@@ -214,6 +214,88 @@ void hlt_timer_fn(void *data)
     evtchn_set_pending(v, iopacket_port(v));
 }
 
+static __inline__ void missed_ticks(struct periodic_time *pt)
+{
+    int missed_ticks;
+
+    missed_ticks = (NOW() - pt->scheduled)/(s_time_t) pt->period;
+    if ( missed_ticks++ >= 0 ) {
+        if ( missed_ticks > 1000 ) {
+            /* TODO: Adjust guest time togther */
+            pt->pending_intr_nr ++;
+        }
+        else {
+            pt->pending_intr_nr += missed_ticks;
+        }
+        pt->scheduled += missed_ticks * pt->period;
+    }
+}
+
+/* hook function for the platform periodic time */
+void pt_timer_fn(void *data)
+{
+    struct vcpu *v = data;
+    struct periodic_time *pt = 
&(v->domain->arch.hvm_domain.pl_time.periodic_tm);
+
+    /* pick up missed timer tick */
+    missed_ticks(pt);
+    if ( test_bit(_VCPUF_running, &v->vcpu_flags) ) {
+        set_timer(&pt->timer, pt->scheduled);
+    }
+}
+
+/* pick up missed timer ticks at deactive time */
+void pickup_deactive_ticks(struct periodic_time *pt)
+{
+    if ( !active_timer(&(pt->timer)) ) {
+        missed_ticks(pt);
+        set_timer(&pt->timer, pt->scheduled);
+    }
+}
+
+/*
+ * period: fire frequency in ns.
+ */
+struct periodic_time * create_periodic_time(
+        struct vcpu *v, 
+        u32 period, 
+        char irq,
+        char one_shot)
+{
+    struct periodic_time *pt = 
&(v->domain->arch.hvm_domain.pl_time.periodic_tm);
+    if ( pt->enabled ) {
+        if ( v->vcpu_id != 0 ) {
+            printk("HVM_PIT: start 2nd periodic time on non BSP!\n");
+        }
+        stop_timer (&pt->timer);
+        pt->enabled = 0;
+    }
+    pt->pending_intr_nr = 0;
+    pt->first_injected = 0;
+    if (period < 900000) { /* < 0.9 ms */
+        printk("HVM_PlatformTime: program too small period %u\n",period);
+        period = 900000;   /* force to 0.9ms */
+    }
+    pt->period = period;
+    pt->irq = irq;
+    pt->period_cycles = (u64)period * cpu_khz / 1000000L;
+    pt->one_shot = one_shot;
+    if ( one_shot ) {
+        printk("HVM_PL: No support for one shot platform time yet\n");
+    }
+    pt->scheduled = NOW() + period;
+    set_timer (&pt->timer,pt->scheduled);
+    pt->enabled = 1;
+    return pt;
+}
+
+void destroy_periodic_time(struct periodic_time *pt)
+{
+    if ( pt->enabled ) {
+        stop_timer(&pt->timer);
+        pt->enabled = 0;
+    }
+}
 
 /*
  * Local variables:
diff -r 9d52a66c7499 -r c073ebdbde8c xen/arch/x86/hvm/svm/intr.c
--- a/xen/arch/x86/hvm/svm/intr.c       Thu May 25 15:59:18 2006 -0600
+++ b/xen/arch/x86/hvm/svm/intr.c       Fri May 26 13:41:49 2006 -0600
@@ -44,45 +44,33 @@
  */
 #define BSP_CPU(v)    (!(v->vcpu_id))
 
-u64 svm_get_guest_time(struct vcpu *v)
-{
-    struct hvm_time_info *time_info = 
&(v->domain->arch.hvm_domain.vpit.time_info);
-    u64    host_tsc;
-    
-    rdtscll(host_tsc);
-    return host_tsc + time_info->cache_tsc_offset;
-}
-
 void svm_set_guest_time(struct vcpu *v, u64 gtime)
 {
-    struct hvm_time_info *time_info = 
&(v->domain->arch.hvm_domain.vpit.time_info);
     u64    host_tsc;
    
     rdtscll(host_tsc);
     
-    time_info->cache_tsc_offset = gtime - host_tsc;
-    v->arch.hvm_svm.vmcb->tsc_offset = time_info->cache_tsc_offset;
+    v->arch.hvm_vcpu.cache_tsc_offset = gtime - host_tsc;
+    v->arch.hvm_svm.vmcb->tsc_offset = v->arch.hvm_vcpu.cache_tsc_offset;
 }
 
 static inline void
 interrupt_post_injection(struct vcpu * v, int vector, int type)
 {
-    struct hvm_virpit *vpit = &(v->domain->arch.hvm_domain.vpit);
-    struct hvm_time_info *time_info = &vpit->time_info;
+    struct  periodic_time *pt = 
&(v->domain->arch.hvm_domain.pl_time.periodic_tm);
 
     if ( is_pit_irq(v, vector, type) ) {
-        if ( !time_info->first_injected ) {
-            time_info->pending_intr_nr = 0;
-            time_info->last_pit_gtime = svm_get_guest_time(v);
-            time_info->first_injected = 1;
+        if ( !pt->first_injected ) {
+            pt->pending_intr_nr = 0;
+            pt->last_plt_gtime = hvm_get_guest_time(v);
+            pt->scheduled = NOW() + pt->period;
+            set_timer(&pt->timer, pt->scheduled);
+            pt->first_injected = 1;
         } else {
-            time_info->pending_intr_nr--;
+            pt->pending_intr_nr--;
+            pt->last_plt_gtime += pt->period_cycles;
+            svm_set_guest_time(v, pt->last_plt_gtime);
         }
-        time_info->count_advance = 0;
-        time_info->count_point = NOW();
-
-        time_info->last_pit_gtime += time_info->period_cycles;
-        svm_set_guest_time(v, time_info->last_pit_gtime);
     }
 
     switch(type)
@@ -121,8 +109,7 @@ asmlinkage void svm_intr_assist(void)
     struct vcpu *v = current;
     struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
     struct hvm_domain *plat=&v->domain->arch.hvm_domain; 
-    struct hvm_virpit *vpit = &plat->vpit;
-    struct hvm_time_info *time_info = &vpit->time_info;
+    struct periodic_time *pt = &plat->pl_time.periodic_tm;
     struct hvm_virpic *pic= &plat->vpic;
     int intr_type = VLAPIC_DELIV_MODE_EXT;
     int intr_vector = -1;
@@ -174,9 +161,9 @@ asmlinkage void svm_intr_assist(void)
       if ( cpu_has_pending_irq(v) ) {
            intr_vector = cpu_get_interrupt(v, &intr_type);
       }
-      else  if ( (v->vcpu_id == 0) && time_info->pending_intr_nr ) {
-          pic_set_irq(pic, 0, 0);
-          pic_set_irq(pic, 0, 1);
+      else  if ( (v->vcpu_id == 0) && pt->enabled && pt->pending_intr_nr ) {
+          pic_set_irq(pic, pt->irq, 0);
+          pic_set_irq(pic, pt->irq, 1);
           intr_vector = cpu_get_interrupt(v, &intr_type);
       }
     }
@@ -190,7 +177,7 @@ asmlinkage void svm_intr_assist(void)
             /* Re-injecting a PIT interruptt? */
             if (re_injecting && 
                 is_pit_irq(v, intr_vector, intr_type)) {
-                    ++time_info->pending_intr_nr;
+                    ++pt->pending_intr_nr;
             }
             /* let's inject this interrupt */
             TRACE_3D(TRC_VMX_INT, v->domain->domain_id, intr_vector, 0);
diff -r 9d52a66c7499 -r c073ebdbde8c xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c        Thu May 25 15:59:18 2006 -0600
+++ b/xen/arch/x86/hvm/svm/svm.c        Fri May 26 13:41:49 2006 -0600
@@ -51,13 +51,6 @@
 
 #define SVM_EXTRA_DEBUG
 
-#ifdef TRACE_BUFFER
-static unsigned long trace_values[NR_CPUS][4];
-#define TRACE_VMEXIT(index,value) trace_values[current->processor][index]=value
-#else
-#define TRACE_VMEXIT(index,value) ((void)0)
-#endif
-
 /* Useful define */
 #define MAX_INST_SIZE  15
 
@@ -672,12 +665,11 @@ static void arch_svm_do_launch(struct vc
 
 static void svm_freeze_time(struct vcpu *v)
 {
-    struct hvm_time_info *time_info = 
&v->domain->arch.hvm_domain.vpit.time_info;
+    struct periodic_time *pt=&v->domain->arch.hvm_domain.pl_time.periodic_tm;
     
-    if ( time_info->first_injected && !v->domain->arch.hvm_domain.guest_time ) 
{
-        v->domain->arch.hvm_domain.guest_time = svm_get_guest_time(v);
-        time_info->count_advance += (NOW() - time_info->count_point);
-        stop_timer(&(time_info->pit_timer));
+    if ( pt->enabled && pt->first_injected && !v->arch.hvm_vcpu.guest_time ) {
+        v->arch.hvm_vcpu.guest_time = hvm_get_guest_time(v);
+        stop_timer(&(pt->timer));
     }
 }
 
@@ -754,7 +746,7 @@ static void svm_relinquish_guest_resourc
         }
     }
 
-    kill_timer(&d->arch.hvm_domain.vpit.time_info.pit_timer);
+    kill_timer(&d->arch.hvm_domain.pl_time.periodic_tm.timer);
 
     if ( d->arch.hvm_domain.shared_page_va )
         unmap_domain_page_global(
@@ -784,10 +776,12 @@ void arch_svm_do_resume(struct vcpu *v)
 
 void svm_migrate_timers(struct vcpu *v)
 {
-    struct hvm_time_info *time_info = 
&v->domain->arch.hvm_domain.vpit.time_info;
-
-    migrate_timer(&time_info->pit_timer, v->processor);
-    migrate_timer(&v->arch.hvm_svm.hlt_timer, v->processor);
+    struct periodic_time *pt = 
&(v->domain->arch.hvm_domain.pl_time.periodic_tm);
+
+    if ( pt->enabled ) {
+        migrate_timer( &pt->timer, v->processor );
+        migrate_timer( &v->arch.hvm_svm.hlt_timer, v->processor );
+    }
     if ( hvm_apic_support(v->domain) && VLAPIC( v ))
         migrate_timer( &(VLAPIC(v)->vlapic_timer ), v->processor );
 }
@@ -816,7 +810,6 @@ static int svm_do_page_fault(unsigned lo
             return 1;
 
         handle_mmio(va, va);
-        TRACE_VMEXIT(2,2);
         return 1;
     }
 
@@ -842,7 +835,6 @@ static int svm_do_page_fault(unsigned lo
             return 1;
         }
 
-        TRACE_VMEXIT (2,2);
         handle_mmio(va, gpa);
 
         return 1;
@@ -854,8 +846,6 @@ static int svm_do_page_fault(unsigned lo
         /* Let's make sure that the Guest TLB is flushed */
         set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
     }
-
-    TRACE_VMEXIT (2,result);
 
     return result;
 }
@@ -1901,14 +1891,8 @@ static inline void svm_do_msr_access(str
         regs->edx = 0;
         switch (regs->ecx) {
         case MSR_IA32_TIME_STAMP_COUNTER:
-        {
-            struct hvm_time_info *time_info;
-
-            rdtscll(msr_content);
-            time_info = &v->domain->arch.hvm_domain.vpit.time_info;
-            msr_content += time_info->cache_tsc_offset;
+            msr_content = hvm_get_guest_time(v);
             break;
-        }
         case MSR_IA32_SYSENTER_CS:
             msr_content = vmcb->sysenter_cs;
             break;
@@ -1975,7 +1959,7 @@ static inline void svm_vmexit_do_hlt(str
 static inline void svm_vmexit_do_hlt(struct vmcb_struct *vmcb)
 {
     struct vcpu *v = current;
-    struct hvm_virpit *vpit = &v->domain->arch.hvm_domain.vpit;
+    struct periodic_time *pt=&v->domain->arch.hvm_domain.pl_time.periodic_tm;
     s_time_t  next_pit = -1, next_wakeup;
 
     __update_guest_eip(vmcb, 1);
@@ -1985,7 +1969,7 @@ static inline void svm_vmexit_do_hlt(str
        return; 
 
     if ( !v->vcpu_id )
-        next_pit = get_pit_scheduled(v, vpit);
+        next_pit = get_scheduled(v, pt->irq, pt);
     next_wakeup = get_apictime_scheduled(v);
     if ( (next_pit != -1 && next_pit < next_wakeup) || next_wakeup == -1 )
         next_wakeup = next_pit;
diff -r 9d52a66c7499 -r c073ebdbde8c xen/arch/x86/hvm/svm/vmcb.c
--- a/xen/arch/x86/hvm/svm/vmcb.c       Thu May 25 15:59:18 2006 -0600
+++ b/xen/arch/x86/hvm/svm/vmcb.c       Fri May 26 13:41:49 2006 -0600
@@ -442,19 +442,17 @@ void svm_do_resume(struct vcpu *v)
 void svm_do_resume(struct vcpu *v) 
 {
     struct domain *d = v->domain;
-    struct hvm_virpit *vpit = &d->arch.hvm_domain.vpit;
-    struct hvm_time_info *time_info = &vpit->time_info;
+    struct periodic_time *pt = &d->arch.hvm_domain.pl_time.periodic_tm;
 
     svm_stts(v);
 
     /* pick up the elapsed PIT ticks and re-enable pit_timer */
-    if ( time_info->first_injected ) {
-        if ( v->domain->arch.hvm_domain.guest_time ) {
-            svm_set_guest_time(v, v->domain->arch.hvm_domain.guest_time);
-            time_info->count_point = NOW();
-            v->domain->arch.hvm_domain.guest_time = 0;
+    if ( pt->enabled && pt->first_injected ) {
+        if ( v->arch.hvm_vcpu.guest_time ) {
+            svm_set_guest_time(v, v->arch.hvm_vcpu.guest_time);
+            v->arch.hvm_vcpu.guest_time = 0;
         }
-        pickup_deactive_ticks(vpit);
+        pickup_deactive_ticks(pt);
     }
 
     if ( test_bit(iopacket_port(v), &d->shared_info->evtchn_pending[0]) ||
diff -r 9d52a66c7499 -r c073ebdbde8c xen/arch/x86/hvm/vmx/io.c
--- a/xen/arch/x86/hvm/vmx/io.c Thu May 25 15:59:18 2006 -0600
+++ b/xen/arch/x86/hvm/vmx/io.c Fri May 26 13:41:49 2006 -0600
@@ -49,45 +49,33 @@ void __set_tsc_offset(u64  offset)
 #endif
 }
 
-u64 get_guest_time(struct vcpu *v)
-{
-    struct hvm_time_info *time_info = 
&(v->domain->arch.hvm_domain.vpit.time_info);
-    u64    host_tsc;
-    
-    rdtscll(host_tsc);
-    return host_tsc + time_info->cache_tsc_offset;
-}
-
 void set_guest_time(struct vcpu *v, u64 gtime)
 {
-    struct hvm_time_info *time_info = 
&(v->domain->arch.hvm_domain.vpit.time_info);
     u64    host_tsc;
    
     rdtscll(host_tsc);
     
-    time_info->cache_tsc_offset = gtime - host_tsc;
-    __set_tsc_offset(time_info->cache_tsc_offset);
+    v->arch.hvm_vcpu.cache_tsc_offset = gtime - host_tsc;
+    __set_tsc_offset(v->arch.hvm_vcpu.cache_tsc_offset);
 }
 
 static inline void
 interrupt_post_injection(struct vcpu * v, int vector, int type)
 {
-    struct hvm_virpit *vpit = &(v->domain->arch.hvm_domain.vpit);
-    struct hvm_time_info *time_info = &vpit->time_info;
+    struct periodic_time *pt = 
&(v->domain->arch.hvm_domain.pl_time.periodic_tm);
 
     if ( is_pit_irq(v, vector, type) ) {
-        if ( !time_info->first_injected ) {
-            time_info->pending_intr_nr = 0;
-            time_info->last_pit_gtime = get_guest_time(v);
-            time_info->first_injected = 1;
+        if ( !pt->first_injected ) {
+            pt->pending_intr_nr = 0;
+            pt->last_plt_gtime = hvm_get_guest_time(v);
+            pt->scheduled = NOW() + pt->period;
+            set_timer(&pt->timer, pt->scheduled);
+            pt->first_injected = 1;
         } else {
-            time_info->pending_intr_nr--;
-        }
-        time_info->count_advance = 0;
-        time_info->count_point = NOW();
-
-        time_info->last_pit_gtime += time_info->period_cycles;
-        set_guest_time(v, time_info->last_pit_gtime);
+            pt->pending_intr_nr--;
+            pt->last_plt_gtime += pt->period_cycles;
+            set_guest_time(v, pt->last_plt_gtime);
+        }
     }
 
     switch(type)
@@ -151,7 +139,7 @@ asmlinkage void vmx_intr_assist(void)
     unsigned long eflags;
     struct vcpu *v = current;
     struct hvm_domain *plat=&v->domain->arch.hvm_domain;
-    struct hvm_time_info *time_info = &plat->vpit.time_info;
+    struct periodic_time *pt = &plat->pl_time.periodic_tm;
     struct hvm_virpic *pic= &plat->vpic;
     unsigned int idtv_info_field;
     unsigned long inst_len;
@@ -160,9 +148,9 @@ asmlinkage void vmx_intr_assist(void)
     if ( v->vcpu_id == 0 )
         hvm_pic_assist(v);
 
-    if ( (v->vcpu_id == 0) && time_info->pending_intr_nr ) {
-        pic_set_irq(pic, 0, 0);
-        pic_set_irq(pic, 0, 1);
+    if ( (v->vcpu_id == 0) && pt->enabled && pt->pending_intr_nr ) {
+        pic_set_irq(pic, pt->irq, 0);
+        pic_set_irq(pic, pt->irq, 1);
     }
 
     has_ext_irq = cpu_has_pending_irq(v);
@@ -232,19 +220,17 @@ void vmx_do_resume(struct vcpu *v)
 void vmx_do_resume(struct vcpu *v)
 {
     struct domain *d = v->domain;
-    struct hvm_virpit *vpit = &v->domain->arch.hvm_domain.vpit;
-    struct hvm_time_info *time_info = &vpit->time_info;
+    struct periodic_time *pt = &v->domain->arch.hvm_domain.pl_time.periodic_tm;
 
     vmx_stts();
 
     /* pick up the elapsed PIT ticks and re-enable pit_timer */
-    if ( time_info->first_injected ) {
-        if ( v->domain->arch.hvm_domain.guest_time ) {
-            time_info->count_point = NOW();
-            set_guest_time(v, v->domain->arch.hvm_domain.guest_time);
-            v->domain->arch.hvm_domain.guest_time = 0;
-        }
-        pickup_deactive_ticks(vpit);
+    if ( pt->enabled && pt->first_injected ) {
+        if ( v->arch.hvm_vcpu.guest_time ) {
+            set_guest_time(v, v->arch.hvm_vcpu.guest_time);
+            v->arch.hvm_vcpu.guest_time = 0;
+        }
+        pickup_deactive_ticks(pt);
     }
 
     if ( test_bit(iopacket_port(v), &d->shared_info->evtchn_pending[0]) ||
diff -r 9d52a66c7499 -r c073ebdbde8c xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c        Thu May 25 15:59:18 2006 -0600
+++ b/xen/arch/x86/hvm/vmx/vmx.c        Fri May 26 13:41:49 2006 -0600
@@ -47,7 +47,7 @@
 #include <asm/hvm/vpic.h>
 #include <asm/hvm/vlapic.h>
 
-static unsigned long trace_values[NR_CPUS][4];
+static unsigned long trace_values[NR_CPUS][5];
 #define TRACE_VMEXIT(index,value) trace_values[smp_processor_id()][index]=value
 
 static void vmx_ctxt_switch_from(struct vcpu *v);
@@ -102,7 +102,7 @@ static void vmx_relinquish_guest_resourc
         }
     }
 
-    kill_timer(&d->arch.hvm_domain.vpit.time_info.pit_timer);
+    kill_timer(&d->arch.hvm_domain.pl_time.periodic_tm.timer);
 
     if ( d->arch.hvm_domain.shared_page_va )
         unmap_domain_page_global(
@@ -358,12 +358,11 @@ static inline int long_mode_do_msr_write
 
 static void vmx_freeze_time(struct vcpu *v)
 {
-    struct hvm_time_info *time_info = 
&(v->domain->arch.hvm_domain.vpit.time_info);
+    struct periodic_time *pt=&v->domain->arch.hvm_domain.pl_time.periodic_tm;
     
-    if ( time_info->first_injected && !v->domain->arch.hvm_domain.guest_time ) 
{
-        v->domain->arch.hvm_domain.guest_time = get_guest_time(v);
-        time_info->count_advance += (NOW() - time_info->count_point);
-        stop_timer(&(time_info->pit_timer));
+    if ( pt->enabled && pt->first_injected && !v->arch.hvm_vcpu.guest_time ) {
+        v->arch.hvm_vcpu.guest_time = hvm_get_guest_time(v);
+        stop_timer(&(pt->timer));
     }
 }
 
@@ -393,10 +392,12 @@ int vmx_initialize_guest_resources(struc
 
 void vmx_migrate_timers(struct vcpu *v)
 {
-    struct hvm_time_info *time_info = 
&v->domain->arch.hvm_domain.vpit.time_info;
-
-    migrate_timer(&time_info->pit_timer, v->processor);
-    migrate_timer(&v->arch.hvm_vmx.hlt_timer, v->processor);
+    struct periodic_time *pt = 
&(v->domain->arch.hvm_domain.pl_time.periodic_tm);
+
+    if ( pt->enabled ) {
+        migrate_timer(&pt->timer, v->processor);
+        migrate_timer(&v->arch.hvm_vmx.hlt_timer, v->processor);
+    }
     if ( hvm_apic_support(v->domain) && VLAPIC(v))
         migrate_timer(&(VLAPIC(v)->vlapic_timer), v->processor);
 }
@@ -1861,14 +1862,8 @@ static inline void vmx_do_msr_read(struc
                 (unsigned long)regs->edx);
     switch (regs->ecx) {
     case MSR_IA32_TIME_STAMP_COUNTER:
-    {
-        struct hvm_time_info *time_info;
-
-        rdtscll(msr_content);
-        time_info = &(v->domain->arch.hvm_domain.vpit.time_info);
-        msr_content += time_info->cache_tsc_offset;
-        break;
-    }
+        msr_content = hvm_get_guest_time(v);
+        break;
     case MSR_IA32_SYSENTER_CS:
         __vmread(GUEST_SYSENTER_CS, (u32 *)&msr_content);
         break;
@@ -1941,11 +1936,11 @@ void vmx_vmexit_do_hlt(void)
 void vmx_vmexit_do_hlt(void)
 {
     struct vcpu *v=current;
-    struct hvm_virpit *vpit = &(v->domain->arch.hvm_domain.vpit);
+    struct periodic_time *pt = 
&(v->domain->arch.hvm_domain.pl_time.periodic_tm);
     s_time_t   next_pit=-1,next_wakeup;
 
     if ( !v->vcpu_id )
-        next_pit = get_pit_scheduled(v,vpit);
+        next_pit = get_scheduled(v, pt->irq, pt);
     next_wakeup = get_apictime_scheduled(v);
     if ( (next_pit != -1 && next_pit < next_wakeup) || next_wakeup == -1 )
         next_wakeup = next_pit;
diff -r 9d52a66c7499 -r c073ebdbde8c xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Thu May 25 15:59:18 2006 -0600
+++ b/xen/arch/x86/mm.c Fri May 26 13:41:49 2006 -0600
@@ -260,9 +260,42 @@ void share_xen_page_with_privileged_gues
     share_xen_page_with_guest(page, dom_xen, readonly);
 }
 
+static void __write_ptbase(unsigned long mfn)
+{
+#ifdef CONFIG_X86_PAE
+    if ( mfn >= 0x100000 )
+    {
+        l3_pgentry_t *highmem_l3tab, *lowmem_l3tab;
+        struct vcpu *v = current;
+        unsigned long flags;
+
+        /* Protects against re-entry and against __pae_flush_pgd(). */
+        local_irq_save(flags);
+
+        /* Pick an unused low-memory L3 cache slot. */
+        v->arch.lowmem_l3tab_inuse ^= 1;
+        lowmem_l3tab = v->arch.lowmem_l3tab[v->arch.lowmem_l3tab_inuse];
+        v->arch.lowmem_l3tab_high_mfn[v->arch.lowmem_l3tab_inuse] = mfn;
+
+        /* Map the guest L3 table and copy to the chosen low-memory cache. */
+        highmem_l3tab = map_domain_page(mfn);
+        memcpy(lowmem_l3tab, highmem_l3tab, sizeof(v->arch.lowmem_l3tab));
+        unmap_domain_page(highmem_l3tab);
+
+        /* Install the low-memory L3 table in CR3. */
+        write_cr3(__pa(lowmem_l3tab));
+
+        local_irq_restore(flags);
+        return;
+    }
+#endif
+
+    write_cr3(mfn << PAGE_SHIFT);
+}
+
 void write_ptbase(struct vcpu *v)
 {
-    write_cr3(pagetable_get_paddr(v->arch.monitor_table));
+    __write_ptbase(pagetable_get_pfn(v->arch.monitor_table));
 }
 
 void invalidate_shadow_ldt(struct vcpu *v)
@@ -401,6 +434,7 @@ static int get_page_and_type_from_pagenr
     return 1;
 }
 
+#ifndef CONFIG_X86_PAE /* We do not support guest linear mappings on PAE. */
 /*
  * We allow root tables to map each other (a.k.a. linear page tables). It
  * needs some special care with reference counts and access permissions:
@@ -456,6 +490,7 @@ get_linear_pagetable(
 
     return 1;
 }
+#endif /* !CONFIG_X86_PAE */
 
 int
 get_page_from_l1e(
@@ -564,10 +599,6 @@ get_page_from_l3e(
     rc = get_page_and_type_from_pagenr(
         l3e_get_pfn(l3e),
         PGT_l2_page_table | vaddr, d);
-#if CONFIG_PAGING_LEVELS == 3
-    if ( unlikely(!rc) )
-        rc = get_linear_pagetable(l3e, pfn, d);
-#endif
     return rc;
 }
 #endif /* 3 level */
@@ -773,6 +804,50 @@ static int create_pae_xen_mappings(l3_pg
     return 1;
 }
 
+struct pae_flush_pgd {
+    unsigned long l3tab_mfn;
+    unsigned int  l3tab_idx;
+    l3_pgentry_t  nl3e;
+};
+
+static void __pae_flush_pgd(void *data)
+{
+    struct pae_flush_pgd *args = data;
+    struct vcpu *v = this_cpu(curr_vcpu);
+    int i = v->arch.lowmem_l3tab_inuse;
+    intpte_t _ol3e, _nl3e, _pl3e;
+    l3_pgentry_t *l3tab_ptr;
+
+    ASSERT(!local_irq_is_enabled());
+
+    if ( v->arch.lowmem_l3tab_high_mfn[i] != args->l3tab_mfn )
+        return;
+
+    l3tab_ptr = &v->arch.lowmem_l3tab[i][args->l3tab_idx];
+
+    _ol3e = l3e_get_intpte(*l3tab_ptr);
+    _nl3e = l3e_get_intpte(args->nl3e);
+    _pl3e = cmpxchg((intpte_t *)l3tab_ptr, _ol3e, _nl3e);
+    BUG_ON(_pl3e != _ol3e);
+}
+
+/* Flush a pgdir update into low-memory caches. */
+static void pae_flush_pgd(
+    unsigned long mfn, unsigned int idx, l3_pgentry_t nl3e)
+{
+    struct domain *d = page_get_owner(mfn_to_page(mfn));
+    struct pae_flush_pgd args = {
+        .l3tab_mfn = mfn,
+        .l3tab_idx = idx,
+        .nl3e      = nl3e };
+
+    /* If below 4GB then the pgdir is not shadowed in low memory. */
+    if ( mfn < 0x100000 )
+        return;
+
+    on_selected_cpus(d->domain_dirty_cpumask, __pae_flush_pgd, &args, 1, 1);
+}
+
 static inline int l1_backptr(
     unsigned long *backptr, unsigned long offset_in_l2, unsigned long l2_type)
 {
@@ -787,6 +862,7 @@ static inline int l1_backptr(
 
 #elif CONFIG_X86_64
 # define create_pae_xen_mappings(pl3e) (1)
+# define pae_flush_pgd(mfn, idx, nl3e) ((void)0)
 
 static inline int l1_backptr(
     unsigned long *backptr, unsigned long offset_in_l2, unsigned long l2_type)
@@ -886,14 +962,6 @@ static int alloc_l3_table(struct page_in
 
     ASSERT(!shadow_mode_refcounts(d));
 
-#ifdef CONFIG_X86_PAE
-    if ( pfn >= 0x100000 )
-    {
-        MEM_LOG("PAE pgd must be below 4GB (0x%lx >= 0x100000)", pfn);
-        return 0;
-    }
-#endif
-
     pl3e = map_domain_page(pfn);
     for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
     {
@@ -1240,6 +1308,8 @@ static int mod_l3_entry(l3_pgentry_t *pl
 
     okay = create_pae_xen_mappings(pl3e);
     BUG_ON(!okay);
+
+    pae_flush_pgd(pfn, pgentry_ptr_to_slot(pl3e), nl3e);
 
     put_page_from_l3e(ol3e, pfn);
     return 1;
@@ -3109,7 +3179,7 @@ void ptwr_flush(struct domain *d, const 
 
     if ( unlikely(d->arch.ptwr[which].vcpu != current) )
         /* Don't use write_ptbase: it may switch to guest_user on x86/64! */
-        write_cr3(pagetable_get_paddr(
+        __write_ptbase(pagetable_get_pfn(
             d->arch.ptwr[which].vcpu->arch.guest_table));
     else
         TOGGLE_MODE();
@@ -3220,15 +3290,16 @@ static int ptwr_emulated_update(
     /* Turn a sub-word access into a full-word access. */
     if ( bytes != sizeof(paddr_t) )
     {
-        int           rc;
-        paddr_t    full;
-        unsigned int  offset = addr & (sizeof(paddr_t)-1);
+        paddr_t      full;
+        unsigned int offset = addr & (sizeof(paddr_t)-1);
 
         /* Align address; read full word. */
         addr &= ~(sizeof(paddr_t)-1);
-        if ( (rc = x86_emulate_read_std(addr, (unsigned long *)&full,
-                                        sizeof(paddr_t))) )
-            return rc; 
+        if ( copy_from_user(&full, (void *)addr, sizeof(paddr_t)) )
+        {
+            propagate_page_fault(addr, 4); /* user mode, read fault */
+            return X86EMUL_PROPAGATE_FAULT;
+        }
         /* Mask out bits provided by caller. */
         full &= ~((((paddr_t)1 << (bytes*8)) - 1) << (offset*8));
         /* Shift the caller value and OR in the missing bits. */
@@ -3306,7 +3377,8 @@ static int ptwr_emulated_write(
 static int ptwr_emulated_write(
     unsigned long addr,
     unsigned long val,
-    unsigned int bytes)
+    unsigned int bytes,
+    struct x86_emulate_ctxt *ctxt)
 {
     return ptwr_emulated_update(addr, 0, val, bytes, 0);
 }
@@ -3315,7 +3387,8 @@ static int ptwr_emulated_cmpxchg(
     unsigned long addr,
     unsigned long old,
     unsigned long new,
-    unsigned int bytes)
+    unsigned int bytes,
+    struct x86_emulate_ctxt *ctxt)
 {
     return ptwr_emulated_update(addr, old, new, bytes, 1);
 }
@@ -3325,7 +3398,8 @@ static int ptwr_emulated_cmpxchg8b(
     unsigned long old,
     unsigned long old_hi,
     unsigned long new,
-    unsigned long new_hi)
+    unsigned long new_hi,
+    struct x86_emulate_ctxt *ctxt)
 {
     if ( CONFIG_PAGING_LEVELS == 2 )
         return X86EMUL_UNHANDLEABLE;
@@ -3334,7 +3408,7 @@ static int ptwr_emulated_cmpxchg8b(
             addr, ((u64)old_hi << 32) | old, ((u64)new_hi << 32) | new, 8, 1);
 }
 
-static struct x86_mem_emulator ptwr_mem_emulator = {
+static struct x86_emulate_ops ptwr_emulate_ops = {
     .read_std           = x86_emulate_read_std,
     .write_std          = x86_emulate_write_std,
     .read_emulated      = x86_emulate_read_std,
@@ -3353,6 +3427,7 @@ int ptwr_do_page_fault(struct domain *d,
     l2_pgentry_t    *pl2e, l2e;
     int              which, flags;
     unsigned long    l2_idx;
+    struct x86_emulate_ctxt emul_ctxt;
 
     if ( unlikely(shadow_mode_enabled(d)) )
         return 0;
@@ -3507,8 +3582,10 @@ int ptwr_do_page_fault(struct domain *d,
     return EXCRET_fault_fixed;
 
  emulate:
-    if ( x86_emulate_memop(guest_cpu_user_regs(), addr,
-                           &ptwr_mem_emulator, X86EMUL_MODE_HOST) )
+    emul_ctxt.regs = guest_cpu_user_regs();
+    emul_ctxt.cr2  = addr;
+    emul_ctxt.mode = X86EMUL_MODE_HOST;
+    if ( x86_emulate_memop(&emul_ctxt, &ptwr_emulate_ops) )
         return 0;
     perfc_incrc(ptwr_emulations);
     return EXCRET_fault_fixed;
diff -r 9d52a66c7499 -r c073ebdbde8c xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c      Thu May 25 15:59:18 2006 -0600
+++ b/xen/arch/x86/traps.c      Fri May 26 13:41:49 2006 -0600
@@ -876,7 +876,7 @@ static int emulate_privileged_op(struct 
                     PAGE_FAULT(regs->edi, USER_WRITE_FAULT);
                 break;
             }
-            regs->edi += (regs->eflags & EF_DF) ? -op_bytes : op_bytes;
+            regs->edi += (regs->eflags & EF_DF) ? -(int)op_bytes : op_bytes;
             break;
 
         case 0x6e: /* OUTSB */
@@ -902,7 +902,7 @@ static int emulate_privileged_op(struct 
                 outl_user((u32)data, (u16)regs->edx, v, regs);
                 break;
             }
-            regs->esi += (regs->eflags & EF_DF) ? -op_bytes : op_bytes;
+            regs->esi += (regs->eflags & EF_DF) ? -(int)op_bytes : op_bytes;
             break;
         }
 
diff -r 9d52a66c7499 -r c073ebdbde8c xen/arch/x86/x86_emulate.c
--- a/xen/arch/x86/x86_emulate.c        Thu May 25 15:59:18 2006 -0600
+++ b/xen/arch/x86/x86_emulate.c        Fri May 26 13:41:49 2006 -0600
@@ -363,12 +363,13 @@ do{ __asm__ __volatile__ (              
 #endif /* __i386__ */
 
 /* Fetch next part of the instruction being emulated. */
-#define insn_fetch(_type, _size, _eip) \
-({ unsigned long _x; \
-   if ( (rc = ops->read_std((unsigned long)(_eip), &_x, (_size))) != 0 ) \
-       goto done; \
-   (_eip) += (_size); \
-   (_type)_x; \
+#define insn_fetch(_type, _size, _eip)                                  \
+({ unsigned long _x;                                                    \
+   rc = ops->read_std((unsigned long)(_eip), &_x, (_size), ctxt);       \
+   if ( rc != 0 )                                                       \
+       goto done;                                                       \
+   (_eip) += (_size);                                                   \
+   (_type)_x;                                                           \
 })
 
 /* Access/update address held in a register, based on addressing mode. */
@@ -426,12 +427,10 @@ decode_register(
     return p;
 }
 
-int 
+int
 x86_emulate_memop(
-    struct cpu_user_regs *regs,
-    unsigned long cr2,
-    struct x86_mem_emulator *ops,
-    int mode)
+    struct x86_emulate_ctxt *ctxt,
+    struct x86_emulate_ops  *ops)
 {
     uint8_t b, d, sib, twobyte = 0, rex_prefix = 0;
     uint8_t modrm, modrm_mod = 0, modrm_reg = 0, modrm_rm = 0;
@@ -439,9 +438,11 @@ x86_emulate_memop(
     unsigned int op_bytes, ad_bytes, lock_prefix = 0, rep_prefix = 0, i;
     int rc = 0;
     struct operand src, dst;
+    unsigned long cr2 = ctxt->cr2;
+    int mode = ctxt->mode;
 
     /* Shadow copy of register state. Committed on successful emulation. */
-    struct cpu_user_regs _regs = *regs;
+    struct cpu_user_regs _regs = *ctxt->regs;
 
     switch ( mode )
     {
@@ -628,7 +629,7 @@ x86_emulate_memop(
         dst.bytes = (d & ByteOp) ? 1 : op_bytes;
         if ( !(d & Mov) && /* optimisation - avoid slow emulated read */
              ((rc = ops->read_emulated((unsigned long)dst.ptr,
-                                       &dst.val, dst.bytes)) != 0) )
+                                       &dst.val, dst.bytes, ctxt)) != 0) )
              goto done;
         break;
     }
@@ -670,7 +671,7 @@ x86_emulate_memop(
         src.type  = OP_MEM;
         src.ptr   = (unsigned long *)cr2;
         if ( (rc = ops->read_emulated((unsigned long)src.ptr, 
-                                      &src.val, src.bytes)) != 0 )
+                                      &src.val, src.bytes, ctxt)) != 0 )
             goto done;
         src.orig_val = src.val;
         break;
@@ -776,7 +777,7 @@ x86_emulate_memop(
         if ( mode == X86EMUL_MODE_PROT64 )
             dst.bytes = 8;
         if ( (rc = ops->read_std(register_address(_regs.ss, _regs.esp),
-                                 &dst.val, dst.bytes)) != 0 )
+                                 &dst.val, dst.bytes, ctxt)) != 0 )
             goto done;
         register_address_increment(_regs.esp, dst.bytes);
         break;
@@ -854,12 +855,12 @@ x86_emulate_memop(
             {
                 dst.bytes = 8;
                 if ( (rc = ops->read_std((unsigned long)dst.ptr,
-                                         &dst.val, 8)) != 0 )
+                                         &dst.val, 8, ctxt)) != 0 )
                     goto done;
             }
-            register_address_increment(_regs.esp, -dst.bytes);
+            register_address_increment(_regs.esp, -(int)dst.bytes);
             if ( (rc = ops->write_std(register_address(_regs.ss, _regs.esp),
-                                      dst.val, dst.bytes)) != 0 )
+                                      dst.val, dst.bytes, ctxt)) != 0 )
                 goto done;
             dst.val = dst.orig_val; /* skanky: disable writeback */
             break;
@@ -887,10 +888,11 @@ x86_emulate_memop(
         case OP_MEM:
             if ( lock_prefix )
                 rc = ops->cmpxchg_emulated(
-                    (unsigned long)dst.ptr, dst.orig_val, dst.val, dst.bytes);
+                    (unsigned long)dst.ptr, dst.orig_val,
+                    dst.val, dst.bytes, ctxt);
             else
                 rc = ops->write_emulated(
-                    (unsigned long)dst.ptr, dst.val, dst.bytes);
+                    (unsigned long)dst.ptr, dst.val, dst.bytes, ctxt);
             if ( rc != 0 )
                 goto done;
         default:
@@ -899,7 +901,7 @@ x86_emulate_memop(
     }
 
     /* Commit shadow register state. */
-    *regs = _regs;
+    *ctxt->regs = _regs;
 
  done:
     return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0;
@@ -911,11 +913,11 @@ x86_emulate_memop(
     {
         if ( _regs.ecx == 0 )
         {
-            regs->eip = _regs.eip;
+            ctxt->regs->eip = _regs.eip;
             goto done;
         }
         _regs.ecx--;
-        _regs.eip = regs->eip;
+        _regs.eip = ctxt->regs->eip;
     }
     switch ( b )
     {
@@ -928,20 +930,21 @@ x86_emulate_memop(
             dst.ptr = (unsigned long *)cr2;
             if ( (rc = ops->read_std(register_address(seg ? *seg : _regs.ds,
                                                       _regs.esi),
-                                     &dst.val, dst.bytes)) != 0 )
+                                     &dst.val, dst.bytes, ctxt)) != 0 )
                 goto done;
         }
         else
         {
             /* Read fault: source is special memory. */
             dst.ptr = (unsigned long *)register_address(_regs.es, _regs.edi);
-            if ( (rc = ops->read_emulated(cr2, &dst.val, dst.bytes)) != 0 )
+            if ( (rc = ops->read_emulated(cr2, &dst.val,
+                                          dst.bytes, ctxt)) != 0 )
                 goto done;
         }
         register_address_increment(
-            _regs.esi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
+            _regs.esi, (_regs.eflags & EFLG_DF) ? -(int)dst.bytes : dst.bytes);
         register_address_increment(
-            _regs.edi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
+            _regs.edi, (_regs.eflags & EFLG_DF) ? -(int)dst.bytes : dst.bytes);
         break;
     case 0xa6 ... 0xa7: /* cmps */
         DPRINTF("Urk! I don't handle CMPS.\n");
@@ -952,16 +955,16 @@ x86_emulate_memop(
         dst.ptr   = (unsigned long *)cr2;
         dst.val   = _regs.eax;
         register_address_increment(
-            _regs.edi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
+            _regs.edi, (_regs.eflags & EFLG_DF) ? -(int)dst.bytes : dst.bytes);
         break;
     case 0xac ... 0xad: /* lods */
         dst.type  = OP_REG;
         dst.bytes = (d & ByteOp) ? 1 : op_bytes;
         dst.ptr   = (unsigned long *)&_regs.eax;
-        if ( (rc = ops->read_emulated(cr2, &dst.val, dst.bytes)) != 0 )
+        if ( (rc = ops->read_emulated(cr2, &dst.val, dst.bytes, ctxt)) != 0 )
             goto done;
         register_address_increment(
-            _regs.esi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
+            _regs.esi, (_regs.eflags & EFLG_DF) ? -(int)dst.bytes : dst.bytes);
         break;
     case 0xae ... 0xaf: /* scas */
         DPRINTF("Urk! I don't handle SCAS.\n");
@@ -1074,8 +1077,8 @@ x86_emulate_memop(
 #if defined(__i386__)
     {
         unsigned long old_lo, old_hi;
-        if ( ((rc = ops->read_emulated(cr2+0, &old_lo, 4)) != 0) ||
-             ((rc = ops->read_emulated(cr2+4, &old_hi, 4)) != 0) )
+        if ( ((rc = ops->read_emulated(cr2+0, &old_lo, 4, ctxt)) != 0) ||
+             ((rc = ops->read_emulated(cr2+4, &old_hi, 4, ctxt)) != 0) )
             goto done;
         if ( (old_lo != _regs.eax) || (old_hi != _regs.edx) )
         {
@@ -1090,8 +1093,8 @@ x86_emulate_memop(
         }
         else
         {
-            if ( (rc = ops->cmpxchg8b_emulated(cr2, old_lo, old_hi,
-                                               _regs.ebx, _regs.ecx)) != 0 )
+            if ( (rc = ops->cmpxchg8b_emulated(cr2, old_lo, old_hi, _regs.ebx,
+                                               _regs.ecx, ctxt)) != 0 )
                 goto done;
             _regs.eflags |= EFLG_ZF;
         }
@@ -1100,7 +1103,7 @@ x86_emulate_memop(
 #elif defined(__x86_64__)
     {
         unsigned long old, new;
-        if ( (rc = ops->read_emulated(cr2, &old, 8)) != 0 )
+        if ( (rc = ops->read_emulated(cr2, &old, 8, ctxt)) != 0 )
             goto done;
         if ( ((uint32_t)(old>>0) != (uint32_t)_regs.eax) ||
              ((uint32_t)(old>>32) != (uint32_t)_regs.edx) )
@@ -1112,7 +1115,7 @@ x86_emulate_memop(
         else
         {
             new = (_regs.ecx<<32)|(uint32_t)_regs.ebx;
-            if ( (rc = ops->cmpxchg_emulated(cr2, old, new, 8)) != 0 )
+            if ( (rc = ops->cmpxchg_emulated(cr2, old, new, 8, ctxt)) != 0 )
                 goto done;
             _regs.eflags |= EFLG_ZF;
         }
@@ -1136,7 +1139,8 @@ x86_emulate_read_std(
 x86_emulate_read_std(
     unsigned long addr,
     unsigned long *val,
-    unsigned int bytes)
+    unsigned int bytes,
+    struct x86_emulate_ctxt *ctxt)
 {
     *val = 0;
     if ( copy_from_user((void *)val, (void *)addr, bytes) )
@@ -1151,7 +1155,8 @@ x86_emulate_write_std(
 x86_emulate_write_std(
     unsigned long addr,
     unsigned long val,
-    unsigned int bytes)
+    unsigned int bytes,
+    struct x86_emulate_ctxt *ctxt)
 {
     if ( copy_to_user((void *)addr, (void *)&val, bytes) )
     {
diff -r 9d52a66c7499 -r c073ebdbde8c xen/common/Makefile
--- a/xen/common/Makefile       Thu May 25 15:59:18 2006 -0600
+++ b/xen/common/Makefile       Fri May 26 13:41:49 2006 -0600
@@ -13,6 +13,7 @@ obj-y += page_alloc.o
 obj-y += page_alloc.o
 obj-y += rangeset.o
 obj-y += sched_bvt.o
+obj-y += sched_credit.o
 obj-y += sched_sedf.o
 obj-y += schedule.o
 obj-y += softirq.o
diff -r 9d52a66c7499 -r c073ebdbde8c xen/common/grant_table.c
--- a/xen/common/grant_table.c  Thu May 25 15:59:18 2006 -0600
+++ b/xen/common/grant_table.c  Fri May 26 13:41:49 2006 -0600
@@ -505,15 +505,12 @@ gnttab_setup_table(
         goto out;
     }
 
-    if ( op.nr_frames <= NR_GRANT_FRAMES )
-    {
-        ASSERT(d->grant_table != NULL);
-        op.status = GNTST_okay;
-        for ( i = 0; i < op.nr_frames; i++ )
-        {
-            gmfn = gnttab_shared_gmfn(d, d->grant_table, i);
-            (void)copy_to_guest_offset(op.frame_list, i, &gmfn, 1);
-        }
+    ASSERT(d->grant_table != NULL);
+    op.status = GNTST_okay;
+    for ( i = 0; i < op.nr_frames; i++ )
+    {
+        gmfn = gnttab_shared_gmfn(d, d->grant_table, i);
+        (void)copy_to_guest_offset(op.frame_list, i, &gmfn, 1);
     }
 
     put_domain(d);
diff -r 9d52a66c7499 -r c073ebdbde8c xen/common/kernel.c
--- a/xen/common/kernel.c       Thu May 25 15:59:18 2006 -0600
+++ b/xen/common/kernel.c       Fri May 26 13:41:49 2006 -0600
@@ -191,12 +191,11 @@ long do_xen_version(int cmd, XEN_GUEST_H
         switch ( fi.submap_idx )
         {
         case 0:
-            fi.submap = 0;
+            fi.submap = (1U << XENFEAT_pae_pgdir_above_4gb);
             if ( shadow_mode_translate(current->domain) )
                 fi.submap |= 
                     (1U << XENFEAT_writable_page_tables) |
-                    (1U << XENFEAT_auto_translated_physmap) |
-                    (1U << XENFEAT_pae_pgdir_above_4gb);
+                    (1U << XENFEAT_auto_translated_physmap);
             if ( supervisor_mode_kernel )
                 fi.submap |= 1U << XENFEAT_supervisor_mode_kernel;
             break;
diff -r 9d52a66c7499 -r c073ebdbde8c xen/common/schedule.c
--- a/xen/common/schedule.c     Thu May 25 15:59:18 2006 -0600
+++ b/xen/common/schedule.c     Fri May 26 13:41:49 2006 -0600
@@ -50,9 +50,11 @@ struct schedule_data schedule_data[NR_CP
 
 extern struct scheduler sched_bvt_def;
 extern struct scheduler sched_sedf_def;
+extern struct scheduler sched_credit_def;
 static struct scheduler *schedulers[] = { 
     &sched_bvt_def,
     &sched_sedf_def,
+    &sched_credit_def,
     NULL
 };
 
@@ -639,6 +641,8 @@ static void t_timer_fn(void *unused)
 
     page_scrub_schedule_work();
 
+    SCHED_OP(tick, cpu);
+
     set_timer(&t_timer[cpu], NOW() + MILLISECS(10));
 }
 
@@ -681,6 +685,7 @@ void __init scheduler_init(void)
         printk("Could not find scheduler: %s\n", opt_sched);
 
     printk("Using scheduler: %s (%s)\n", ops.name, ops.opt_name);
+    SCHED_OP(init);
 
     if ( idle_vcpu[0] != NULL )
     {
diff -r 9d52a66c7499 -r c073ebdbde8c xen/common/trace.c
--- a/xen/common/trace.c        Thu May 25 15:59:18 2006 -0600
+++ b/xen/common/trace.c        Fri May 26 13:41:49 2006 -0600
@@ -91,6 +91,7 @@ static int alloc_trace_bufs(void)
     if ( (rawbuf = alloc_xenheap_pages(order)) == NULL )
     {
         printk("Xen trace buffers: memory allocation failed\n");
+        opt_tbuf_size = 0;
         return -EINVAL;
     }
 
@@ -135,10 +136,7 @@ static int tb_set_size(int size)
 
     opt_tbuf_size = size;
     if ( alloc_trace_bufs() != 0 )
-    {
-        opt_tbuf_size = 0;
-        return -EINVAL;
-    }
+        return -EINVAL;
 
     printk("Xen trace buffers: initialized\n");
     return 0;
diff -r 9d52a66c7499 -r c073ebdbde8c xen/include/asm-x86/domain.h
--- a/xen/include/asm-x86/domain.h      Thu May 25 15:59:18 2006 -0600
+++ b/xen/include/asm-x86/domain.h      Fri May 26 13:41:49 2006 -0600
@@ -120,6 +120,18 @@ struct arch_vcpu
     struct vcpu_guest_context guest_context
     __attribute__((__aligned__(16)));
 
+#ifdef CONFIG_X86_PAE
+    /*
+     * Two low-memory (<4GB) PAE L3 tables, used as fallback when the guest
+     * supplies a >=4GB PAE L3 table. We need two because we cannot set up
+     * an L3 table while we are currently running on it (without using
+     * expensive atomic 64-bit operations).
+     */
+    l3_pgentry_t  lowmem_l3tab[2][4] __attribute__((__aligned__(32)));
+    unsigned long lowmem_l3tab_high_mfn[2]; /* The >=4GB MFN being shadowed. */
+    unsigned int  lowmem_l3tab_inuse;       /* Which lowmem_l3tab is in use? */
+#endif
+
     unsigned long      flags; /* TF_ */
 
     void (*schedule_tail) (struct vcpu *);
diff -r 9d52a66c7499 -r c073ebdbde8c xen/include/asm-x86/hvm/domain.h
--- a/xen/include/asm-x86/hvm/domain.h  Thu May 25 15:59:18 2006 -0600
+++ b/xen/include/asm-x86/hvm/domain.h  Fri May 26 13:41:49 2006 -0600
@@ -35,9 +35,9 @@ struct hvm_domain {
     unsigned int           nr_vcpus;
     unsigned int           apic_enabled;
     unsigned int           pae_enabled;
-
-    struct hvm_virpit      vpit;
-    u64                    guest_time;
+    s64                    tsc_frequency;
+    struct pl_time         pl_time;
+    
     struct hvm_virpic      vpic;
     struct hvm_vioapic     vioapic;
     struct hvm_io_handler  io_handler;
diff -r 9d52a66c7499 -r c073ebdbde8c xen/include/asm-x86/hvm/svm/intr.h
--- a/xen/include/asm-x86/hvm/svm/intr.h        Thu May 25 15:59:18 2006 -0600
+++ b/xen/include/asm-x86/hvm/svm/intr.h        Fri May 26 13:41:49 2006 -0600
@@ -21,7 +21,6 @@
 #ifndef __ASM_X86_HVM_SVM_INTR_H__
 #define __ASM_X86_HVM_SVM_INTR_H__
 
-extern void svm_set_tsc_shift(struct vcpu *v, struct hvm_virpit *vpit);
 extern void svm_intr_assist(void);
 extern void svm_intr_assist_update(struct vcpu *v, int highest_vector);
 extern void svm_intr_assist_test_valid(struct vcpu *v, 
diff -r 9d52a66c7499 -r c073ebdbde8c xen/include/asm-x86/hvm/svm/svm.h
--- a/xen/include/asm-x86/hvm/svm/svm.h Thu May 25 15:59:18 2006 -0600
+++ b/xen/include/asm-x86/hvm/svm/svm.h Fri May 26 13:41:49 2006 -0600
@@ -48,7 +48,6 @@ extern void svm_do_launch(struct vcpu *v
 extern void svm_do_launch(struct vcpu *v);
 extern void svm_do_resume(struct vcpu *v);
 extern void svm_set_guest_time(struct vcpu *v, u64 gtime);
-extern u64 svm_get_guest_time(struct vcpu *v);
 extern void arch_svm_do_resume(struct vcpu *v);
 extern int load_vmcb(struct arch_svm_struct *arch_svm, u64 phys_hsa);
 /* For debugging. Remove when no longer needed. */
diff -r 9d52a66c7499 -r c073ebdbde8c xen/include/asm-x86/hvm/vcpu.h
--- a/xen/include/asm-x86/hvm/vcpu.h    Thu May 25 15:59:18 2006 -0600
+++ b/xen/include/asm-x86/hvm/vcpu.h    Fri May 26 13:41:49 2006 -0600
@@ -32,6 +32,9 @@ struct hvm_vcpu {
     unsigned long   ioflags;
     struct mmio_op  mmio_op;
     struct vlapic   *vlapic;
+    s64             cache_tsc_offset;
+    u64             guest_time;
+
     /* For AP startup */
     unsigned long   init_sipi_sipi_state;
 
diff -r 9d52a66c7499 -r c073ebdbde8c xen/include/asm-x86/hvm/vmx/vmx.h
--- a/xen/include/asm-x86/hvm/vmx/vmx.h Thu May 25 15:59:18 2006 -0600
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h Fri May 26 13:41:49 2006 -0600
@@ -34,7 +34,6 @@ extern void arch_vmx_do_launch(struct vc
 extern void arch_vmx_do_launch(struct vcpu *);
 extern void arch_vmx_do_resume(struct vcpu *);
 extern void set_guest_time(struct vcpu *v, u64 gtime);
-extern u64  get_guest_time(struct vcpu *v);
 
 extern unsigned int cpu_rev;
 
diff -r 9d52a66c7499 -r c073ebdbde8c xen/include/asm-x86/hvm/vpit.h
--- a/xen/include/asm-x86/hvm/vpit.h    Thu May 25 15:59:18 2006 -0600
+++ b/xen/include/asm-x86/hvm/vpit.h    Fri May 26 13:41:49 2006 -0600
@@ -29,9 +29,7 @@
 #include <asm/hvm/vpic.h>
 
 #define PIT_FREQ 1193181
-
-#define PIT_BASE 0x40
-#define HVM_PIT_ACCEL_MODE 2
+#define PIT_BASE        0x40
 
 typedef struct PITChannelState {
     int count; /* can be 65536 */
@@ -48,47 +46,56 @@ typedef struct PITChannelState {
     u8 gate; /* timer start */
     s64 count_load_time;
     /* irq handling */
-    s64 next_transition_time;
-    int irq;
-    struct hvm_time_info *hvm_time;
-    u32 period; /* period(ns) based on count */
+    struct vcpu      *vcpu;
+    struct periodic_time *pt;
 } PITChannelState;
-
-struct hvm_time_info {
-    /* extra info for the mode 2 channel */
-    struct timer pit_timer;
-    struct vcpu *vcpu;          /* which vcpu the ac_timer bound to */
-    u64 period_cycles;          /* pit frequency in cpu cycles */
-    s_time_t count_advance;     /* accumulated count advance since last fire */
-    s_time_t count_point;        /* last point accumulating count advance */
-    unsigned int pending_intr_nr; /* the couner for pending timer interrupts */
-    int first_injected;         /* flag to prevent shadow window */
-    s64 cache_tsc_offset;       /* cache of VMCS TSC_OFFSET offset */
-    u64 last_pit_gtime;         /* guest time when last pit is injected */
+   
+/*
+ * Abstract layer of periodic time, one short time.
+ */
+struct periodic_time {
+    char enabled;               /* enabled */
+    char one_shot;              /* one shot time */
+    char irq;
+    char first_injected;        /* flag to prevent shadow window */
+    u32 pending_intr_nr;        /* the couner for pending timer interrupts */
+    u32 period;                 /* frequency in ns */
+    u64 period_cycles;          /* frequency in cpu cycles */
+    s_time_t scheduled;         /* scheduled timer interrupt */
+    u64 last_plt_gtime;         /* platform time when last IRQ is injected */
+    struct timer timer;         /* ac_timer */
 };
 
-typedef struct hvm_virpit {
+typedef struct PITState {
     PITChannelState channels[3];
-    struct hvm_time_info time_info;
     int speaker_data_on;
     int dummy_refresh_clock;
-}hvm_virpit;
+} PITState;
 
+struct pl_time {    /* platform time */
+    struct periodic_time periodic_tm;
+    struct PITState      vpit;
+    /* TODO: RTC/ACPI time */
+};
 
-static __inline__ s_time_t get_pit_scheduled(
-    struct vcpu *v,
-    struct hvm_virpit *vpit)
+static __inline__ s_time_t get_scheduled(
+    struct vcpu *v, int irq,
+    struct periodic_time *pt)
 {
-    struct PITChannelState *s = &(vpit->channels[0]);
-    if ( is_irq_enabled(v, 0) ) {
-        return s->next_transition_time;
+    if ( is_irq_enabled(v, irq) ) {
+        return pt->scheduled;
     }
     else
         return -1;
 }
 
 /* to hook the ioreq packet to get the PIT initialization info */
-extern void pit_init(struct hvm_virpit *pit, struct vcpu *v);
-extern void pickup_deactive_ticks(struct hvm_virpit *vpit);
+extern void hvm_hooks_assist(struct vcpu *v);
+extern void pickup_deactive_ticks(struct periodic_time *vpit);
+extern u64 hvm_get_guest_time(struct vcpu *v);
+extern struct periodic_time *create_periodic_time(struct vcpu *v, u32 period, 
char irq, char one_shot);
+extern void destroy_periodic_time(struct periodic_time *pt);
+void pit_init(struct vcpu *v, unsigned long cpu_khz);
+void pt_timer_fn(void *data);
 
 #endif /* __ASM_X86_HVM_VPIT_H__ */
diff -r 9d52a66c7499 -r c073ebdbde8c xen/include/asm-x86/string.h
--- a/xen/include/asm-x86/string.h      Thu May 25 15:59:18 2006 -0600
+++ b/xen/include/asm-x86/string.h      Fri May 26 13:41:49 2006 -0600
@@ -2,152 +2,6 @@
 #define __X86_STRING_H__
 
 #include <xen/config.h>
-
-#define __HAVE_ARCH_STRCPY
-static inline char *strcpy(char *dest, const char *src)
-{
-    long d0, d1, d2;
-    __asm__ __volatile__ (
-        "1: lodsb          \n"
-        "   stosb          \n"
-        "   test %%al,%%al \n"
-        "   jne  1b        \n"
-        : "=&S" (d0), "=&D" (d1), "=&a" (d2)
-        : "0" (src), "1" (dest) : "memory" );
-    return dest;
-}
-
-#define __HAVE_ARCH_STRNCPY
-static inline char *strncpy(char *dest, const char *src, size_t count)
-{
-    long d0, d1, d2, d3;
-    __asm__ __volatile__ (
-        "1: dec  %2        \n"
-        "   js   2f        \n"
-        "   lodsb          \n"
-        "   stosb          \n"
-        "   test %%al,%%al \n"
-        "   jne  1b        \n"
-        "   rep ; stosb    \n"
-        "2:                \n"
-        : "=&S" (d0), "=&D" (d1), "=&c" (d2), "=&a" (d3)
-        : "0" (src), "1" (dest), "2" (count) : "memory" );
-    return dest;
-}
-
-#define __HAVE_ARCH_STRCAT
-static inline char *strcat(char *dest, const char *src)
-{
-    long d0, d1, d2, d3;
-    __asm__ __volatile__ (
-        "   repne ; scasb  \n"
-        "   dec  %1        \n"
-        "1: lodsb          \n"
-        "   stosb          \n"
-        "   test %%al,%%al \n"
-        "   jne  1b        \n"
-        : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3)
-        : "0" (src), "1" (dest), "2" (0UL), "3" (0xffffffffUL) : "memory" );
-    return dest;
-}
-
-#define __HAVE_ARCH_STRNCAT
-static inline char *strncat(char *dest, const char *src, size_t count)
-{
-    long d0, d1, d2, d3;
-    __asm__ __volatile__ (
-        "   repne ; scasb   \n"
-        "   dec  %1         \n"
-        "   mov  %8,%3      \n"
-        "1: dec  %3         \n"
-        "   js   2f         \n"
-        "   lodsb           \n"
-        "   stosb           \n"
-        "   test %%al,%%al  \n"
-        "   jne  1b         \n"
-        "2: xor  %%eax,%%eax\n"
-        "   stosb"
-        : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3)
-        : "0" (src), "1" (dest), "2" (0UL), "3" (0xffffffffUL), "g" (count)
-        : "memory" );
-    return dest;
-}
-
-#define __HAVE_ARCH_STRCMP
-static inline int strcmp(const char *cs, const char *ct)
-{
-    long d0, d1;
-    register int __res;
-    __asm__ __volatile__ (
-        "1: lodsb           \n"
-        "   scasb           \n"
-        "   jne  2f         \n"
-        "   test %%al,%%al  \n"
-        "   jne  1b         \n"
-        "   xor  %%eax,%%eax\n"
-        "   jmp  3f         \n"
-        "2: sbb  %%eax,%%eax\n"
-        "   or   $1,%%al    \n"
-        "3:                 \n"
-        : "=a" (__res), "=&S" (d0), "=&D" (d1)
-        : "1" (cs), "2" (ct) );
-    return __res;
-}
-
-#define __HAVE_ARCH_STRNCMP
-static inline int strncmp(const char *cs, const char *ct, size_t count)
-{
-    long d0, d1, d2;
-    register int __res;
-    __asm__ __volatile__ (
-        "1: dec  %3         \n"
-        "   js   2f         \n"
-        "   lodsb           \n"
-        "   scasb           \n"
-        "   jne  3f         \n"
-        "   test %%al,%%al  \n"
-        "   jne  1b         \n"
-        "2: xor  %%eax,%%eax\n"
-        "   jmp  4f         \n"
-        "3: sbb  %%eax,%%eax\n"
-        "   or   $1,%%al    \n"
-        "4:                 \n"
-        : "=a" (__res), "=&S" (d0), "=&D" (d1), "=&c" (d2)
-        : "1" (cs), "2" (ct), "3" (count) );
-    return __res;
-}
-
-#define __HAVE_ARCH_STRCHR
-static inline char *strchr(const char *s, int c)
-{
-    long d0;
-    register char *__res;
-    __asm__ __volatile__ (
-        "   mov  %%al,%%ah  \n"
-        "1: lodsb           \n"
-        "   cmp  %%ah,%%al  \n"
-        "   je   2f         \n"
-        "   test %%al,%%al  \n"
-        "   jne  1b         \n"
-        "   mov  $1,%1      \n"
-        "2: mov  %1,%0      \n"
-        "   dec  %0         \n"
-        : "=a" (__res), "=&S" (d0) : "1" (s), "0" (c) );
-    return __res;
-}
-
-#define __HAVE_ARCH_STRLEN
-static inline size_t strlen(const char *s)
-{
-    long d0;
-    register int __res;
-    __asm__ __volatile__ (
-        "   repne ; scasb  \n"
-        "   notl %0        \n"
-        "   decl %0        \n"
-        : "=c" (__res), "=&D" (d0) : "1" (s), "a" (0), "0" (0xffffffffUL) );
-    return __res;
-}
 
 static inline void *__variable_memcpy(void *to, const void *from, size_t n)
 {
@@ -258,22 +112,6 @@ extern void *memmove(void *dest, const v
 #define __HAVE_ARCH_MEMCMP
 #define memcmp __builtin_memcmp
 
-#define __HAVE_ARCH_MEMCHR
-static inline void *memchr(const void *cs, int c, size_t count)
-{
-    long d0;
-    register void *__res;
-    if ( count == 0 )
-        return NULL;
-    __asm__ __volatile__ (
-        "   repne ; scasb\n"
-        "   je   1f      \n"
-        "   mov  $1,%0   \n"
-        "1: dec  %0      \n"
-        : "=D" (__res), "=&c" (d0) : "a" (c), "0" (cs), "1" (count) );
-    return __res;
-}
-
 static inline void *__memset_generic(void *s, char c, size_t count)
 {
     long d0, d1;
diff -r 9d52a66c7499 -r c073ebdbde8c xen/include/asm-x86/x86_emulate.h
--- a/xen/include/asm-x86/x86_emulate.h Thu May 25 15:59:18 2006 -0600
+++ b/xen/include/asm-x86/x86_emulate.h Fri May 26 13:41:49 2006 -0600
@@ -9,8 +9,10 @@
 #ifndef __X86_EMULATE_H__
 #define __X86_EMULATE_H__
 
-/*
- * x86_mem_emulator:
+struct x86_emulate_ctxt;
+
+/*
+ * x86_emulate_ops:
  * 
  * These operations represent the instruction emulator's interface to memory.
  * There are two categories of operation: those that act on ordinary memory
@@ -47,7 +49,7 @@
 #define X86EMUL_PROPAGATE_FAULT 2 /* propagate a generated fault to guest */
 #define X86EMUL_RETRY_INSTR     2 /* retry the instruction for some reason */
 #define X86EMUL_CMPXCHG_FAILED  2 /* cmpxchg did not see expected value */
-struct x86_mem_emulator
+struct x86_emulate_ops
 {
     /*
      * read_std: Read bytes of standard (non-emulated/special) memory.
@@ -59,7 +61,8 @@ struct x86_mem_emulator
     int (*read_std)(
         unsigned long addr,
         unsigned long *val,
-        unsigned int bytes);
+        unsigned int bytes,
+        struct x86_emulate_ctxt *ctxt);
 
     /*
      * write_std: Write bytes of standard (non-emulated/special) memory.
@@ -71,7 +74,8 @@ struct x86_mem_emulator
     int (*write_std)(
         unsigned long addr,
         unsigned long val,
-        unsigned int bytes);
+        unsigned int bytes,
+        struct x86_emulate_ctxt *ctxt);
 
     /*
      * read_emulated: Read bytes from emulated/special memory area.
@@ -82,7 +86,8 @@ struct x86_mem_emulator
     int (*read_emulated)(
         unsigned long addr,
         unsigned long *val,
-        unsigned int bytes);
+        unsigned int bytes,
+        struct x86_emulate_ctxt *ctxt);
 
     /*
      * write_emulated: Read bytes from emulated/special memory area.
@@ -93,7 +98,8 @@ struct x86_mem_emulator
     int (*write_emulated)(
         unsigned long addr,
         unsigned long val,
-        unsigned int bytes);
+        unsigned int bytes,
+        struct x86_emulate_ctxt *ctxt);
 
     /*
      * cmpxchg_emulated: Emulate an atomic (LOCKed) CMPXCHG operation on an
@@ -107,11 +113,12 @@ struct x86_mem_emulator
         unsigned long addr,
         unsigned long old,
         unsigned long new,
-        unsigned int bytes);
-
-    /*
-     * cmpxchg_emulated: Emulate an atomic (LOCKed) CMPXCHG8B operation on an
-     *                   emulated/special memory area.
+        unsigned int bytes,
+        struct x86_emulate_ctxt *ctxt);
+
+    /*
+     * cmpxchg8b_emulated: Emulate an atomic (LOCKed) CMPXCHG8B operation on an
+     *                     emulated/special memory area.
      *  @addr:  [IN ] Linear address to access.
      *  @old:   [IN ] Value expected to be current at @addr.
      *  @new:   [IN ] Value to write to @addr.
@@ -126,7 +133,8 @@ struct x86_mem_emulator
         unsigned long old_lo,
         unsigned long old_hi,
         unsigned long new_lo,
-        unsigned long new_hi);
+        unsigned long new_hi,
+        struct x86_emulate_ctxt *ctxt);
 };
 
 /* Standard reader/writer functions that callers may wish to use. */
@@ -134,14 +142,28 @@ x86_emulate_read_std(
 x86_emulate_read_std(
     unsigned long addr,
     unsigned long *val,
-    unsigned int bytes);
+    unsigned int bytes,
+    struct x86_emulate_ctxt *ctxt);
 extern int
 x86_emulate_write_std(
     unsigned long addr,
     unsigned long val,
-    unsigned int bytes);
+    unsigned int bytes,
+    struct x86_emulate_ctxt *ctxt);
 
 struct cpu_user_regs;
+
+struct x86_emulate_ctxt
+{
+    /* Register state before/after emulation. */
+    struct cpu_user_regs   *regs;
+
+    /* Linear faulting address (if emulating a page-faulting instruction). */
+    unsigned long           cr2;
+
+    /* Emulated execution mode, represented by an X86EMUL_MODE value. */
+    int                     mode;
+};
 
 /* Execution mode, passed to the emulator. */
 #define X86EMUL_MODE_REAL     0 /* Real mode.             */
@@ -159,25 +181,19 @@ struct cpu_user_regs;
 /*
  * x86_emulate_memop: Emulate an instruction that faulted attempting to
  *                    read/write a 'special' memory area.
- *  @regs: Register state at time of fault.
- *  @cr2:  Linear faulting address within an emulated/special memory area.
- *  @ops:  Interface to access special memory.
- *  @mode: Emulated execution mode, represented by an X86EMUL_MODE value.
  * Returns -1 on failure, 0 on success.
  */
-extern int
+int
 x86_emulate_memop(
-    struct cpu_user_regs *regs,
-    unsigned long cr2,
-    struct x86_mem_emulator *ops,
-    int mode);
+    struct x86_emulate_ctxt *ctxt,
+    struct x86_emulate_ops  *ops);
 
 /*
  * Given the 'reg' portion of a ModRM byte, and a register block, return a
  * pointer into the block that addresses the relevant register.
  * @highbyte_regs specifies whether to decode AH,CH,DH,BH.
  */
-extern void *
+void *
 decode_register(
     uint8_t modrm_reg, struct cpu_user_regs *regs, int highbyte_regs);
 
diff -r 9d52a66c7499 -r c073ebdbde8c xen/include/public/io/xenbus.h
--- a/xen/include/public/io/xenbus.h    Thu May 25 15:59:18 2006 -0600
+++ b/xen/include/public/io/xenbus.h    Fri May 26 13:41:49 2006 -0600
@@ -9,34 +9,37 @@
 #ifndef _XEN_PUBLIC_IO_XENBUS_H
 #define _XEN_PUBLIC_IO_XENBUS_H
 
-/* The state of either end of the Xenbus, i.e. the current communication
-   status of initialisation across the bus.  States here imply nothing about
-   the state of the connection between the driver and the kernel's device
-   layers.  */
-typedef enum
-{
-  XenbusStateUnknown      = 0,
-  XenbusStateInitialising = 1,
-  XenbusStateInitWait     = 2,  /* Finished early initialisation, but waiting
-                                   for information from the peer or hotplug
-                                  scripts. */
-  XenbusStateInitialised  = 3,  /* Initialised and waiting for a connection
-                                  from the peer. */
-  XenbusStateConnected    = 4,
-  XenbusStateClosing      = 5,  /* The device is being closed due to an error
-                                  or an unplug event. */
-  XenbusStateClosed       = 6
+/*
+ * The state of either end of the Xenbus, i.e. the current communication
+ * status of initialisation across the bus.  States here imply nothing about
+ * the state of the connection between the driver and the kernel's device
+ * layers.
+ */
+enum xenbus_state {
+    XenbusStateUnknown       = 0,
 
-} XenbusState;
+    XenbusStateInitialising  = 1,
+
+    /*
+     * InitWait: Finished early initialisation but waiting for information
+     * from the peer or hotplug scripts.
+     */
+    XenbusStateInitWait      = 2,
+
+    /*
+     * Initialised: Waiting for a connection from the peer.
+     */
+    XenbusStateInitialised   = 3,
+
+    XenbusStateConnected     = 4,
+
+    /*
+     * Closing: The device is being closed due to an error or an unplug event.
+     */
+    XenbusStateClosing       = 5,
+
+    XenbusStateClosed       = 6
+};
+typedef enum xenbus_state XenbusState;
 
 #endif /* _XEN_PUBLIC_IO_XENBUS_H */
-
-/*
- * Local variables:
- *  c-file-style: "linux"
- *  indent-tabs-mode: t
- *  c-indent-level: 8
- *  c-basic-offset: 8
- *  tab-width: 8
- * End:
- */
diff -r 9d52a66c7499 -r c073ebdbde8c xen/include/public/sched_ctl.h
--- a/xen/include/public/sched_ctl.h    Thu May 25 15:59:18 2006 -0600
+++ b/xen/include/public/sched_ctl.h    Fri May 26 13:41:49 2006 -0600
@@ -10,6 +10,7 @@
 /* Scheduler types. */
 #define SCHED_BVT      0
 #define SCHED_SEDF     4
+#define SCHED_CREDIT   5
 
 /* Set or get info? */
 #define SCHED_INFO_PUT 0
@@ -48,6 +49,10 @@ struct sched_adjdom_cmd {
             uint32_t extratime;
             uint32_t weight;
         } sedf;
+        struct csched_domain {
+            uint16_t weight;
+            uint16_t cap;
+        } credit;
     } u;
 };
 
diff -r 9d52a66c7499 -r c073ebdbde8c xen/include/xen/sched-if.h
--- a/xen/include/xen/sched-if.h        Thu May 25 15:59:18 2006 -0600
+++ b/xen/include/xen/sched-if.h        Fri May 26 13:41:49 2006 -0600
@@ -58,6 +58,8 @@ struct scheduler {
     char *opt_name;         /* option name for this scheduler    */
     unsigned int sched_id;  /* ID for this scheduler             */
 
+    void         (*init)           (void);
+    void         (*tick)           (unsigned int cpu);
     int          (*alloc_task)     (struct vcpu *);
     void         (*add_task)       (struct vcpu *);
     void         (*free_task)      (struct domain *);
diff -r 9d52a66c7499 -r c073ebdbde8c xen/include/xen/softirq.h
--- a/xen/include/xen/softirq.h Thu May 25 15:59:18 2006 -0600
+++ b/xen/include/xen/softirq.h Fri May 26 13:41:49 2006 -0600
@@ -26,6 +26,19 @@ asmlinkage void do_softirq(void);
 asmlinkage void do_softirq(void);
 extern void open_softirq(int nr, softirq_handler handler);
 
+static inline void cpumask_raise_softirq(cpumask_t mask, unsigned int nr)
+{
+    int cpu;
+
+    for_each_cpu_mask(cpu, mask)
+    {
+        if ( test_and_set_bit(nr, &softirq_pending(cpu)) )
+            cpu_clear(cpu, mask);
+    }
+
+    smp_send_event_check_mask(mask);
+}
+
 static inline void cpu_raise_softirq(unsigned int cpu, unsigned int nr)
 {
     if ( !test_and_set_bit(nr, &softirq_pending(cpu)) )
diff -r 9d52a66c7499 -r c073ebdbde8c 
linux-2.6-xen-sparse/drivers/xen/core/cpu_hotplug.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/linux-2.6-xen-sparse/drivers/xen/core/cpu_hotplug.c       Fri May 26 
13:41:49 2006 -0600
@@ -0,0 +1,185 @@
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <xen/cpu_hotplug.h>
+#include <xen/xenbus.h>
+
+/*
+ * Set of CPUs that remote admin software will allow us to bring online.
+ * Notified to us via xenbus.
+ */
+static cpumask_t xenbus_allowed_cpumask;
+
+/* Set of CPUs that local admin will allow us to bring online. */
+static cpumask_t local_allowed_cpumask = CPU_MASK_ALL;
+
+static int local_cpu_hotplug_request(void)
+{
+       /*
+        * We assume a CPU hotplug request comes from local admin if it is made
+        * via a userspace process (i.e., one with a real mm_struct).
+        */
+       return (current->mm != NULL);
+}
+
+static void vcpu_hotplug(unsigned int cpu)
+{
+       int err;
+       char dir[32], state[32];
+
+       if ((cpu >= NR_CPUS) || !cpu_possible(cpu))
+               return;
+
+       sprintf(dir, "cpu/%d", cpu);
+       err = xenbus_scanf(XBT_NULL, dir, "availability", "%s", state);
+       if (err != 1) {
+               printk(KERN_ERR "XENBUS: Unable to read cpu state\n");
+               return;
+       }
+
+       if (strcmp(state, "online") == 0) {
+               cpu_set(cpu, xenbus_allowed_cpumask);
+               (void)cpu_up(cpu);
+       } else if (strcmp(state, "offline") == 0) {
+               cpu_clear(cpu, xenbus_allowed_cpumask);
+               (void)cpu_down(cpu);
+       } else {
+               printk(KERN_ERR "XENBUS: unknown state(%s) on CPU%d\n",
+                      state, cpu);
+       }
+}
+
+static void handle_vcpu_hotplug_event(
+       struct xenbus_watch *watch, const char **vec, unsigned int len)
+{
+       int cpu;
+       char *cpustr;
+       const char *node = vec[XS_WATCH_PATH];
+
+       if ((cpustr = strstr(node, "cpu/")) != NULL) {
+               sscanf(cpustr, "cpu/%d", &cpu);
+               vcpu_hotplug(cpu);
+       }
+}
+
+static int smpboot_cpu_notify(struct notifier_block *notifier,
+                             unsigned long action, void *hcpu)
+{
+       int cpu = (long)hcpu;
+
+       /*
+        * We do this in a callback notifier rather than __cpu_disable()
+        * because local_cpu_hotplug_request() does not work in the latter
+        * as it's always executed from within a stopmachine kthread.
+        */
+       if ((action == CPU_DOWN_PREPARE) && local_cpu_hotplug_request())
+               cpu_clear(cpu, local_allowed_cpumask);
+
+       return NOTIFY_OK;
+}
+
+static int setup_cpu_watcher(struct notifier_block *notifier,
+                             unsigned long event, void *data)
+{
+       int i;
+
+       static struct xenbus_watch cpu_watch = {
+               .node = "cpu",
+               .callback = handle_vcpu_hotplug_event,
+               .flags = XBWF_new_thread };
+       (void)register_xenbus_watch(&cpu_watch);
+
+       if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
+               for_each_cpu(i)
+                       vcpu_hotplug(i);
+               printk(KERN_INFO "Brought up %ld CPUs\n",
+                      (long)num_online_cpus());
+       }
+
+       return NOTIFY_DONE;
+}
+
+static int __init setup_vcpu_hotplug_event(void)
+{
+       static struct notifier_block hotplug_cpu = {
+               .notifier_call = smpboot_cpu_notify };
+       static struct notifier_block xsn_cpu = {
+               .notifier_call = setup_cpu_watcher };
+
+       register_cpu_notifier(&hotplug_cpu);
+       register_xenstore_notifier(&xsn_cpu);
+
+       return 0;
+}
+
+arch_initcall(setup_vcpu_hotplug_event);
+
+int smp_suspend(void)
+{
+       int i, err;
+
+       lock_cpu_hotplug();
+
+       /*
+        * Take all other CPUs offline. We hold the hotplug mutex to
+        * avoid other processes bringing up CPUs under our feet.
+        */
+       while (num_online_cpus() > 1) {
+               unlock_cpu_hotplug();
+               for_each_online_cpu(i) {
+                       if (i == 0)
+                               continue;
+                       err = cpu_down(i);
+                       if (err) {
+                               printk(KERN_CRIT "Failed to take all CPUs "
+                                      "down: %d.\n", err);
+                               for_each_cpu(i)
+                                       vcpu_hotplug(i);
+                               return err;
+                       }
+               }
+               lock_cpu_hotplug();
+       }
+
+       return 0;
+}
+
+void smp_resume(void)
+{
+       int cpu;
+
+       for_each_cpu(cpu)
+               cpu_initialize_context(cpu);
+
+       unlock_cpu_hotplug();
+
+       for_each_cpu(cpu)
+               vcpu_hotplug(cpu);
+}
+
+int cpu_up_is_allowed(unsigned int cpu)
+{
+       int rc = 0;
+
+       if (local_cpu_hotplug_request()) {
+               cpu_set(cpu, local_allowed_cpumask);
+               if (!cpu_isset(cpu, xenbus_allowed_cpumask)) {
+                       printk("%s: attempt to bring up CPU %u disallowed by "
+                              "remote admin.\n", __FUNCTION__, cpu);
+                       rc = -EBUSY;
+               }
+       } else if (!cpu_isset(cpu, local_allowed_cpumask) ||
+                  !cpu_isset(cpu, xenbus_allowed_cpumask)) {
+               rc = -EBUSY;
+       }
+
+       return rc;
+}
+
+void init_xenbus_allowed_cpumask(void)
+{
+       xenbus_allowed_cpumask = cpu_present_map;
+}
diff -r 9d52a66c7499 -r c073ebdbde8c 
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/e820.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/e820.h       Fri May 
26 13:41:49 2006 -0600
@@ -0,0 +1,63 @@
+/*
+ * structures and definitions for the int 15, ax=e820 memory map
+ * scheme.
+ *
+ * In a nutshell, setup.S populates a scratch table in the
+ * empty_zero_block that contains a list of usable address/size
+ * duples.  setup.c, this information is transferred into the e820map,
+ * and in init.c/numa.c, that new information is used to mark pages
+ * reserved or not.
+ */
+#ifndef __E820_HEADER
+#define __E820_HEADER
+
+#include <linux/mmzone.h>
+
+#define E820MAP        0x2d0           /* our map */
+#define E820MAX        128             /* number of entries in E820MAP */
+#define E820NR 0x1e8           /* # entries in E820MAP */
+
+#define E820_RAM       1
+#define E820_RESERVED  2
+#define E820_ACPI      3 /* usable as RAM once ACPI tables have been read */
+#define E820_NVS       4
+
+#define HIGH_MEMORY    (1024*1024)
+
+#define LOWMEMSIZE()   (0x9f000)
+
+#ifndef __ASSEMBLY__
+struct e820entry {
+       u64 addr;       /* start of memory segment */
+       u64 size;       /* size of memory segment */
+       u32 type;       /* type of memory segment */
+} __attribute__((packed));
+
+struct e820map {
+    int nr_map;
+       struct e820entry map[E820MAX];
+};
+
+extern unsigned long find_e820_area(unsigned long start, unsigned long end, 
+                                   unsigned size);
+extern void add_memory_region(unsigned long start, unsigned long size, 
+                             int type);
+extern void setup_memory_region(void);
+extern void contig_e820_setup(void); 
+extern unsigned long e820_end_of_ram(void);
+extern void e820_reserve_resources(struct e820entry *e820, int nr_map);
+extern void e820_print_map(char *who);
+extern int e820_mapped(unsigned long start, unsigned long end, unsigned type);
+
+extern void e820_bootmem_free(pg_data_t *pgdat, unsigned long start,unsigned 
long end);
+extern void e820_setup_gap(struct e820entry *e820, int nr_map);
+extern unsigned long e820_hole_size(unsigned long start_pfn,
+                                   unsigned long end_pfn);
+
+extern void __init parse_memopt(char *p, char **end);
+extern void __init parse_memmapopt(char *p, char **end);
+
+extern struct e820map e820;
+#endif/*!__ASSEMBLY__*/
+
+#endif/*__E820_HEADER*/
diff -r 9d52a66c7499 -r c073ebdbde8c 
linux-2.6-xen-sparse/include/xen/cpu_hotplug.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/linux-2.6-xen-sparse/include/xen/cpu_hotplug.h    Fri May 26 13:41:49 
2006 -0600
@@ -0,0 +1,42 @@
+#ifndef __XEN_CPU_HOTPLUG_H__
+#define __XEN_CPU_HOTPLUG_H__
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/cpumask.h>
+
+#if defined(CONFIG_HOTPLUG_CPU)
+
+#if defined(CONFIG_X86)
+void cpu_initialize_context(unsigned int cpu);
+#else
+#define cpu_initialize_context(cpu)    ((void)0)
+#endif
+
+int cpu_up_is_allowed(unsigned int cpu);
+void init_xenbus_allowed_cpumask(void);
+int smp_suspend(void);
+void smp_resume(void);
+
+#else /* !defined(CONFIG_HOTPLUG_CPU) */
+
+#define cpu_up_is_allowed(cpu)         (1)
+#define init_xenbus_allowed_cpumask()  ((void)0)
+
+static inline int smp_suspend(void)
+{
+       if (num_online_cpus() > 1) {
+               printk(KERN_WARNING "Can't suspend SMP guests "
+                      "without CONFIG_HOTPLUG_CPU\n");
+               return -EOPNOTSUPP;
+       }
+       return 0;
+}
+
+static inline void smp_resume(void)
+{
+}
+
+#endif /* !defined(CONFIG_HOTPLUG_CPU) */
+
+#endif /* __XEN_CPU_HOTPLUG_H__ */
diff -r 9d52a66c7499 -r c073ebdbde8c 
patches/linux-2.6.16.13/fix-ide-cd-pio-mode.patch
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/patches/linux-2.6.16.13/fix-ide-cd-pio-mode.patch Fri May 26 13:41:49 
2006 -0600
@@ -0,0 +1,18 @@
+diff -ru ../pristine-linux-2.6.16.13/drivers/ide/ide-lib.c 
./drivers/ide/ide-lib.c
+--- ../pristine-linux-2.6.16.13/drivers/ide/ide-lib.c  2006-05-02 
22:38:44.000000000 +0100
++++ ./drivers/ide/ide-lib.c    2006-05-24 18:37:05.000000000 +0100
+@@ -410,10 +410,10 @@
+ {
+       u64 addr = BLK_BOUNCE_HIGH;     /* dma64_addr_t */
+ 
+-      if (!PCI_DMA_BUS_IS_PHYS) {
+-              addr = BLK_BOUNCE_ANY;
+-      } else if (on && drive->media == ide_disk) {
+-              if (HWIF(drive)->pci_dev)
++      if (on && drive->media == ide_disk) {
++              if (!PCI_DMA_BUS_IS_PHYS)
++                      addr = BLK_BOUNCE_ANY;
++              else if (HWIF(drive)->pci_dev)
+                       addr = HWIF(drive)->pci_dev->dma_mask;
+       }
+ 
diff -r 9d52a66c7499 -r c073ebdbde8c tools/libxc/xc_csched.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/libxc/xc_csched.c   Fri May 26 13:41:49 2006 -0600
@@ -0,0 +1,50 @@
+/****************************************************************************
+ * (C) 2006 - Emmanuel Ackaouy - XenSource Inc.
+ ****************************************************************************
+ *
+ *        File: xc_csched.c
+ *      Author: Emmanuel Ackaouy
+ *
+ * Description: XC Interface to the credit scheduler
+ *
+ */
+#include "xc_private.h"
+
+
+int
+xc_csched_domain_set(
+    int xc_handle,
+    uint32_t domid,
+    struct csched_domain *sdom)
+{
+    DECLARE_DOM0_OP;
+
+    op.cmd = DOM0_ADJUSTDOM;    
+    op.u.adjustdom.domain = (domid_t) domid;
+    op.u.adjustdom.sched_id = SCHED_CREDIT;
+    op.u.adjustdom.direction = SCHED_INFO_PUT;
+    op.u.adjustdom.u.credit = *sdom;
+
+    return do_dom0_op(xc_handle, &op);
+}
+
+int
+xc_csched_domain_get(
+    int xc_handle,
+    uint32_t domid,
+    struct csched_domain *sdom)
+{
+    DECLARE_DOM0_OP;
+    int err;
+
+    op.cmd = DOM0_ADJUSTDOM;    
+    op.u.adjustdom.domain = (domid_t) domid;
+    op.u.adjustdom.sched_id = SCHED_CREDIT;
+    op.u.adjustdom.direction = SCHED_INFO_GET;
+
+    err = do_dom0_op(xc_handle, &op);
+    if ( err == 0 )
+        *sdom = op.u.adjustdom.u.credit;
+
+    return err;
+}
diff -r 9d52a66c7499 -r c073ebdbde8c tools/xenstore/xenstored_linux.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/xenstore/xenstored_linux.c  Fri May 26 13:41:49 2006 -0600
@@ -0,0 +1,69 @@
+/******************************************************************************
+ *
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (C) 2005 Rusty Russell IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2 of the
+ * License.
+ */
+
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+
+#include "xenstored_core.h"
+
+#define XENSTORED_PROC_KVA  "/proc/xen/xsd_kva"
+#define XENSTORED_PROC_PORT "/proc/xen/xsd_port"
+
+evtchn_port_t xenbus_evtchn(void)
+{
+       int fd;
+       int rc;
+       evtchn_port_t port; 
+       char str[20]; 
+
+       fd = open(XENSTORED_PROC_PORT, O_RDONLY); 
+       if (fd == -1)
+               return -1;
+
+       rc = read(fd, str, sizeof(str)); 
+       if (rc == -1)
+       {
+               int err = errno;
+               close(fd);
+               errno = err;
+               return -1;
+       }
+
+       str[rc] = '\0'; 
+       port = strtoul(str, NULL, 0); 
+
+       close(fd); 
+       return port;
+}
+
+void *xenbus_map(void)
+{
+       int fd;
+       void *addr;
+
+       fd = open(XENSTORED_PROC_KVA, O_RDWR);
+       if (fd == -1)
+               return NULL;
+
+       addr = mmap(NULL, getpagesize(), PROT_READ|PROT_WRITE,
+               MAP_SHARED, fd, 0);
+
+       if (addr == MAP_FAILED)
+               addr = NULL;
+
+       close(fd);
+
+       return addr;
+}
diff -r 9d52a66c7499 -r c073ebdbde8c xen/common/sched_credit.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/common/sched_credit.c Fri May 26 13:41:49 2006 -0600
@@ -0,0 +1,1233 @@
+/****************************************************************************
+ * (C) 2005-2006 - Emmanuel Ackaouy - XenSource Inc.
+ ****************************************************************************
+ *
+ *        File: common/csched_credit.c
+ *      Author: Emmanuel Ackaouy
+ *
+ * Description: Credit-based SMP CPU scheduler
+ */
+
+#include <xen/config.h>
+#include <xen/init.h>
+#include <xen/lib.h>
+#include <xen/sched.h>
+#include <xen/domain.h>
+#include <xen/delay.h>
+#include <xen/event.h>
+#include <xen/time.h>
+#include <xen/perfc.h>
+#include <xen/sched-if.h>
+#include <xen/softirq.h>
+#include <asm/atomic.h>
+
+
+/*
+ * CSCHED_STATS
+ *
+ * Manage very basic counters and stats.
+ *
+ * Useful for debugging live systems. The stats are displayed
+ * with runq dumps ('r' on the Xen console).
+ */
+#define CSCHED_STATS
+
+
+/*
+ * Basic constants
+ */
+#define CSCHED_TICK             10      /* milliseconds */
+#define CSCHED_TSLICE           30      /* milliseconds */
+#define CSCHED_ACCT_NTICKS      3
+#define CSCHED_ACCT_PERIOD      (CSCHED_ACCT_NTICKS * CSCHED_TICK)
+#define CSCHED_DEFAULT_WEIGHT   256
+
+
+/*
+ * Priorities
+ */
+#define CSCHED_PRI_TS_UNDER     -1      /* time-share w/ credits */
+#define CSCHED_PRI_TS_OVER      -2      /* time-share w/o credits */
+#define CSCHED_PRI_IDLE         -64     /* idle */
+#define CSCHED_PRI_TS_PARKED    -65     /* time-share w/ capped credits */
+
+
+/*
+ * Useful macros
+ */
+#define CSCHED_PCPU(_c)     ((struct csched_pcpu 
*)schedule_data[_c].sched_priv)
+#define CSCHED_VCPU(_vcpu)  ((struct csched_vcpu *) (_vcpu)->sched_priv)
+#define CSCHED_DOM(_dom)    ((struct csched_dom *) (_dom)->sched_priv)
+#define RUNQ(_cpu)          (&(CSCHED_PCPU(_cpu)->runq))
+
+
+/*
+ * Stats
+ */
+#ifdef CSCHED_STATS
+
+#define CSCHED_STAT(_X)         (csched_priv.stats._X)
+#define CSCHED_STAT_DEFINE(_X)  uint32_t _X;
+#define CSCHED_STAT_PRINTK(_X)                                  \
+    do                                                          \
+    {                                                           \
+        printk("\t%-30s = %u\n", #_X, CSCHED_STAT(_X));  \
+    } while ( 0 );
+
+#define CSCHED_STATS_EXPAND_SCHED(_MACRO)   \
+    _MACRO(vcpu_alloc)                      \
+    _MACRO(vcpu_add)                        \
+    _MACRO(vcpu_sleep)                      \
+    _MACRO(vcpu_wake_running)               \
+    _MACRO(vcpu_wake_onrunq)                \
+    _MACRO(vcpu_wake_runnable)              \
+    _MACRO(vcpu_wake_not_runnable)          \
+    _MACRO(dom_free)                        \
+    _MACRO(schedule)                        \
+    _MACRO(tickle_local_idler)              \
+    _MACRO(tickle_local_over)               \
+    _MACRO(tickle_local_under)              \
+    _MACRO(tickle_local_other)              \
+    _MACRO(acct_run)                        \
+    _MACRO(acct_no_work)                    \
+    _MACRO(acct_balance)                    \
+    _MACRO(acct_reorder)                    \
+    _MACRO(acct_min_credit)                 \
+    _MACRO(acct_vcpu_active)                \
+    _MACRO(acct_vcpu_idle)                  \
+    _MACRO(acct_vcpu_credit_min)
+
+#define CSCHED_STATS_EXPAND_SMP_LOAD_BALANCE(_MACRO)    \
+    _MACRO(vcpu_migrate)                                \
+    _MACRO(load_balance_idle)                           \
+    _MACRO(load_balance_over)                           \
+    _MACRO(load_balance_other)                          \
+    _MACRO(steal_trylock_failed)                        \
+    _MACRO(steal_peer_down)                             \
+    _MACRO(steal_peer_idle)                             \
+    _MACRO(steal_peer_running)                          \
+    _MACRO(steal_peer_pinned)                           \
+    _MACRO(tickle_idlers_none)                          \
+    _MACRO(tickle_idlers_some)
+
+#ifndef NDEBUG
+#define CSCHED_STATS_EXPAND_CHECKS(_MACRO)  \
+    _MACRO(vcpu_check)
+#else
+#define CSCHED_STATS_EXPAND_CHECKS(_MACRO)
+#endif
+
+#define CSCHED_STATS_EXPAND(_MACRO)                 \
+    CSCHED_STATS_EXPAND_SCHED(_MACRO)               \
+    CSCHED_STATS_EXPAND_SMP_LOAD_BALANCE(_MACRO)    \
+    CSCHED_STATS_EXPAND_CHECKS(_MACRO)
+
+#define CSCHED_STATS_RESET()                                        \
+    do                                                              \
+    {                                                               \
+        memset(&csched_priv.stats, 0, sizeof(csched_priv.stats));   \
+    } while ( 0 )
+
+#define CSCHED_STATS_DEFINE()                   \
+    struct                                      \
+    {                                           \
+        CSCHED_STATS_EXPAND(CSCHED_STAT_DEFINE) \
+    } stats
+
+#define CSCHED_STATS_PRINTK()                   \
+    do                                          \
+    {                                           \
+        printk("stats:\n");                     \
+        CSCHED_STATS_EXPAND(CSCHED_STAT_PRINTK) \
+    } while ( 0 )
+
+#define CSCHED_STAT_CRANK(_X)   (CSCHED_STAT(_X)++)
+
+#else /* CSCHED_STATS */
+
+#define CSCHED_STATS_RESET()    do {} while ( 0 )
+#define CSCHED_STATS_DEFINE()   do {} while ( 0 )
+#define CSCHED_STATS_PRINTK()   do {} while ( 0 )
+#define CSCHED_STAT_CRANK(_X)   do {} while ( 0 )
+
+#endif /* CSCHED_STATS */
+
+
+/*
+ * Physical CPU
+ */
+struct csched_pcpu {
+    struct list_head runq;
+    uint32_t runq_sort_last;
+};
+
+/*
+ * Virtual CPU
+ */
+struct csched_vcpu {
+    struct list_head runq_elem;
+    struct list_head active_vcpu_elem;
+    struct csched_dom *sdom;
+    struct vcpu *vcpu;
+    atomic_t credit;
+    int credit_last;
+    uint32_t credit_incr;
+    uint32_t state_active;
+    uint32_t state_idle;
+    int16_t pri;
+};
+
+/*
+ * Domain
+ */
+struct csched_dom {
+    struct list_head active_vcpu;
+    struct list_head active_sdom_elem;
+    struct domain *dom;
+    uint16_t active_vcpu_count;
+    uint16_t weight;
+    uint16_t cap;
+};
+
+/*
+ * System-wide private data
+ */
+struct csched_private {
+    spinlock_t lock;
+    struct list_head active_sdom;
+    uint32_t ncpus;
+    unsigned int master;
+    cpumask_t idlers;
+    uint32_t weight;
+    uint32_t credit;
+    int credit_balance;
+    uint32_t runq_sort;
+    CSCHED_STATS_DEFINE();
+};
+
+
+/*
+ * Global variables
+ */
+static struct csched_private csched_priv;
+
+
+
+static inline int
+__vcpu_on_runq(struct csched_vcpu *svc)
+{
+    return !list_empty(&svc->runq_elem);
+}
+
+static inline struct csched_vcpu *
+__runq_elem(struct list_head *elem)
+{
+    return list_entry(elem, struct csched_vcpu, runq_elem);
+}
+
+static inline void
+__runq_insert(unsigned int cpu, struct csched_vcpu *svc)
+{
+    const struct list_head * const runq = RUNQ(cpu);
+    struct list_head *iter;
+
+    BUG_ON( __vcpu_on_runq(svc) );
+    BUG_ON( cpu != svc->vcpu->processor );
+
+    list_for_each( iter, runq )
+    {
+        const struct csched_vcpu * const iter_svc = __runq_elem(iter);
+        if ( svc->pri > iter_svc->pri )
+            break;
+    }
+
+    list_add_tail(&svc->runq_elem, iter);
+}
+
+static inline void
+__runq_remove(struct csched_vcpu *svc)
+{
+    BUG_ON( !__vcpu_on_runq(svc) );
+    list_del_init(&svc->runq_elem);
+}
+
+static inline void
+__runq_tickle(unsigned int cpu, struct csched_vcpu *new)
+{
+    struct csched_vcpu * const cur = CSCHED_VCPU(schedule_data[cpu].curr);
+    cpumask_t mask;
+
+    ASSERT(cur);
+    cpus_clear(mask);
+
+    /* If strictly higher priority than current VCPU, signal the CPU */
+    if ( new->pri > cur->pri )
+    {
+        if ( cur->pri == CSCHED_PRI_IDLE )
+            CSCHED_STAT_CRANK(tickle_local_idler);
+        else if ( cur->pri == CSCHED_PRI_TS_OVER )
+            CSCHED_STAT_CRANK(tickle_local_over);
+        else if ( cur->pri == CSCHED_PRI_TS_UNDER )
+            CSCHED_STAT_CRANK(tickle_local_under);
+        else
+            CSCHED_STAT_CRANK(tickle_local_other);
+
+        cpu_set(cpu, mask);
+    }
+
+    /*
+     * If this CPU has at least two runnable VCPUs, we tickle any idlers to
+     * let them know there is runnable work in the system...
+     */
+    if ( cur->pri > CSCHED_PRI_IDLE )
+    {
+        if ( cpus_empty(csched_priv.idlers) )
+        {
+            CSCHED_STAT_CRANK(tickle_idlers_none);
+        }
+        else
+        {
+            CSCHED_STAT_CRANK(tickle_idlers_some);
+            cpus_or(mask, mask, csched_priv.idlers);
+        }
+    }
+
+    /* Send scheduler interrupts to designated CPUs */
+    if ( !cpus_empty(mask) )
+        cpumask_raise_softirq(mask, SCHEDULE_SOFTIRQ);
+}
+
+static void
+csched_pcpu_init(int cpu)
+{
+    struct csched_pcpu *spc;
+    unsigned long flags;
+
+    spin_lock_irqsave(&csched_priv.lock, flags);
+
+    /* Initialize/update system-wide config */
+    csched_priv.credit += CSCHED_ACCT_PERIOD;
+    if ( csched_priv.ncpus <= cpu )
+        csched_priv.ncpus = cpu + 1;
+    if ( csched_priv.master >= csched_priv.ncpus )
+        csched_priv.master = cpu;
+
+    /* Allocate per-PCPU info */
+    spc = xmalloc(struct csched_pcpu);
+    BUG_ON( spc == NULL );
+    INIT_LIST_HEAD(&spc->runq);
+    spc->runq_sort_last = csched_priv.runq_sort;
+    schedule_data[cpu].sched_priv = spc;
+
+    /* Start off idling... */
+    BUG_ON( !is_idle_vcpu(schedule_data[cpu].curr) );
+    cpu_set(cpu, csched_priv.idlers);
+
+    spin_unlock_irqrestore(&csched_priv.lock, flags);
+}
+
+#ifndef NDEBUG
+static inline void
+__csched_vcpu_check(struct vcpu *vc)
+{
+    struct csched_vcpu * const svc = CSCHED_VCPU(vc);
+    struct csched_dom * const sdom = svc->sdom;
+
+    BUG_ON( svc->vcpu != vc );
+    BUG_ON( sdom != CSCHED_DOM(vc->domain) );
+    if ( sdom )
+    {
+        BUG_ON( is_idle_vcpu(vc) );
+        BUG_ON( sdom->dom != vc->domain );
+    }
+    else
+    {
+        BUG_ON( !is_idle_vcpu(vc) );
+    }
+
+    CSCHED_STAT_CRANK(vcpu_check);
+}
+#define CSCHED_VCPU_CHECK(_vc)  (__csched_vcpu_check(_vc))
+#else
+#define CSCHED_VCPU_CHECK(_vc)
+#endif
+
+static inline int
+__csched_vcpu_is_stealable(int local_cpu, struct vcpu *vc)
+{
+    /*
+     * Don't pick up work that's in the peer's scheduling tail. Also only pick
+     * up work that's allowed to run on our CPU.
+     */
+    if ( unlikely(test_bit(_VCPUF_running, &vc->vcpu_flags)) )
+    {
+        CSCHED_STAT_CRANK(steal_peer_running);
+        return 0;
+    }
+
+    if ( unlikely(!cpu_isset(local_cpu, vc->cpu_affinity)) )
+    {
+        CSCHED_STAT_CRANK(steal_peer_pinned);
+        return 0;
+    }
+
+    return 1;
+}
+
+static void
+csched_vcpu_acct(struct csched_vcpu *svc, int credit_dec)
+{
+    struct csched_dom * const sdom = svc->sdom;
+    unsigned long flags;
+
+    /* Update credits */
+    atomic_sub(credit_dec, &svc->credit);
+
+    /* Put this VCPU and domain back on the active list if it was idling */
+    if ( list_empty(&svc->active_vcpu_elem) )
+    {
+        spin_lock_irqsave(&csched_priv.lock, flags);
+
+        if ( list_empty(&svc->active_vcpu_elem) )
+        {
+            CSCHED_STAT_CRANK(acct_vcpu_active);
+            svc->state_active++;
+
+            sdom->active_vcpu_count++;
+            list_add(&svc->active_vcpu_elem, &sdom->active_vcpu);
+            if ( list_empty(&sdom->active_sdom_elem) )
+            {
+                list_add(&sdom->active_sdom_elem, &csched_priv.active_sdom);
+                csched_priv.weight += sdom->weight;
+            }
+        }
+
+        spin_unlock_irqrestore(&csched_priv.lock, flags);
+    }
+}
+
+static inline void
+__csched_vcpu_acct_idle_locked(struct csched_vcpu *svc)
+{
+    struct csched_dom * const sdom = svc->sdom;
+
+    BUG_ON( list_empty(&svc->active_vcpu_elem) );
+
+    CSCHED_STAT_CRANK(acct_vcpu_idle);
+    svc->state_idle++;
+
+    sdom->active_vcpu_count--;
+    list_del_init(&svc->active_vcpu_elem);
+    if ( list_empty(&sdom->active_vcpu) )
+    {
+        BUG_ON( csched_priv.weight < sdom->weight );
+        list_del_init(&sdom->active_sdom_elem);
+        csched_priv.weight -= sdom->weight;
+    }
+
+    atomic_set(&svc->credit, 0);
+}
+
+static int
+csched_vcpu_alloc(struct vcpu *vc)
+{
+    struct domain * const dom = vc->domain;
+    struct csched_dom *sdom;
+    struct csched_vcpu *svc;
+    int16_t pri;
+
+    CSCHED_STAT_CRANK(vcpu_alloc);
+
+    /* Allocate, if appropriate, per-domain info */
+    if ( is_idle_vcpu(vc) )
+    {
+        sdom = NULL;
+        pri = CSCHED_PRI_IDLE;
+    }
+    else if ( CSCHED_DOM(dom) )
+    {
+        sdom = CSCHED_DOM(dom);
+        pri = CSCHED_PRI_TS_UNDER;
+    }
+    else 
+    {
+        sdom = xmalloc(struct csched_dom);
+        if ( !sdom )
+            return -1;
+
+        /* Initialize credit and weight */
+        INIT_LIST_HEAD(&sdom->active_vcpu);
+        sdom->active_vcpu_count = 0;
+        INIT_LIST_HEAD(&sdom->active_sdom_elem);
+        sdom->dom = dom;
+        sdom->weight = CSCHED_DEFAULT_WEIGHT;
+        sdom->cap = 0U;
+        dom->sched_priv = sdom;
+        pri = CSCHED_PRI_TS_UNDER;
+    }
+
+    /* Allocate per-VCPU info */
+    svc = xmalloc(struct csched_vcpu);
+    if ( !svc )
+        return -1;
+
+    INIT_LIST_HEAD(&svc->runq_elem);
+    INIT_LIST_HEAD(&svc->active_vcpu_elem);
+    svc->sdom = sdom;
+    svc->vcpu = vc;
+    atomic_set(&svc->credit, 0);
+    svc->credit_last = 0;
+    svc->credit_incr = 0U;
+    svc->state_active = 0U;
+    svc->state_idle = 0U;
+    svc->pri = pri;
+    vc->sched_priv = svc;
+
+    CSCHED_VCPU_CHECK(vc);
+
+    /* Attach fair-share VCPUs to the accounting list */
+    if ( likely(sdom != NULL) )
+        csched_vcpu_acct(svc, 0);
+
+    return 0;
+}
+
+static void
+csched_vcpu_add(struct vcpu *vc) 
+{
+    CSCHED_STAT_CRANK(vcpu_add);
+
+    /* Allocate per-PCPU info */
+    if ( unlikely(!CSCHED_PCPU(vc->processor)) )
+        csched_pcpu_init(vc->processor);
+
+    CSCHED_VCPU_CHECK(vc);
+}
+
+static void
+csched_vcpu_free(struct vcpu *vc)
+{
+    struct csched_vcpu * const svc = CSCHED_VCPU(vc);
+    struct csched_dom * const sdom = svc->sdom;
+    unsigned long flags;
+
+    BUG_ON( sdom == NULL );
+    BUG_ON( !list_empty(&svc->runq_elem) );
+
+    spin_lock_irqsave(&csched_priv.lock, flags);
+
+    if ( !list_empty(&svc->active_vcpu_elem) )
+        __csched_vcpu_acct_idle_locked(svc);
+
+    spin_unlock_irqrestore(&csched_priv.lock, flags);
+
+    xfree(svc);
+}
+
+static void
+csched_vcpu_sleep(struct vcpu *vc)
+{
+    struct csched_vcpu * const svc = CSCHED_VCPU(vc);
+
+    CSCHED_STAT_CRANK(vcpu_sleep);
+
+    BUG_ON( is_idle_vcpu(vc) );
+
+    if ( schedule_data[vc->processor].curr == vc )
+        cpu_raise_softirq(vc->processor, SCHEDULE_SOFTIRQ);
+    else if ( __vcpu_on_runq(svc) )
+        __runq_remove(svc);
+}
+
+static void
+csched_vcpu_wake(struct vcpu *vc)
+{
+    struct csched_vcpu * const svc = CSCHED_VCPU(vc);
+    const unsigned int cpu = vc->processor;
+
+    BUG_ON( is_idle_vcpu(vc) );
+
+    if ( unlikely(schedule_data[cpu].curr == vc) )
+    {
+        CSCHED_STAT_CRANK(vcpu_wake_running);
+        return;
+    }
+    if ( unlikely(__vcpu_on_runq(svc)) )
+    {
+        CSCHED_STAT_CRANK(vcpu_wake_onrunq);
+        return;
+    }
+
+    if ( likely(vcpu_runnable(vc)) )
+        CSCHED_STAT_CRANK(vcpu_wake_runnable);
+    else
+        CSCHED_STAT_CRANK(vcpu_wake_not_runnable);
+
+    /* Put the VCPU on the runq and tickle CPUs */
+    __runq_insert(cpu, svc);
+    __runq_tickle(cpu, svc);
+}
+
+static int
+csched_vcpu_set_affinity(struct vcpu *vc, cpumask_t *affinity)
+{
+    unsigned long flags;
+    int lcpu;
+
+    if ( vc == current )
+    {
+        /* No locking needed but also can't move on the spot... */
+        if ( !cpu_isset(vc->processor, *affinity) )
+            return -EBUSY;
+
+        vc->cpu_affinity = *affinity;
+    }
+    else
+    {
+        /* Pause, modify, and unpause. */
+        vcpu_pause(vc);
+
+        vc->cpu_affinity = *affinity;
+        if ( !cpu_isset(vc->processor, vc->cpu_affinity) )
+        {
+            /*
+             * We must grab the scheduler lock for the CPU currently owning
+             * this VCPU before changing its ownership.
+             */
+            vcpu_schedule_lock_irqsave(vc, flags);
+            lcpu = vc->processor;
+
+            vc->processor = first_cpu(vc->cpu_affinity);
+
+            spin_unlock_irqrestore(&schedule_data[lcpu].schedule_lock, flags);
+        }
+
+        vcpu_unpause(vc);
+    }
+
+    return 0;
+}
+
+static int
+csched_dom_cntl(
+    struct domain *d,
+    struct sched_adjdom_cmd *cmd)
+{
+    struct csched_dom * const sdom = CSCHED_DOM(d);
+    unsigned long flags;
+
+    if ( cmd->direction == SCHED_INFO_GET )
+    {
+        cmd->u.credit.weight = sdom->weight;
+        cmd->u.credit.cap = sdom->cap;
+    }
+    else
+    {
+        ASSERT( cmd->direction == SCHED_INFO_PUT );
+
+        spin_lock_irqsave(&csched_priv.lock, flags);
+
+        if ( cmd->u.credit.weight != 0 )
+        {
+            csched_priv.weight -= sdom->weight;
+            sdom->weight = cmd->u.credit.weight;
+            csched_priv.weight += sdom->weight;
+        }
+
+        if ( cmd->u.credit.cap != (uint16_t)~0U )
+            sdom->cap = cmd->u.credit.cap;
+
+        spin_unlock_irqrestore(&csched_priv.lock, flags);
+    }
+
+    return 0;
+}
+
+static void
+csched_dom_free(struct domain *dom)
+{
+    struct csched_dom * const sdom = CSCHED_DOM(dom);
+    int i;
+
+    CSCHED_STAT_CRANK(dom_free);
+
+    for ( i = 0; i < MAX_VIRT_CPUS; i++ )
+    {
+        if ( dom->vcpu[i] )
+            csched_vcpu_free(dom->vcpu[i]);
+    }
+
+    xfree(sdom);
+}
+
+/*
+ * This is a O(n) optimized sort of the runq.
+ *
+ * Time-share VCPUs can only be one of two priorities, UNDER or OVER. We walk
+ * through the runq and move up any UNDERs that are preceded by OVERS. We
+ * remember the last UNDER to make the move up operation O(1).
+ */
+static void
+csched_runq_sort(unsigned int cpu)
+{
+    struct csched_pcpu * const spc = CSCHED_PCPU(cpu);
+    struct list_head *runq, *elem, *next, *last_under;
+    struct csched_vcpu *svc_elem;
+    unsigned long flags;
+    int sort_epoch;
+
+    sort_epoch = csched_priv.runq_sort;
+    if ( sort_epoch == spc->runq_sort_last )
+        return;
+
+    spc->runq_sort_last = sort_epoch;
+
+    spin_lock_irqsave(&schedule_data[cpu].schedule_lock, flags);
+
+    runq = &spc->runq;
+    elem = runq->next;
+    last_under = runq;
+
+    while ( elem != runq )
+    {
+        next = elem->next;
+        svc_elem = __runq_elem(elem);
+
+        if ( svc_elem->pri == CSCHED_PRI_TS_UNDER )
+        {
+            /* does elem need to move up the runq? */
+            if ( elem->prev != last_under )
+            {
+                list_del(elem);
+                list_add(elem, last_under);
+            }
+            last_under = elem;
+        }
+
+        elem = next;
+    }
+
+    spin_unlock_irqrestore(&schedule_data[cpu].schedule_lock, flags);
+}
+
+static void
+csched_acct(void)
+{
+    unsigned long flags;
+    struct list_head *iter_vcpu, *next_vcpu;
+    struct list_head *iter_sdom, *next_sdom;
+    struct csched_vcpu *svc;
+    struct csched_dom *sdom;
+    uint32_t credit_total;
+    uint32_t weight_total;
+    uint32_t weight_left;
+    uint32_t credit_fair;
+    uint32_t credit_peak;
+    int credit_balance;
+    int credit_xtra;
+    int credit;
+
+
+    spin_lock_irqsave(&csched_priv.lock, flags);
+
+    weight_total = csched_priv.weight;
+    credit_total = csched_priv.credit;
+
+    /* Converge balance towards 0 when it drops negative */
+    if ( csched_priv.credit_balance < 0 )
+    {
+        credit_total -= csched_priv.credit_balance;
+        CSCHED_STAT_CRANK(acct_balance);
+    }
+
+    if ( unlikely(weight_total == 0) )
+    {
+        csched_priv.credit_balance = 0;
+        spin_unlock_irqrestore(&csched_priv.lock, flags);
+        CSCHED_STAT_CRANK(acct_no_work);
+        return;
+    }
+
+    CSCHED_STAT_CRANK(acct_run);
+
+    weight_left = weight_total;
+    credit_balance = 0;
+    credit_xtra = 0;
+
+    list_for_each_safe( iter_sdom, next_sdom, &csched_priv.active_sdom )
+    {
+        sdom = list_entry(iter_sdom, struct csched_dom, active_sdom_elem);
+
+        BUG_ON( is_idle_domain(sdom->dom) );
+        BUG_ON( sdom->active_vcpu_count == 0 );
+        BUG_ON( sdom->weight == 0 );
+        BUG_ON( sdom->weight > weight_left );
+
+        weight_left -= sdom->weight;
+
+        /*
+         * A domain's fair share is computed using its weight in competition
+         * with that of all other active domains.
+         *
+         * At most, a domain can use credits to run all its active VCPUs
+         * for one full accounting period. We allow a domain to earn more
+         * only when the system-wide credit balance is negative.
+         */
+        credit_peak = sdom->active_vcpu_count * CSCHED_ACCT_PERIOD;
+        if ( csched_priv.credit_balance < 0 )
+        {
+            credit_peak += ( ( -csched_priv.credit_balance * sdom->weight) +
+                             (weight_total - 1)
+                           ) / weight_total;
+        }
+        if ( sdom->cap != 0U )
+        {
+            uint32_t credit_cap = ((sdom->cap * CSCHED_ACCT_PERIOD) + 99) / 
100;
+            if ( credit_cap < credit_peak )
+                credit_peak = credit_cap;
+        }
+
+        credit_fair = ( ( credit_total * sdom->weight) + (weight_total - 1)
+                      ) / weight_total;
+
+        if ( credit_fair < credit_peak )
+        {
+            credit_xtra = 1;
+        }
+        else
+        {
+            if ( weight_left != 0U )
+            {
+                /* Give other domains a chance at unused credits */
+                credit_total += ( ( ( credit_fair - credit_peak
+                                    ) * weight_total
+                                  ) + ( weight_left - 1 )
+                                ) / weight_left;
+            }
+
+            if ( credit_xtra )
+            {
+                /*
+                 * Lazily keep domains with extra credits at the head of
+                 * the queue to give others a chance at them in future
+                 * accounting periods.
+                 */
+                CSCHED_STAT_CRANK(acct_reorder);
+                list_del(&sdom->active_sdom_elem);
+                list_add(&sdom->active_sdom_elem, &csched_priv.active_sdom);
+            }
+
+            credit_fair = credit_peak;
+        }
+
+        /* Compute fair share per VCPU */
+        credit_fair = ( credit_fair + ( sdom->active_vcpu_count - 1 )
+                      ) / sdom->active_vcpu_count;
+
+
+        list_for_each_safe( iter_vcpu, next_vcpu, &sdom->active_vcpu )
+        {
+            svc = list_entry(iter_vcpu, struct csched_vcpu, active_vcpu_elem);
+            BUG_ON( sdom != svc->sdom );
+
+            /* Increment credit */
+            atomic_add(credit_fair, &svc->credit);
+            credit = atomic_read(&svc->credit);
+
+            /*
+             * Recompute priority or, if VCPU is idling, remove it from
+             * the active list.
+             */
+            if ( credit < 0 )
+            {
+                if ( sdom->cap == 0U )
+                    svc->pri = CSCHED_PRI_TS_OVER;
+                else
+                    svc->pri = CSCHED_PRI_TS_PARKED;
+
+                if ( credit < -CSCHED_TSLICE )
+                {
+                    CSCHED_STAT_CRANK(acct_min_credit);
+                    credit = -CSCHED_TSLICE;
+                    atomic_set(&svc->credit, credit);
+                }
+            }
+            else
+            {
+                svc->pri = CSCHED_PRI_TS_UNDER;
+
+                if ( credit > CSCHED_TSLICE )
+                    __csched_vcpu_acct_idle_locked(svc);
+            }
+
+            svc->credit_last = credit;
+            svc->credit_incr = credit_fair;
+            credit_balance += credit;
+        }
+    }
+
+    csched_priv.credit_balance = credit_balance;
+
+    spin_unlock_irqrestore(&csched_priv.lock, flags);
+
+    /* Inform each CPU that its runq needs to be sorted */
+    csched_priv.runq_sort++;
+}
+
+static void
+csched_tick(unsigned int cpu)
+{
+    struct csched_vcpu * const svc = CSCHED_VCPU(current);
+    struct csched_dom * const sdom = svc->sdom;
+
+    /*
+     * Accounting for running VCPU
+     *
+     * Note: Some VCPUs, such as the idle tasks, are not credit scheduled.
+     */
+    if ( likely(sdom != NULL) )
+    {
+        csched_vcpu_acct(svc, CSCHED_TICK);
+    }
+
+    /*
+     * Accounting duty
+     *
+     * Note: Currently, this is always done by the master boot CPU. Eventually,
+     * we could distribute or at the very least cycle the duty.
+     */
+    if ( (csched_priv.master == cpu) &&
+         (schedule_data[cpu].tick % CSCHED_ACCT_NTICKS) == 0 )
+    {
+        csched_acct();
+    }
+
+    /*
+     * Check if runq needs to be sorted
+     *
+     * Every physical CPU resorts the runq after the accounting master has
+     * modified priorities. This is a special O(n) sort and runs at most
+     * once per accounting period (currently 30 milliseconds).
+     */
+    csched_runq_sort(cpu);
+}
+
+static struct csched_vcpu *
+csched_runq_steal(struct csched_pcpu *spc, int cpu, int pri)
+{
+    struct list_head *iter;
+    struct csched_vcpu *speer;
+    struct vcpu *vc;
+
+    list_for_each( iter, &spc->runq )
+    {
+        speer = __runq_elem(iter);
+
+        /*
+         * If next available VCPU here is not of higher priority than ours,
+         * this PCPU is useless to us.
+         */
+        if ( speer->pri <= CSCHED_PRI_IDLE || speer->pri <= pri )
+        {
+            CSCHED_STAT_CRANK(steal_peer_idle);
+            break;
+        }
+
+        /* Is this VCPU is runnable on our PCPU? */
+        vc = speer->vcpu;
+        BUG_ON( is_idle_vcpu(vc) );
+
+        if ( __csched_vcpu_is_stealable(cpu, vc) )
+        {
+            /* We got a candidate. Grab it! */
+            __runq_remove(speer);
+            vc->processor = cpu;
+
+            return speer;
+        }
+    }
+
+    return NULL;
+}
+
+static struct csched_vcpu *
+csched_load_balance(int cpu, struct csched_vcpu *snext)
+{
+    struct csched_pcpu *spc;
+    struct csched_vcpu *speer;
+    int peer_cpu;
+
+    if ( snext->pri == CSCHED_PRI_IDLE )
+        CSCHED_STAT_CRANK(load_balance_idle);
+    else if ( snext->pri == CSCHED_PRI_TS_OVER )
+        CSCHED_STAT_CRANK(load_balance_over);
+    else
+        CSCHED_STAT_CRANK(load_balance_other);
+
+    peer_cpu = cpu;
+    BUG_ON( peer_cpu != snext->vcpu->processor );
+
+    while ( 1 )
+    {
+        /* For each PCPU in the system starting with our neighbour... */
+        peer_cpu = (peer_cpu + 1) % csched_priv.ncpus;
+        if ( peer_cpu == cpu )
+            break;
+
+        BUG_ON( peer_cpu >= csched_priv.ncpus );
+        BUG_ON( peer_cpu == cpu );
+
+        /*
+         * Get ahold of the scheduler lock for this peer CPU.
+         *
+         * Note: We don't spin on this lock but simply try it. Spinning could
+         * cause a deadlock if the peer CPU is also load balancing and trying
+         * to lock this CPU.
+         */
+        if ( spin_trylock(&schedule_data[peer_cpu].schedule_lock) )
+        {
+
+            spc = CSCHED_PCPU(peer_cpu);
+            if ( unlikely(spc == NULL) )
+            {
+                CSCHED_STAT_CRANK(steal_peer_down);
+                speer = NULL;
+            }
+            else
+            {
+                speer = csched_runq_steal(spc, cpu, snext->pri);
+            }
+
+            spin_unlock(&schedule_data[peer_cpu].schedule_lock);
+
+            /* Got one! */
+            if ( speer )
+            {
+                CSCHED_STAT_CRANK(vcpu_migrate);
+                return speer;
+            }
+        }
+        else
+        {
+            CSCHED_STAT_CRANK(steal_trylock_failed);
+        }
+    }
+
+
+    /* Failed to find more important work */
+    __runq_remove(snext);
+    return snext;
+}
+
+/*
+ * This function is in the critical path. It is designed to be simple and
+ * fast for the common case.
+ */
+static struct task_slice
+csched_schedule(s_time_t now)
+{
+    const int cpu = smp_processor_id();
+    struct list_head * const runq = RUNQ(cpu);
+    struct csched_vcpu * const scurr = CSCHED_VCPU(current);
+    struct csched_vcpu *snext;
+    struct task_slice ret;
+
+    CSCHED_STAT_CRANK(schedule);
+    CSCHED_VCPU_CHECK(current);
+
+    /*
+     * Select next runnable local VCPU (ie top of local runq)
+     */
+    if ( vcpu_runnable(current) )
+        __runq_insert(cpu, scurr);
+    else
+        BUG_ON( is_idle_vcpu(current) || list_empty(runq) );
+
+    snext = __runq_elem(runq->next);
+
+    /*
+     * SMP Load balance:
+     *
+     * If the next highest priority local runnable VCPU has already eaten
+     * through its credits, look on other PCPUs to see if we have more
+     * urgent work... If not, csched_load_balance() will return snext, but
+     * already removed from the runq.
+     */
+    if ( snext->pri > CSCHED_PRI_TS_OVER )
+        __runq_remove(snext);
+    else
+        snext = csched_load_balance(cpu, snext);
+
+    /*
+     * Update idlers mask if necessary. When we're idling, other CPUs
+     * will tickle us when they get extra work.
+     */
+    if ( snext->pri == CSCHED_PRI_IDLE )
+    {
+        if ( !cpu_isset(cpu, csched_priv.idlers) )
+            cpu_set(cpu, csched_priv.idlers);
+    }
+    else if ( cpu_isset(cpu, csched_priv.idlers) )
+    {
+        cpu_clear(cpu, csched_priv.idlers);
+    }
+
+    /*
+     * Return task to run next...
+     */
+    ret.time = MILLISECS(CSCHED_TSLICE);
+    ret.task = snext->vcpu;
+
+    CSCHED_VCPU_CHECK(ret.task);
+    BUG_ON( !vcpu_runnable(ret.task) );
+
+    return ret;
+}
+
+static void
+csched_dump_vcpu(struct csched_vcpu *svc)
+{
+    struct csched_dom * const sdom = svc->sdom;
+
+    printk("[%i.%i] pri=%i cpu=%i",
+            svc->vcpu->domain->domain_id,
+            svc->vcpu->vcpu_id,
+            svc->pri,
+            svc->vcpu->processor);
+
+    if ( sdom )
+    {
+        printk(" credit=%i (%d+%u) {a=%u i=%u w=%u}",
+            atomic_read(&svc->credit),
+            svc->credit_last,
+            svc->credit_incr,
+            svc->state_active,
+            svc->state_idle,
+            sdom->weight);
+    }
+
+    printk("\n");
+}
+
+static void
+csched_dump_pcpu(int cpu)
+{
+    struct list_head *runq, *iter;
+    struct csched_pcpu *spc;
+    struct csched_vcpu *svc;
+    int loop;
+
+    spc = CSCHED_PCPU(cpu);
+    runq = &spc->runq;
+
+    printk(" tick=%lu, sort=%d\n",
+            schedule_data[cpu].tick,
+            spc->runq_sort_last);
+
+    /* current VCPU */
+    svc = CSCHED_VCPU(schedule_data[cpu].curr);
+    if ( svc )
+    {
+        printk("\trun: ");
+        csched_dump_vcpu(svc);
+    }
+
+    loop = 0;
+    list_for_each( iter, runq )
+    {
+        svc = __runq_elem(iter);
+        if ( svc )
+        {
+            printk("\t%3d: ", ++loop);
+            csched_dump_vcpu(svc);
+        }
+    }
+}
+
+static void
+csched_dump(void)
+{
+    struct list_head *iter_sdom, *iter_svc;
+    int loop;
+
+    printk("info:\n"
+           "\tncpus              = %u\n"
+           "\tmaster             = %u\n"
+           "\tcredit             = %u\n"
+           "\tcredit balance     = %d\n"
+           "\tweight             = %u\n"
+           "\trunq_sort          = %u\n"
+           "\ttick               = %dms\n"
+           "\ttslice             = %dms\n"
+           "\taccounting period  = %dms\n"
+           "\tdefault-weight     = %d\n",
+           csched_priv.ncpus,
+           csched_priv.master,
+           csched_priv.credit,
+           csched_priv.credit_balance,
+           csched_priv.weight,
+           csched_priv.runq_sort,
+           CSCHED_TICK,
+           CSCHED_TSLICE,
+           CSCHED_ACCT_PERIOD,
+           CSCHED_DEFAULT_WEIGHT);
+
+    printk("idlers: 0x%lx\n", csched_priv.idlers.bits[0]);
+
+    CSCHED_STATS_PRINTK();
+
+    printk("active vcpus:\n");
+    loop = 0;
+    list_for_each( iter_sdom, &csched_priv.active_sdom )
+    {
+        struct csched_dom *sdom;
+        sdom = list_entry(iter_sdom, struct csched_dom, active_sdom_elem);
+
+        list_for_each( iter_svc, &sdom->active_vcpu )
+        {
+            struct csched_vcpu *svc;
+            svc = list_entry(iter_svc, struct csched_vcpu, active_vcpu_elem);
+
+            printk("\t%3d: ", ++loop);
+            csched_dump_vcpu(svc);
+        }
+    }
+}
+
+static void
+csched_init(void)
+{
+    spin_lock_init(&csched_priv.lock);
+    INIT_LIST_HEAD(&csched_priv.active_sdom);
+    csched_priv.ncpus = 0;
+    csched_priv.master = UINT_MAX;
+    cpus_clear(csched_priv.idlers);
+    csched_priv.weight = 0U;
+    csched_priv.credit = 0U;
+    csched_priv.credit_balance = 0;
+    csched_priv.runq_sort = 0U;
+    CSCHED_STATS_RESET();
+}
+
+
+struct scheduler sched_credit_def = {
+    .name           = "SMP Credit Scheduler",
+    .opt_name       = "credit",
+    .sched_id       = SCHED_CREDIT,
+
+    .alloc_task     = csched_vcpu_alloc,
+    .add_task       = csched_vcpu_add,
+    .sleep          = csched_vcpu_sleep,
+    .wake           = csched_vcpu_wake,
+    .set_affinity   = csched_vcpu_set_affinity,
+
+    .adjdom         = csched_dom_cntl,
+    .free_task      = csched_dom_free,
+
+    .tick           = csched_tick,
+    .do_schedule    = csched_schedule,
+
+    .dump_cpu_state = csched_dump_pcpu,
+    .dump_settings  = csched_dump,
+    .init           = csched_init,
+};
diff -r 9d52a66c7499 -r c073ebdbde8c 
linux-2.6-xen-sparse/drivers/xen/net_driver_util.c
--- a/linux-2.6-xen-sparse/drivers/xen/net_driver_util.c        Thu May 25 
15:59:18 2006 -0600
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,58 +0,0 @@
-/*****************************************************************************
- *
- * Utility functions for Xen network devices.
- *
- * Copyright (c) 2005 XenSource Ltd.
- * 
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation; or, when distributed
- * separately from the Linux kernel or incorporated into other
- * software packages, subject to the following license:
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this source file (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy, modify,
- * merge, publish, distribute, sublicense, and/or sell copies of the Software,
- * and to permit persons to whom the Software is furnished to do so, subject
- * to the following conditions:
- * 
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-#include <linux/if_ether.h>
-#include <linux/err.h>
-#include <linux/module.h>
-#include <xen/net_driver_util.h>
-
-
-int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
-{
-       char *s;
-       int i;
-       char *e;
-       char *macstr = xenbus_read(XBT_NULL, dev->nodename, "mac", NULL);
-       if (IS_ERR(macstr))
-               return PTR_ERR(macstr);
-       s = macstr;
-       for (i = 0; i < ETH_ALEN; i++) {
-               mac[i] = simple_strtoul(s, &e, 16);
-               if (s == e || (e[0] != ':' && e[0] != 0)) {
-                       kfree(macstr);
-                       return -ENOENT;
-               }
-               s = &e[1];
-       }
-       kfree(macstr);
-       return 0;
-}
-EXPORT_SYMBOL_GPL(xen_net_read_mac);
diff -r 9d52a66c7499 -r c073ebdbde8c 
linux-2.6-xen-sparse/include/asm-x86_64/e820.h
--- a/linux-2.6-xen-sparse/include/asm-x86_64/e820.h    Thu May 25 15:59:18 
2006 -0600
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,63 +0,0 @@
-/*
- * structures and definitions for the int 15, ax=e820 memory map
- * scheme.
- *
- * In a nutshell, setup.S populates a scratch table in the
- * empty_zero_block that contains a list of usable address/size
- * duples.  setup.c, this information is transferred into the e820map,
- * and in init.c/numa.c, that new information is used to mark pages
- * reserved or not.
- */
-#ifndef __E820_HEADER
-#define __E820_HEADER
-
-#include <linux/mmzone.h>
-
-#define E820MAP        0x2d0           /* our map */
-#define E820MAX        128             /* number of entries in E820MAP */
-#define E820NR 0x1e8           /* # entries in E820MAP */
-
-#define E820_RAM       1
-#define E820_RESERVED  2
-#define E820_ACPI      3 /* usable as RAM once ACPI tables have been read */
-#define E820_NVS       4
-
-#define HIGH_MEMORY    (1024*1024)
-
-#define LOWMEMSIZE()   (0x9f000)
-
-#ifndef __ASSEMBLY__
-struct e820entry {
-       u64 addr;       /* start of memory segment */
-       u64 size;       /* size of memory segment */
-       u32 type;       /* type of memory segment */
-} __attribute__((packed));
-
-struct e820map {
-    int nr_map;
-       struct e820entry map[E820MAX];
-};
-
-extern unsigned long find_e820_area(unsigned long start, unsigned long end, 
-                                   unsigned size);
-extern void add_memory_region(unsigned long start, unsigned long size, 
-                             int type);
-extern void setup_memory_region(void);
-extern void contig_e820_setup(void); 
-extern unsigned long e820_end_of_ram(void);
-extern void e820_reserve_resources(struct e820entry *e820, int nr_map);
-extern void e820_print_map(char *who);
-extern int e820_mapped(unsigned long start, unsigned long end, unsigned type);
-
-extern void e820_bootmem_free(pg_data_t *pgdat, unsigned long start,unsigned 
long end);
-extern void e820_setup_gap(struct e820entry *e820, int nr_map);
-extern unsigned long e820_hole_size(unsigned long start_pfn,
-                                   unsigned long end_pfn);
-
-extern void __init parse_memopt(char *p, char **end);
-extern void __init parse_memmapopt(char *p, char **end);
-
-extern struct e820map e820;
-#endif/*!__ASSEMBLY__*/
-
-#endif/*__E820_HEADER*/
diff -r 9d52a66c7499 -r c073ebdbde8c 
linux-2.6-xen-sparse/include/xen/net_driver_util.h
--- a/linux-2.6-xen-sparse/include/xen/net_driver_util.h        Thu May 25 
15:59:18 2006 -0600
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,48 +0,0 @@
-/*****************************************************************************
- *
- * Utility functions for Xen network devices.
- *
- * Copyright (c) 2005 XenSource Ltd.
- * 
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation; or, when distributed
- * separately from the Linux kernel or incorporated into other
- * software packages, subject to the following license:
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this source file (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy, modify,
- * merge, publish, distribute, sublicense, and/or sell copies of the Software,
- * and to permit persons to whom the Software is furnished to do so, subject
- * to the following conditions:
- * 
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef _ASM_XEN_NET_DRIVER_UTIL_H
-#define _ASM_XEN_NET_DRIVER_UTIL_H
-
-
-#include <xen/xenbus.h>
-
-
-/**
- * Read the 'mac' node at the given device's node in the store, and parse that
- * as colon-separated octets, placing result the given mac array.  mac must be
- * a preallocated array of length ETH_ALEN (as declared in linux/if_ether.h).
- * Return 0 on success, or -errno on error.
- */
-int xen_net_read_mac(struct xenbus_device *dev, u8 mac[]);
-
-
-#endif /* _ASM_XEN_NET_DRIVER_UTIL_H */
diff -r 9d52a66c7499 -r c073ebdbde8c tools/xenstore/xenstored_proc.h
--- a/tools/xenstore/xenstored_proc.h   Thu May 25 15:59:18 2006 -0600
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,27 +0,0 @@
-/* 
-    Copyright (C) 2005 XenSource Ltd
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program; if not, write to the Free Software
-    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
-*/
-
-#ifndef _XENSTORED_PROC_H
-#define _XENSTORED_PROC_H
-
-#define XENSTORED_PROC_KVA  "/proc/xen/xsd_kva"
-#define XENSTORED_PROC_PORT "/proc/xen/xsd_port"
-
-
-#endif /* _XENSTORED_PROC_H */
_______________________________________________
Xen-ppc-devel mailing list
Xen-ppc-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-ppc-devel
 |