[Xen-changelog] [xen-unstable] merge with xen-unstable.hg

# HG changeset patch
# User awilliam@xxxxxxxxxxx
# Node ID fbc0e953732ef78292d9e87ff6dd7f3432ddd014
# Parent  7f67c15e2c917dc52a3f8acc0fdb79a63b894b15
# Parent  73c73fb8875c331b8c0e6ed0317c8d71b83cdda2
merge with xen-unstable.hg
---
 tools/security/python/xensec_tools/acm_getdecision                  |   55 
 extras/mini-os/events.c                                             |   12 
 extras/mini-os/include/xenbus.h                                     |   28 
 extras/mini-os/kernel.c                                             |   23 
 extras/mini-os/xenbus/xenbus.c                                      |  202 +
 linux-2.6-xen-sparse/arch/i386/mm/hypervisor.c                      |    5 
 linux-2.6-xen-sparse/arch/i386/mm/init-xen.c                        |    2 
 linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c                     |  142 
 linux-2.6-xen-sparse/arch/x86_64/mm/pageattr-xen.c                  |    6 
 linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c                |   27 
 linux-2.6-xen-sparse/drivers/xen/core/gnttab.c                      |   15 
 linux-2.6-xen-sparse/drivers/xen/core/skbuff.c                      |   11 
 linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c                |   68 
 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/mmu.h            |    4 
 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/mmu_context.h    |   11 
 linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h    |    4 
 linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/mmu.h          |    4 
 linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/mmu_context.h  |   12 
 linux-2.6-xen-sparse/include/xen/gnttab.h                           |    1 
 linux-2.6-xen-sparse/kernel/fork.c                                  | 1619 
++++++++++
 tools/console/daemon/io.c                                           |   66 
 tools/console/daemon/utils.c                                        |   26 
 tools/console/daemon/utils.h                                        |    3 
 tools/ioemu/sdl.c                                                   |    9 
 tools/ioemu/target-i386-dm/helper2.c                                |   32 
 tools/libxc/xc_elf.h                                                |    3 
 tools/libxc/xc_linux.c                                              |  118 
 tools/libxc/xc_linux_restore.c                                      |   22 
 tools/libxc/xc_load_elf.c                                           |   32 
 tools/libxc/xenctrl.h                                               |   54 
 tools/python/xen/util/security.py                                   |   41 
 tools/python/xen/xm/addlabel.py                                     |    2 
 tools/python/xen/xm/create.py                                       |    2 
 tools/python/xen/xm/main.py                                         |    3 
 tools/security/Makefile                                             |    2 
 tools/security/python/xensec_gen/cgi-bin/policy.cgi                 |    2 
 tools/security/secpol_xml2bin.c                                     |    6 
 tools/xenmon/xenbaked.c                                             |   55 
 tools/xenstat/libxenstat/src/xenstat.c                              |   23 
 tools/xenstore/fake_libxc.c                                         |    4 
 tools/xenstore/xenstored_core.c                                     |   13 
 tools/xenstore/xenstored_domain.c                                   |   79 
 tools/xm-test/tests/block-integrity/01_block_device_read_verify.py  |    4 
 tools/xm-test/tests/block-integrity/02_block_device_write_verify.py |    4 
 xen/arch/x86/traps.c                                                |    2 
 xen/common/event_channel.c                                          |   14 
 xen/include/asm-ia64/event.h                                        |    8 
 xen/include/asm-x86/event.h                                         |    8 
 xen/include/xen/elf.h                                               |    2 
 49 files changed, 2417 insertions(+), 473 deletions(-)

diff -r 7f67c15e2c91 -r fbc0e953732e extras/mini-os/events.c
--- a/extras/mini-os/events.c   Thu Jun 15 10:02:53 2006 -0600
+++ b/extras/mini-os/events.c   Thu Jun 15 10:23:57 2006 -0600
@@ -35,24 +35,29 @@ int do_event(u32 port, struct pt_regs *r
     ev_action_t  *action;
     if (port >= NR_EVS) {
         printk("Port number too large: %d\n", port);
-        return 0;
+        goto out;
     }
 
     action = &ev_actions[port];
     action->count++;
 
     if (!action->handler)
+    {
+        printk("Spurious event on port %d\n", port);
         goto out;
+    }
     
     if (action->status & EVS_DISABLED)
+    {
+        printk("Event on port %d disabled\n", port);
         goto out;
+    }
     
     /* call the handler */
     action->handler(port, regs);
-
-       clear_evtchn(port);
     
  out:
+       clear_evtchn(port);
     return 1;
 
 }
@@ -135,6 +140,7 @@ void init_events(void)
     {
         ev_actions[i].status  = EVS_DISABLED;
         ev_actions[i].handler = default_handler;
+        mask_evtchn(i);
     }
 }
 
diff -r 7f67c15e2c91 -r fbc0e953732e extras/mini-os/include/xenbus.h
--- a/extras/mini-os/include/xenbus.h   Thu Jun 15 10:02:53 2006 -0600
+++ b/extras/mini-os/include/xenbus.h   Thu Jun 15 10:23:57 2006 -0600
@@ -1,6 +1,34 @@
 #ifndef XENBUS_H__
 #define XENBUS_H__
 
+/* Initialize the XenBus system. */
 void init_xenbus(void);
 
+/* Read the value associated with a path.  Returns a malloc'd error
+   string on failure and sets *value to NULL.  On success, *value is
+   set to a malloc'd copy of the value. */
+char *xenbus_read(const char *path, char **value);
+
+/* Associates a value with a path.  Returns a malloc'd error string on
+   failure. */
+char *xenbus_write(const char *path, const char *value);
+
+/* Removes the value associated with a path.  Returns a malloc'd error
+   string on failure. */
+char *xenbus_rm(const char *path);
+
+/* List the contents of a directory.  Returns a malloc'd error string
+   on failure and sets *contents to NULL.  On success, *contents is
+   set to a malloc'd array of pointers to malloc'd strings.  The array
+   is NULL terminated.  May block. */
+char *xenbus_ls(const char *prefix, char ***contents);
+
+/* Reads permissions associated with a path.  Returns a malloc'd error
+   string on failure and sets *value to NULL.  On success, *value is
+   set to a malloc'd copy of the value. */
+char *xenbus_get_perms(const char *path, char **value);
+
+/* Sets the permissions associated with a path.  Returns a malloc'd
+   error string on failure. */
+char *xenbus_set_perms(const char *path, domid_t dom, char perm);
 #endif /* XENBUS_H__ */
diff -r 7f67c15e2c91 -r fbc0e953732e extras/mini-os/kernel.c
--- a/extras/mini-os/kernel.c   Thu Jun 15 10:02:53 2006 -0600
+++ b/extras/mini-os/kernel.c   Thu Jun 15 10:23:57 2006 -0600
@@ -82,17 +82,6 @@ static shared_info_t *map_shared_info(un
 }
 
 
-void test_xenbus(void);
-
-/* Do initialisation from a thread once the scheduler's available */
-static void init_xs(void *ign)
-{
-    init_xenbus();
-
-    test_xenbus();
-}
-
-
 u8 xen_features[XENFEAT_NR_SUBMAPS * 32];
 
 void setup_xen_features(void)
@@ -111,10 +100,18 @@ void setup_xen_features(void)
     }
 }
 
+void test_xenbus(void);
+
+void xenbus_tester(void *p)
+{
+    test_xenbus();
+}
+
 /* This should be overridden by the application we are linked against. */
 __attribute__((weak)) int app_main(start_info_t *si)
 {
     printk("Dummy main: start_info=%p\n", si);
+    create_thread("xenbus_tester", xenbus_tester, si);
     return 0;
 }
 
@@ -183,8 +180,8 @@ void start_kernel(start_info_t *si)
     /* Init scheduler. */
     init_sched();
  
-    /* Init XenBus from a separate thread */
-    create_thread("init_xs", init_xs, NULL);
+    /* Init XenBus */
+    init_xenbus();
 
     /* Call (possibly overridden) app_main() */
     app_main(&start_info);
diff -r 7f67c15e2c91 -r fbc0e953732e extras/mini-os/xenbus/xenbus.c
--- a/extras/mini-os/xenbus/xenbus.c    Thu Jun 15 10:02:53 2006 -0600
+++ b/extras/mini-os/xenbus/xenbus.c    Thu Jun 15 10:23:57 2006 -0600
@@ -3,11 +3,12 @@
  * (C) 2006 - Cambridge University
  ****************************************************************************
  *
- *        File: mm.c
+ *        File: xenbus.c
  *      Author: Steven Smith (sos22@xxxxxxxxx) 
  *     Changes: Grzegorz Milos (gm281@xxxxxxxxx)
+ *     Changes: John D. Ramsdell
  *              
- *        Date: Mar 2006, chages Aug 2005
+ *        Date: Jun 2006, chages Aug 2005
  * 
  * Environment: Xen Minimal OS
  * Description: Minimal implementation of xenbus
@@ -167,6 +168,7 @@ void init_xenbus(void)
 void init_xenbus(void)
 {
     int err;
+    printk("Initialising xenbus\n");
     DEBUG("init_xenbus called.\n");
     xenstore_buf = mfn_to_virt(start_info.store_mfn);
     create_thread("xenstore", xenbus_thread_func, NULL);
@@ -262,15 +264,15 @@ static void xb_write(int type, int req_i
 /* Send a mesasge to xenbus, in the same fashion as xb_write, and
    block waiting for a reply.  The reply is malloced and should be
    freed by the caller. */
-static void *xenbus_msg_reply(int type,
+static struct xsd_sockmsg *
+xenbus_msg_reply(int type,
         int trans,
         struct write_req *io,
         int nr_reqs)
 {
     int id;
     DEFINE_WAIT(w);
-    void *rep;
-    struct xsd_sockmsg *repmsg;
+    struct xsd_sockmsg *rep;
 
     id = allocate_xenbus_id();
     add_waiter(w, req_info[id].waitq);
@@ -281,12 +283,26 @@ static void *xenbus_msg_reply(int type,
     wake(current);
 
     rep = req_info[id].reply;
-    repmsg = rep;
-    BUG_ON(repmsg->req_id != id);
+    BUG_ON(rep->req_id != id);
     release_xenbus_id(id);
-
     return rep;
 }
+
+static char *errmsg(struct xsd_sockmsg *rep)
+{
+    if (!rep) {
+       char msg[] = "No reply";
+       size_t len = strlen(msg) + 1;
+       return memcpy(malloc(len), msg, len);
+    }
+    if (rep->type != XS_ERROR)
+       return NULL;
+    char *res = malloc(rep->len + 1);
+    memcpy(res, rep + 1, rep->len);
+    res[rep->len] = 0;
+    free(rep);
+    return res;
+}      
 
 /* Send a debug message to xenbus.  Can block. */
 static void xenbus_debug_msg(const char *msg)
@@ -296,27 +312,29 @@ static void xenbus_debug_msg(const char 
         { "print", sizeof("print") },
         { msg, len },
         { "", 1 }};
-    void *reply;
-    struct xsd_sockmsg *repmsg;
-
-    reply = xenbus_msg_reply(XS_DEBUG, 0, req, 3);
-    repmsg = reply;
+    struct xsd_sockmsg *reply;
+
+    reply = xenbus_msg_reply(XS_DEBUG, 0, req, ARRAY_SIZE(req));
     DEBUG("Got a reply, type %d, id %d, len %d.\n",
-            repmsg->type, repmsg->req_id, repmsg->len);
+            reply->type, reply->req_id, reply->len);
 }
 
 /* List the contents of a directory.  Returns a malloc()ed array of
    pointers to malloc()ed strings.  The array is NULL terminated.  May
    block. */
-static char **xenbus_ls(const char *pre)
-{
-    void *reply;
-    struct xsd_sockmsg *repmsg;
+char *xenbus_ls(const char *pre, char ***contents)
+{
+    struct xsd_sockmsg *reply, *repmsg;
     struct write_req req[] = { { pre, strlen(pre)+1 } };
     int nr_elems, x, i;
     char **res;
 
-    repmsg = xenbus_msg_reply(XS_DIRECTORY, 0, req, 1);
+    repmsg = xenbus_msg_reply(XS_DIRECTORY, 0, req, ARRAY_SIZE(req));
+    char *msg = errmsg(repmsg);
+    if (msg) {
+       *contents = NULL;
+       return msg;
+    }
     reply = repmsg + 1;
     for (x = nr_elems = 0; x < repmsg->len; x++)
         nr_elems += (((char *)reply)[x] == 0);
@@ -329,20 +347,91 @@ static char **xenbus_ls(const char *pre)
     }
     res[i] = NULL;
     free(repmsg);
-    return res;
-}
-
-static char *xenbus_read(const char *path)
-{
-    struct write_req req[] = { {path, strlen(path) + 1}};
+    *contents = res;
+    return NULL;
+}
+
+char *xenbus_read(const char *path, char **value)
+{
+    struct write_req req[] = { {path, strlen(path) + 1} };
     struct xsd_sockmsg *rep;
     char *res;
-    rep = xenbus_msg_reply(XS_READ, 0, req, 1);
+    rep = xenbus_msg_reply(XS_READ, 0, req, ARRAY_SIZE(req));
+    char *msg = errmsg(rep);
+    if (msg) {
+       *value = NULL;
+       return msg;
+    }
     res = malloc(rep->len + 1);
     memcpy(res, rep + 1, rep->len);
     res[rep->len] = 0;
     free(rep);
-    return res;
+    *value = res;
+    return NULL;
+}
+
+char *xenbus_write(const char *path, const char *value)
+{
+    struct write_req req[] = { 
+       {path, strlen(path) + 1},
+       {value, strlen(value) + 1},
+    };
+    struct xsd_sockmsg *rep;
+    rep = xenbus_msg_reply(XS_WRITE, 0, req, ARRAY_SIZE(req));
+    char *msg = errmsg(rep);
+    if (msg)
+       return msg;
+    free(rep);
+    return NULL;
+}
+
+char *xenbus_rm(const char *path)
+{
+    struct write_req req[] = { {path, strlen(path) + 1} };
+    struct xsd_sockmsg *rep;
+    rep = xenbus_msg_reply(XS_RM, 0, req, ARRAY_SIZE(req));
+    char *msg = errmsg(rep);
+    if (msg)
+       return msg;
+    free(rep);
+    return NULL;
+}
+
+char *xenbus_get_perms(const char *path, char **value)
+{
+    struct write_req req[] = { {path, strlen(path) + 1} };
+    struct xsd_sockmsg *rep;
+    char *res;
+    rep = xenbus_msg_reply(XS_GET_PERMS, 0, req, ARRAY_SIZE(req));
+    char *msg = errmsg(rep);
+    if (msg) {
+       *value = NULL;
+       return msg;
+    }
+    res = malloc(rep->len + 1);
+    memcpy(res, rep + 1, rep->len);
+    res[rep->len] = 0;
+    free(rep);
+    *value = res;
+    return NULL;
+}
+
+#define PERM_MAX_SIZE 32
+char *xenbus_set_perms(const char *path, domid_t dom, char perm)
+{
+    char value[PERM_MAX_SIZE];
+    snprintf(value, PERM_MAX_SIZE, "%c%hu", perm, dom);
+    struct write_req req[] = { 
+       {path, strlen(path) + 1},
+       {value, strlen(value) + 1},
+    };
+    struct xsd_sockmsg *rep;
+    rep = xenbus_msg_reply(XS_SET_PERMS, 0, req, ARRAY_SIZE(req));
+    char *msg = errmsg(rep);
+    if (msg)
+       return msg;
+    free(rep);
+    return NULL;
 }
 
 static void do_ls_test(const char *pre)
@@ -351,7 +440,12 @@ static void do_ls_test(const char *pre)
     int x;
 
     DEBUG("ls %s...\n", pre);
-    dirs = xenbus_ls(pre);
+    char *msg = xenbus_ls(pre, &dirs);
+    if (msg) {
+       DEBUG("Error in xenbus ls: %s\n", msg);
+       free(msg);
+       return;
+    }
     for (x = 0; dirs[x]; x++) 
     {
         DEBUG("ls %s[%d] -> %s\n", pre, x, dirs[x]);
@@ -364,9 +458,38 @@ static void do_read_test(const char *pat
 {
     char *res;
     DEBUG("Read %s...\n", path);
-    res = xenbus_read(path);
+    char *msg = xenbus_read(path, &res);
+    if (msg) {
+       DEBUG("Error in xenbus read: %s\n", msg);
+       free(msg);
+       return;
+    }
     DEBUG("Read %s -> %s.\n", path, res);
     free(res);
+}
+
+static void do_write_test(const char *path, const char *val)
+{
+    DEBUG("Write %s to %s...\n", val, path);
+    char *msg = xenbus_write(path, val);
+    if (msg) {
+       DEBUG("Result %s\n", msg);
+       free(msg);
+    } else {
+       DEBUG("Success.\n");
+    }
+}
+
+static void do_rm_test(const char *path)
+{
+    DEBUG("rm %s...\n", path);
+    char *msg = xenbus_rm(path);
+    if (msg) {
+       DEBUG("Result %s\n", msg);
+       free(msg);
+    } else {
+       DEBUG("Success.\n");
+    }
 }
 
 /* Simple testing thing */
@@ -383,5 +506,22 @@ void test_xenbus(void)
     DEBUG("Doing read test.\n");
     do_read_test("device/vif/0/mac");
     do_read_test("device/vif/0/backend");
-    printk("Xenbus initialised.\n");
-}
+
+    DEBUG("Doing write test.\n");
+    do_write_test("device/vif/0/flibble", "flobble");
+    do_read_test("device/vif/0/flibble");
+    do_write_test("device/vif/0/flibble", "widget");
+    do_read_test("device/vif/0/flibble");
+
+    DEBUG("Doing rm test.\n");
+    do_rm_test("device/vif/0/flibble");
+    do_read_test("device/vif/0/flibble");
+    DEBUG("(Should have said ENOENT)\n");
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-basic-offset: 4
+ * End:
+ */
diff -r 7f67c15e2c91 -r fbc0e953732e 
linux-2.6-xen-sparse/arch/i386/mm/hypervisor.c
--- a/linux-2.6-xen-sparse/arch/i386/mm/hypervisor.c    Thu Jun 15 10:02:53 
2006 -0600
+++ b/linux-2.6-xen-sparse/arch/i386/mm/hypervisor.c    Thu Jun 15 10:23:57 
2006 -0600
@@ -133,6 +133,7 @@ void xen_tlb_flush(void)
        op.cmd = MMUEXT_TLB_FLUSH_LOCAL;
        BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
 }
+EXPORT_SYMBOL(xen_tlb_flush);
 
 void xen_invlpg(unsigned long ptr)
 {
@@ -141,6 +142,7 @@ void xen_invlpg(unsigned long ptr)
        op.arg1.linear_addr = ptr & PAGE_MASK;
        BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
 }
+EXPORT_SYMBOL(xen_invlpg);
 
 #ifdef CONFIG_SMP
 
@@ -363,7 +365,8 @@ void xen_destroy_contiguous_region(unsig
        };
        set_xen_guest_handle(reservation.extent_start, &frame);
 
-       if (xen_feature(XENFEAT_auto_translated_physmap))
+       if (xen_feature(XENFEAT_auto_translated_physmap) ||
+           !test_bit(__pa(vstart) >> PAGE_SHIFT, contiguous_bitmap))
                return;
 
        scrub_pages(vstart, 1 << order);
diff -r 7f67c15e2c91 -r fbc0e953732e 
linux-2.6-xen-sparse/arch/i386/mm/init-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/mm/init-xen.c      Thu Jun 15 10:02:53 
2006 -0600
+++ b/linux-2.6-xen-sparse/arch/i386/mm/init-xen.c      Thu Jun 15 10:23:57 
2006 -0600
@@ -763,7 +763,7 @@ void __init pgtable_cache_init(void)
 #endif
                                0,
                                pgd_ctor,
-                               pgd_dtor);
+                               PTRS_PER_PMD == 1 ? pgd_dtor : NULL);
        if (!pgd_cache)
                panic("pgtable_cache_init(): Cannot create pgd cache");
 }
diff -r 7f67c15e2c91 -r fbc0e953732e 
linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c   Thu Jun 15 10:02:53 
2006 -0600
+++ b/linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c   Thu Jun 15 10:23:57 
2006 -0600
@@ -300,11 +300,6 @@ void pgd_ctor(void *pgd, kmem_cache_t *c
        unsigned long flags;
 
        if (PTRS_PER_PMD > 1) {
-               if (!xen_feature(XENFEAT_pae_pgdir_above_4gb)) {
-                       int rc = xen_create_contiguous_region(
-                               (unsigned long)pgd, 0, 32);
-                       BUG_ON(rc);
-               }
                if (HAVE_SHARED_KERNEL_PMD)
                        clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
                                        swapper_pg_dir + USER_PTRS_PER_PGD,
@@ -320,69 +315,105 @@ void pgd_ctor(void *pgd, kmem_cache_t *c
        }
 }
 
+/* never called when PTRS_PER_PMD > 1 */
 void pgd_dtor(void *pgd, kmem_cache_t *cache, unsigned long unused)
 {
        unsigned long flags; /* can be called from interrupt context */
 
-       if (PTRS_PER_PMD > 1) {
-               if (!xen_feature(XENFEAT_pae_pgdir_above_4gb))
-                       xen_destroy_contiguous_region((unsigned long)pgd, 0);
-       } else {
-               spin_lock_irqsave(&pgd_lock, flags);
-               pgd_list_del(pgd);
-               spin_unlock_irqrestore(&pgd_lock, flags);
-
-               pgd_test_and_unpin(pgd);
-       }
+       spin_lock_irqsave(&pgd_lock, flags);
+       pgd_list_del(pgd);
+       spin_unlock_irqrestore(&pgd_lock, flags);
+
+       pgd_test_and_unpin(pgd);
 }
 
 pgd_t *pgd_alloc(struct mm_struct *mm)
 {
        int i;
        pgd_t *pgd = kmem_cache_alloc(pgd_cache, GFP_KERNEL);
+       pmd_t **pmd;
+       unsigned long flags;
 
        pgd_test_and_unpin(pgd);
 
        if (PTRS_PER_PMD == 1 || !pgd)
                return pgd;
 
-       for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
-               pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
-               if (!pmd)
-                       goto out_oom;
-               set_pgd(&pgd[i], __pgd(1 + __pa(pmd)));
-       }
-
-       if (!HAVE_SHARED_KERNEL_PMD) {
-               unsigned long flags;
-
-               for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) {
+       if (HAVE_SHARED_KERNEL_PMD) {
+               for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
                        pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
                        if (!pmd)
                                goto out_oom;
                        set_pgd(&pgd[i], __pgd(1 + __pa(pmd)));
                }
-
-               spin_lock_irqsave(&pgd_lock, flags);
-               for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) {
-                       unsigned long v = (unsigned long)i << PGDIR_SHIFT;
-                       pgd_t *kpgd = pgd_offset_k(v);
-                       pud_t *kpud = pud_offset(kpgd, v);
-                       pmd_t *kpmd = pmd_offset(kpud, v);
-                       pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1);
-                       memcpy(pmd, kpmd, PAGE_SIZE);
-                       make_lowmem_page_readonly(
-                               pmd, XENFEAT_writable_page_tables);
+               return pgd;
+       }
+
+       /*
+        * We can race save/restore (if we sleep during a GFP_KERNEL memory
+        * allocation). We therefore store virtual addresses of pmds as they
+        * do not change across save/restore, and poke the machine addresses
+        * into the pgdir under the pgd_lock.
+        */
+       pmd = kmalloc(PTRS_PER_PGD * sizeof(pmd_t *), GFP_KERNEL);
+       if (!pmd) {
+               kmem_cache_free(pgd_cache, pgd);
+               return NULL;
+       }
+
+       /* Allocate pmds, remember virtual addresses. */
+       for (i = 0; i < PTRS_PER_PGD; ++i) {
+               pmd[i] = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
+               if (!pmd[i])
+                       goto out_oom;
+       }
+
+       spin_lock_irqsave(&pgd_lock, flags);
+
+       /* Protect against save/restore: move below 4GB under pgd_lock. */
+       if (!xen_feature(XENFEAT_pae_pgdir_above_4gb)) {
+               int rc = xen_create_contiguous_region(
+                       (unsigned long)pgd, 0, 32);
+               if (rc) {
+                       spin_unlock_irqrestore(&pgd_lock, flags);
+                       goto out_oom;
                }
-               pgd_list_add(pgd);
-               spin_unlock_irqrestore(&pgd_lock, flags);
-       }
+       }
+
+       /* Copy kernel pmd contents and write-protect the new pmds. */
+       for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) {
+               unsigned long v = (unsigned long)i << PGDIR_SHIFT;
+               pgd_t *kpgd = pgd_offset_k(v);
+               pud_t *kpud = pud_offset(kpgd, v);
+               pmd_t *kpmd = pmd_offset(kpud, v);
+               memcpy(pmd[i], kpmd, PAGE_SIZE);
+               make_lowmem_page_readonly(
+                       pmd[i], XENFEAT_writable_page_tables);
+       }
+
+       /* It is safe to poke machine addresses of pmds under the pmd_lock. */
+       for (i = 0; i < PTRS_PER_PGD; i++)
+               set_pgd(&pgd[i], __pgd(1 + __pa(pmd[i])));
+
+       /* Ensure this pgd gets picked up and pinned on save/restore. */
+       pgd_list_add(pgd);
+
+       spin_unlock_irqrestore(&pgd_lock, flags);
+
+       kfree(pmd);
 
        return pgd;
 
 out_oom:
-       for (i--; i >= 0; i--)
-               kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1));
+       if (HAVE_SHARED_KERNEL_PMD) {
+               for (i--; i >= 0; i--)
+                       kmem_cache_free(pmd_cache,
+                                       (void *)__va(pgd_val(pgd[i])-1));
+       } else {
+               for (i--; i >= 0; i--)
+                       kmem_cache_free(pmd_cache, pmd[i]);
+               kfree(pmd);
+       }
        kmem_cache_free(pgd_cache, pgd);
        return NULL;
 }
@@ -391,6 +422,14 @@ void pgd_free(pgd_t *pgd)
 {
        int i;
 
+       /*
+        * After this the pgd should not be pinned for the duration of this
+        * function's execution. We should never sleep and thus never race:
+        *  1. User pmds will not become write-protected under our feet due
+        *     to a concurrent mm_pin_all().
+        *  2. The machine addresses in PGD entries will not become invalid
+        *     due to a concurrent save/restore.
+        */
        pgd_test_and_unpin(pgd);
 
        /* in the PAE case user pgd entries are overwritten before usage */
@@ -399,11 +438,13 @@ void pgd_free(pgd_t *pgd)
                        pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1);
                        kmem_cache_free(pmd_cache, pmd);
                }
+
                if (!HAVE_SHARED_KERNEL_PMD) {
                        unsigned long flags;
                        spin_lock_irqsave(&pgd_lock, flags);
                        pgd_list_del(pgd);
                        spin_unlock_irqrestore(&pgd_lock, flags);
+
                        for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) {
                                pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1);
                                make_lowmem_page_writable(
@@ -411,8 +452,13 @@ void pgd_free(pgd_t *pgd)
                                memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t));
                                kmem_cache_free(pmd_cache, pmd);
                        }
+
+                       if (!xen_feature(XENFEAT_pae_pgdir_above_4gb))
+                               xen_destroy_contiguous_region(
+                                       (unsigned long)pgd, 0);
                }
        }
+
        /* in the non-PAE case, free_pgtables() clears user pgd entries */
        kmem_cache_free(pgd_cache, pgd);
 }
@@ -588,7 +634,7 @@ void mm_pin(struct mm_struct *mm)
 void mm_pin(struct mm_struct *mm)
 {
        if (xen_feature(XENFEAT_writable_page_tables))
-           return;
+               return;
        spin_lock(&mm->page_table_lock);
        __pgd_pin(mm->pgd);
        spin_unlock(&mm->page_table_lock);
@@ -597,7 +643,7 @@ void mm_unpin(struct mm_struct *mm)
 void mm_unpin(struct mm_struct *mm)
 {
        if (xen_feature(XENFEAT_writable_page_tables))
-           return;
+               return;
        spin_lock(&mm->page_table_lock);
        __pgd_unpin(mm->pgd);
        spin_unlock(&mm->page_table_lock);
@@ -607,11 +653,17 @@ void mm_pin_all(void)
 {
        struct page *page;
        if (xen_feature(XENFEAT_writable_page_tables))
-           return;
+               return;
        for (page = pgd_list; page; page = (struct page *)page->index) {
                if (!test_bit(PG_pinned, &page->flags))
                        __pgd_pin((pgd_t *)page_address(page));
        }
+}
+
+void _arch_dup_mmap(struct mm_struct *mm)
+{
+       if (!test_bit(PG_pinned, &virt_to_page(mm->pgd)->flags))
+               mm_pin(mm);
 }
 
 void _arch_exit_mmap(struct mm_struct *mm)
diff -r 7f67c15e2c91 -r fbc0e953732e 
linux-2.6-xen-sparse/arch/x86_64/mm/pageattr-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/mm/pageattr-xen.c        Thu Jun 15 
10:02:53 2006 -0600
+++ b/linux-2.6-xen-sparse/arch/x86_64/mm/pageattr-xen.c        Thu Jun 15 
10:23:57 2006 -0600
@@ -130,6 +130,12 @@ void mm_pin_all(void)
                                  context.unpinned));
 }
 
+void _arch_dup_mmap(struct mm_struct *mm)
+{
+    if (!mm->context.pinned)
+        mm_pin(mm);
+}
+
 void _arch_exit_mmap(struct mm_struct *mm)
 {
     struct task_struct *tsk = current;
diff -r 7f67c15e2c91 -r fbc0e953732e 
linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c      Thu Jun 15 
10:02:53 2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c      Thu Jun 15 
10:23:57 2006 -0600
@@ -342,8 +342,20 @@ static void blkfront_closing(struct xenb
 static void blkfront_closing(struct xenbus_device *dev)
 {
        struct blkfront_info *info = dev->dev.driver_data;
+       unsigned long flags;
 
        DPRINTK("blkfront_closing: %s removed\n", dev->nodename);
+
+       if (info->rq == NULL)
+               return;
+
+       spin_lock_irqsave(&blkif_io_lock, flags);
+       /* No more blkif_request(). */
+       blk_stop_queue(info->rq);
+       /* No more gnttab callback work. */
+       gnttab_cancel_free_callback(&info->callback);
+       flush_scheduled_work();
+       spin_unlock_irqrestore(&blkif_io_lock, flags);
 
        xlvbd_del(info);
 
@@ -407,7 +419,8 @@ static void blkif_restart_queue(void *ar
 {
        struct blkfront_info *info = (struct blkfront_info *)arg;
        spin_lock_irq(&blkif_io_lock);
-       kick_pending_request_queues(info);
+       if (info->connected == BLKIF_STATE_CONNECTED)
+               kick_pending_request_queues(info);
        spin_unlock_irq(&blkif_io_lock);
 }
 
@@ -695,6 +708,12 @@ static void blkif_free(struct blkfront_i
        spin_lock_irq(&blkif_io_lock);
        info->connected = suspend ?
                BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED;
+       /* No more blkif_request(). */
+       if (info->rq)
+               blk_stop_queue(info->rq);
+       /* No more gnttab callback work. */
+       gnttab_cancel_free_callback(&info->callback);
+       flush_scheduled_work();
        spin_unlock_irq(&blkif_io_lock);
 
        /* Free resources associated with old device channel. */
@@ -768,17 +787,17 @@ static void blkif_recover(struct blkfron
 
        (void)xenbus_switch_state(info->xbdev, XenbusStateConnected);
 
+       spin_lock_irq(&blkif_io_lock);
+
        /* Now safe for us to use the shared ring */
-       spin_lock_irq(&blkif_io_lock);
        info->connected = BLKIF_STATE_CONNECTED;
-       spin_unlock_irq(&blkif_io_lock);
 
        /* Send off requeued requests */
        flush_requests(info);
 
        /* Kick any other new requests queued since we resumed */
-       spin_lock_irq(&blkif_io_lock);
        kick_pending_request_queues(info);
+
        spin_unlock_irq(&blkif_io_lock);
 }
 
diff -r 7f67c15e2c91 -r fbc0e953732e 
linux-2.6-xen-sparse/drivers/xen/core/gnttab.c
--- a/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c    Thu Jun 15 10:02:53 
2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c    Thu Jun 15 10:23:57 
2006 -0600
@@ -334,6 +334,21 @@ out:
 }
 EXPORT_SYMBOL_GPL(gnttab_request_free_callback);
 
+void gnttab_cancel_free_callback(struct gnttab_free_callback *callback)
+{
+       struct gnttab_free_callback **pcb;
+       unsigned long flags;
+
+       spin_lock_irqsave(&gnttab_list_lock, flags);
+       for (pcb = &gnttab_free_callback_list; *pcb; pcb = &(*pcb)->next) {
+               if (*pcb == callback) {
+                       *pcb = callback->next;
+                       break;
+               }
+       }
+       spin_unlock_irqrestore(&gnttab_list_lock, flags);
+}
+
 #ifndef __ia64__
 static int map_pte_fn(pte_t *pte, struct page *pmd_page,
                      unsigned long addr, void *data)
diff -r 7f67c15e2c91 -r fbc0e953732e 
linux-2.6-xen-sparse/drivers/xen/core/skbuff.c
--- a/linux-2.6-xen-sparse/drivers/xen/core/skbuff.c    Thu Jun 15 10:02:53 
2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/core/skbuff.c    Thu Jun 15 10:23:57 
2006 -0600
@@ -121,8 +121,15 @@ static int __init skbuff_init(void)
        for (order = 0; order <= MAX_SKBUFF_ORDER; order++) {
                size = PAGE_SIZE << order;
                sprintf(name[order], "xen-skb-%lu", size);
-               skbuff_order_cachep[order] = kmem_cache_create(
-                       name[order], size, size, 0, skbuff_ctor, skbuff_dtor);
+               if (is_running_on_xen() &&
+                   (xen_start_info->flags & SIF_PRIVILEGED))
+                       skbuff_order_cachep[order] = kmem_cache_create(
+                               name[order], size, size, 0,
+                               skbuff_ctor, skbuff_dtor);
+               else
+                       skbuff_order_cachep[order] = kmem_cache_create(
+                               name[order], size, size, 0, NULL, NULL);
+                       
        }
 
        skbuff_cachep = skbuff_order_cachep[0];
diff -r 7f67c15e2c91 -r fbc0e953732e 
linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c      Thu Jun 15 
10:02:53 2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c      Thu Jun 15 
10:23:57 2006 -0600
@@ -1072,68 +1072,39 @@ static void xennet_set_features(struct n
 
 static void network_connect(struct net_device *dev)
 {
-       struct netfront_info *np;
+       struct netfront_info *np = netdev_priv(dev);
        int i, requeue_idx;
-       struct netif_tx_request *tx;
        struct sk_buff *skb;
 
        xennet_set_features(dev);
 
-       np = netdev_priv(dev);
        spin_lock_irq(&np->tx_lock);
        spin_lock(&np->rx_lock);
 
-       /* Recovery procedure: */
-
        /*
-        * Step 1: Rebuild the RX and TX ring contents.
-        * NB. We could just free the queued TX packets now but we hope
-        * that sending them out might do some good.  We have to rebuild
-        * the RX ring because some of our pages are currently flipped out
-        * so we can't just free the RX skbs.
-        * NB2. Freelist index entries are always going to be less than
+         * Recovery procedure:
+        *  NB. Freelist index entries are always going to be less than
         *  PAGE_OFFSET, whereas pointers to skbs will always be equal or
-        * greater than PAGE_OFFSET: we use this property to distinguish
-        * them.
-        */
-
-       /*
-        * Rebuild the TX buffer freelist and the TX ring itself.
-        * NB. This reorders packets.  We could keep more private state
-        * to avoid this but maybe it doesn't matter so much given the
-        * interface has been down.
-        */
+        *  greater than PAGE_OFFSET: we use this property to distinguish
+        *  them.
+         */
+
+       /* Step 1: Discard all pending TX packet fragments. */
        for (requeue_idx = 0, i = 1; i <= NET_TX_RING_SIZE; i++) {
                if ((unsigned long)np->tx_skbs[i] < PAGE_OFFSET)
                        continue;
 
                skb = np->tx_skbs[i];
-
-               tx = RING_GET_REQUEST(&np->tx, requeue_idx);
-               requeue_idx++;
-
-               tx->id = i;
-               gnttab_grant_foreign_access_ref(
-                       np->grant_tx_ref[i], np->xbdev->otherend_id,
-                       virt_to_mfn(np->tx_skbs[i]->data),
-                       GNTMAP_readonly);
-               tx->gref = np->grant_tx_ref[i];
-               tx->offset = (unsigned long)skb->data & ~PAGE_MASK;
-               tx->size = skb->len;
-               tx->flags = 0;
-               if (skb->ip_summed == CHECKSUM_HW) /* local packet? */
-                       tx->flags |= NETTXF_csum_blank | NETTXF_data_validated;
-               if (skb->proto_data_valid) /* remote but checksummed? */
-                       tx->flags |= NETTXF_data_validated;
-
-               np->stats.tx_bytes += skb->len;
-               np->stats.tx_packets++;
-       }
-
-       np->tx.req_prod_pvt = requeue_idx;
-       RING_PUSH_REQUESTS(&np->tx);
-
-       /* Rebuild the RX buffer freelist and the RX ring itself. */
+               gnttab_end_foreign_access_ref(
+                       np->grant_tx_ref[i], GNTMAP_readonly);
+               gnttab_release_grant_reference(
+                       &np->gref_tx_head, np->grant_tx_ref[i]);
+               np->grant_tx_ref[i] = GRANT_INVALID_REF;
+               add_id_to_freelist(np->tx_skbs, i);
+               dev_kfree_skb_irq(skb);
+       }
+
+       /* Step 2: Rebuild the RX buffer freelist and the RX ring itself. */
        for (requeue_idx = 0, i = 1; i <= NET_RX_RING_SIZE; i++) {
                if ((unsigned long)np->rx_skbs[i] < PAGE_OFFSET)
                        continue;
@@ -1150,7 +1121,7 @@ static void network_connect(struct net_d
        RING_PUSH_REQUESTS(&np->rx);
 
        /*
-        * Step 2: All public and private state should now be sane.  Get
+        * Step 3: All public and private state should now be sane.  Get
         * ready to start sending and receiving packets and give the driver
         * domain a kick because we've probably just requeued some
         * packets.
@@ -1158,6 +1129,7 @@ static void network_connect(struct net_d
        netif_carrier_on(dev);
        notify_remote_via_irq(np->irq);
        network_tx_buf_gc(dev);
+       network_alloc_rx_buffers(dev);
 
        spin_unlock(&np->rx_lock);
        spin_unlock_irq(&np->tx_lock);
diff -r 7f67c15e2c91 -r fbc0e953732e 
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/mmu.h
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/mmu.h  Thu Jun 15 
10:02:53 2006 -0600
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/mmu.h  Thu Jun 15 
10:23:57 2006 -0600
@@ -18,4 +18,8 @@ extern void _arch_exit_mmap(struct mm_st
 extern void _arch_exit_mmap(struct mm_struct *mm);
 #define arch_exit_mmap(_mm) _arch_exit_mmap(_mm)
 
+/* kernel/fork.c:dup_mmap hook */
+extern void _arch_dup_mmap(struct mm_struct *mm);
+#define arch_dup_mmap(mm, oldmm) ((void)(oldmm), _arch_dup_mmap(mm))
+
 #endif
diff -r 7f67c15e2c91 -r fbc0e953732e 
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/mmu_context.h
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/mmu_context.h  Thu Jun 
15 10:02:53 2006 -0600
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/mmu_context.h  Thu Jun 
15 10:23:57 2006 -0600
@@ -51,8 +51,7 @@ static inline void switch_mm(struct mm_s
        struct mmuext_op _op[2], *op = _op;
 
        if (likely(prev != next)) {
-               if (!test_bit(PG_pinned, &virt_to_page(next->pgd)->flags))
-                       mm_pin(next);
+               BUG_ON(!test_bit(PG_pinned, &virt_to_page(next->pgd)->flags));
 
                /* stop flush ipis for the previous mm */
                cpu_clear(cpu, prev->cpu_vm_mask);
@@ -99,7 +98,11 @@ static inline void switch_mm(struct mm_s
 #define deactivate_mm(tsk, mm) \
        asm("movl %0,%%fs ; movl %0,%%gs": :"r" (0))
 
-#define activate_mm(prev, next) \
-       switch_mm((prev),(next),NULL)
+static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next)
+{
+       if (!test_bit(PG_pinned, &virt_to_page(next->pgd)->flags))
+               mm_pin(next);
+       switch_mm(prev, next, NULL);
+}
 
 #endif
diff -r 7f67c15e2c91 -r fbc0e953732e 
linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h  Thu Jun 
15 10:02:53 2006 -0600
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h  Thu Jun 
15 10:23:57 2006 -0600
@@ -25,9 +25,9 @@ static char * __init machine_specific_me
        if ( rc == -ENOSYS ) {
                memmap.nr_entries = 1;
                map[0].addr = 0ULL;
-               map[0].size = xen_start_info->nr_pages << PAGE_SHIFT;
+               map[0].size = PFN_PHYS(xen_start_info->nr_pages);
                /* 8MB slack (to balance backend allocations). */
-               map[0].size += 8 << 20;
+               map[0].size += 8ULL << 20;
                map[0].type = E820_RAM;
                rc = 0;
        }
diff -r 7f67c15e2c91 -r fbc0e953732e 
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/mmu.h
--- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/mmu.h        Thu Jun 
15 10:02:53 2006 -0600
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/mmu.h        Thu Jun 
15 10:23:57 2006 -0600
@@ -28,6 +28,10 @@ extern spinlock_t mm_unpinned_lock;
 /* mm/memory.c:exit_mmap hook */
 extern void _arch_exit_mmap(struct mm_struct *mm);
 #define arch_exit_mmap(_mm) _arch_exit_mmap(_mm)
+
+/* kernel/fork.c:dup_mmap hook */
+extern void _arch_dup_mmap(struct mm_struct *mm);
+#define arch_dup_mmap(mm, oldmm) ((void)(oldmm), _arch_dup_mmap(mm))
 #endif
 
 #endif
diff -r 7f67c15e2c91 -r fbc0e953732e 
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/mmu_context.h
--- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/mmu_context.h        
Thu Jun 15 10:02:53 2006 -0600
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/mmu_context.h        
Thu Jun 15 10:23:57 2006 -0600
@@ -73,8 +73,7 @@ static inline void switch_mm(struct mm_s
        struct mmuext_op _op[3], *op = _op;
 
        if (likely(prev != next)) {
-               if (!next->context.pinned)
-                       mm_pin(next);
+               BUG_ON(!next->context.pinned);
 
                /* stop flush ipis for the previous mm */
                clear_bit(cpu, &prev->cpu_vm_mask);
@@ -127,8 +126,11 @@ static inline void switch_mm(struct mm_s
        asm volatile("movl %0,%%fs"::"r"(0));  \
 } while(0)
 
-#define activate_mm(prev, next) do {           \
-       switch_mm((prev),(next),NULL);          \
-} while (0)
+static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next)
+{
+       if (!next->context.pinned)
+               mm_pin(next);
+       switch_mm(prev, next, NULL);
+}
 
 #endif
diff -r 7f67c15e2c91 -r fbc0e953732e linux-2.6-xen-sparse/include/xen/gnttab.h
--- a/linux-2.6-xen-sparse/include/xen/gnttab.h Thu Jun 15 10:02:53 2006 -0600
+++ b/linux-2.6-xen-sparse/include/xen/gnttab.h Thu Jun 15 10:23:57 2006 -0600
@@ -100,6 +100,7 @@ void gnttab_release_grant_reference(gran
 
 void gnttab_request_free_callback(struct gnttab_free_callback *callback,
                                  void (*fn)(void *), void *arg, u16 count);
+void gnttab_cancel_free_callback(struct gnttab_free_callback *callback);
 
 void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
                                     unsigned long frame, int readonly);
diff -r 7f67c15e2c91 -r fbc0e953732e tools/console/daemon/io.c
--- a/tools/console/daemon/io.c Thu Jun 15 10:02:53 2006 -0600
+++ b/tools/console/daemon/io.c Thu Jun 15 10:23:57 2006 -0600
@@ -24,8 +24,8 @@
 #include "io.h"
 #include <xenctrl.h>
 #include <xs.h>
-#include <xen/linux/evtchn.h>
 #include <xen/io/console.h>
+#include <xenctrl.h>
 
 #include <malloc.h>
 #include <stdlib.h>
@@ -36,7 +36,6 @@
 #include <unistd.h>
 #include <termios.h>
 #include <stdarg.h>
-#include <sys/ioctl.h>
 #include <sys/mman.h>
 
 #define MAX(a, b) (((a) > (b)) ? (a) : (b))
@@ -64,18 +63,11 @@ struct domain
        char *conspath;
        int ring_ref;
        evtchn_port_t local_port;
-       int evtchn_fd;
+       int xce_handle;
        struct xencons_interface *interface;
 };
 
 static struct domain *dom_head;
-
-static void evtchn_notify(struct domain *dom)
-{
-       struct ioctl_evtchn_notify notify;
-       notify.port = dom->local_port;
-       (void)ioctl(dom->evtchn_fd, IOCTL_EVTCHN_NOTIFY, &notify);
-}
 
 static void buffer_append(struct domain *dom)
 {
@@ -106,7 +98,7 @@ static void buffer_append(struct domain 
 
        mb();
        intf->out_cons = cons;
-       evtchn_notify(dom);
+       xc_evtchn_notify(dom->xce_handle, dom->local_port);
 
        if (buffer->max_capacity &&
            buffer->size > buffer->max_capacity) {
@@ -234,7 +226,6 @@ static int domain_create_ring(struct dom
 static int domain_create_ring(struct domain *dom)
 {
        int err, remote_port, ring_ref, rc;
-       struct ioctl_evtchn_bind_interdomain bind;
 
        err = xs_gather(xs, dom->conspath,
                        "ring-ref", "%u", &ring_ref,
@@ -258,24 +249,24 @@ static int domain_create_ring(struct dom
        }
 
        dom->local_port = -1;
-       if (dom->evtchn_fd != -1)
-               close(dom->evtchn_fd);
+       if (dom->xce_handle != -1)
+               xc_evtchn_close(dom->xce_handle);
 
        /* Opening evtchn independently for each console is a bit
         * wasteful, but that's how the code is structured... */
-       dom->evtchn_fd = open("/dev/xen/evtchn", O_RDWR);
-       if (dom->evtchn_fd == -1) {
+       dom->xce_handle = xc_evtchn_open();
+       if (dom->xce_handle == -1) {
                err = errno;
                goto out;
        }
  
-       bind.remote_domain = dom->domid;
-       bind.remote_port   = remote_port;
-       rc = ioctl(dom->evtchn_fd, IOCTL_EVTCHN_BIND_INTERDOMAIN, &bind);
+       rc = xc_evtchn_bind_interdomain(dom->xce_handle,
+               dom->domid, remote_port);
+
        if (rc == -1) {
                err = errno;
-               close(dom->evtchn_fd);
-               dom->evtchn_fd = -1;
+               xc_evtchn_close(dom->xce_handle);
+               dom->xce_handle = -1;
                goto out;
        }
        dom->local_port = rc;
@@ -285,8 +276,8 @@ static int domain_create_ring(struct dom
 
                if (dom->tty_fd == -1) {
                        err = errno;
-                       close(dom->evtchn_fd);
-                       dom->evtchn_fd = -1;
+                       xc_evtchn_close(dom->xce_handle);
+                       dom->xce_handle = -1;
                        dom->local_port = -1;
                        goto out;
                }
@@ -344,7 +335,7 @@ static struct domain *create_domain(int 
        dom->ring_ref = -1;
        dom->local_port = -1;
        dom->interface = NULL;
-       dom->evtchn_fd = -1;
+       dom->xce_handle = -1;
 
        if (!watch_domain(dom, true))
                goto out;
@@ -409,9 +400,9 @@ static void shutdown_domain(struct domai
        if (d->interface != NULL)
                munmap(d->interface, getpagesize());
        d->interface = NULL;
-       if (d->evtchn_fd != -1)
-               close(d->evtchn_fd);
-       d->evtchn_fd = -1;
+       if (d->xce_handle != -1)
+               xc_evtchn_close(d->xce_handle);
+       d->xce_handle = -1;
        cleanup_domain(d);
 }
 
@@ -483,7 +474,7 @@ static void handle_tty_read(struct domai
                }
                wmb();
                intf->in_prod = prod;
-               evtchn_notify(dom);
+               xc_evtchn_notify(dom->xce_handle, dom->local_port);
        } else {
                close(dom->tty_fd);
                dom->tty_fd = -1;
@@ -516,14 +507,14 @@ static void handle_tty_write(struct doma
 
 static void handle_ring_read(struct domain *dom)
 {
-       evtchn_port_t v;
-
-       if (!read_sync(dom->evtchn_fd, &v, sizeof(v)))
+       evtchn_port_t port;
+
+       if ((port = xc_evtchn_pending(dom->xce_handle)) == -1)
                return;
 
        buffer_append(dom);
 
-       (void)write_sync(dom->evtchn_fd, &v, sizeof(v));
+       (void)xc_evtchn_unmask(dom->xce_handle, port);
 }
 
 static void handle_xs(void)
@@ -566,9 +557,10 @@ void handle_io(void)
                max_fd = MAX(xs_fileno(xs), max_fd);
 
                for (d = dom_head; d; d = d->next) {
-                       if (d->evtchn_fd != -1) {
-                               FD_SET(d->evtchn_fd, &readfds);
-                               max_fd = MAX(d->evtchn_fd, max_fd);
+                       if (d->xce_handle != -1) {
+                               int evtchn_fd = xc_evtchn_fd(d->xce_handle);
+                               FD_SET(evtchn_fd, &readfds);
+                               max_fd = MAX(evtchn_fd, max_fd);
                        }
 
                        if (d->tty_fd != -1) {
@@ -588,8 +580,8 @@ void handle_io(void)
 
                for (d = dom_head; d; d = n) {
                        n = d->next;
-                       if (d->evtchn_fd != -1 &&
-                           FD_ISSET(d->evtchn_fd, &readfds))
+                       if (d->xce_handle != -1 &&
+                           FD_ISSET(xc_evtchn_fd(d->xce_handle), &readfds))
                                handle_ring_read(d);
 
                        if (d->tty_fd != -1) {
diff -r 7f67c15e2c91 -r fbc0e953732e tools/console/daemon/utils.c
--- a/tools/console/daemon/utils.c      Thu Jun 15 10:02:53 2006 -0600
+++ b/tools/console/daemon/utils.c      Thu Jun 15 10:23:57 2006 -0600
@@ -38,32 +38,6 @@
 
 struct xs_handle *xs;
 int xc;
-
-bool _read_write_sync(int fd, void *data, size_t size, bool do_read)
-{
-       size_t offset = 0;
-       ssize_t len;
-
-       while (offset < size) {
-               if (do_read) {
-                       len = read(fd, data + offset, size - offset);
-               } else {
-                       len = write(fd, data + offset, size - offset);
-               }
-
-               if (len < 1) {
-                       if (len == -1 && (errno == EAGAIN || errno == EINTR)) {
-                               continue;
-                       } else {
-                               return false;
-                       }
-               } else {
-                       offset += len;
-               }
-       }
-
-       return true;
-}
 
 static void child_exit(int sig)
 {
diff -r 7f67c15e2c91 -r fbc0e953732e tools/console/daemon/utils.h
--- a/tools/console/daemon/utils.h      Thu Jun 15 10:02:53 2006 -0600
+++ b/tools/console/daemon/utils.h      Thu Jun 15 10:23:57 2006 -0600
@@ -29,9 +29,6 @@
 
 void daemonize(const char *pidfile);
 bool xen_setup(void);
-#define read_sync(fd, buffer, size) _read_write_sync(fd, buffer, size, true)
-#define write_sync(fd, buffer, size) _read_write_sync(fd, buffer, size, false)
-bool _read_write_sync(int fd, void *data, size_t size, bool do_read);
 
 extern struct xs_handle *xs;
 extern int xc;
diff -r 7f67c15e2c91 -r fbc0e953732e tools/ioemu/sdl.c
--- a/tools/ioemu/sdl.c Thu Jun 15 10:02:53 2006 -0600
+++ b/tools/ioemu/sdl.c Thu Jun 15 10:23:57 2006 -0600
@@ -376,13 +376,18 @@ static void sdl_update_caption(void)
 
 static void sdl_hide_cursor(void)
 {
-    SDL_SetCursor(sdl_cursor_hidden);
+    if (kbd_mouse_is_absolute()) {
+       SDL_ShowCursor(1);
+       SDL_SetCursor(sdl_cursor_hidden);
+    } else {
+       SDL_ShowCursor(0);
+    }
 }
 
 static void sdl_show_cursor(void)
 {
     if (!kbd_mouse_is_absolute()) {
-       SDL_SetCursor(sdl_cursor_normal);
+       SDL_ShowCursor(1);
     }
 }
 
diff -r 7f67c15e2c91 -r fbc0e953732e tools/ioemu/target-i386-dm/helper2.c
--- a/tools/ioemu/target-i386-dm/helper2.c      Thu Jun 15 10:02:53 2006 -0600
+++ b/tools/ioemu/target-i386-dm/helper2.c      Thu Jun 15 10:23:57 2006 -0600
@@ -47,11 +47,9 @@
 
 #include <limits.h>
 #include <fcntl.h>
-#include <sys/ioctl.h>
 
 #include <xenctrl.h>
 #include <xen/hvm/ioreq.h>
-#include <xen/linux/evtchn.h>
 
 #include "cpu.h"
 #include "exec-all.h"
@@ -123,7 +121,7 @@ target_ulong cpu_get_phys_page_debug(CPU
 }
 
 //the evtchn fd for polling
-int evtchn_fd = -1;
+int xce_handle = -1;
 
 //which vcpu we are serving
 int send_vcpu = 0;
@@ -170,11 +168,10 @@ static ioreq_t* __cpu_get_ioreq(int vcpu
 //retval--the number of ioreq packet
 static ioreq_t* cpu_get_ioreq(void)
 {
-    int i, rc;
+    int i;
     evtchn_port_t port;
 
-    rc = read(evtchn_fd, &port, sizeof(port));
-    if ( rc == sizeof(port) ) {
+    if ( (port = xc_evtchn_pending(xce_handle)) != -1 ) {
         for ( i = 0; i < vcpus; i++ )
             if ( shared_page->vcpu_iodata[i].dm_eport == port )
                 break;
@@ -184,8 +181,7 @@ static ioreq_t* cpu_get_ioreq(void)
             exit(1);
         }
 
-        // unmask the wanted port again
-        write(evtchn_fd, &port, sizeof(port));
+       xc_evtchn_unmask(xce_handle, port);
 
         //get the io packet from shared memory
         send_vcpu = i;
@@ -436,6 +432,7 @@ int main_loop(void)
     extern int shutdown_requested;
     CPUState *env = global_env;
     int retval;
+    int evtchn_fd = xc_evtchn_fd(xce_handle);
     extern void main_loop_wait(int);
 
     /* Watch stdin (fd 0) to see when it has input. */
@@ -475,11 +472,9 @@ int main_loop(void)
         main_loop_wait(0);
 
         if (env->send_event) {
-            struct ioctl_evtchn_notify notify;
-
             env->send_event = 0;
-            notify.port = shared_page->vcpu_iodata[send_vcpu].dm_eport;
-            (void)ioctl(evtchn_fd, IOCTL_EVTCHN_NOTIFY, &notify);
+            (void)xc_evtchn_notify(xce_handle,
+                 shared_page->vcpu_iodata[send_vcpu].dm_eport);
         }
     }
     destroy_hvm_domain();
@@ -511,7 +506,6 @@ CPUState * cpu_init()
 CPUState * cpu_init()
 {
     CPUX86State *env;
-    struct ioctl_evtchn_bind_interdomain bind;
     int i, rc;
 
     cpu_exec_init();
@@ -523,21 +517,19 @@ CPUState * cpu_init()
 
     cpu_single_env = env;
 
-    if (evtchn_fd != -1)//the evtchn has been opened by another cpu object
+    if (xce_handle != -1)//the evtchn has been opened by another cpu object
         return NULL;
 
-    //use nonblock reading not polling, may change in future.
-    evtchn_fd = open("/dev/xen/evtchn", O_RDWR|O_NONBLOCK);
-    if (evtchn_fd == -1) {
+    xce_handle = xc_evtchn_open();
+    if (xce_handle == -1) {
         fprintf(logfile, "open evtchn device error %d\n", errno);
         return NULL;
     }
 
     /* FIXME: how about if we overflow the page here? */
-    bind.remote_domain = domid;
     for ( i = 0; i < vcpus; i++ ) {
-        bind.remote_port = shared_page->vcpu_iodata[i].vp_eport;
-        rc = ioctl(evtchn_fd, IOCTL_EVTCHN_BIND_INTERDOMAIN, &bind);
+        rc = xc_evtchn_bind_interdomain(xce_handle, domid,
+            shared_page->vcpu_iodata[i].vp_eport);
         if ( rc == -1 ) {
             fprintf(logfile, "bind interdomain ioctl error %d\n", errno);
             return NULL;
diff -r 7f67c15e2c91 -r fbc0e953732e tools/libxc/xc_elf.h
--- a/tools/libxc/xc_elf.h      Thu Jun 15 10:02:53 2006 -0600
+++ b/tools/libxc/xc_elf.h      Thu Jun 15 10:23:57 2006 -0600
@@ -170,13 +170,14 @@ typedef struct {
 #define EM_PARISC      15              /* HPPA */
 #define EM_SPARC32PLUS 18              /* Enhanced instruction set SPARC */
 #define EM_PPC         20              /* PowerPC */
+#define EM_PPC64       21              /* PowerPC 64-bit */
 #define EM_ARM         40              /* Advanced RISC Machines ARM */
 #define EM_ALPHA       41              /* DEC ALPHA */
 #define EM_SPARCV9     43              /* SPARC version 9 */
 #define EM_ALPHA_EXP   0x9026          /* DEC ALPHA */
+#define EM_IA_64       50              /* Intel Merced */
 #define EM_X86_64      62              /* AMD x86-64 architecture */
 #define EM_VAX         75              /* DEC VAX */
-#define EM_NUM         15              /* number of machine types */
 
 /* Version */
 #define EV_NONE                0               /* Invalid */
diff -r 7f67c15e2c91 -r fbc0e953732e tools/libxc/xc_linux.c
--- a/tools/libxc/xc_linux.c    Thu Jun 15 10:02:53 2006 -0600
+++ b/tools/libxc/xc_linux.c    Thu Jun 15 10:23:57 2006 -0600
@@ -103,6 +103,124 @@ int do_xen_hypercall(int xc_handle, priv
                       (unsigned long)hypercall);
 }
 
+#define EVTCHN_DEV_NAME  "/dev/xen/evtchn"
+#define EVTCHN_DEV_MAJOR 10
+#define EVTCHN_DEV_MINOR 201
+
+int xc_evtchn_open(void)
+{
+    struct stat st;
+    int fd;
+
+    /* Make sure any existing device file links to correct device. */
+    if ((lstat(EVTCHN_DEV_NAME, &st) != 0) || !S_ISCHR(st.st_mode) ||
+        (st.st_rdev != makedev(EVTCHN_DEV_MAJOR, EVTCHN_DEV_MINOR)))
+        (void)unlink(EVTCHN_DEV_NAME);
+
+reopen:
+    if ( (fd = open(EVTCHN_DEV_NAME, O_RDWR)) == -1 )
+    {
+        if ( (errno == ENOENT) &&
+            ((mkdir("/dev/xen", 0755) == 0) || (errno == EEXIST)) &&
+            (mknod(EVTCHN_DEV_NAME, S_IFCHR|0600,
+            makedev(EVTCHN_DEV_MAJOR, EVTCHN_DEV_MINOR)) == 0) )
+            goto reopen;
+
+        PERROR("Could not open event channel interface");
+        return -1;
+    }
+
+    return fd;
+}
+
+int xc_evtchn_close(int xce_handle)
+{
+    return close(xce_handle);
+}
+
+int xc_evtchn_fd(int xce_handle)
+{
+    return xce_handle;
+}
+
+int xc_evtchn_notify(int xce_handle, evtchn_port_t port)
+{
+    struct ioctl_evtchn_notify notify;
+
+    notify.port = port;
+
+    return ioctl(xce_handle, IOCTL_EVTCHN_NOTIFY, &notify);
+}
+
+evtchn_port_t xc_evtchn_bind_interdomain(int xce_handle, int domid,
+    evtchn_port_t remote_port)
+{
+    struct ioctl_evtchn_bind_interdomain bind;
+
+    bind.remote_domain = domid;
+    bind.remote_port = remote_port;
+
+    return ioctl(xce_handle, IOCTL_EVTCHN_BIND_INTERDOMAIN, &bind);
+}
+
+int xc_evtchn_unbind(int xce_handle, evtchn_port_t port)
+{
+    struct ioctl_evtchn_unbind unbind;
+
+    unbind.port = port;
+
+    return ioctl(xce_handle, IOCTL_EVTCHN_UNBIND, &unbind);
+}
+
+evtchn_port_t xc_evtchn_bind_virq(int xce_handle, unsigned int virq)
+{
+    struct ioctl_evtchn_bind_virq bind;
+
+    bind.virq = virq;
+
+    return ioctl(xce_handle, IOCTL_EVTCHN_BIND_VIRQ, &bind);
+}
+
+static int dorw(int fd, char *data, size_t size, int do_write)
+{
+    size_t offset = 0;
+    ssize_t len;
+
+    while ( offset < size )
+    {
+        if (do_write)
+            len = write(fd, data + offset, size - offset);
+        else
+            len = read(fd, data + offset, size - offset);
+
+        if ( len == -1 )
+        {
+             if ( errno == EINTR )
+                 continue;
+             return -1;
+        }
+
+        offset += len;
+    }
+
+    return 0;
+}
+
+evtchn_port_t xc_evtchn_pending(int xce_handle)
+{
+    evtchn_port_t port;
+
+    if ( dorw(xce_handle, (char *)&port, sizeof(port), 0) == -1 )
+        return -1;
+
+    return port;
+}
+
+int xc_evtchn_unmask(int xce_handle, evtchn_port_t port)
+{
+    return dorw(xce_handle, (char *)&port, sizeof(port), 1);
+}
+
 /*
  * Local variables:
  * mode: C
diff -r 7f67c15e2c91 -r fbc0e953732e tools/libxc/xc_linux_restore.c
--- a/tools/libxc/xc_linux_restore.c    Thu Jun 15 10:02:53 2006 -0600
+++ b/tools/libxc/xc_linux_restore.c    Thu Jun 15 10:23:57 2006 -0600
@@ -456,6 +456,15 @@ int xc_linux_restore(int xc_handle, int 
         n+= j; /* crude stats */
     }
 
+    /*
+     * Ensure we flush all machphys updates before potential PAE-specific
+     * reallocations below.
+     */
+    if (xc_finish_mmu_updates(xc_handle, mmu)) {
+        ERR("Error doing finish_mmu_updates()");
+        goto out;
+    }
+
     DPRINTF("Received all pages (%d races)\n", nraces);
 
     if ((pt_levels == 3) && !pae_extended_cr3) {
@@ -550,14 +559,11 @@ int xc_linux_restore(int xc_handle, int 
             }
         }
 
-    }
-
-
-    if (xc_finish_mmu_updates(xc_handle, mmu)) {
-        ERR("Error doing finish_mmu_updates()");
-        goto out;
-    }
-
+        if (xc_finish_mmu_updates(xc_handle, mmu)) {
+            ERR("Error doing finish_mmu_updates()");
+            goto out;
+        }
+    }
 
     /*
      * Pin page tables. Do this after writing to them as otherwise Xen
diff -r 7f67c15e2c91 -r fbc0e953732e tools/libxc/xc_load_elf.c
--- a/tools/libxc/xc_load_elf.c Thu Jun 15 10:02:53 2006 -0600
+++ b/tools/libxc/xc_load_elf.c Thu Jun 15 10:23:57 2006 -0600
@@ -21,6 +21,24 @@ loadelfsymtab(
 loadelfsymtab(
     const char *image, int xch, uint32_t dom, xen_pfn_t *parray,
     struct domain_setup_info *dsi);
+
+/*
+ * Elf header attributes we require for each supported host platform.
+ * These are checked in parseelfimage().
+ */
+#if defined(__ia64__)
+#define ELFCLASS   ELFCLASS64
+#define ELFDATA    ELFDATA2LSB
+#define ELFMACHINE EM_IA_64
+#elif defined(__i386__)
+#define ELFCLASS   ELFCLASS32
+#define ELFDATA    ELFDATA2LSB
+#define ELFMACHINE EM_386
+#elif defined(__x86_64__)
+#define ELFCLASS   ELFCLASS64
+#define ELFDATA    ELFDATA2LSB
+#define ELFMACHINE EM_X86_64
+#endif
 
 int probe_elf(const char *image,
               unsigned long image_size,
@@ -61,16 +79,10 @@ static int parseelfimage(const char *ima
         return -EINVAL;
     }
 
-    if (
-#if defined(__i386__)
-        (ehdr->e_ident[EI_CLASS] != ELFCLASS32) ||
-        (ehdr->e_machine != EM_386) ||
-#elif defined(__x86_64__)
-        (ehdr->e_ident[EI_CLASS] != ELFCLASS64) ||
-        (ehdr->e_machine != EM_X86_64) ||
-#endif
-        (ehdr->e_ident[EI_DATA] != ELFDATA2LSB) ||
-        (ehdr->e_type != ET_EXEC) )
+    if ( (ehdr->e_ident[EI_CLASS] != ELFCLASS) ||
+         (ehdr->e_machine != ELFMACHINE) ||
+         (ehdr->e_ident[EI_DATA] != ELFDATA) ||
+         (ehdr->e_type != ET_EXEC) )
     {
         ERROR("Kernel not a Xen-compatible Elf image.");
         return -EINVAL;
diff -r 7f67c15e2c91 -r fbc0e953732e tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h     Thu Jun 15 10:02:53 2006 -0600
+++ b/tools/libxc/xenctrl.h     Thu Jun 15 10:23:57 2006 -0600
@@ -604,4 +604,58 @@ int xc_finish_mmu_updates(int xc_handle,
 
 int xc_acm_op(int xc_handle, int cmd, void *arg, size_t arg_size);
 
+/*
+ * Return a handle to the event channel driver, or -1 on failure, in which case
+ * errno will be set appropriately.
+ */
+int xc_evtchn_open(void);
+
+/*
+ * Close a handle previously allocated with xc_evtchn_open().
+ */
+int xc_evtchn_close(int xce_handle);
+
+/*
+ * Return an fd that can be select()ed on for further calls to
+ * xc_evtchn_pending().
+ */
+int xc_evtchn_fd(int xce_handle);
+
+/*
+ * Notify the given event channel. Returns -1 on failure, in which case
+ * errno will be set appropriately.
+ */
+int xc_evtchn_notify(int xce_handle, evtchn_port_t port);
+
+/*
+ * Returns a new event port bound to the remote port for the given domain ID,
+ * or -1 on failure, in which case errno will be set appropriately.
+ */
+evtchn_port_t xc_evtchn_bind_interdomain(int xce_handle, int domid,
+    evtchn_port_t remote_port);
+
+/*
+ * Unbind the given event channel. Returns -1 on failure, in which case errno
+ * will be set appropriately.
+ */
+int xc_evtchn_unbind(int xce_handle, evtchn_port_t port);
+
+/*
+ * Bind an event channel to the given VIRQ. Returns the event channel bound to
+ * the VIRQ, or -1 on failure, in which case errno will be set appropriately.
+ */
+evtchn_port_t xc_evtchn_bind_virq(int xce_handle, unsigned int virq);
+
+/*
+ * Return the next event channel to become pending, or -1 on failure, in which
+ * case errno will be set appropriately.  
+ */
+evtchn_port_t xc_evtchn_pending(int xce_handle);
+
+/*
+ * Unmask the given event channel. Returns -1 on failure, in which case errno
+ * will be set appropriately.
+ */
+int xc_evtchn_unmask(int xce_handle, evtchn_port_t port);
+
 #endif
diff -r 7f67c15e2c91 -r fbc0e953732e tools/python/xen/util/security.py
--- a/tools/python/xen/util/security.py Thu Jun 15 10:02:53 2006 -0600
+++ b/tools/python/xen/util/security.py Thu Jun 15 10:23:57 2006 -0600
@@ -52,7 +52,8 @@ binary_name_re = re.compile(".*[chwall|s
 binary_name_re = re.compile(".*[chwall|ste|chwall_ste].*\.bin", re.IGNORECASE)
 policy_name_re = re.compile(".*[chwall|ste|chwall_ste].*", re.IGNORECASE)
 
-
+#other global variables
+NULL_SSIDREF = 0
 
 log = logging.getLogger("xend.util.security")
 
@@ -255,6 +256,8 @@ def ssidref2label(ssidref_var):
     #2. get labelnames for both ssidref parts
     pri_ssid = ssidref & 0xffff
     sec_ssid = ssidref >> 16
+    pri_null_ssid = NULL_SSIDREF & 0xffff
+    sec_null_ssid = NULL_SSIDREF >> 16
     pri_labels = []
     sec_labels = []
     labels = []
@@ -270,7 +273,11 @@ def ssidref2label(ssidref_var):
     f.close()
 
     #3. get the label that is in both lists (combination must be a single 
label)
-    if secondary == "NULL":
+    if (primary == "CHWALL") and (pri_ssid == pri_null_ssid) and (sec_ssid != 
sec_null_ssid):
+        labels = sec_labels
+    elif (secondary == "CHWALL") and (pri_ssid != pri_null_ssid) and (sec_ssid 
== sec_null_ssid):
+        labels = pri_labels
+    elif secondary == "NULL":
         labels = pri_labels
     else:
         for i in pri_labels:
@@ -285,7 +292,7 @@ def ssidref2label(ssidref_var):
 
 
 
-def label2ssidref(labelname, policyname):
+def label2ssidref(labelname, policyname, type):
     """
     returns ssidref corresponding to labelname;
     maps current policy to default directory
@@ -293,6 +300,14 @@ def label2ssidref(labelname, policyname)
 
     if policyname in ['NULL', 'INACTIVE', 'DEFAULT']:
         err("Cannot translate labels for \'" + policyname + "\' policy.")
+
+    allowed_types = ['ANY']
+    if type == 'dom':
+        allowed_types.append('VM')
+    elif type == 'res':
+        allowed_types.append('RES')
+    else:
+        err("Invalid type.  Must specify 'dom' or 'res'.")
 
     (primary, secondary, f, pol_exists) = getmapfile(policyname)
 
@@ -303,11 +318,15 @@ def label2ssidref(labelname, policyname)
         l = line.split()
         if (len(l) < 5) or (l[0] != "LABEL->SSID"):
             continue
-        if primary and (l[2] == primary) and (l[3] == labelname):
+        if primary and (l[1] in allowed_types) and (l[2] == primary) and (l[3] 
== labelname):
             pri_ssid.append(int(l[4], 16))
-        if secondary and (l[2] == secondary) and (l[3] == labelname):
+        if secondary and (l[1] in allowed_types) and (l[2] == secondary) and 
(l[3] == labelname):
             sec_ssid.append(int(l[4], 16))
     f.close()
+    if (type == 'res') and (primary == "CHWALL") and (len(pri_ssid) == 0):
+        pri_ssid.append(NULL_SSIDREF)
+    elif (type == 'res') and (secondary == "CHWALL") and (len(sec_ssid) == 0):
+        sec_ssid.append(NULL_SSIDREF)
 
     #3. sanity check and composition of ssidref
     if (len(pri_ssid) == 0) or ((len(sec_ssid) == 0) and (secondary != 
"NULL")):
@@ -360,7 +379,7 @@ def refresh_ssidref(config):
         err("Policy \'" + policyname + "\' in label does not match active 
policy \'"
             + active_policy +"\'!")
 
-    new_ssidref = label2ssidref(labelname, policyname)
+    new_ssidref = label2ssidref(labelname, policyname, 'dom')
     if not new_ssidref:
         err("SSIDREF refresh failed!")
 
@@ -409,7 +428,7 @@ def get_decision(arg1, arg2):
     enables domains to retrieve access control decisions from
     the hypervisor Access Control Module.
     IN: args format = ['domid', id] or ['ssidref', ssidref]
-    or ['access_control', ['policy', policy], ['label', label]]
+    or ['access_control', ['policy', policy], ['label', label], ['type', type]]
     """
 
     if not on():
@@ -417,14 +436,14 @@ def get_decision(arg1, arg2):
 
     #translate labels before calling low-level function
     if arg1[0] == 'access_control':
-        if (arg1[1][0] != 'policy') or (arg1[2][0] != 'label') :
+        if (arg1[1][0] != 'policy') or (arg1[2][0] != 'label') or (arg1[3][0] 
!= 'type'):
             err("Argument type not supported.")
-        ssidref = label2ssidref(arg1[2][1], arg1[1][1])
+        ssidref = label2ssidref(arg1[2][1], arg1[1][1], arg1[3][1])
         arg1 = ['ssidref', str(ssidref)]
     if arg2[0] == 'access_control':
-        if (arg2[1][0] != 'policy') or (arg2[2][0] != 'label') :
+        if (arg2[1][0] != 'policy') or (arg2[2][0] != 'label') or (arg2[3][0] 
!= 'type'):
             err("Argument type not supported.")
-        ssidref = label2ssidref(arg2[2][1], arg2[1][1])
+        ssidref = label2ssidref(arg2[2][1], arg2[1][1], arg2[3][1])
         arg2 = ['ssidref', str(ssidref)]
 
     # accept only int or string types for domid and ssidref
diff -r 7f67c15e2c91 -r fbc0e953732e tools/python/xen/xm/addlabel.py
--- a/tools/python/xen/xm/addlabel.py   Thu Jun 15 10:02:53 2006 -0600
+++ b/tools/python/xen/xm/addlabel.py   Thu Jun 15 10:23:57 2006 -0600
@@ -50,7 +50,7 @@ def main(argv):
             err("No active policy. Policy must be specified in command line.")
 
         #sanity checks: make sure this label can be instantiated later on
-        ssidref = label2ssidref(label, policyref)
+        ssidref = label2ssidref(label, policyref, 'dom')
 
         new_label = "access_control = ['policy=%s,label=%s']\n" % (policyref, 
label)
         if not os.path.isfile(configfile):
diff -r 7f67c15e2c91 -r fbc0e953732e tools/python/xen/xm/create.py
--- a/tools/python/xen/xm/create.py     Thu Jun 15 10:02:53 2006 -0600
+++ b/tools/python/xen/xm/create.py     Thu Jun 15 10:23:57 2006 -0600
@@ -541,7 +541,7 @@ def configure_security(config, vals):
         if sxp.child_value(config, 'ssidref'):
             err("ERROR: SSIDREF and access_control are mutually exclusive but 
both specified!")
         #else calculate ssidre from label
-        ssidref = security.label2ssidref(label, policy)
+        ssidref = security.label2ssidref(label, policy, 'dom')
         if not ssidref :
             err("ERROR calculating ssidref from access_control.")
         security_label = ['security', [ config_access_control, ['ssidref' , 
ssidref ] ] ]
diff -r 7f67c15e2c91 -r fbc0e953732e tools/python/xen/xm/main.py
--- a/tools/python/xen/xm/main.py       Thu Jun 15 10:02:53 2006 -0600
+++ b/tools/python/xen/xm/main.py       Thu Jun 15 10:23:57 2006 -0600
@@ -1193,6 +1193,9 @@ def main(argv=sys.argv):
             else:
                 print  >>sys.stderr, "Error: %s" % ex.faultString
             sys.exit(1)
+        except (ValueError, OverflowError):
+            err("Invalid argument.")
+            usage(argv[1])
         except:
             print "Unexpected error:", sys.exc_info()[0]
             print
diff -r 7f67c15e2c91 -r fbc0e953732e tools/security/Makefile
--- a/tools/security/Makefile   Thu Jun 15 10:02:53 2006 -0600
+++ b/tools/security/Makefile   Thu Jun 15 10:23:57 2006 -0600
@@ -33,7 +33,7 @@ OBJS_XML2BIN := $(patsubst %.c,%.o,$(fil
 
 ACM_INST_TOOLS    = xensec_tool xensec_xml2bin xensec_gen
 ACM_OBJS          = $(OBJS_TOOL) $(OBJS_XML2BIN) $(OBJS_GETD)
-ACM_SCRIPTS       = python/xensec_tools/acm_getlabel 
python/xensec_tools/acm_getdecision
+ACM_SCRIPTS       = python/xensec_tools/acm_getlabel
 
 ACM_CONFIG_DIR    = /etc/xen/acm-security
 ACM_POLICY_DIR    = $(ACM_CONFIG_DIR)/policies
diff -r 7f67c15e2c91 -r fbc0e953732e 
tools/security/python/xensec_gen/cgi-bin/policy.cgi
--- a/tools/security/python/xensec_gen/cgi-bin/policy.cgi       Thu Jun 15 
10:02:53 2006 -0600
+++ b/tools/security/python/xensec_gen/cgi-bin/policy.cgi       Thu Jun 15 
10:23:57 2006 -0600
@@ -406,7 +406,7 @@ def parsePolicyXml( ):
                                        msg = msg + 'Please validate the Policy 
file used.'
                                        formatXmlError( msg )
 
-                                       allCSMTypes[csName][1] = csMemberList
+                               allCSMTypes[csName][1] = csMemberList
 
        if pOrder != '':
                formPolicyOrder[1] = pOrder
diff -r 7f67c15e2c91 -r fbc0e953732e tools/security/secpol_xml2bin.c
--- a/tools/security/secpol_xml2bin.c   Thu Jun 15 10:02:53 2006 -0600
+++ b/tools/security/secpol_xml2bin.c   Thu Jun 15 10:23:57 2006 -0600
@@ -44,6 +44,8 @@
 
 #define DEBUG    0
 
+#define NULL_LABEL_NAME "__NULL_LABEL__"
+
 /* primary / secondary policy component setting */
 enum policycomponent { CHWALL, STE, NULLPOLICY }
     primary = NULLPOLICY, secondary = NULLPOLICY;
@@ -467,7 +469,7 @@ int init_ssid_queues(void)
         return -ENOMEM;
 
     /* default chwall ssid */
-    default_ssid_chwall->name = "DEFAULT";
+    default_ssid_chwall->name = NULL_LABEL_NAME;
     default_ssid_chwall->num = max_chwall_ssids++;
     default_ssid_chwall->is_ref = 0;
     default_ssid_chwall->type = ANY;
@@ -484,7 +486,7 @@ int init_ssid_queues(void)
     max_chwall_labels++;
 
     /* default ste ssid */
-    default_ssid_ste->name = "DEFAULT";
+    default_ssid_ste->name = NULL_LABEL_NAME;
     default_ssid_ste->num = max_ste_ssids++;
     default_ssid_ste->is_ref = 0;
     default_ssid_ste->type = ANY;
diff -r 7f67c15e2c91 -r fbc0e953732e tools/xenmon/xenbaked.c
--- a/tools/xenmon/xenbaked.c   Thu Jun 15 10:02:53 2006 -0600
+++ b/tools/xenmon/xenbaked.c   Thu Jun 15 10:23:57 2006 -0600
@@ -33,9 +33,6 @@
 #include <stdlib.h>
 #include <stdio.h>
 #include <sys/mman.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-#include <sys/ioctl.h>
 #include <fcntl.h>
 #include <unistd.h>
 #include <errno.h>
@@ -45,7 +42,6 @@
 #include <xen/xen.h>
 #include <string.h>
 #include <sys/select.h>
-#include <xen/linux/evtchn.h>
 
 #define PERROR(_m, _a...)                                       \
 do {                                                            \
@@ -256,51 +252,29 @@ void log_event(int event_id)
         stat_map[0].event_count++;     // other
 }
 
-#define EVTCHN_DEV_NAME  "/dev/xen/evtchn"
-#define EVTCHN_DEV_MAJOR 10
-#define EVTCHN_DEV_MINOR 201
-
 int virq_port;
-int eventchn_fd = -1;
+int xce_handle = -1;
 
 /* Returns the event channel handle. */
 /* Stolen from xenstore code */
 int eventchn_init(void)
 {
-  struct stat st;
-  struct ioctl_evtchn_bind_virq bind;
   int rc;
   
   // to revert to old way:
   if (0)
     return -1;
   
-  /* Make sure any existing device file links to correct device. */
-  if ((lstat(EVTCHN_DEV_NAME, &st) != 0) || !S_ISCHR(st.st_mode) ||
-      (st.st_rdev != makedev(EVTCHN_DEV_MAJOR, EVTCHN_DEV_MINOR)))
-    (void)unlink(EVTCHN_DEV_NAME);
-  
- reopen:
-  eventchn_fd = open(EVTCHN_DEV_NAME, O_NONBLOCK|O_RDWR);
-  if (eventchn_fd == -1) {
-    if ((errno == ENOENT) &&
-       ((mkdir("/dev/xen", 0755) == 0) || (errno == EEXIST)) &&
-       (mknod(EVTCHN_DEV_NAME, S_IFCHR|0600,
-              makedev(EVTCHN_DEV_MAJOR, EVTCHN_DEV_MINOR)) == 0))
-      goto reopen;
-    return -errno;
-  }
-  
-  if (eventchn_fd < 0)
+  xce_handle = xc_evtchn_open();
+
+  if (xce_handle < 0)
     perror("Failed to open evtchn device");
   
-  bind.virq = VIRQ_TBUF;
-  rc = ioctl(eventchn_fd, IOCTL_EVTCHN_BIND_VIRQ, &bind);
-  if (rc == -1)
+  if ((rc = xc_evtchn_bind_virq(xce_handle, VIRQ_TBUF)) == -1)
     perror("Failed to bind to domain exception virq port");
   virq_port = rc;
   
-  return eventchn_fd;
+  return xce_handle;
 }
 
 void wait_for_event(void)
@@ -309,27 +283,30 @@ void wait_for_event(void)
   fd_set inset;
   evtchn_port_t port;
   struct timeval tv;
+  int evtchn_fd;
   
-  if (eventchn_fd < 0) {
+  if (xce_handle < 0) {
     nanosleep(&opts.poll_sleep, NULL);
     return;
   }
 
+  evtchn_fd = xc_evtchn_fd(xce_handle);
+
   FD_ZERO(&inset);
-  FD_SET(eventchn_fd, &inset);
+  FD_SET(evtchn_fd, &inset);
   tv.tv_sec = 1;
   tv.tv_usec = 0;
   // tv = millis_to_timespec(&opts.poll_sleep);
-  ret = select(eventchn_fd+1, &inset, NULL, NULL, &tv);
+  ret = select(evtchn_fd+1, &inset, NULL, NULL, &tv);
   
-  if ( (ret == 1) && FD_ISSET(eventchn_fd, &inset)) {
-    if (read(eventchn_fd, &port, sizeof(port)) != sizeof(port))
+  if ( (ret == 1) && FD_ISSET(evtchn_fd, &inset)) {
+    if ((port = xc_evtchn_pending(xce_handle)) == -1)
       perror("Failed to read from event fd");
     
     //    if (port == virq_port)
     //      printf("got the event I was looking for\r\n");
-    
-    if (write(eventchn_fd, &port, sizeof(port)) != sizeof(port))
+
+    if (xc_evtchn_unmask(xce_handle, port) == -1)
       perror("Failed to write to event fd");
   }
 }
diff -r 7f67c15e2c91 -r fbc0e953732e tools/xenstat/libxenstat/src/xenstat.c
--- a/tools/xenstat/libxenstat/src/xenstat.c    Thu Jun 15 10:02:53 2006 -0600
+++ b/tools/xenstat/libxenstat/src/xenstat.c    Thu Jun 15 10:23:57 2006 -0600
@@ -223,18 +223,20 @@ xenstat_node *xenstat_get_node(xenstat_h
 
        num_domains = 0;
        do {
-               xenstat_domain *domain;
+               xenstat_domain *domain, *tmp;
 
                new_domains = xc_domain_getinfolist(handle->xc_handle,
                        num_domains, DOMAIN_CHUNK_SIZE, domaininfo);
 
-               node->domains = realloc(node->domains,
-                                       (num_domains + new_domains)
-                                       * sizeof(xenstat_domain));
-               if (node->domains == NULL) {
+               tmp = realloc(node->domains,
+                             (num_domains + new_domains)
+                             * sizeof(xenstat_domain));
+               if (tmp == NULL) {
+                       free(node->domains);
                        free(node);
                        return NULL;
                }
+               node->domains = tmp;
 
                domain = node->domains + num_domains;
 
@@ -582,11 +584,14 @@ static int xenstat_collect_networks(xens
                        domain->num_networks = 1;
                        domain->networks = malloc(sizeof(xenstat_network));
                } else {
+                       struct xenstat_network *tmp;
                        domain->num_networks++;
-                       domain->networks =
-                           realloc(domain->networks,
-                                   domain->num_networks *
-                                   sizeof(xenstat_network));
+                       tmp = realloc(domain->networks,
+                                     domain->num_networks *
+                                     sizeof(xenstat_network));
+                       if (tmp == NULL)
+                               free(domain->networks);
+                       domain->networks = tmp;
                }
                if (domain->networks == NULL)
                        return 0;
diff -r 7f67c15e2c91 -r fbc0e953732e tools/xenstore/fake_libxc.c
--- a/tools/xenstore/fake_libxc.c       Thu Jun 15 10:02:53 2006 -0600
+++ b/tools/xenstore/fake_libxc.c       Thu Jun 15 10:23:57 2006 -0600
@@ -37,7 +37,7 @@ static evtchn_port_t port;
 static evtchn_port_t port;
 
 /* The event channel maps to a signal, shared page to an mmapped file. */
-void evtchn_notify(int local_port)
+void xc_evtchn_notify(int xce_handle, int local_port)
 {
        assert(local_port == port);
        if (kill(xs_test_pid, SIGUSR2) != 0)
@@ -124,7 +124,7 @@ void fake_ack_event(void)
        signal(SIGUSR2, send_to_fd);
 }
 
-int fake_open_eventchn(void)
+int xc_evtchn_open(void)
 {
        int fds[2];
 
diff -r 7f67c15e2c91 -r fbc0e953732e tools/xenstore/xenstored_core.c
--- a/tools/xenstore/xenstored_core.c   Thu Jun 15 10:02:53 2006 -0600
+++ b/tools/xenstore/xenstored_core.c   Thu Jun 15 10:23:57 2006 -0600
@@ -54,7 +54,7 @@
 #include "hashtable.h"
 
 
-extern int eventchn_fd; /* in xenstored_domain.c */
+extern int xce_handle; /* in xenstored_domain.c */
 
 static bool verbose = false;
 LIST_HEAD(connections);
@@ -353,8 +353,11 @@ static int initialize_set(fd_set *inset,
 
        set_fd(sock,               inset, &max);
        set_fd(ro_sock,            inset, &max);
-       set_fd(eventchn_fd,        inset, &max);
        set_fd(reopen_log_pipe[0], inset, &max);
+
+       if (xce_handle != -1)
+               set_fd(xc_evtchn_fd(xce_handle), inset, &max);
+
        list_for_each_entry(i, &connections, list) {
                if (i->domain)
                        continue;
@@ -1769,6 +1772,7 @@ int main(int argc, char *argv[])
        bool outputpid = false;
        bool no_domain_init = false;
        const char *pidfile = NULL;
+       int evtchn_fd = -1;
 
        while ((opt = getopt_long(argc, argv, "DE:F:HNPS:T:RLVW:", options,
                                  NULL)) != -1) {
@@ -1907,6 +1911,9 @@ int main(int argc, char *argv[])
        signal(SIGUSR1, stop_failtest);
 #endif
 
+       if (xce_handle != -1)
+               evtchn_fd = xc_evtchn_fd(xce_handle);
+
        /* Get ready to listen to the tools. */
        max = initialize_set(&inset, &outset, *sock, *ro_sock);
 
@@ -1934,7 +1941,7 @@ int main(int argc, char *argv[])
                if (FD_ISSET(*ro_sock, &inset))
                        accept_connection(*ro_sock, false);
 
-               if (eventchn_fd > 0 && FD_ISSET(eventchn_fd, &inset))
+               if (evtchn_fd != -1 && FD_ISSET(evtchn_fd, &inset))
                        handle_event();
 
                list_for_each_entry(i, &connections, list) {
diff -r 7f67c15e2c91 -r fbc0e953732e tools/xenstore/xenstored_domain.c
--- a/tools/xenstore/xenstored_domain.c Thu Jun 15 10:02:53 2006 -0600
+++ b/tools/xenstore/xenstored_domain.c Thu Jun 15 10:23:57 2006 -0600
@@ -18,15 +18,10 @@
 */
 
 #include <stdio.h>
-#include <linux/ioctl.h>
-#include <sys/ioctl.h>
 #include <sys/mman.h>
 #include <unistd.h>
 #include <stdlib.h>
 #include <stdarg.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
 
 //#define DEBUG
 #include "utils.h"
@@ -37,12 +32,11 @@
 #include "xenstored_test.h"
 
 #include <xenctrl.h>
-#include <xen/sys/evtchn.h>
 
 static int *xc_handle;
 static evtchn_port_t virq_port;
 
-int eventchn_fd = -1; 
+int xce_handle = -1; 
 
 struct domain
 {
@@ -82,19 +76,6 @@ struct domain
 };
 
 static LIST_HEAD(domains);
-
-#ifndef TESTING
-static void evtchn_notify(int port)
-{
-       int rc; 
-
-       struct ioctl_evtchn_notify notify;
-       notify.port = port;
-       rc = ioctl(eventchn_fd, IOCTL_EVTCHN_NOTIFY, &notify);
-}
-#else
-extern void evtchn_notify(int port);
-#endif
 
 /* FIXME: Mark connection as broken (close it?) when this happens. */
 static bool check_indexes(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod)
@@ -146,7 +127,7 @@ static int writechn(struct connection *c
        mb();
        intf->rsp_prod += len;
 
-       evtchn_notify(conn->domain->port);
+       xc_evtchn_notify(xce_handle, conn->domain->port);
 
        return len;
 }
@@ -176,7 +157,7 @@ static int readchn(struct connection *co
        mb();
        intf->req_cons += len;
 
-       evtchn_notify(conn->domain->port);
+       xc_evtchn_notify(xce_handle, conn->domain->port);
 
        return len;
 }
@@ -184,13 +165,11 @@ static int destroy_domain(void *_domain)
 static int destroy_domain(void *_domain)
 {
        struct domain *domain = _domain;
-       struct ioctl_evtchn_unbind unbind;
 
        list_del(&domain->list);
 
        if (domain->port) {
-               unbind.port = domain->port;
-               if (ioctl(eventchn_fd, IOCTL_EVTCHN_UNBIND, &unbind) == -1)
+               if (xc_evtchn_unbind(xce_handle, domain->port) == -1)
                        eprintf("> Unbinding port %i failed!\n", domain->port);
        }
 
@@ -231,14 +210,14 @@ void handle_event(void)
 {
        evtchn_port_t port;
 
-       if (read(eventchn_fd, &port, sizeof(port)) != sizeof(port))
+       if ((port = xc_evtchn_pending(xce_handle)) == -1)
                barf_perror("Failed to read from event fd");
 
        if (port == virq_port)
                domain_cleanup();
 
 #ifndef TESTING
-       if (write(eventchn_fd, &port, sizeof(port)) != sizeof(port))
+       if (xc_evtchn_unmask(xce_handle, port) == -1)
                barf_perror("Failed to write to event fd");
 #endif
 }
@@ -269,7 +248,6 @@ static struct domain *new_domain(void *c
                                 int port)
 {
        struct domain *domain;
-       struct ioctl_evtchn_bind_interdomain bind;
        int rc;
 
 
@@ -283,9 +261,7 @@ static struct domain *new_domain(void *c
        talloc_set_destructor(domain, destroy_domain);
 
        /* Tell kernel we're interested in this event. */
-       bind.remote_domain = domid;
-       bind.remote_port   = port;
-       rc = ioctl(eventchn_fd, IOCTL_EVTCHN_BIND_INTERDOMAIN, &bind);
+       rc = xc_evtchn_bind_interdomain(xce_handle, domid, port);
        if (rc == -1)
            return NULL;
        domain->port = rc;
@@ -490,23 +466,14 @@ static int dom0_init(void)
 
        talloc_steal(dom0->conn, dom0); 
 
-       evtchn_notify(dom0->port); 
+       xc_evtchn_notify(xce_handle, dom0->port); 
 
        return 0; 
 }
-
-
-
-#define EVTCHN_DEV_NAME  "/dev/xen/evtchn"
-#define EVTCHN_DEV_MAJOR 10
-#define EVTCHN_DEV_MINOR 201
-
 
 /* Returns the event channel handle. */
 int domain_init(void)
 {
-       struct stat st;
-       struct ioctl_evtchn_bind_virq bind;
        int rc;
 
        xc_handle = talloc(talloc_autofree_context(), int);
@@ -519,39 +486,19 @@ int domain_init(void)
 
        talloc_set_destructor(xc_handle, close_xc_handle);
 
-#ifdef TESTING
-       eventchn_fd = fake_open_eventchn();
-       (void)&st;
-#else
-       /* Make sure any existing device file links to correct device. */
-       if ((lstat(EVTCHN_DEV_NAME, &st) != 0) || !S_ISCHR(st.st_mode) ||
-           (st.st_rdev != makedev(EVTCHN_DEV_MAJOR, EVTCHN_DEV_MINOR)))
-               (void)unlink(EVTCHN_DEV_NAME);
-
- reopen:
-       eventchn_fd = open(EVTCHN_DEV_NAME, O_NONBLOCK|O_RDWR);
-       if (eventchn_fd == -1) {
-               if ((errno == ENOENT) &&
-                   ((mkdir("/dev/xen", 0755) == 0) || (errno == EEXIST)) &&
-                   (mknod(EVTCHN_DEV_NAME, S_IFCHR|0600,
-                          makedev(EVTCHN_DEV_MAJOR, EVTCHN_DEV_MINOR)) == 0))
-                       goto reopen;
-               return -errno;
-       }
-#endif
-       if (eventchn_fd < 0)
+       xce_handle = xc_evtchn_open();
+
+       if (xce_handle < 0)
                barf_perror("Failed to open evtchn device");
 
        if (dom0_init() != 0) 
                barf_perror("Failed to initialize dom0 state"); 
 
-       bind.virq = VIRQ_DOM_EXC;
-       rc = ioctl(eventchn_fd, IOCTL_EVTCHN_BIND_VIRQ, &bind);
-       if (rc == -1)
+       if ((rc = xc_evtchn_bind_virq(xce_handle, VIRQ_DOM_EXC)) == -1)
                barf_perror("Failed to bind to domain exception virq port");
        virq_port = rc;
 
-       return eventchn_fd;
+       return xce_handle;
 }
 
 void domain_entry_inc(struct connection *conn)
diff -r 7f67c15e2c91 -r fbc0e953732e 
tools/xm-test/tests/block-integrity/01_block_device_read_verify.py
--- a/tools/xm-test/tests/block-integrity/01_block_device_read_verify.py        
Thu Jun 15 10:02:53 2006 -0600
+++ b/tools/xm-test/tests/block-integrity/01_block_device_read_verify.py        
Thu Jun 15 10:23:57 2006 -0600
@@ -31,7 +31,7 @@ traceCommand("cat /dev/urandom > /dev/ra
 
 s, o = traceCommand("md5sum /dev/ram1")
 
-dom0_md5sum_match = re.search(r"^[\dA-Fa-f]{32}", o)
+dom0_md5sum_match = re.search(r"^[\dA-Fa-f]{32}", o, re.M)
 
 block_attach(domain, "phy:ram1", "hda1")
 
@@ -40,7 +40,7 @@ except ConsoleError, e:
 except ConsoleError, e:
     FAIL(str(e))
 
-domU_md5sum_match = re.search(r"^[\dA-Fa-f]{32}", run["output"])
+domU_md5sum_match = re.search(r"^[\dA-Fa-f]{32}", run["output"], re.M)
 
 domain.closeConsole()
 
diff -r 7f67c15e2c91 -r fbc0e953732e 
tools/xm-test/tests/block-integrity/02_block_device_write_verify.py
--- a/tools/xm-test/tests/block-integrity/02_block_device_write_verify.py       
Thu Jun 15 10:02:53 2006 -0600
+++ b/tools/xm-test/tests/block-integrity/02_block_device_write_verify.py       
Thu Jun 15 10:23:57 2006 -0600
@@ -37,7 +37,7 @@ except ConsoleError, e:
 except ConsoleError, e:
     FAIL(str(e))
 
-domU_md5sum_match = re.search(r"^[\dA-Fa-f]{32}", run["output"])
+domU_md5sum_match = re.search(r"^[\dA-Fa-f]{32}", run["output"], re.M)
 
 domain.closeConsole()
 
@@ -45,7 +45,7 @@ domain.stop()
 
 s, o = traceCommand("md5sum /dev/ram1")
 
-dom0_md5sum_match = re.search(r"^[\dA-Fa-f]{32}", o)
+dom0_md5sum_match = re.search(r"^[\dA-Fa-f]{32}", o, re.M)
 
 if domU_md5sum_match == None:
     FAIL("Failed to get md5sum of data written in domU.")
diff -r 7f67c15e2c91 -r fbc0e953732e xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c      Thu Jun 15 10:02:53 2006 -0600
+++ b/xen/arch/x86/traps.c      Thu Jun 15 10:23:57 2006 -0600
@@ -1279,7 +1279,7 @@ static void nmi_softirq(void)
 static void nmi_softirq(void)
 {
     /* Only used to defer wakeup of dom0,vcpu0 to a safe (non-NMI) context. */
-    evtchn_notify(dom0->vcpu[0]);
+    vcpu_kick(dom0->vcpu[0]);
 }
 
 static void nmi_dom0_report(unsigned int reason_idx)
diff -r 7f67c15e2c91 -r fbc0e953732e xen/common/event_channel.c
--- a/xen/common/event_channel.c        Thu Jun 15 10:02:53 2006 -0600
+++ b/xen/common/event_channel.c        Thu Jun 15 10:23:57 2006 -0600
@@ -493,10 +493,9 @@ void evtchn_set_pending(struct vcpu *v, 
 
     if ( !test_bit        (port, s->evtchn_mask) &&
          !test_and_set_bit(port / BITS_PER_LONG,
-                           &v->vcpu_info->evtchn_pending_sel) &&
-         !test_and_set_bit(0, &v->vcpu_info->evtchn_upcall_pending) )
-    {
-        evtchn_notify(v);
+                           &v->vcpu_info->evtchn_pending_sel) )
+    {
+        vcpu_mark_events_pending(v);
     }
     
     /* Check if some VCPU might be polling for this event. */
@@ -682,10 +681,9 @@ static long evtchn_unmask(evtchn_unmask_
     if ( test_and_clear_bit(port, s->evtchn_mask) &&
          test_bit          (port, s->evtchn_pending) &&
          !test_and_set_bit (port / BITS_PER_LONG,
-                            &v->vcpu_info->evtchn_pending_sel) &&
-         !test_and_set_bit (0, &v->vcpu_info->evtchn_upcall_pending) )
-    {
-        evtchn_notify(v);
+                            &v->vcpu_info->evtchn_pending_sel) )
+    {
+        vcpu_mark_events_pending(v);
     }
 
     spin_unlock(&d->evtchn_lock);
diff -r 7f67c15e2c91 -r fbc0e953732e xen/include/asm-ia64/event.h
--- a/xen/include/asm-ia64/event.h      Thu Jun 15 10:02:53 2006 -0600
+++ b/xen/include/asm-ia64/event.h      Thu Jun 15 10:23:57 2006 -0600
@@ -12,7 +12,7 @@
 #include <public/arch-ia64.h>
 #include <asm/vcpu.h>
 
-static inline void evtchn_notify(struct vcpu *v)
+static inline void vcpu_kick(struct vcpu *v)
 {
     /*
      * NB1. 'vcpu_flags' and 'processor' must be checked /after/ update of
@@ -30,6 +30,12 @@ static inline void evtchn_notify(struct 
 
     if(!VMX_DOMAIN(v) && !v->arch.event_callback_ip)
         vcpu_pend_interrupt(v, v->domain->shared_info->arch.evtchn_vector);
+}
+
+static inline void vcpu_mark_events_pending(struct vcpu *v)
+{
+    if ( !test_and_set_bit(0, &v->vcpu_info->evtchn_upcall_pending) )
+        vcpu_kick(v);
 }
 
 /* Note: Bitwise operations result in fast code with no branches. */
diff -r 7f67c15e2c91 -r fbc0e953732e xen/include/asm-x86/event.h
--- a/xen/include/asm-x86/event.h       Thu Jun 15 10:02:53 2006 -0600
+++ b/xen/include/asm-x86/event.h       Thu Jun 15 10:23:57 2006 -0600
@@ -9,7 +9,7 @@
 #ifndef __ASM_EVENT_H__
 #define __ASM_EVENT_H__
 
-static inline void evtchn_notify(struct vcpu *v)
+static inline void vcpu_kick(struct vcpu *v)
 {
     /*
      * NB1. 'vcpu_flags' and 'processor' must be checked /after/ update of
@@ -24,6 +24,12 @@ static inline void evtchn_notify(struct 
     vcpu_unblock(v);
     if ( running )
         smp_send_event_check_cpu(v->processor);
+}
+
+static inline void vcpu_mark_events_pending(struct vcpu *v)
+{
+    if ( !test_and_set_bit(0, &v->vcpu_info->evtchn_upcall_pending) )
+        vcpu_kick(v);
 }
 
 static inline int local_events_need_delivery(void)
diff -r 7f67c15e2c91 -r fbc0e953732e xen/include/xen/elf.h
--- a/xen/include/xen/elf.h     Thu Jun 15 10:02:53 2006 -0600
+++ b/xen/include/xen/elf.h     Thu Jun 15 10:23:57 2006 -0600
@@ -178,9 +178,9 @@ typedef struct {
 #define EM_ALPHA       41              /* DEC ALPHA */
 #define EM_SPARCV9     43              /* SPARC version 9 */
 #define EM_ALPHA_EXP   0x9026          /* DEC ALPHA */
+#define EM_IA_64       50              /* Intel Merced */
 #define EM_X86_64      62              /* AMD x86-64 architecture */
 #define EM_VAX         75              /* DEC VAX */
-#define EM_NUM         15              /* number of machine types */
 
 /* Version */
 #define EV_NONE                0               /* Invalid */
diff -r 7f67c15e2c91 -r fbc0e953732e linux-2.6-xen-sparse/kernel/fork.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/linux-2.6-xen-sparse/kernel/fork.c        Thu Jun 15 10:23:57 2006 -0600
@@ -0,0 +1,1619 @@
+/*
+ *  linux/kernel/fork.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ */
+
+/*
+ *  'fork.c' contains the help-routines for the 'fork' system call
+ * (see also entry.S and others).
+ * Fork is rather simple, once you get the hang of it, but the memory
+ * management can be a bitch. See 'mm/memory.c': 'copy_page_range()'
+ */
+
+#include <linux/config.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/unistd.h>
+#include <linux/smp_lock.h>
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/completion.h>
+#include <linux/namespace.h>
+#include <linux/personality.h>
+#include <linux/mempolicy.h>
+#include <linux/sem.h>
+#include <linux/file.h>
+#include <linux/key.h>
+#include <linux/binfmts.h>
+#include <linux/mman.h>
+#include <linux/fs.h>
+#include <linux/capability.h>
+#include <linux/cpu.h>
+#include <linux/cpuset.h>
+#include <linux/security.h>
+#include <linux/swap.h>
+#include <linux/syscalls.h>
+#include <linux/jiffies.h>
+#include <linux/futex.h>
+#include <linux/rcupdate.h>
+#include <linux/ptrace.h>
+#include <linux/mount.h>
+#include <linux/audit.h>
+#include <linux/profile.h>
+#include <linux/rmap.h>
+#include <linux/acct.h>
+#include <linux/cn_proc.h>
+
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/uaccess.h>
+#include <asm/mmu_context.h>
+#include <asm/cacheflush.h>
+#include <asm/tlbflush.h>
+
+/*
+ * Protected counters by write_lock_irq(&tasklist_lock)
+ */
+unsigned long total_forks;     /* Handle normal Linux uptimes. */
+int nr_threads;                /* The idle threads do not count.. */
+
+int max_threads;               /* tunable limit on nr_threads */
+
+DEFINE_PER_CPU(unsigned long, process_counts) = 0;
+
+ __cacheline_aligned DEFINE_RWLOCK(tasklist_lock);  /* outer */
+
+EXPORT_SYMBOL(tasklist_lock);
+
+int nr_processes(void)
+{
+       int cpu;
+       int total = 0;
+
+       for_each_online_cpu(cpu)
+               total += per_cpu(process_counts, cpu);
+
+       return total;
+}
+
+#ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR
+# define alloc_task_struct()   kmem_cache_alloc(task_struct_cachep, GFP_KERNEL)
+# define free_task_struct(tsk) kmem_cache_free(task_struct_cachep, (tsk))
+static kmem_cache_t *task_struct_cachep;
+#endif
+
+/* SLAB cache for signal_struct structures (tsk->signal) */
+kmem_cache_t *signal_cachep;
+
+/* SLAB cache for sighand_struct structures (tsk->sighand) */
+kmem_cache_t *sighand_cachep;
+
+/* SLAB cache for files_struct structures (tsk->files) */
+kmem_cache_t *files_cachep;
+
+/* SLAB cache for fs_struct structures (tsk->fs) */
+kmem_cache_t *fs_cachep;
+
+/* SLAB cache for vm_area_struct structures */
+kmem_cache_t *vm_area_cachep;
+
+/* SLAB cache for mm_struct structures (tsk->mm) */
+static kmem_cache_t *mm_cachep;
+
+void free_task(struct task_struct *tsk)
+{
+       free_thread_info(tsk->thread_info);
+       free_task_struct(tsk);
+}
+EXPORT_SYMBOL(free_task);
+
+void __put_task_struct_cb(struct rcu_head *rhp)
+{
+       struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
+
+       WARN_ON(!(tsk->exit_state & (EXIT_DEAD | EXIT_ZOMBIE)));
+       WARN_ON(atomic_read(&tsk->usage));
+       WARN_ON(tsk == current);
+
+       if (unlikely(tsk->audit_context))
+               audit_free(tsk);
+       security_task_free(tsk);
+       free_uid(tsk->user);
+       put_group_info(tsk->group_info);
+
+       if (!profile_handoff_task(tsk))
+               free_task(tsk);
+}
+
+void __init fork_init(unsigned long mempages)
+{
+#ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR
+#ifndef ARCH_MIN_TASKALIGN
+#define ARCH_MIN_TASKALIGN     L1_CACHE_BYTES
+#endif
+       /* create a slab on which task_structs can be allocated */
+       task_struct_cachep =
+               kmem_cache_create("task_struct", sizeof(struct task_struct),
+                       ARCH_MIN_TASKALIGN, SLAB_PANIC, NULL, NULL);
+#endif
+
+       /*
+        * The default maximum number of threads is set to a safe
+        * value: the thread structures can take up at most half
+        * of memory.
+        */
+       max_threads = mempages / (8 * THREAD_SIZE / PAGE_SIZE);
+
+       /*
+        * we need to allow at least 20 threads to boot a system
+        */
+       if(max_threads < 20)
+               max_threads = 20;
+
+       init_task.signal->rlim[RLIMIT_NPROC].rlim_cur = max_threads/2;
+       init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2;
+       init_task.signal->rlim[RLIMIT_SIGPENDING] =
+               init_task.signal->rlim[RLIMIT_NPROC];
+}
+
+static struct task_struct *dup_task_struct(struct task_struct *orig)
+{
+       struct task_struct *tsk;
+       struct thread_info *ti;
+
+       prepare_to_copy(orig);
+
+       tsk = alloc_task_struct();
+       if (!tsk)
+               return NULL;
+
+       ti = alloc_thread_info(tsk);
+       if (!ti) {
+               free_task_struct(tsk);
+               return NULL;
+       }
+
+       *tsk = *orig;
+       tsk->thread_info = ti;
+       setup_thread_stack(tsk, orig);
+
+       /* One for us, one for whoever does the "release_task()" (usually 
parent) */
+       atomic_set(&tsk->usage,2);
+       atomic_set(&tsk->fs_excl, 0);
+       return tsk;
+}
+
+#ifdef CONFIG_MMU
+static inline int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
+{
+       struct vm_area_struct *mpnt, *tmp, **pprev;
+       struct rb_node **rb_link, *rb_parent;
+       int retval;
+       unsigned long charge;
+       struct mempolicy *pol;
+
+       down_write(&oldmm->mmap_sem);
+       flush_cache_mm(oldmm);
+       down_write(&mm->mmap_sem);
+
+       mm->locked_vm = 0;
+       mm->mmap = NULL;
+       mm->mmap_cache = NULL;
+       mm->free_area_cache = oldmm->mmap_base;
+       mm->cached_hole_size = ~0UL;
+       mm->map_count = 0;
+       cpus_clear(mm->cpu_vm_mask);
+       mm->mm_rb = RB_ROOT;
+       rb_link = &mm->mm_rb.rb_node;
+       rb_parent = NULL;
+       pprev = &mm->mmap;
+
+       for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) {
+               struct file *file;
+
+               if (mpnt->vm_flags & VM_DONTCOPY) {
+                       long pages = vma_pages(mpnt);
+                       mm->total_vm -= pages;
+                       vm_stat_account(mm, mpnt->vm_flags, mpnt->vm_file,
+                                                               -pages);
+                       continue;
+               }
+               charge = 0;
+               if (mpnt->vm_flags & VM_ACCOUNT) {
+                       unsigned int len = (mpnt->vm_end - mpnt->vm_start) >> 
PAGE_SHIFT;
+                       if (security_vm_enough_memory(len))
+                               goto fail_nomem;
+                       charge = len;
+               }
+               tmp = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+               if (!tmp)
+                       goto fail_nomem;
+               *tmp = *mpnt;
+               pol = mpol_copy(vma_policy(mpnt));
+               retval = PTR_ERR(pol);
+               if (IS_ERR(pol))
+                       goto fail_nomem_policy;
+               vma_set_policy(tmp, pol);
+               tmp->vm_flags &= ~VM_LOCKED;
+               tmp->vm_mm = mm;
+               tmp->vm_next = NULL;
+               anon_vma_link(tmp);
+               file = tmp->vm_file;
+               if (file) {
+                       struct inode *inode = file->f_dentry->d_inode;
+                       get_file(file);
+                       if (tmp->vm_flags & VM_DENYWRITE)
+                               atomic_dec(&inode->i_writecount);
+      
+                       /* insert tmp into the share list, just after mpnt */
+                       spin_lock(&file->f_mapping->i_mmap_lock);
+                       tmp->vm_truncate_count = mpnt->vm_truncate_count;
+                       flush_dcache_mmap_lock(file->f_mapping);
+                       vma_prio_tree_add(tmp, mpnt);
+                       flush_dcache_mmap_unlock(file->f_mapping);
+                       spin_unlock(&file->f_mapping->i_mmap_lock);
+               }
+
+               /*
+                * Link in the new vma and copy the page table entries.
+                */
+               *pprev = tmp;
+               pprev = &tmp->vm_next;
+
+               __vma_link_rb(mm, tmp, rb_link, rb_parent);
+               rb_link = &tmp->vm_rb.rb_right;
+               rb_parent = &tmp->vm_rb;
+
+               mm->map_count++;
+               retval = copy_page_range(mm, oldmm, mpnt);
+
+               if (tmp->vm_ops && tmp->vm_ops->open)
+                       tmp->vm_ops->open(tmp);
+
+               if (retval)
+                       goto out;
+       }
+#ifdef arch_dup_mmap
+       arch_dup_mmap(mm, oldmm);
+#endif
+       retval = 0;
+out:
+       up_write(&mm->mmap_sem);
+       flush_tlb_mm(oldmm);
+       up_write(&oldmm->mmap_sem);
+       return retval;
+fail_nomem_policy:
+       kmem_cache_free(vm_area_cachep, tmp);
+fail_nomem:
+       retval = -ENOMEM;
+       vm_unacct_memory(charge);
+       goto out;
+}
+
+static inline int mm_alloc_pgd(struct mm_struct * mm)
+{
+       mm->pgd = pgd_alloc(mm);
+       if (unlikely(!mm->pgd))
+               return -ENOMEM;
+       return 0;
+}
+
+static inline void mm_free_pgd(struct mm_struct * mm)
+{
+       pgd_free(mm->pgd);
+}
+#else
+#define dup_mmap(mm, oldmm)    (0)
+#define mm_alloc_pgd(mm)       (0)
+#define mm_free_pgd(mm)
+#endif /* CONFIG_MMU */
+
+ __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock);
+
+#define allocate_mm()  (kmem_cache_alloc(mm_cachep, SLAB_KERNEL))
+#define free_mm(mm)    (kmem_cache_free(mm_cachep, (mm)))
+
+#include <linux/init_task.h>
+
+static struct mm_struct * mm_init(struct mm_struct * mm)
+{
+       atomic_set(&mm->mm_users, 1);
+       atomic_set(&mm->mm_count, 1);
+       init_rwsem(&mm->mmap_sem);
+       INIT_LIST_HEAD(&mm->mmlist);
+       mm->core_waiters = 0;
+       mm->nr_ptes = 0;
+       set_mm_counter(mm, file_rss, 0);
+       set_mm_counter(mm, anon_rss, 0);
+       spin_lock_init(&mm->page_table_lock);
+       rwlock_init(&mm->ioctx_list_lock);
+       mm->ioctx_list = NULL;
+       mm->free_area_cache = TASK_UNMAPPED_BASE;
+       mm->cached_hole_size = ~0UL;
+
+       if (likely(!mm_alloc_pgd(mm))) {
+               mm->def_flags = 0;
+               return mm;
+       }
+       free_mm(mm);
+       return NULL;
+}
+
+/*
+ * Allocate and initialize an mm_struct.
+ */
+struct mm_struct * mm_alloc(void)
+{
+       struct mm_struct * mm;
+
+       mm = allocate_mm();
+       if (mm) {
+               memset(mm, 0, sizeof(*mm));
+               mm = mm_init(mm);
+       }
+       return mm;
+}
+
+/*
+ * Called when the last reference to the mm
+ * is dropped: either by a lazy thread or by
+ * mmput. Free the page directory and the mm.
+ */
+void fastcall __mmdrop(struct mm_struct *mm)
+{
+       BUG_ON(mm == &init_mm);
+       mm_free_pgd(mm);
+       destroy_context(mm);
+       free_mm(mm);
+}
+
+/*
+ * Decrement the use count and release all resources for an mm.
+ */
+void mmput(struct mm_struct *mm)
+{
+       if (atomic_dec_and_test(&mm->mm_users)) {
+               exit_aio(mm);
+               exit_mmap(mm);
+               if (!list_empty(&mm->mmlist)) {
+                       spin_lock(&mmlist_lock);
+                       list_del(&mm->mmlist);
+                       spin_unlock(&mmlist_lock);
+               }
+               put_swap_token(mm);
+               mmdrop(mm);
+       }
+}
+EXPORT_SYMBOL_GPL(mmput);
+
+/**
+ * get_task_mm - acquire a reference to the task's mm
+ *
+ * Returns %NULL if the task has no mm.  Checks PF_BORROWED_MM (meaning
+ * this kernel workthread has transiently adopted a user mm with use_mm,
+ * to do its AIO) is not set and if so returns a reference to it, after
+ * bumping up the use count.  User must release the mm via mmput()
+ * after use.  Typically used by /proc and ptrace.
+ */
+struct mm_struct *get_task_mm(struct task_struct *task)
+{
+       struct mm_struct *mm;
+
+       task_lock(task);
+       mm = task->mm;
+       if (mm) {
+               if (task->flags & PF_BORROWED_MM)
+                       mm = NULL;
+               else
+                       atomic_inc(&mm->mm_users);
+       }
+       task_unlock(task);
+       return mm;
+}
+EXPORT_SYMBOL_GPL(get_task_mm);
+
+/* Please note the differences between mmput and mm_release.
+ * mmput is called whenever we stop holding onto a mm_struct,
+ * error success whatever.
+ *
+ * mm_release is called after a mm_struct has been removed
+ * from the current process.
+ *
+ * This difference is important for error handling, when we
+ * only half set up a mm_struct for a new process and need to restore
+ * the old one.  Because we mmput the new mm_struct before
+ * restoring the old one. . .
+ * Eric Biederman 10 January 1998
+ */
+void mm_release(struct task_struct *tsk, struct mm_struct *mm)
+{
+       struct completion *vfork_done = tsk->vfork_done;
+
+       /* Get rid of any cached register state */
+       deactivate_mm(tsk, mm);
+
+       /* notify parent sleeping on vfork() */
+       if (vfork_done) {
+               tsk->vfork_done = NULL;
+               complete(vfork_done);
+       }
+       if (tsk->clear_child_tid && atomic_read(&mm->mm_users) > 1) {
+               u32 __user * tidptr = tsk->clear_child_tid;
+               tsk->clear_child_tid = NULL;
+
+               /*
+                * We don't check the error code - if userspace has
+                * not set up a proper pointer then tough luck.
+                */
+               put_user(0, tidptr);
+               sys_futex(tidptr, FUTEX_WAKE, 1, NULL, NULL, 0);
+       }
+}
+
+/*
+ * Allocate a new mm structure and copy contents from the
+ * mm structure of the passed in task structure.
+ */
+static struct mm_struct *dup_mm(struct task_struct *tsk)
+{
+       struct mm_struct *mm, *oldmm = current->mm;
+       int err;
+
+       if (!oldmm)
+               return NULL;
+
+       mm = allocate_mm();
+       if (!mm)
+               goto fail_nomem;
+
+       memcpy(mm, oldmm, sizeof(*mm));
+
+       if (!mm_init(mm))
+               goto fail_nomem;
+
+       if (init_new_context(tsk, mm))
+               goto fail_nocontext;
+
+       err = dup_mmap(mm, oldmm);
+       if (err)
+               goto free_pt;
+
+       mm->hiwater_rss = get_mm_rss(mm);
+       mm->hiwater_vm = mm->total_vm;
+
+       return mm;
+
+free_pt:
+       mmput(mm);
+
+fail_nomem:
+       return NULL;
+
+fail_nocontext:
+       /*
+        * If init_new_context() failed, we cannot use mmput() to free the mm
+        * because it calls destroy_context()
+        */
+       mm_free_pgd(mm);
+       free_mm(mm);
+       return NULL;
+}
+
+static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
+{
+       struct mm_struct * mm, *oldmm;
+       int retval;
+
+       tsk->min_flt = tsk->maj_flt = 0;
+       tsk->nvcsw = tsk->nivcsw = 0;
+
+       tsk->mm = NULL;
+       tsk->active_mm = NULL;
+
+       /*
+        * Are we cloning a kernel thread?
+        *
+        * We need to steal a active VM for that..
+        */
+       oldmm = current->mm;
+       if (!oldmm)
+               return 0;
+
+       if (clone_flags & CLONE_VM) {
+               atomic_inc(&oldmm->mm_users);
+               mm = oldmm;
+               goto good_mm;
+       }
+
+       retval = -ENOMEM;
+       mm = dup_mm(tsk);
+       if (!mm)
+               goto fail_nomem;
+
+good_mm:
+       tsk->mm = mm;
+       tsk->active_mm = mm;
+       return 0;
+
+fail_nomem:
+       return retval;
+}
+
+static inline struct fs_struct *__copy_fs_struct(struct fs_struct *old)
+{
+       struct fs_struct *fs = kmem_cache_alloc(fs_cachep, GFP_KERNEL);
+       /* We don't need to lock fs - think why ;-) */
+       if (fs) {
+               atomic_set(&fs->count, 1);
+               rwlock_init(&fs->lock);
+               fs->umask = old->umask;
+               read_lock(&old->lock);
+               fs->rootmnt = mntget(old->rootmnt);
+               fs->root = dget(old->root);
+               fs->pwdmnt = mntget(old->pwdmnt);
+               fs->pwd = dget(old->pwd);
+               if (old->altroot) {
+                       fs->altrootmnt = mntget(old->altrootmnt);
+                       fs->altroot = dget(old->altroot);
+               } else {
+                       fs->altrootmnt = NULL;
+                       fs->altroot = NULL;
+               }
+               read_unlock(&old->lock);
+       }
+       return fs;
+}
+
+struct fs_struct *copy_fs_struct(struct fs_struct *old)
+{
+       return __copy_fs_struct(old);
+}
+
+EXPORT_SYMBOL_GPL(copy_fs_struct);
+
+static inline int copy_fs(unsigned long clone_flags, struct task_struct * tsk)
+{
+       if (clone_flags & CLONE_FS) {
+               atomic_inc(&current->fs->count);
+               return 0;
+       }
+       tsk->fs = __copy_fs_struct(current->fs);
+       if (!tsk->fs)
+               return -ENOMEM;
+       return 0;
+}
+
+static int count_open_files(struct fdtable *fdt)
+{
+       int size = fdt->max_fdset;
+       int i;
+
+       /* Find the last open fd */
+       for (i = size/(8*sizeof(long)); i > 0; ) {
+               if (fdt->open_fds->fds_bits[--i])
+                       break;
+       }
+       i = (i+1) * 8 * sizeof(long);
+       return i;
+}
+
+static struct files_struct *alloc_files(void)
+{
+       struct files_struct *newf;
+       struct fdtable *fdt;
+
+       newf = kmem_cache_alloc(files_cachep, SLAB_KERNEL);
+       if (!newf)
+               goto out;
+
+       atomic_set(&newf->count, 1);
+
+       spin_lock_init(&newf->file_lock);
+       fdt = &newf->fdtab;
+       fdt->next_fd = 0;
+       fdt->max_fds = NR_OPEN_DEFAULT;
+       fdt->max_fdset = __FD_SETSIZE;
+       fdt->close_on_exec = &newf->close_on_exec_init;
+       fdt->open_fds = &newf->open_fds_init;
+       fdt->fd = &newf->fd_array[0];
+       INIT_RCU_HEAD(&fdt->rcu);
+       fdt->free_files = NULL;
+       fdt->next = NULL;
+       rcu_assign_pointer(newf->fdt, fdt);
+out:
+       return newf;
+}
+
+/*
+ * Allocate a new files structure and copy contents from the
+ * passed in files structure.
+ */
+static struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
+{
+       struct files_struct *newf;
+       struct file **old_fds, **new_fds;
+       int open_files, size, i, expand;
+       struct fdtable *old_fdt, *new_fdt;
+
+       newf = alloc_files();
+       if (!newf)
+               goto out;
+
+       spin_lock(&oldf->file_lock);
+       old_fdt = files_fdtable(oldf);
+       new_fdt = files_fdtable(newf);
+       size = old_fdt->max_fdset;
+       open_files = count_open_files(old_fdt);
+       expand = 0;
+
+       /*
+        * Check whether we need to allocate a larger fd array or fd set.
+        * Note: we're not a clone task, so the open count won't  change.
+        */
+       if (open_files > new_fdt->max_fdset) {
+               new_fdt->max_fdset = 0;
+               expand = 1;
+       }
+       if (open_files > new_fdt->max_fds) {
+               new_fdt->max_fds = 0;
+               expand = 1;
+       }
+
+       /* if the old fdset gets grown now, we'll only copy up to "size" fds */
+       if (expand) {
+               spin_unlock(&oldf->file_lock);
+               spin_lock(&newf->file_lock);
+               *errorp = expand_files(newf, open_files-1);
+               spin_unlock(&newf->file_lock);
+               if (*errorp < 0)
+                       goto out_release;
+               new_fdt = files_fdtable(newf);
+               /*
+                * Reacquire the oldf lock and a pointer to its fd table
+                * who knows it may have a new bigger fd table. We need
+                * the latest pointer.
+                */
+               spin_lock(&oldf->file_lock);
+               old_fdt = files_fdtable(oldf);
+       }
+
+       old_fds = old_fdt->fd;
+       new_fds = new_fdt->fd;
+
+       memcpy(new_fdt->open_fds->fds_bits, old_fdt->open_fds->fds_bits, 
open_files/8);
+       memcpy(new_fdt->close_on_exec->fds_bits, 
old_fdt->close_on_exec->fds_bits, open_files/8);
+
+       for (i = open_files; i != 0; i--) {
+               struct file *f = *old_fds++;
+               if (f) {
+                       get_file(f);
+               } else {
+                       /*
+                        * The fd may be claimed in the fd bitmap but not yet
+                        * instantiated in the files array if a sibling thread
+                        * is partway through open().  So make sure that this
+                        * fd is available to the new process.
+                        */
+                       FD_CLR(open_files - i, new_fdt->open_fds);
+               }
+               rcu_assign_pointer(*new_fds++, f);
+       }
+       spin_unlock(&oldf->file_lock);
+
+       /* compute the remainder to be cleared */
+       size = (new_fdt->max_fds - open_files) * sizeof(struct file *);
+
+       /* This is long word aligned thus could use a optimized version */ 
+       memset(new_fds, 0, size); 
+
+       if (new_fdt->max_fdset > open_files) {
+               int left = (new_fdt->max_fdset-open_files)/8;
+               int start = open_files / (8 * sizeof(unsigned long));
+
+               memset(&new_fdt->open_fds->fds_bits[start], 0, left);
+               memset(&new_fdt->close_on_exec->fds_bits[start], 0, left);
+       }
+
+out:
+       return newf;
+
+out_release:
+       free_fdset (new_fdt->close_on_exec, new_fdt->max_fdset);
+       free_fdset (new_fdt->open_fds, new_fdt->max_fdset);
+       free_fd_array(new_fdt->fd, new_fdt->max_fds);
+       kmem_cache_free(files_cachep, newf);
+       return NULL;
+}
+
+static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
+{
+       struct files_struct *oldf, *newf;
+       int error = 0;
+
+       /*
+        * A background process may not have any files ...
+        */
+       oldf = current->files;
+       if (!oldf)
+               goto out;
+
+       if (clone_flags & CLONE_FILES) {
+               atomic_inc(&oldf->count);
+               goto out;
+       }
+
+       /*
+        * Note: we may be using current for both targets (See exec.c)
+        * This works because we cache current->files (old) as oldf. Don't
+        * break this.
+        */
+       tsk->files = NULL;
+       error = -ENOMEM;
+       newf = dup_fd(oldf, &error);
+       if (!newf)
+               goto out;
+
+       tsk->files = newf;
+       error = 0;
+out:
+       return error;
+}
+
+/*
+ *     Helper to unshare the files of the current task.
+ *     We don't want to expose copy_files internals to
+ *     the exec layer of the kernel.
+ */
+
+int unshare_files(void)
+{
+       struct files_struct *files  = current->files;
+       int rc;
+
+       if(!files)
+               BUG();
+
+       /* This can race but the race causes us to copy when we don't
+          need to and drop the copy */
+       if(atomic_read(&files->count) == 1)
+       {
+               atomic_inc(&files->count);
+               return 0;
+       }
+       rc = copy_files(0, current);
+       if(rc)
+               current->files = files;
+       return rc;
+}
+
+EXPORT_SYMBOL(unshare_files);
+
+void sighand_free_cb(struct rcu_head *rhp)
+{
+       struct sighand_struct *sp;
+
+       sp = container_of(rhp, struct sighand_struct, rcu);
+       kmem_cache_free(sighand_cachep, sp);
+}
+
+static inline int copy_sighand(unsigned long clone_flags, struct task_struct * 
tsk)
+{
+       struct sighand_struct *sig;
+
+       if (clone_flags & (CLONE_SIGHAND | CLONE_THREAD)) {
+               atomic_inc(&current->sighand->count);
+               return 0;
+       }
+       sig = kmem_cache_alloc(sighand_cachep, GFP_KERNEL);
+       rcu_assign_pointer(tsk->sighand, sig);
+       if (!sig)
+               return -ENOMEM;
+       spin_lock_init(&sig->siglock);
+       atomic_set(&sig->count, 1);
+       memcpy(sig->action, current->sighand->action, sizeof(sig->action));
+       return 0;
+}
+
+static inline int copy_signal(unsigned long clone_flags, struct task_struct * 
tsk)
+{
+       struct signal_struct *sig;
+       int ret;
+
+       if (clone_flags & CLONE_THREAD) {
+               atomic_inc(&current->signal->count);
+               atomic_inc(&current->signal->live);
+               return 0;
+       }
+       sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL);
+       tsk->signal = sig;
+       if (!sig)
+               return -ENOMEM;
+
+       ret = copy_thread_group_keys(tsk);
+       if (ret < 0) {
+               kmem_cache_free(signal_cachep, sig);
+               return ret;
+       }
+
+       atomic_set(&sig->count, 1);
+       atomic_set(&sig->live, 1);
+       init_waitqueue_head(&sig->wait_chldexit);
+       sig->flags = 0;
+       sig->group_exit_code = 0;
+       sig->group_exit_task = NULL;
+       sig->group_stop_count = 0;
+       sig->curr_target = NULL;
+       init_sigpending(&sig->shared_pending);
+       INIT_LIST_HEAD(&sig->posix_timers);
+
+       hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_REL);
+       sig->it_real_incr.tv64 = 0;
+       sig->real_timer.function = it_real_fn;
+       sig->real_timer.data = tsk;
+
+       sig->it_virt_expires = cputime_zero;
+       sig->it_virt_incr = cputime_zero;
+       sig->it_prof_expires = cputime_zero;
+       sig->it_prof_incr = cputime_zero;
+
+       sig->leader = 0;        /* session leadership doesn't inherit */
+       sig->tty_old_pgrp = 0;
+
+       sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero;
+       sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
+       sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
+       sig->sched_time = 0;
+       INIT_LIST_HEAD(&sig->cpu_timers[0]);
+       INIT_LIST_HEAD(&sig->cpu_timers[1]);
+       INIT_LIST_HEAD(&sig->cpu_timers[2]);
+
+       task_lock(current->group_leader);
+       memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim);
+       task_unlock(current->group_leader);
+
+       if (sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) {
+               /*
+                * New sole thread in the process gets an expiry time
+                * of the whole CPU time limit.
+                */
+               tsk->it_prof_expires =
+                       secs_to_cputime(sig->rlim[RLIMIT_CPU].rlim_cur);
+       }
+
+       return 0;
+}
+
+static inline void copy_flags(unsigned long clone_flags, struct task_struct *p)
+{
+       unsigned long new_flags = p->flags;
+
+       new_flags &= ~(PF_SUPERPRIV | PF_NOFREEZE);
+       new_flags |= PF_FORKNOEXEC;
+       if (!(clone_flags & CLONE_PTRACE))
+               p->ptrace = 0;
+       p->flags = new_flags;
+}
+
+asmlinkage long sys_set_tid_address(int __user *tidptr)
+{
+       current->clear_child_tid = tidptr;
+
+       return current->pid;
+}
+
+/*
+ * This creates a new process as a copy of the old one,
+ * but does not actually start it yet.
+ *
+ * It copies the registers, and all the appropriate
+ * parts of the process environment (as per the clone
+ * flags). The actual kick-off is left to the caller.
+ */
+static task_t *copy_process(unsigned long clone_flags,
+                                unsigned long stack_start,
+                                struct pt_regs *regs,
+                                unsigned long stack_size,
+                                int __user *parent_tidptr,
+                                int __user *child_tidptr,
+                                int pid)
+{
+       int retval;
+       struct task_struct *p = NULL;
+
+       if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
+               return ERR_PTR(-EINVAL);
+
+       /*
+        * Thread groups must share signals as well, and detached threads
+        * can only be started up within the thread group.
+        */
+       if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND))
+               return ERR_PTR(-EINVAL);
+
+       /*
+        * Shared signal handlers imply shared VM. By way of the above,
+        * thread groups also imply shared VM. Blocking this case allows
+        * for various simplifications in other code.
+        */
+       if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM))
+               return ERR_PTR(-EINVAL);
+
+       retval = security_task_create(clone_flags);
+       if (retval)
+               goto fork_out;
+
+       retval = -ENOMEM;
+       p = dup_task_struct(current);
+       if (!p)
+               goto fork_out;
+
+       retval = -EAGAIN;
+       if (atomic_read(&p->user->processes) >=
+                       p->signal->rlim[RLIMIT_NPROC].rlim_cur) {
+               if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) &&
+                               p->user != &root_user)
+                       goto bad_fork_free;
+       }
+
+       atomic_inc(&p->user->__count);
+       atomic_inc(&p->user->processes);
+       get_group_info(p->group_info);
+
+       /*
+        * If multiple threads are within copy_process(), then this check
+        * triggers too late. This doesn't hurt, the check is only there
+        * to stop root fork bombs.
+        */
+       if (nr_threads >= max_threads)
+               goto bad_fork_cleanup_count;
+
+       if (!try_module_get(task_thread_info(p)->exec_domain->module))
+               goto bad_fork_cleanup_count;
+
+       if (p->binfmt && !try_module_get(p->binfmt->module))
+               goto bad_fork_cleanup_put_domain;
+
+       p->did_exec = 0;
+       copy_flags(clone_flags, p);
+       p->pid = pid;
+       retval = -EFAULT;
+       if (clone_flags & CLONE_PARENT_SETTID)
+               if (put_user(p->pid, parent_tidptr))
+                       goto bad_fork_cleanup;
+
+       p->proc_dentry = NULL;
+
+       INIT_LIST_HEAD(&p->children);
+       INIT_LIST_HEAD(&p->sibling);
+       p->vfork_done = NULL;
+       spin_lock_init(&p->alloc_lock);
+       spin_lock_init(&p->proc_lock);
+
+       clear_tsk_thread_flag(p, TIF_SIGPENDING);
+       init_sigpending(&p->pending);
+
+       p->utime = cputime_zero;
+       p->stime = cputime_zero;
+       p->sched_time = 0;
+       p->rchar = 0;           /* I/O counter: bytes read */
+       p->wchar = 0;           /* I/O counter: bytes written */
+       p->syscr = 0;           /* I/O counter: read syscalls */
+       p->syscw = 0;           /* I/O counter: write syscalls */
+       acct_clear_integrals(p);
+
+       p->it_virt_expires = cputime_zero;
+       p->it_prof_expires = cputime_zero;
+       p->it_sched_expires = 0;
+       INIT_LIST_HEAD(&p->cpu_timers[0]);
+       INIT_LIST_HEAD(&p->cpu_timers[1]);
+       INIT_LIST_HEAD(&p->cpu_timers[2]);
+
+       p->lock_depth = -1;             /* -1 = no lock */
+       do_posix_clock_monotonic_gettime(&p->start_time);
+       p->security = NULL;
+       p->io_context = NULL;
+       p->io_wait = NULL;
+       p->audit_context = NULL;
+       cpuset_fork(p);
+#ifdef CONFIG_NUMA
+       p->mempolicy = mpol_copy(p->mempolicy);
+       if (IS_ERR(p->mempolicy)) {
+               retval = PTR_ERR(p->mempolicy);
+               p->mempolicy = NULL;
+               goto bad_fork_cleanup_cpuset;
+       }
+#endif
+
+#ifdef CONFIG_DEBUG_MUTEXES
+       p->blocked_on = NULL; /* not blocked yet */
+#endif
+
+       p->tgid = p->pid;
+       if (clone_flags & CLONE_THREAD)
+               p->tgid = current->tgid;
+
+       if ((retval = security_task_alloc(p)))
+               goto bad_fork_cleanup_policy;
+       if ((retval = audit_alloc(p)))
+               goto bad_fork_cleanup_security;
+       /* copy all the process information */
+       if ((retval = copy_semundo(clone_flags, p)))
+               goto bad_fork_cleanup_audit;
+       if ((retval = copy_files(clone_flags, p)))
+               goto bad_fork_cleanup_semundo;
+       if ((retval = copy_fs(clone_flags, p)))
+               goto bad_fork_cleanup_files;
+       if ((retval = copy_sighand(clone_flags, p)))
+               goto bad_fork_cleanup_fs;
+       if ((retval = copy_signal(clone_flags, p)))
+               goto bad_fork_cleanup_sighand;
+       if ((retval = copy_mm(clone_flags, p)))
+               goto bad_fork_cleanup_signal;
+       if ((retval = copy_keys(clone_flags, p)))
+               goto bad_fork_cleanup_mm;
+       if ((retval = copy_namespace(clone_flags, p)))
+               goto bad_fork_cleanup_keys;
+       retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs);
+       if (retval)
+               goto bad_fork_cleanup_namespace;
+
+       p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : 
NULL;
+       /*
+        * Clear TID on mm_release()?
+        */
+       p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? 
child_tidptr: NULL;
+
+       /*
+        * sigaltstack should be cleared when sharing the same VM
+        */
+       if ((clone_flags & (CLONE_VM|CLONE_VFORK)) == CLONE_VM)
+               p->sas_ss_sp = p->sas_ss_size = 0;
+
+       /*
+        * Syscall tracing should be turned off in the child regardless
+        * of CLONE_PTRACE.
+        */
+       clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE);
+#ifdef TIF_SYSCALL_EMU
+       clear_tsk_thread_flag(p, TIF_SYSCALL_EMU);
+#endif
+
+       /* Our parent execution domain becomes current domain
+          These must match for thread signalling to apply */
+          
+       p->parent_exec_id = p->self_exec_id;
+
+       /* ok, now we should be set up.. */
+       p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags & 
CSIGNAL);
+       p->pdeath_signal = 0;
+       p->exit_state = 0;
+
+       /*
+        * Ok, make it visible to the rest of the system.
+        * We dont wake it up yet.
+        */
+       p->group_leader = p;
+       INIT_LIST_HEAD(&p->ptrace_children);
+       INIT_LIST_HEAD(&p->ptrace_list);
+
+       /* Perform scheduler related setup. Assign this task to a CPU. */
+       sched_fork(p, clone_flags);
+
+       /* Need tasklist lock for parent etc handling! */
+       write_lock_irq(&tasklist_lock);
+
+       /*
+        * The task hasn't been attached yet, so its cpus_allowed mask will
+        * not be changed, nor will its assigned CPU.
+        *
+        * The cpus_allowed mask of the parent may have changed after it was
+        * copied first time - so re-copy it here, then check the child's CPU
+        * to ensure it is on a valid CPU (and if not, just force it back to
+        * parent's CPU). This avoids alot of nasty races.
+        */
+       p->cpus_allowed = current->cpus_allowed;
+       if (unlikely(!cpu_isset(task_cpu(p), p->cpus_allowed) ||
+                       !cpu_online(task_cpu(p))))
+               set_task_cpu(p, smp_processor_id());
+
+       /*
+        * Check for pending SIGKILL! The new thread should not be allowed
+        * to slip out of an OOM kill. (or normal SIGKILL.)
+        */
+       if (sigismember(&current->pending.signal, SIGKILL)) {
+               write_unlock_irq(&tasklist_lock);
+               retval = -EINTR;
+               goto bad_fork_cleanup_namespace;
+       }
+
+       /* CLONE_PARENT re-uses the old parent */
+       if (clone_flags & (CLONE_PARENT|CLONE_THREAD))
+               p->real_parent = current->real_parent;
+       else
+               p->real_parent = current;
+       p->parent = p->real_parent;
+
+       spin_lock(&current->sighand->siglock);
+       if (clone_flags & CLONE_THREAD) {
+               /*
+                * Important: if an exit-all has been started then
+                * do not create this new thread - the whole thread
+                * group is supposed to exit anyway.
+                */
+               if (current->signal->flags & SIGNAL_GROUP_EXIT) {
+                       spin_unlock(&current->sighand->siglock);
+                       write_unlock_irq(&tasklist_lock);
+                       retval = -EAGAIN;
+                       goto bad_fork_cleanup_namespace;
+               }
+               p->group_leader = current->group_leader;
+
+               if (current->signal->group_stop_count > 0) {
+                       /*
+                        * There is an all-stop in progress for the group.
+                        * We ourselves will stop as soon as we check signals.
+                        * Make the new thread part of that group stop too.
+                        */
+                       current->signal->group_stop_count++;
+                       set_tsk_thread_flag(p, TIF_SIGPENDING);
+               }
+
+               if (!cputime_eq(current->signal->it_virt_expires,
+                               cputime_zero) ||
+                   !cputime_eq(current->signal->it_prof_expires,
+                               cputime_zero) ||
+                   current->signal->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY 
||
+                   !list_empty(&current->signal->cpu_timers[0]) ||
+                   !list_empty(&current->signal->cpu_timers[1]) ||
+                   !list_empty(&current->signal->cpu_timers[2])) {
+                       /*
+                        * Have child wake up on its first tick to check
+                        * for process CPU timers.
+                        */
+                       p->it_prof_expires = jiffies_to_cputime(1);
+               }
+       }
+
+       /*
+        * inherit ioprio
+        */
+       p->ioprio = current->ioprio;
+
+       SET_LINKS(p);
+       if (unlikely(p->ptrace & PT_PTRACED))
+               __ptrace_link(p, current->parent);
+
+       if (thread_group_leader(p)) {
+               p->signal->tty = current->signal->tty;
+               p->signal->pgrp = process_group(current);
+               p->signal->session = current->signal->session;
+               attach_pid(p, PIDTYPE_PGID, process_group(p));
+               attach_pid(p, PIDTYPE_SID, p->signal->session);
+               if (p->pid)
+                       __get_cpu_var(process_counts)++;
+       }
+       attach_pid(p, PIDTYPE_TGID, p->tgid);
+       attach_pid(p, PIDTYPE_PID, p->pid);
+
+       nr_threads++;
+       total_forks++;
+       spin_unlock(&current->sighand->siglock);
+       write_unlock_irq(&tasklist_lock);
+       proc_fork_connector(p);
+       return p;
+
+bad_fork_cleanup_namespace:
+       exit_namespace(p);
+bad_fork_cleanup_keys:
+       exit_keys(p);
+bad_fork_cleanup_mm:
+       if (p->mm)
+               mmput(p->mm);
+bad_fork_cleanup_signal:
+       exit_signal(p);
+bad_fork_cleanup_sighand:
+       exit_sighand(p);
+bad_fork_cleanup_fs:
+       exit_fs(p); /* blocking */
+bad_fork_cleanup_files:
+       exit_files(p); /* blocking */
+bad_fork_cleanup_semundo:
+       exit_sem(p);
+bad_fork_cleanup_audit:
+       audit_free(p);
+bad_fork_cleanup_security:
+       security_task_free(p);
+bad_fork_cleanup_policy:
+#ifdef CONFIG_NUMA
+       mpol_free(p->mempolicy);
+bad_fork_cleanup_cpuset:
+#endif
+       cpuset_exit(p);
+bad_fork_cleanup:
+       if (p->binfmt)
+               module_put(p->binfmt->module);
+bad_fork_cleanup_put_domain:
+       module_put(task_thread_info(p)->exec_domain->module);
+bad_fork_cleanup_count:
+       put_group_info(p->group_info);
+       atomic_dec(&p->user->processes);
+       free_uid(p->user);
+bad_fork_free:
+       free_task(p);
+fork_out:
+       return ERR_PTR(retval);
+}
+
+struct pt_regs * __devinit __attribute__((weak)) idle_regs(struct pt_regs 
*regs)
+{
+       memset(regs, 0, sizeof(struct pt_regs));
+       return regs;
+}
+
+task_t * __devinit fork_idle(int cpu)
+{
+       task_t *task;
+       struct pt_regs regs;
+
+       task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL, NULL, 0);
+       if (!task)
+               return ERR_PTR(-ENOMEM);
+       init_idle(task, cpu);
+       unhash_process(task);
+       return task;
+}
+
+static inline int fork_traceflag (unsigned clone_flags)
+{
+       if (clone_flags & CLONE_UNTRACED)
+               return 0;
+       else if (clone_flags & CLONE_VFORK) {
+               if (current->ptrace & PT_TRACE_VFORK)
+                       return PTRACE_EVENT_VFORK;
+       } else if ((clone_flags & CSIGNAL) != SIGCHLD) {
+               if (current->ptrace & PT_TRACE_CLONE)
+                       return PTRACE_EVENT_CLONE;
+       } else if (current->ptrace & PT_TRACE_FORK)
+               return PTRACE_EVENT_FORK;
+
+       return 0;
+}
+
+/*
+ *  Ok, this is the main fork-routine.
+ *
+ * It copies the process, and if successful kick-starts
+ * it and waits for it to finish using the VM if required.
+ */
+long do_fork(unsigned long clone_flags,
+             unsigned long stack_start,
+             struct pt_regs *regs,
+             unsigned long stack_size,
+             int __user *parent_tidptr,
+             int __user *child_tidptr)
+{
+       struct task_struct *p;
+       int trace = 0;
+       long pid = alloc_pidmap();
+
+       if (pid < 0)
+               return -EAGAIN;
+       if (unlikely(current->ptrace)) {
+               trace = fork_traceflag (clone_flags);
+               if (trace)
+                       clone_flags |= CLONE_PTRACE;
+       }
+
+       p = copy_process(clone_flags, stack_start, regs, stack_size, 
parent_tidptr, child_tidptr, pid);
+       /*
+        * Do this prior waking up the new thread - the thread pointer
+        * might get invalid after that point, if the thread exits quickly.
+        */
+       if (!IS_ERR(p)) {
+               struct completion vfork;
+
+               if (clone_flags & CLONE_VFORK) {
+                       p->vfork_done = &vfork;
+                       init_completion(&vfork);
+               }
+
+               if ((p->ptrace & PT_PTRACED) || (clone_flags & CLONE_STOPPED)) {
+                       /*
+                        * We'll start up with an immediate SIGSTOP.
+                        */
+                       sigaddset(&p->pending.signal, SIGSTOP);
+                       set_tsk_thread_flag(p, TIF_SIGPENDING);
+               }
+
+               if (!(clone_flags & CLONE_STOPPED))
+                       wake_up_new_task(p, clone_flags);
+               else
+                       p->state = TASK_STOPPED;
+
+               if (unlikely (trace)) {
+                       current->ptrace_message = pid;
+                       ptrace_notify ((trace << 8) | SIGTRAP);
+               }
+
+               if (clone_flags & CLONE_VFORK) {
+                       wait_for_completion(&vfork);
+                       if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE))
+                               ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | 
SIGTRAP);
+               }
+       } else {
+               free_pidmap(pid);
+               pid = PTR_ERR(p);
+       }
+       return pid;
+}
+
+#ifndef ARCH_MIN_MMSTRUCT_ALIGN
+#define ARCH_MIN_MMSTRUCT_ALIGN 0
+#endif
+
+void __init proc_caches_init(void)
+{
+       sighand_cachep = kmem_cache_create("sighand_cache",
+                       sizeof(struct sighand_struct), 0,
+                       SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+       signal_cachep = kmem_cache_create("signal_cache",
+                       sizeof(struct signal_struct), 0,
+                       SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+       files_cachep = kmem_cache_create("files_cache", 
+                       sizeof(struct files_struct), 0,
+                       SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+       fs_cachep = kmem_cache_create("fs_cache", 
+                       sizeof(struct fs_struct), 0,
+                       SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+       vm_area_cachep = kmem_cache_create("vm_area_struct",
+                       sizeof(struct vm_area_struct), 0,
+                       SLAB_PANIC, NULL, NULL);
+       mm_cachep = kmem_cache_create("mm_struct",
+                       sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
+                       SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+}
+
+
+/*
+ * Check constraints on flags passed to the unshare system call and
+ * force unsharing of additional process context as appropriate.
+ */
+static inline void check_unshare_flags(unsigned long *flags_ptr)
+{
+       /*
+        * If unsharing a thread from a thread group, must also
+        * unshare vm.
+        */
+       if (*flags_ptr & CLONE_THREAD)
+               *flags_ptr |= CLONE_VM;
+
+       /*
+        * If unsharing vm, must also unshare signal handlers.
+        */
+       if (*flags_ptr & CLONE_VM)
+               *flags_ptr |= CLONE_SIGHAND;
+
+       /*
+        * If unsharing signal handlers and the task was created
+        * using CLONE_THREAD, then must unshare the thread
+        */
+       if ((*flags_ptr & CLONE_SIGHAND) &&
+           (atomic_read(&current->signal->count) > 1))
+               *flags_ptr |= CLONE_THREAD;
+
+       /*
+        * If unsharing namespace, must also unshare filesystem information.
+        */
+       if (*flags_ptr & CLONE_NEWNS)
+               *flags_ptr |= CLONE_FS;
+}
+
+/*
+ * Unsharing of tasks created with CLONE_THREAD is not supported yet
+ */
+static int unshare_thread(unsigned long unshare_flags)
+{
+       if (unshare_flags & CLONE_THREAD)
+               return -EINVAL;
+
+       return 0;
+}
+
+/*
+ * Unshare the filesystem structure if it is being shared
+ */
+static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp)
+{
+       struct fs_struct *fs = current->fs;
+
+       if ((unshare_flags & CLONE_FS) &&
+           (fs && atomic_read(&fs->count) > 1)) {
+               *new_fsp = __copy_fs_struct(current->fs);
+               if (!*new_fsp)
+                       return -ENOMEM;
+       }
+
+       return 0;
+}
+
+/*
+ * Unshare the namespace structure if it is being shared
+ */
+static int unshare_namespace(unsigned long unshare_flags, struct namespace 
**new_nsp, struct fs_struct *new_fs)
+{
+       struct namespace *ns = current->namespace;
+
+       if ((unshare_flags & CLONE_NEWNS) &&
+           (ns && atomic_read(&ns->count) > 1)) {
+               if (!capable(CAP_SYS_ADMIN))
+                       return -EPERM;
+
+               *new_nsp = dup_namespace(current, new_fs ? new_fs : 
current->fs);
+               if (!*new_nsp)
+                       return -ENOMEM;
+       }
+
+       return 0;
+}
+
+/*
+ * Unsharing of sighand for tasks created with CLONE_SIGHAND is not
+ * supported yet
+ */
+static int unshare_sighand(unsigned long unshare_flags, struct sighand_struct 
**new_sighp)
+{
+       struct sighand_struct *sigh = current->sighand;
+
+       if ((unshare_flags & CLONE_SIGHAND) &&
+           (sigh && atomic_read(&sigh->count) > 1))
+               return -EINVAL;
+       else
+               return 0;
+}
+
+/*
+ * Unshare vm if it is being shared
+ */
+static int unshare_vm(unsigned long unshare_flags, struct mm_struct **new_mmp)
+{
+       struct mm_struct *mm = current->mm;
+
+       if ((unshare_flags & CLONE_VM) &&
+           (mm && atomic_read(&mm->mm_users) > 1)) {
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+/*
+ * Unshare file descriptor table if it is being shared
+ */
+static int unshare_fd(unsigned long unshare_flags, struct files_struct 
**new_fdp)
+{
+       struct files_struct *fd = current->files;
+       int error = 0;
+
+       if ((unshare_flags & CLONE_FILES) &&
+           (fd && atomic_read(&fd->count) > 1)) {
+               *new_fdp = dup_fd(fd, &error);
+               if (!*new_fdp)
+                       return error;
+       }
+
+       return 0;
+}
+
+/*
+ * Unsharing of semundo for tasks created with CLONE_SYSVSEM is not
+ * supported yet
+ */
+static int unshare_semundo(unsigned long unshare_flags, struct sem_undo_list 
**new_ulistp)
+{
+       if (unshare_flags & CLONE_SYSVSEM)
+               return -EINVAL;
+
+       return 0;
+}
+
+/*
+ * unshare allows a process to 'unshare' part of the process
+ * context which was originally shared using clone.  copy_*
+ * functions used by do_fork() cannot be used here directly
+ * because they modify an inactive task_struct that is being
+ * constructed. Here we are modifying the current, active,
+ * task_struct.
+ */
+asmlinkage long sys_unshare(unsigned long unshare_flags)
+{
+       int err = 0;
+       struct fs_struct *fs, *new_fs = NULL;
+       struct namespace *ns, *new_ns = NULL;
+       struct sighand_struct *sigh, *new_sigh = NULL;
+       struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL;
+       struct files_struct *fd, *new_fd = NULL;
+       struct sem_undo_list *new_ulist = NULL;
+
+       check_unshare_flags(&unshare_flags);
+
+       if ((err = unshare_thread(unshare_flags)))
+               goto bad_unshare_out;
+       if ((err = unshare_fs(unshare_flags, &new_fs)))
+               goto bad_unshare_cleanup_thread;
+       if ((err = unshare_namespace(unshare_flags, &new_ns, new_fs)))
+               goto bad_unshare_cleanup_fs;
+       if ((err = unshare_sighand(unshare_flags, &new_sigh)))
+               goto bad_unshare_cleanup_ns;
+       if ((err = unshare_vm(unshare_flags, &new_mm)))
+               goto bad_unshare_cleanup_sigh;
+       if ((err = unshare_fd(unshare_flags, &new_fd)))
+               goto bad_unshare_cleanup_vm;
+       if ((err = unshare_semundo(unshare_flags, &new_ulist)))
+               goto bad_unshare_cleanup_fd;
+
+       if (new_fs || new_ns || new_sigh || new_mm || new_fd || new_ulist) {
+
+               task_lock(current);
+
+               if (new_fs) {
+                       fs = current->fs;
+                       current->fs = new_fs;
+                       new_fs = fs;
+               }
+
+               if (new_ns) {
+                       ns = current->namespace;
+                       current->namespace = new_ns;
+                       new_ns = ns;
+               }
+
+               if (new_sigh) {
+                       sigh = current->sighand;
+                       rcu_assign_pointer(current->sighand, new_sigh);
+                       new_sigh = sigh;
+               }
+
+               if (new_mm) {
+                       mm = current->mm;
+                       active_mm = current->active_mm;
+                       current->mm = new_mm;
+                       current->active_mm = new_mm;
+                       activate_mm(active_mm, new_mm);
+                       new_mm = mm;
+               }
+
+               if (new_fd) {
+                       fd = current->files;
+                       current->files = new_fd;
+                       new_fd = fd;
+               }
+
+               task_unlock(current);
+       }
+
+bad_unshare_cleanup_fd:
+       if (new_fd)
+               put_files_struct(new_fd);
+
+bad_unshare_cleanup_vm:
+       if (new_mm)
+               mmput(new_mm);
+
+bad_unshare_cleanup_sigh:
+       if (new_sigh)
+               if (atomic_dec_and_test(&new_sigh->count))
+                       kmem_cache_free(sighand_cachep, new_sigh);
+
+bad_unshare_cleanup_ns:
+       if (new_ns)
+               put_namespace(new_ns);
+
+bad_unshare_cleanup_fs:
+       if (new_fs)
+               put_fs_struct(new_fs);
+
+bad_unshare_cleanup_thread:
+bad_unshare_out:
+       return err;
+}
diff -r 7f67c15e2c91 -r fbc0e953732e 
tools/security/python/xensec_tools/acm_getdecision
--- a/tools/security/python/xensec_tools/acm_getdecision        Thu Jun 15 
10:02:53 2006 -0600
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,55 +0,0 @@
-#!/usr/bin/env python
-#  -*- mode: python; -*-
-import sys
-import traceback
-import getopt
-
-# add fallback path for non-native python path installs if needed
-sys.path.insert(-1, '/usr/lib/python')
-sys.path.insert(-1, '/usr/lib64/python')
-
-from xen.util.security import ACMError, err, get_decision, active_policy
-
-def usage():
-    print "Usage: acm_getdecision -i domainid --label labelname"
-    print "  Test program illustrating the retrieval of"
-    print "  access control decisions from Xen. At this time,"
-    print "  only sharing (STE) policy decisions are supported."
-    print "  Arguments are two paramters in any combination:"
-    print "\t -i domain_id or --domid domain_id"
-    print "\t -l labelname or --label labelname"
-    print "  Return value:"
-    print "\t PERMITTED if access is permitted"
-    print "\t DENIED if access is denied"
-    print "\t ACMError -- e.g., unknown label or domain id"
-    err("Usage")
-
-try:
-
-    if len(sys.argv) != 5:
-        usage()
-
-    decision_args = []
-
-    for idx in range(1, len(sys.argv), 2):
-        if sys.argv[idx] in ['-i', '--domid']:
-            decision_args.append(['domid', sys.argv[idx+1]])
-        elif sys.argv[idx] in ['-l', '--label']:
-            decision_args.append(['access_control',
-                                  ['policy', active_policy],
-                                  ['label', sys.argv[idx+1]]
-                                  ])
-        else:
-            print "unknown argument %s" % sys.argv[idx]
-            usage()
-
-    if len(decision_args) != 2:
-        print "too many arguments"
-        usage()
-
-    print get_decision(decision_args[0], decision_args[1])
-
-except ACMError:
-       pass
-except:
-    traceback.print_exc(limit=1)

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
WARNING - OLD ARCHIVES

xen-changelog

[Xen-changelog] [xen-unstable] merge with xen-unstable.hg