# HG changeset patch
# User awilliam@xxxxxxxxxxx
# Node ID fbc0e953732ef78292d9e87ff6dd7f3432ddd014
# Parent 7f67c15e2c917dc52a3f8acc0fdb79a63b894b15
# Parent 73c73fb8875c331b8c0e6ed0317c8d71b83cdda2
merge with xen-unstable.hg
---
tools/security/python/xensec_tools/acm_getdecision | 55
extras/mini-os/events.c | 12
extras/mini-os/include/xenbus.h | 28
extras/mini-os/kernel.c | 23
extras/mini-os/xenbus/xenbus.c | 202 +
linux-2.6-xen-sparse/arch/i386/mm/hypervisor.c | 5
linux-2.6-xen-sparse/arch/i386/mm/init-xen.c | 2
linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c | 142
linux-2.6-xen-sparse/arch/x86_64/mm/pageattr-xen.c | 6
linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c | 27
linux-2.6-xen-sparse/drivers/xen/core/gnttab.c | 15
linux-2.6-xen-sparse/drivers/xen/core/skbuff.c | 11
linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c | 68
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/mmu.h | 4
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/mmu_context.h | 11
linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h | 4
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/mmu.h | 4
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/mmu_context.h | 12
linux-2.6-xen-sparse/include/xen/gnttab.h | 1
linux-2.6-xen-sparse/kernel/fork.c | 1619
++++++++++
tools/console/daemon/io.c | 66
tools/console/daemon/utils.c | 26
tools/console/daemon/utils.h | 3
tools/ioemu/sdl.c | 9
tools/ioemu/target-i386-dm/helper2.c | 32
tools/libxc/xc_elf.h | 3
tools/libxc/xc_linux.c | 118
tools/libxc/xc_linux_restore.c | 22
tools/libxc/xc_load_elf.c | 32
tools/libxc/xenctrl.h | 54
tools/python/xen/util/security.py | 41
tools/python/xen/xm/addlabel.py | 2
tools/python/xen/xm/create.py | 2
tools/python/xen/xm/main.py | 3
tools/security/Makefile | 2
tools/security/python/xensec_gen/cgi-bin/policy.cgi | 2
tools/security/secpol_xml2bin.c | 6
tools/xenmon/xenbaked.c | 55
tools/xenstat/libxenstat/src/xenstat.c | 23
tools/xenstore/fake_libxc.c | 4
tools/xenstore/xenstored_core.c | 13
tools/xenstore/xenstored_domain.c | 79
tools/xm-test/tests/block-integrity/01_block_device_read_verify.py | 4
tools/xm-test/tests/block-integrity/02_block_device_write_verify.py | 4
xen/arch/x86/traps.c | 2
xen/common/event_channel.c | 14
xen/include/asm-ia64/event.h | 8
xen/include/asm-x86/event.h | 8
xen/include/xen/elf.h | 2
49 files changed, 2417 insertions(+), 473 deletions(-)
diff -r 7f67c15e2c91 -r fbc0e953732e extras/mini-os/events.c
--- a/extras/mini-os/events.c Thu Jun 15 10:02:53 2006 -0600
+++ b/extras/mini-os/events.c Thu Jun 15 10:23:57 2006 -0600
@@ -35,24 +35,29 @@ int do_event(u32 port, struct pt_regs *r
ev_action_t *action;
if (port >= NR_EVS) {
printk("Port number too large: %d\n", port);
- return 0;
+ goto out;
}
action = &ev_actions[port];
action->count++;
if (!action->handler)
+ {
+ printk("Spurious event on port %d\n", port);
goto out;
+ }
if (action->status & EVS_DISABLED)
+ {
+ printk("Event on port %d disabled\n", port);
goto out;
+ }
/* call the handler */
action->handler(port, regs);
-
- clear_evtchn(port);
out:
+ clear_evtchn(port);
return 1;
}
@@ -135,6 +140,7 @@ void init_events(void)
{
ev_actions[i].status = EVS_DISABLED;
ev_actions[i].handler = default_handler;
+ mask_evtchn(i);
}
}
diff -r 7f67c15e2c91 -r fbc0e953732e extras/mini-os/include/xenbus.h
--- a/extras/mini-os/include/xenbus.h Thu Jun 15 10:02:53 2006 -0600
+++ b/extras/mini-os/include/xenbus.h Thu Jun 15 10:23:57 2006 -0600
@@ -1,6 +1,34 @@
#ifndef XENBUS_H__
#define XENBUS_H__
+/* Initialize the XenBus system. */
void init_xenbus(void);
+/* Read the value associated with a path. Returns a malloc'd error
+ string on failure and sets *value to NULL. On success, *value is
+ set to a malloc'd copy of the value. */
+char *xenbus_read(const char *path, char **value);
+
+/* Associates a value with a path. Returns a malloc'd error string on
+ failure. */
+char *xenbus_write(const char *path, const char *value);
+
+/* Removes the value associated with a path. Returns a malloc'd error
+ string on failure. */
+char *xenbus_rm(const char *path);
+
+/* List the contents of a directory. Returns a malloc'd error string
+ on failure and sets *contents to NULL. On success, *contents is
+ set to a malloc'd array of pointers to malloc'd strings. The array
+ is NULL terminated. May block. */
+char *xenbus_ls(const char *prefix, char ***contents);
+
+/* Reads permissions associated with a path. Returns a malloc'd error
+ string on failure and sets *value to NULL. On success, *value is
+ set to a malloc'd copy of the value. */
+char *xenbus_get_perms(const char *path, char **value);
+
+/* Sets the permissions associated with a path. Returns a malloc'd
+ error string on failure. */
+char *xenbus_set_perms(const char *path, domid_t dom, char perm);
#endif /* XENBUS_H__ */
diff -r 7f67c15e2c91 -r fbc0e953732e extras/mini-os/kernel.c
--- a/extras/mini-os/kernel.c Thu Jun 15 10:02:53 2006 -0600
+++ b/extras/mini-os/kernel.c Thu Jun 15 10:23:57 2006 -0600
@@ -82,17 +82,6 @@ static shared_info_t *map_shared_info(un
}
-void test_xenbus(void);
-
-/* Do initialisation from a thread once the scheduler's available */
-static void init_xs(void *ign)
-{
- init_xenbus();
-
- test_xenbus();
-}
-
-
u8 xen_features[XENFEAT_NR_SUBMAPS * 32];
void setup_xen_features(void)
@@ -111,10 +100,18 @@ void setup_xen_features(void)
}
}
+void test_xenbus(void);
+
+void xenbus_tester(void *p)
+{
+ test_xenbus();
+}
+
/* This should be overridden by the application we are linked against. */
__attribute__((weak)) int app_main(start_info_t *si)
{
printk("Dummy main: start_info=%p\n", si);
+ create_thread("xenbus_tester", xenbus_tester, si);
return 0;
}
@@ -183,8 +180,8 @@ void start_kernel(start_info_t *si)
/* Init scheduler. */
init_sched();
- /* Init XenBus from a separate thread */
- create_thread("init_xs", init_xs, NULL);
+ /* Init XenBus */
+ init_xenbus();
/* Call (possibly overridden) app_main() */
app_main(&start_info);
diff -r 7f67c15e2c91 -r fbc0e953732e extras/mini-os/xenbus/xenbus.c
--- a/extras/mini-os/xenbus/xenbus.c Thu Jun 15 10:02:53 2006 -0600
+++ b/extras/mini-os/xenbus/xenbus.c Thu Jun 15 10:23:57 2006 -0600
@@ -3,11 +3,12 @@
* (C) 2006 - Cambridge University
****************************************************************************
*
- * File: mm.c
+ * File: xenbus.c
* Author: Steven Smith (sos22@xxxxxxxxx)
* Changes: Grzegorz Milos (gm281@xxxxxxxxx)
+ * Changes: John D. Ramsdell
*
- * Date: Mar 2006, chages Aug 2005
+ * Date: Jun 2006, chages Aug 2005
*
* Environment: Xen Minimal OS
* Description: Minimal implementation of xenbus
@@ -167,6 +168,7 @@ void init_xenbus(void)
void init_xenbus(void)
{
int err;
+ printk("Initialising xenbus\n");
DEBUG("init_xenbus called.\n");
xenstore_buf = mfn_to_virt(start_info.store_mfn);
create_thread("xenstore", xenbus_thread_func, NULL);
@@ -262,15 +264,15 @@ static void xb_write(int type, int req_i
/* Send a mesasge to xenbus, in the same fashion as xb_write, and
block waiting for a reply. The reply is malloced and should be
freed by the caller. */
-static void *xenbus_msg_reply(int type,
+static struct xsd_sockmsg *
+xenbus_msg_reply(int type,
int trans,
struct write_req *io,
int nr_reqs)
{
int id;
DEFINE_WAIT(w);
- void *rep;
- struct xsd_sockmsg *repmsg;
+ struct xsd_sockmsg *rep;
id = allocate_xenbus_id();
add_waiter(w, req_info[id].waitq);
@@ -281,12 +283,26 @@ static void *xenbus_msg_reply(int type,
wake(current);
rep = req_info[id].reply;
- repmsg = rep;
- BUG_ON(repmsg->req_id != id);
+ BUG_ON(rep->req_id != id);
release_xenbus_id(id);
-
return rep;
}
+
+static char *errmsg(struct xsd_sockmsg *rep)
+{
+ if (!rep) {
+ char msg[] = "No reply";
+ size_t len = strlen(msg) + 1;
+ return memcpy(malloc(len), msg, len);
+ }
+ if (rep->type != XS_ERROR)
+ return NULL;
+ char *res = malloc(rep->len + 1);
+ memcpy(res, rep + 1, rep->len);
+ res[rep->len] = 0;
+ free(rep);
+ return res;
+}
/* Send a debug message to xenbus. Can block. */
static void xenbus_debug_msg(const char *msg)
@@ -296,27 +312,29 @@ static void xenbus_debug_msg(const char
{ "print", sizeof("print") },
{ msg, len },
{ "", 1 }};
- void *reply;
- struct xsd_sockmsg *repmsg;
-
- reply = xenbus_msg_reply(XS_DEBUG, 0, req, 3);
- repmsg = reply;
+ struct xsd_sockmsg *reply;
+
+ reply = xenbus_msg_reply(XS_DEBUG, 0, req, ARRAY_SIZE(req));
DEBUG("Got a reply, type %d, id %d, len %d.\n",
- repmsg->type, repmsg->req_id, repmsg->len);
+ reply->type, reply->req_id, reply->len);
}
/* List the contents of a directory. Returns a malloc()ed array of
pointers to malloc()ed strings. The array is NULL terminated. May
block. */
-static char **xenbus_ls(const char *pre)
-{
- void *reply;
- struct xsd_sockmsg *repmsg;
+char *xenbus_ls(const char *pre, char ***contents)
+{
+ struct xsd_sockmsg *reply, *repmsg;
struct write_req req[] = { { pre, strlen(pre)+1 } };
int nr_elems, x, i;
char **res;
- repmsg = xenbus_msg_reply(XS_DIRECTORY, 0, req, 1);
+ repmsg = xenbus_msg_reply(XS_DIRECTORY, 0, req, ARRAY_SIZE(req));
+ char *msg = errmsg(repmsg);
+ if (msg) {
+ *contents = NULL;
+ return msg;
+ }
reply = repmsg + 1;
for (x = nr_elems = 0; x < repmsg->len; x++)
nr_elems += (((char *)reply)[x] == 0);
@@ -329,20 +347,91 @@ static char **xenbus_ls(const char *pre)
}
res[i] = NULL;
free(repmsg);
- return res;
-}
-
-static char *xenbus_read(const char *path)
-{
- struct write_req req[] = { {path, strlen(path) + 1}};
+ *contents = res;
+ return NULL;
+}
+
+char *xenbus_read(const char *path, char **value)
+{
+ struct write_req req[] = { {path, strlen(path) + 1} };
struct xsd_sockmsg *rep;
char *res;
- rep = xenbus_msg_reply(XS_READ, 0, req, 1);
+ rep = xenbus_msg_reply(XS_READ, 0, req, ARRAY_SIZE(req));
+ char *msg = errmsg(rep);
+ if (msg) {
+ *value = NULL;
+ return msg;
+ }
res = malloc(rep->len + 1);
memcpy(res, rep + 1, rep->len);
res[rep->len] = 0;
free(rep);
- return res;
+ *value = res;
+ return NULL;
+}
+
+char *xenbus_write(const char *path, const char *value)
+{
+ struct write_req req[] = {
+ {path, strlen(path) + 1},
+ {value, strlen(value) + 1},
+ };
+ struct xsd_sockmsg *rep;
+ rep = xenbus_msg_reply(XS_WRITE, 0, req, ARRAY_SIZE(req));
+ char *msg = errmsg(rep);
+ if (msg)
+ return msg;
+ free(rep);
+ return NULL;
+}
+
+char *xenbus_rm(const char *path)
+{
+ struct write_req req[] = { {path, strlen(path) + 1} };
+ struct xsd_sockmsg *rep;
+ rep = xenbus_msg_reply(XS_RM, 0, req, ARRAY_SIZE(req));
+ char *msg = errmsg(rep);
+ if (msg)
+ return msg;
+ free(rep);
+ return NULL;
+}
+
+char *xenbus_get_perms(const char *path, char **value)
+{
+ struct write_req req[] = { {path, strlen(path) + 1} };
+ struct xsd_sockmsg *rep;
+ char *res;
+ rep = xenbus_msg_reply(XS_GET_PERMS, 0, req, ARRAY_SIZE(req));
+ char *msg = errmsg(rep);
+ if (msg) {
+ *value = NULL;
+ return msg;
+ }
+ res = malloc(rep->len + 1);
+ memcpy(res, rep + 1, rep->len);
+ res[rep->len] = 0;
+ free(rep);
+ *value = res;
+ return NULL;
+}
+
+#define PERM_MAX_SIZE 32
+char *xenbus_set_perms(const char *path, domid_t dom, char perm)
+{
+ char value[PERM_MAX_SIZE];
+ snprintf(value, PERM_MAX_SIZE, "%c%hu", perm, dom);
+ struct write_req req[] = {
+ {path, strlen(path) + 1},
+ {value, strlen(value) + 1},
+ };
+ struct xsd_sockmsg *rep;
+ rep = xenbus_msg_reply(XS_SET_PERMS, 0, req, ARRAY_SIZE(req));
+ char *msg = errmsg(rep);
+ if (msg)
+ return msg;
+ free(rep);
+ return NULL;
}
static void do_ls_test(const char *pre)
@@ -351,7 +440,12 @@ static void do_ls_test(const char *pre)
int x;
DEBUG("ls %s...\n", pre);
- dirs = xenbus_ls(pre);
+ char *msg = xenbus_ls(pre, &dirs);
+ if (msg) {
+ DEBUG("Error in xenbus ls: %s\n", msg);
+ free(msg);
+ return;
+ }
for (x = 0; dirs[x]; x++)
{
DEBUG("ls %s[%d] -> %s\n", pre, x, dirs[x]);
@@ -364,9 +458,38 @@ static void do_read_test(const char *pat
{
char *res;
DEBUG("Read %s...\n", path);
- res = xenbus_read(path);
+ char *msg = xenbus_read(path, &res);
+ if (msg) {
+ DEBUG("Error in xenbus read: %s\n", msg);
+ free(msg);
+ return;
+ }
DEBUG("Read %s -> %s.\n", path, res);
free(res);
+}
+
+static void do_write_test(const char *path, const char *val)
+{
+ DEBUG("Write %s to %s...\n", val, path);
+ char *msg = xenbus_write(path, val);
+ if (msg) {
+ DEBUG("Result %s\n", msg);
+ free(msg);
+ } else {
+ DEBUG("Success.\n");
+ }
+}
+
+static void do_rm_test(const char *path)
+{
+ DEBUG("rm %s...\n", path);
+ char *msg = xenbus_rm(path);
+ if (msg) {
+ DEBUG("Result %s\n", msg);
+ free(msg);
+ } else {
+ DEBUG("Success.\n");
+ }
}
/* Simple testing thing */
@@ -383,5 +506,22 @@ void test_xenbus(void)
DEBUG("Doing read test.\n");
do_read_test("device/vif/0/mac");
do_read_test("device/vif/0/backend");
- printk("Xenbus initialised.\n");
-}
+
+ DEBUG("Doing write test.\n");
+ do_write_test("device/vif/0/flibble", "flobble");
+ do_read_test("device/vif/0/flibble");
+ do_write_test("device/vif/0/flibble", "widget");
+ do_read_test("device/vif/0/flibble");
+
+ DEBUG("Doing rm test.\n");
+ do_rm_test("device/vif/0/flibble");
+ do_read_test("device/vif/0/flibble");
+ DEBUG("(Should have said ENOENT)\n");
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-basic-offset: 4
+ * End:
+ */
diff -r 7f67c15e2c91 -r fbc0e953732e
linux-2.6-xen-sparse/arch/i386/mm/hypervisor.c
--- a/linux-2.6-xen-sparse/arch/i386/mm/hypervisor.c Thu Jun 15 10:02:53
2006 -0600
+++ b/linux-2.6-xen-sparse/arch/i386/mm/hypervisor.c Thu Jun 15 10:23:57
2006 -0600
@@ -133,6 +133,7 @@ void xen_tlb_flush(void)
op.cmd = MMUEXT_TLB_FLUSH_LOCAL;
BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
}
+EXPORT_SYMBOL(xen_tlb_flush);
void xen_invlpg(unsigned long ptr)
{
@@ -141,6 +142,7 @@ void xen_invlpg(unsigned long ptr)
op.arg1.linear_addr = ptr & PAGE_MASK;
BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
}
+EXPORT_SYMBOL(xen_invlpg);
#ifdef CONFIG_SMP
@@ -363,7 +365,8 @@ void xen_destroy_contiguous_region(unsig
};
set_xen_guest_handle(reservation.extent_start, &frame);
- if (xen_feature(XENFEAT_auto_translated_physmap))
+ if (xen_feature(XENFEAT_auto_translated_physmap) ||
+ !test_bit(__pa(vstart) >> PAGE_SHIFT, contiguous_bitmap))
return;
scrub_pages(vstart, 1 << order);
diff -r 7f67c15e2c91 -r fbc0e953732e
linux-2.6-xen-sparse/arch/i386/mm/init-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/mm/init-xen.c Thu Jun 15 10:02:53
2006 -0600
+++ b/linux-2.6-xen-sparse/arch/i386/mm/init-xen.c Thu Jun 15 10:23:57
2006 -0600
@@ -763,7 +763,7 @@ void __init pgtable_cache_init(void)
#endif
0,
pgd_ctor,
- pgd_dtor);
+ PTRS_PER_PMD == 1 ? pgd_dtor : NULL);
if (!pgd_cache)
panic("pgtable_cache_init(): Cannot create pgd cache");
}
diff -r 7f67c15e2c91 -r fbc0e953732e
linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c Thu Jun 15 10:02:53
2006 -0600
+++ b/linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c Thu Jun 15 10:23:57
2006 -0600
@@ -300,11 +300,6 @@ void pgd_ctor(void *pgd, kmem_cache_t *c
unsigned long flags;
if (PTRS_PER_PMD > 1) {
- if (!xen_feature(XENFEAT_pae_pgdir_above_4gb)) {
- int rc = xen_create_contiguous_region(
- (unsigned long)pgd, 0, 32);
- BUG_ON(rc);
- }
if (HAVE_SHARED_KERNEL_PMD)
clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
swapper_pg_dir + USER_PTRS_PER_PGD,
@@ -320,69 +315,105 @@ void pgd_ctor(void *pgd, kmem_cache_t *c
}
}
+/* never called when PTRS_PER_PMD > 1 */
void pgd_dtor(void *pgd, kmem_cache_t *cache, unsigned long unused)
{
unsigned long flags; /* can be called from interrupt context */
- if (PTRS_PER_PMD > 1) {
- if (!xen_feature(XENFEAT_pae_pgdir_above_4gb))
- xen_destroy_contiguous_region((unsigned long)pgd, 0);
- } else {
- spin_lock_irqsave(&pgd_lock, flags);
- pgd_list_del(pgd);
- spin_unlock_irqrestore(&pgd_lock, flags);
-
- pgd_test_and_unpin(pgd);
- }
+ spin_lock_irqsave(&pgd_lock, flags);
+ pgd_list_del(pgd);
+ spin_unlock_irqrestore(&pgd_lock, flags);
+
+ pgd_test_and_unpin(pgd);
}
pgd_t *pgd_alloc(struct mm_struct *mm)
{
int i;
pgd_t *pgd = kmem_cache_alloc(pgd_cache, GFP_KERNEL);
+ pmd_t **pmd;
+ unsigned long flags;
pgd_test_and_unpin(pgd);
if (PTRS_PER_PMD == 1 || !pgd)
return pgd;
- for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
- pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
- if (!pmd)
- goto out_oom;
- set_pgd(&pgd[i], __pgd(1 + __pa(pmd)));
- }
-
- if (!HAVE_SHARED_KERNEL_PMD) {
- unsigned long flags;
-
- for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) {
+ if (HAVE_SHARED_KERNEL_PMD) {
+ for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
if (!pmd)
goto out_oom;
set_pgd(&pgd[i], __pgd(1 + __pa(pmd)));
}
-
- spin_lock_irqsave(&pgd_lock, flags);
- for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) {
- unsigned long v = (unsigned long)i << PGDIR_SHIFT;
- pgd_t *kpgd = pgd_offset_k(v);
- pud_t *kpud = pud_offset(kpgd, v);
- pmd_t *kpmd = pmd_offset(kpud, v);
- pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1);
- memcpy(pmd, kpmd, PAGE_SIZE);
- make_lowmem_page_readonly(
- pmd, XENFEAT_writable_page_tables);
+ return pgd;
+ }
+
+ /*
+ * We can race save/restore (if we sleep during a GFP_KERNEL memory
+ * allocation). We therefore store virtual addresses of pmds as they
+ * do not change across save/restore, and poke the machine addresses
+ * into the pgdir under the pgd_lock.
+ */
+ pmd = kmalloc(PTRS_PER_PGD * sizeof(pmd_t *), GFP_KERNEL);
+ if (!pmd) {
+ kmem_cache_free(pgd_cache, pgd);
+ return NULL;
+ }
+
+ /* Allocate pmds, remember virtual addresses. */
+ for (i = 0; i < PTRS_PER_PGD; ++i) {
+ pmd[i] = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
+ if (!pmd[i])
+ goto out_oom;
+ }
+
+ spin_lock_irqsave(&pgd_lock, flags);
+
+ /* Protect against save/restore: move below 4GB under pgd_lock. */
+ if (!xen_feature(XENFEAT_pae_pgdir_above_4gb)) {
+ int rc = xen_create_contiguous_region(
+ (unsigned long)pgd, 0, 32);
+ if (rc) {
+ spin_unlock_irqrestore(&pgd_lock, flags);
+ goto out_oom;
}
- pgd_list_add(pgd);
- spin_unlock_irqrestore(&pgd_lock, flags);
- }
+ }
+
+ /* Copy kernel pmd contents and write-protect the new pmds. */
+ for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) {
+ unsigned long v = (unsigned long)i << PGDIR_SHIFT;
+ pgd_t *kpgd = pgd_offset_k(v);
+ pud_t *kpud = pud_offset(kpgd, v);
+ pmd_t *kpmd = pmd_offset(kpud, v);
+ memcpy(pmd[i], kpmd, PAGE_SIZE);
+ make_lowmem_page_readonly(
+ pmd[i], XENFEAT_writable_page_tables);
+ }
+
+ /* It is safe to poke machine addresses of pmds under the pmd_lock. */
+ for (i = 0; i < PTRS_PER_PGD; i++)
+ set_pgd(&pgd[i], __pgd(1 + __pa(pmd[i])));
+
+ /* Ensure this pgd gets picked up and pinned on save/restore. */
+ pgd_list_add(pgd);
+
+ spin_unlock_irqrestore(&pgd_lock, flags);
+
+ kfree(pmd);
return pgd;
out_oom:
- for (i--; i >= 0; i--)
- kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1));
+ if (HAVE_SHARED_KERNEL_PMD) {
+ for (i--; i >= 0; i--)
+ kmem_cache_free(pmd_cache,
+ (void *)__va(pgd_val(pgd[i])-1));
+ } else {
+ for (i--; i >= 0; i--)
+ kmem_cache_free(pmd_cache, pmd[i]);
+ kfree(pmd);
+ }
kmem_cache_free(pgd_cache, pgd);
return NULL;
}
@@ -391,6 +422,14 @@ void pgd_free(pgd_t *pgd)
{
int i;
+ /*
+ * After this the pgd should not be pinned for the duration of this
+ * function's execution. We should never sleep and thus never race:
+ * 1. User pmds will not become write-protected under our feet due
+ * to a concurrent mm_pin_all().
+ * 2. The machine addresses in PGD entries will not become invalid
+ * due to a concurrent save/restore.
+ */
pgd_test_and_unpin(pgd);
/* in the PAE case user pgd entries are overwritten before usage */
@@ -399,11 +438,13 @@ void pgd_free(pgd_t *pgd)
pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1);
kmem_cache_free(pmd_cache, pmd);
}
+
if (!HAVE_SHARED_KERNEL_PMD) {
unsigned long flags;
spin_lock_irqsave(&pgd_lock, flags);
pgd_list_del(pgd);
spin_unlock_irqrestore(&pgd_lock, flags);
+
for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) {
pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1);
make_lowmem_page_writable(
@@ -411,8 +452,13 @@ void pgd_free(pgd_t *pgd)
memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t));
kmem_cache_free(pmd_cache, pmd);
}
+
+ if (!xen_feature(XENFEAT_pae_pgdir_above_4gb))
+ xen_destroy_contiguous_region(
+ (unsigned long)pgd, 0);
}
}
+
/* in the non-PAE case, free_pgtables() clears user pgd entries */
kmem_cache_free(pgd_cache, pgd);
}
@@ -588,7 +634,7 @@ void mm_pin(struct mm_struct *mm)
void mm_pin(struct mm_struct *mm)
{
if (xen_feature(XENFEAT_writable_page_tables))
- return;
+ return;
spin_lock(&mm->page_table_lock);
__pgd_pin(mm->pgd);
spin_unlock(&mm->page_table_lock);
@@ -597,7 +643,7 @@ void mm_unpin(struct mm_struct *mm)
void mm_unpin(struct mm_struct *mm)
{
if (xen_feature(XENFEAT_writable_page_tables))
- return;
+ return;
spin_lock(&mm->page_table_lock);
__pgd_unpin(mm->pgd);
spin_unlock(&mm->page_table_lock);
@@ -607,11 +653,17 @@ void mm_pin_all(void)
{
struct page *page;
if (xen_feature(XENFEAT_writable_page_tables))
- return;
+ return;
for (page = pgd_list; page; page = (struct page *)page->index) {
if (!test_bit(PG_pinned, &page->flags))
__pgd_pin((pgd_t *)page_address(page));
}
+}
+
+void _arch_dup_mmap(struct mm_struct *mm)
+{
+ if (!test_bit(PG_pinned, &virt_to_page(mm->pgd)->flags))
+ mm_pin(mm);
}
void _arch_exit_mmap(struct mm_struct *mm)
diff -r 7f67c15e2c91 -r fbc0e953732e
linux-2.6-xen-sparse/arch/x86_64/mm/pageattr-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/mm/pageattr-xen.c Thu Jun 15
10:02:53 2006 -0600
+++ b/linux-2.6-xen-sparse/arch/x86_64/mm/pageattr-xen.c Thu Jun 15
10:23:57 2006 -0600
@@ -130,6 +130,12 @@ void mm_pin_all(void)
context.unpinned));
}
+void _arch_dup_mmap(struct mm_struct *mm)
+{
+ if (!mm->context.pinned)
+ mm_pin(mm);
+}
+
void _arch_exit_mmap(struct mm_struct *mm)
{
struct task_struct *tsk = current;
diff -r 7f67c15e2c91 -r fbc0e953732e
linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Thu Jun 15
10:02:53 2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Thu Jun 15
10:23:57 2006 -0600
@@ -342,8 +342,20 @@ static void blkfront_closing(struct xenb
static void blkfront_closing(struct xenbus_device *dev)
{
struct blkfront_info *info = dev->dev.driver_data;
+ unsigned long flags;
DPRINTK("blkfront_closing: %s removed\n", dev->nodename);
+
+ if (info->rq == NULL)
+ return;
+
+ spin_lock_irqsave(&blkif_io_lock, flags);
+ /* No more blkif_request(). */
+ blk_stop_queue(info->rq);
+ /* No more gnttab callback work. */
+ gnttab_cancel_free_callback(&info->callback);
+ flush_scheduled_work();
+ spin_unlock_irqrestore(&blkif_io_lock, flags);
xlvbd_del(info);
@@ -407,7 +419,8 @@ static void blkif_restart_queue(void *ar
{
struct blkfront_info *info = (struct blkfront_info *)arg;
spin_lock_irq(&blkif_io_lock);
- kick_pending_request_queues(info);
+ if (info->connected == BLKIF_STATE_CONNECTED)
+ kick_pending_request_queues(info);
spin_unlock_irq(&blkif_io_lock);
}
@@ -695,6 +708,12 @@ static void blkif_free(struct blkfront_i
spin_lock_irq(&blkif_io_lock);
info->connected = suspend ?
BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED;
+ /* No more blkif_request(). */
+ if (info->rq)
+ blk_stop_queue(info->rq);
+ /* No more gnttab callback work. */
+ gnttab_cancel_free_callback(&info->callback);
+ flush_scheduled_work();
spin_unlock_irq(&blkif_io_lock);
/* Free resources associated with old device channel. */
@@ -768,17 +787,17 @@ static void blkif_recover(struct blkfron
(void)xenbus_switch_state(info->xbdev, XenbusStateConnected);
+ spin_lock_irq(&blkif_io_lock);
+
/* Now safe for us to use the shared ring */
- spin_lock_irq(&blkif_io_lock);
info->connected = BLKIF_STATE_CONNECTED;
- spin_unlock_irq(&blkif_io_lock);
/* Send off requeued requests */
flush_requests(info);
/* Kick any other new requests queued since we resumed */
- spin_lock_irq(&blkif_io_lock);
kick_pending_request_queues(info);
+
spin_unlock_irq(&blkif_io_lock);
}
diff -r 7f67c15e2c91 -r fbc0e953732e
linux-2.6-xen-sparse/drivers/xen/core/gnttab.c
--- a/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c Thu Jun 15 10:02:53
2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c Thu Jun 15 10:23:57
2006 -0600
@@ -334,6 +334,21 @@ out:
}
EXPORT_SYMBOL_GPL(gnttab_request_free_callback);
+void gnttab_cancel_free_callback(struct gnttab_free_callback *callback)
+{
+ struct gnttab_free_callback **pcb;
+ unsigned long flags;
+
+ spin_lock_irqsave(&gnttab_list_lock, flags);
+ for (pcb = &gnttab_free_callback_list; *pcb; pcb = &(*pcb)->next) {
+ if (*pcb == callback) {
+ *pcb = callback->next;
+ break;
+ }
+ }
+ spin_unlock_irqrestore(&gnttab_list_lock, flags);
+}
+
#ifndef __ia64__
static int map_pte_fn(pte_t *pte, struct page *pmd_page,
unsigned long addr, void *data)
diff -r 7f67c15e2c91 -r fbc0e953732e
linux-2.6-xen-sparse/drivers/xen/core/skbuff.c
--- a/linux-2.6-xen-sparse/drivers/xen/core/skbuff.c Thu Jun 15 10:02:53
2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/core/skbuff.c Thu Jun 15 10:23:57
2006 -0600
@@ -121,8 +121,15 @@ static int __init skbuff_init(void)
for (order = 0; order <= MAX_SKBUFF_ORDER; order++) {
size = PAGE_SIZE << order;
sprintf(name[order], "xen-skb-%lu", size);
- skbuff_order_cachep[order] = kmem_cache_create(
- name[order], size, size, 0, skbuff_ctor, skbuff_dtor);
+ if (is_running_on_xen() &&
+ (xen_start_info->flags & SIF_PRIVILEGED))
+ skbuff_order_cachep[order] = kmem_cache_create(
+ name[order], size, size, 0,
+ skbuff_ctor, skbuff_dtor);
+ else
+ skbuff_order_cachep[order] = kmem_cache_create(
+ name[order], size, size, 0, NULL, NULL);
+
}
skbuff_cachep = skbuff_order_cachep[0];
diff -r 7f67c15e2c91 -r fbc0e953732e
linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Thu Jun 15
10:02:53 2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Thu Jun 15
10:23:57 2006 -0600
@@ -1072,68 +1072,39 @@ static void xennet_set_features(struct n
static void network_connect(struct net_device *dev)
{
- struct netfront_info *np;
+ struct netfront_info *np = netdev_priv(dev);
int i, requeue_idx;
- struct netif_tx_request *tx;
struct sk_buff *skb;
xennet_set_features(dev);
- np = netdev_priv(dev);
spin_lock_irq(&np->tx_lock);
spin_lock(&np->rx_lock);
- /* Recovery procedure: */
-
/*
- * Step 1: Rebuild the RX and TX ring contents.
- * NB. We could just free the queued TX packets now but we hope
- * that sending them out might do some good. We have to rebuild
- * the RX ring because some of our pages are currently flipped out
- * so we can't just free the RX skbs.
- * NB2. Freelist index entries are always going to be less than
+ * Recovery procedure:
+ * NB. Freelist index entries are always going to be less than
* PAGE_OFFSET, whereas pointers to skbs will always be equal or
- * greater than PAGE_OFFSET: we use this property to distinguish
- * them.
- */
-
- /*
- * Rebuild the TX buffer freelist and the TX ring itself.
- * NB. This reorders packets. We could keep more private state
- * to avoid this but maybe it doesn't matter so much given the
- * interface has been down.
- */
+ * greater than PAGE_OFFSET: we use this property to distinguish
+ * them.
+ */
+
+ /* Step 1: Discard all pending TX packet fragments. */
for (requeue_idx = 0, i = 1; i <= NET_TX_RING_SIZE; i++) {
if ((unsigned long)np->tx_skbs[i] < PAGE_OFFSET)
continue;
skb = np->tx_skbs[i];
-
- tx = RING_GET_REQUEST(&np->tx, requeue_idx);
- requeue_idx++;
-
- tx->id = i;
- gnttab_grant_foreign_access_ref(
- np->grant_tx_ref[i], np->xbdev->otherend_id,
- virt_to_mfn(np->tx_skbs[i]->data),
- GNTMAP_readonly);
- tx->gref = np->grant_tx_ref[i];
- tx->offset = (unsigned long)skb->data & ~PAGE_MASK;
- tx->size = skb->len;
- tx->flags = 0;
- if (skb->ip_summed == CHECKSUM_HW) /* local packet? */
- tx->flags |= NETTXF_csum_blank | NETTXF_data_validated;
- if (skb->proto_data_valid) /* remote but checksummed? */
- tx->flags |= NETTXF_data_validated;
-
- np->stats.tx_bytes += skb->len;
- np->stats.tx_packets++;
- }
-
- np->tx.req_prod_pvt = requeue_idx;
- RING_PUSH_REQUESTS(&np->tx);
-
- /* Rebuild the RX buffer freelist and the RX ring itself. */
+ gnttab_end_foreign_access_ref(
+ np->grant_tx_ref[i], GNTMAP_readonly);
+ gnttab_release_grant_reference(
+ &np->gref_tx_head, np->grant_tx_ref[i]);
+ np->grant_tx_ref[i] = GRANT_INVALID_REF;
+ add_id_to_freelist(np->tx_skbs, i);
+ dev_kfree_skb_irq(skb);
+ }
+
+ /* Step 2: Rebuild the RX buffer freelist and the RX ring itself. */
for (requeue_idx = 0, i = 1; i <= NET_RX_RING_SIZE; i++) {
if ((unsigned long)np->rx_skbs[i] < PAGE_OFFSET)
continue;
@@ -1150,7 +1121,7 @@ static void network_connect(struct net_d
RING_PUSH_REQUESTS(&np->rx);
/*
- * Step 2: All public and private state should now be sane. Get
+ * Step 3: All public and private state should now be sane. Get
* ready to start sending and receiving packets and give the driver
* domain a kick because we've probably just requeued some
* packets.
@@ -1158,6 +1129,7 @@ static void network_connect(struct net_d
netif_carrier_on(dev);
notify_remote_via_irq(np->irq);
network_tx_buf_gc(dev);
+ network_alloc_rx_buffers(dev);
spin_unlock(&np->rx_lock);
spin_unlock_irq(&np->tx_lock);
diff -r 7f67c15e2c91 -r fbc0e953732e
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/mmu.h
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/mmu.h Thu Jun 15
10:02:53 2006 -0600
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/mmu.h Thu Jun 15
10:23:57 2006 -0600
@@ -18,4 +18,8 @@ extern void _arch_exit_mmap(struct mm_st
extern void _arch_exit_mmap(struct mm_struct *mm);
#define arch_exit_mmap(_mm) _arch_exit_mmap(_mm)
+/* kernel/fork.c:dup_mmap hook */
+extern void _arch_dup_mmap(struct mm_struct *mm);
+#define arch_dup_mmap(mm, oldmm) ((void)(oldmm), _arch_dup_mmap(mm))
+
#endif
diff -r 7f67c15e2c91 -r fbc0e953732e
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/mmu_context.h
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/mmu_context.h Thu Jun
15 10:02:53 2006 -0600
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/mmu_context.h Thu Jun
15 10:23:57 2006 -0600
@@ -51,8 +51,7 @@ static inline void switch_mm(struct mm_s
struct mmuext_op _op[2], *op = _op;
if (likely(prev != next)) {
- if (!test_bit(PG_pinned, &virt_to_page(next->pgd)->flags))
- mm_pin(next);
+ BUG_ON(!test_bit(PG_pinned, &virt_to_page(next->pgd)->flags));
/* stop flush ipis for the previous mm */
cpu_clear(cpu, prev->cpu_vm_mask);
@@ -99,7 +98,11 @@ static inline void switch_mm(struct mm_s
#define deactivate_mm(tsk, mm) \
asm("movl %0,%%fs ; movl %0,%%gs": :"r" (0))
-#define activate_mm(prev, next) \
- switch_mm((prev),(next),NULL)
+static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next)
+{
+ if (!test_bit(PG_pinned, &virt_to_page(next->pgd)->flags))
+ mm_pin(next);
+ switch_mm(prev, next, NULL);
+}
#endif
diff -r 7f67c15e2c91 -r fbc0e953732e
linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h Thu Jun
15 10:02:53 2006 -0600
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h Thu Jun
15 10:23:57 2006 -0600
@@ -25,9 +25,9 @@ static char * __init machine_specific_me
if ( rc == -ENOSYS ) {
memmap.nr_entries = 1;
map[0].addr = 0ULL;
- map[0].size = xen_start_info->nr_pages << PAGE_SHIFT;
+ map[0].size = PFN_PHYS(xen_start_info->nr_pages);
/* 8MB slack (to balance backend allocations). */
- map[0].size += 8 << 20;
+ map[0].size += 8ULL << 20;
map[0].type = E820_RAM;
rc = 0;
}
diff -r 7f67c15e2c91 -r fbc0e953732e
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/mmu.h
--- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/mmu.h Thu Jun
15 10:02:53 2006 -0600
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/mmu.h Thu Jun
15 10:23:57 2006 -0600
@@ -28,6 +28,10 @@ extern spinlock_t mm_unpinned_lock;
/* mm/memory.c:exit_mmap hook */
extern void _arch_exit_mmap(struct mm_struct *mm);
#define arch_exit_mmap(_mm) _arch_exit_mmap(_mm)
+
+/* kernel/fork.c:dup_mmap hook */
+extern void _arch_dup_mmap(struct mm_struct *mm);
+#define arch_dup_mmap(mm, oldmm) ((void)(oldmm), _arch_dup_mmap(mm))
#endif
#endif
diff -r 7f67c15e2c91 -r fbc0e953732e
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/mmu_context.h
--- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/mmu_context.h
Thu Jun 15 10:02:53 2006 -0600
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/mmu_context.h
Thu Jun 15 10:23:57 2006 -0600
@@ -73,8 +73,7 @@ static inline void switch_mm(struct mm_s
struct mmuext_op _op[3], *op = _op;
if (likely(prev != next)) {
- if (!next->context.pinned)
- mm_pin(next);
+ BUG_ON(!next->context.pinned);
/* stop flush ipis for the previous mm */
clear_bit(cpu, &prev->cpu_vm_mask);
@@ -127,8 +126,11 @@ static inline void switch_mm(struct mm_s
asm volatile("movl %0,%%fs"::"r"(0)); \
} while(0)
-#define activate_mm(prev, next) do { \
- switch_mm((prev),(next),NULL); \
-} while (0)
+static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next)
+{
+ if (!next->context.pinned)
+ mm_pin(next);
+ switch_mm(prev, next, NULL);
+}
#endif
diff -r 7f67c15e2c91 -r fbc0e953732e linux-2.6-xen-sparse/include/xen/gnttab.h
--- a/linux-2.6-xen-sparse/include/xen/gnttab.h Thu Jun 15 10:02:53 2006 -0600
+++ b/linux-2.6-xen-sparse/include/xen/gnttab.h Thu Jun 15 10:23:57 2006 -0600
@@ -100,6 +100,7 @@ void gnttab_release_grant_reference(gran
void gnttab_request_free_callback(struct gnttab_free_callback *callback,
void (*fn)(void *), void *arg, u16 count);
+void gnttab_cancel_free_callback(struct gnttab_free_callback *callback);
void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
unsigned long frame, int readonly);
diff -r 7f67c15e2c91 -r fbc0e953732e tools/console/daemon/io.c
--- a/tools/console/daemon/io.c Thu Jun 15 10:02:53 2006 -0600
+++ b/tools/console/daemon/io.c Thu Jun 15 10:23:57 2006 -0600
@@ -24,8 +24,8 @@
#include "io.h"
#include <xenctrl.h>
#include <xs.h>
-#include <xen/linux/evtchn.h>
#include <xen/io/console.h>
+#include <xenctrl.h>
#include <malloc.h>
#include <stdlib.h>
@@ -36,7 +36,6 @@
#include <unistd.h>
#include <termios.h>
#include <stdarg.h>
-#include <sys/ioctl.h>
#include <sys/mman.h>
#define MAX(a, b) (((a) > (b)) ? (a) : (b))
@@ -64,18 +63,11 @@ struct domain
char *conspath;
int ring_ref;
evtchn_port_t local_port;
- int evtchn_fd;
+ int xce_handle;
struct xencons_interface *interface;
};
static struct domain *dom_head;
-
-static void evtchn_notify(struct domain *dom)
-{
- struct ioctl_evtchn_notify notify;
- notify.port = dom->local_port;
- (void)ioctl(dom->evtchn_fd, IOCTL_EVTCHN_NOTIFY, ¬ify);
-}
static void buffer_append(struct domain *dom)
{
@@ -106,7 +98,7 @@ static void buffer_append(struct domain
mb();
intf->out_cons = cons;
- evtchn_notify(dom);
+ xc_evtchn_notify(dom->xce_handle, dom->local_port);
if (buffer->max_capacity &&
buffer->size > buffer->max_capacity) {
@@ -234,7 +226,6 @@ static int domain_create_ring(struct dom
static int domain_create_ring(struct domain *dom)
{
int err, remote_port, ring_ref, rc;
- struct ioctl_evtchn_bind_interdomain bind;
err = xs_gather(xs, dom->conspath,
"ring-ref", "%u", &ring_ref,
@@ -258,24 +249,24 @@ static int domain_create_ring(struct dom
}
dom->local_port = -1;
- if (dom->evtchn_fd != -1)
- close(dom->evtchn_fd);
+ if (dom->xce_handle != -1)
+ xc_evtchn_close(dom->xce_handle);
/* Opening evtchn independently for each console is a bit
* wasteful, but that's how the code is structured... */
- dom->evtchn_fd = open("/dev/xen/evtchn", O_RDWR);
- if (dom->evtchn_fd == -1) {
+ dom->xce_handle = xc_evtchn_open();
+ if (dom->xce_handle == -1) {
err = errno;
goto out;
}
- bind.remote_domain = dom->domid;
- bind.remote_port = remote_port;
- rc = ioctl(dom->evtchn_fd, IOCTL_EVTCHN_BIND_INTERDOMAIN, &bind);
+ rc = xc_evtchn_bind_interdomain(dom->xce_handle,
+ dom->domid, remote_port);
+
if (rc == -1) {
err = errno;
- close(dom->evtchn_fd);
- dom->evtchn_fd = -1;
+ xc_evtchn_close(dom->xce_handle);
+ dom->xce_handle = -1;
goto out;
}
dom->local_port = rc;
@@ -285,8 +276,8 @@ static int domain_create_ring(struct dom
if (dom->tty_fd == -1) {
err = errno;
- close(dom->evtchn_fd);
- dom->evtchn_fd = -1;
+ xc_evtchn_close(dom->xce_handle);
+ dom->xce_handle = -1;
dom->local_port = -1;
goto out;
}
@@ -344,7 +335,7 @@ static struct domain *create_domain(int
dom->ring_ref = -1;
dom->local_port = -1;
dom->interface = NULL;
- dom->evtchn_fd = -1;
+ dom->xce_handle = -1;
if (!watch_domain(dom, true))
goto out;
@@ -409,9 +400,9 @@ static void shutdown_domain(struct domai
if (d->interface != NULL)
munmap(d->interface, getpagesize());
d->interface = NULL;
- if (d->evtchn_fd != -1)
- close(d->evtchn_fd);
- d->evtchn_fd = -1;
+ if (d->xce_handle != -1)
+ xc_evtchn_close(d->xce_handle);
+ d->xce_handle = -1;
cleanup_domain(d);
}
@@ -483,7 +474,7 @@ static void handle_tty_read(struct domai
}
wmb();
intf->in_prod = prod;
- evtchn_notify(dom);
+ xc_evtchn_notify(dom->xce_handle, dom->local_port);
} else {
close(dom->tty_fd);
dom->tty_fd = -1;
@@ -516,14 +507,14 @@ static void handle_tty_write(struct doma
static void handle_ring_read(struct domain *dom)
{
- evtchn_port_t v;
-
- if (!read_sync(dom->evtchn_fd, &v, sizeof(v)))
+ evtchn_port_t port;
+
+ if ((port = xc_evtchn_pending(dom->xce_handle)) == -1)
return;
buffer_append(dom);
- (void)write_sync(dom->evtchn_fd, &v, sizeof(v));
+ (void)xc_evtchn_unmask(dom->xce_handle, port);
}
static void handle_xs(void)
@@ -566,9 +557,10 @@ void handle_io(void)
max_fd = MAX(xs_fileno(xs), max_fd);
for (d = dom_head; d; d = d->next) {
- if (d->evtchn_fd != -1) {
- FD_SET(d->evtchn_fd, &readfds);
- max_fd = MAX(d->evtchn_fd, max_fd);
+ if (d->xce_handle != -1) {
+ int evtchn_fd = xc_evtchn_fd(d->xce_handle);
+ FD_SET(evtchn_fd, &readfds);
+ max_fd = MAX(evtchn_fd, max_fd);
}
if (d->tty_fd != -1) {
@@ -588,8 +580,8 @@ void handle_io(void)
for (d = dom_head; d; d = n) {
n = d->next;
- if (d->evtchn_fd != -1 &&
- FD_ISSET(d->evtchn_fd, &readfds))
+ if (d->xce_handle != -1 &&
+ FD_ISSET(xc_evtchn_fd(d->xce_handle), &readfds))
handle_ring_read(d);
if (d->tty_fd != -1) {
diff -r 7f67c15e2c91 -r fbc0e953732e tools/console/daemon/utils.c
--- a/tools/console/daemon/utils.c Thu Jun 15 10:02:53 2006 -0600
+++ b/tools/console/daemon/utils.c Thu Jun 15 10:23:57 2006 -0600
@@ -38,32 +38,6 @@
struct xs_handle *xs;
int xc;
-
-bool _read_write_sync(int fd, void *data, size_t size, bool do_read)
-{
- size_t offset = 0;
- ssize_t len;
-
- while (offset < size) {
- if (do_read) {
- len = read(fd, data + offset, size - offset);
- } else {
- len = write(fd, data + offset, size - offset);
- }
-
- if (len < 1) {
- if (len == -1 && (errno == EAGAIN || errno == EINTR)) {
- continue;
- } else {
- return false;
- }
- } else {
- offset += len;
- }
- }
-
- return true;
-}
static void child_exit(int sig)
{
diff -r 7f67c15e2c91 -r fbc0e953732e tools/console/daemon/utils.h
--- a/tools/console/daemon/utils.h Thu Jun 15 10:02:53 2006 -0600
+++ b/tools/console/daemon/utils.h Thu Jun 15 10:23:57 2006 -0600
@@ -29,9 +29,6 @@
void daemonize(const char *pidfile);
bool xen_setup(void);
-#define read_sync(fd, buffer, size) _read_write_sync(fd, buffer, size, true)
-#define write_sync(fd, buffer, size) _read_write_sync(fd, buffer, size, false)
-bool _read_write_sync(int fd, void *data, size_t size, bool do_read);
extern struct xs_handle *xs;
extern int xc;
diff -r 7f67c15e2c91 -r fbc0e953732e tools/ioemu/sdl.c
--- a/tools/ioemu/sdl.c Thu Jun 15 10:02:53 2006 -0600
+++ b/tools/ioemu/sdl.c Thu Jun 15 10:23:57 2006 -0600
@@ -376,13 +376,18 @@ static void sdl_update_caption(void)
static void sdl_hide_cursor(void)
{
- SDL_SetCursor(sdl_cursor_hidden);
+ if (kbd_mouse_is_absolute()) {
+ SDL_ShowCursor(1);
+ SDL_SetCursor(sdl_cursor_hidden);
+ } else {
+ SDL_ShowCursor(0);
+ }
}
static void sdl_show_cursor(void)
{
if (!kbd_mouse_is_absolute()) {
- SDL_SetCursor(sdl_cursor_normal);
+ SDL_ShowCursor(1);
}
}
diff -r 7f67c15e2c91 -r fbc0e953732e tools/ioemu/target-i386-dm/helper2.c
--- a/tools/ioemu/target-i386-dm/helper2.c Thu Jun 15 10:02:53 2006 -0600
+++ b/tools/ioemu/target-i386-dm/helper2.c Thu Jun 15 10:23:57 2006 -0600
@@ -47,11 +47,9 @@
#include <limits.h>
#include <fcntl.h>
-#include <sys/ioctl.h>
#include <xenctrl.h>
#include <xen/hvm/ioreq.h>
-#include <xen/linux/evtchn.h>
#include "cpu.h"
#include "exec-all.h"
@@ -123,7 +121,7 @@ target_ulong cpu_get_phys_page_debug(CPU
}
//the evtchn fd for polling
-int evtchn_fd = -1;
+int xce_handle = -1;
//which vcpu we are serving
int send_vcpu = 0;
@@ -170,11 +168,10 @@ static ioreq_t* __cpu_get_ioreq(int vcpu
//retval--the number of ioreq packet
static ioreq_t* cpu_get_ioreq(void)
{
- int i, rc;
+ int i;
evtchn_port_t port;
- rc = read(evtchn_fd, &port, sizeof(port));
- if ( rc == sizeof(port) ) {
+ if ( (port = xc_evtchn_pending(xce_handle)) != -1 ) {
for ( i = 0; i < vcpus; i++ )
if ( shared_page->vcpu_iodata[i].dm_eport == port )
break;
@@ -184,8 +181,7 @@ static ioreq_t* cpu_get_ioreq(void)
exit(1);
}
- // unmask the wanted port again
- write(evtchn_fd, &port, sizeof(port));
+ xc_evtchn_unmask(xce_handle, port);
//get the io packet from shared memory
send_vcpu = i;
@@ -436,6 +432,7 @@ int main_loop(void)
extern int shutdown_requested;
CPUState *env = global_env;
int retval;
+ int evtchn_fd = xc_evtchn_fd(xce_handle);
extern void main_loop_wait(int);
/* Watch stdin (fd 0) to see when it has input. */
@@ -475,11 +472,9 @@ int main_loop(void)
main_loop_wait(0);
if (env->send_event) {
- struct ioctl_evtchn_notify notify;
-
env->send_event = 0;
- notify.port = shared_page->vcpu_iodata[send_vcpu].dm_eport;
- (void)ioctl(evtchn_fd, IOCTL_EVTCHN_NOTIFY, ¬ify);
+ (void)xc_evtchn_notify(xce_handle,
+ shared_page->vcpu_iodata[send_vcpu].dm_eport);
}
}
destroy_hvm_domain();
@@ -511,7 +506,6 @@ CPUState * cpu_init()
CPUState * cpu_init()
{
CPUX86State *env;
- struct ioctl_evtchn_bind_interdomain bind;
int i, rc;
cpu_exec_init();
@@ -523,21 +517,19 @@ CPUState * cpu_init()
cpu_single_env = env;
- if (evtchn_fd != -1)//the evtchn has been opened by another cpu object
+ if (xce_handle != -1)//the evtchn has been opened by another cpu object
return NULL;
- //use nonblock reading not polling, may change in future.
- evtchn_fd = open("/dev/xen/evtchn", O_RDWR|O_NONBLOCK);
- if (evtchn_fd == -1) {
+ xce_handle = xc_evtchn_open();
+ if (xce_handle == -1) {
fprintf(logfile, "open evtchn device error %d\n", errno);
return NULL;
}
/* FIXME: how about if we overflow the page here? */
- bind.remote_domain = domid;
for ( i = 0; i < vcpus; i++ ) {
- bind.remote_port = shared_page->vcpu_iodata[i].vp_eport;
- rc = ioctl(evtchn_fd, IOCTL_EVTCHN_BIND_INTERDOMAIN, &bind);
+ rc = xc_evtchn_bind_interdomain(xce_handle, domid,
+ shared_page->vcpu_iodata[i].vp_eport);
if ( rc == -1 ) {
fprintf(logfile, "bind interdomain ioctl error %d\n", errno);
return NULL;
diff -r 7f67c15e2c91 -r fbc0e953732e tools/libxc/xc_elf.h
--- a/tools/libxc/xc_elf.h Thu Jun 15 10:02:53 2006 -0600
+++ b/tools/libxc/xc_elf.h Thu Jun 15 10:23:57 2006 -0600
@@ -170,13 +170,14 @@ typedef struct {
#define EM_PARISC 15 /* HPPA */
#define EM_SPARC32PLUS 18 /* Enhanced instruction set SPARC */
#define EM_PPC 20 /* PowerPC */
+#define EM_PPC64 21 /* PowerPC 64-bit */
#define EM_ARM 40 /* Advanced RISC Machines ARM */
#define EM_ALPHA 41 /* DEC ALPHA */
#define EM_SPARCV9 43 /* SPARC version 9 */
#define EM_ALPHA_EXP 0x9026 /* DEC ALPHA */
+#define EM_IA_64 50 /* Intel Merced */
#define EM_X86_64 62 /* AMD x86-64 architecture */
#define EM_VAX 75 /* DEC VAX */
-#define EM_NUM 15 /* number of machine types */
/* Version */
#define EV_NONE 0 /* Invalid */
diff -r 7f67c15e2c91 -r fbc0e953732e tools/libxc/xc_linux.c
--- a/tools/libxc/xc_linux.c Thu Jun 15 10:02:53 2006 -0600
+++ b/tools/libxc/xc_linux.c Thu Jun 15 10:23:57 2006 -0600
@@ -103,6 +103,124 @@ int do_xen_hypercall(int xc_handle, priv
(unsigned long)hypercall);
}
+#define EVTCHN_DEV_NAME "/dev/xen/evtchn"
+#define EVTCHN_DEV_MAJOR 10
+#define EVTCHN_DEV_MINOR 201
+
+int xc_evtchn_open(void)
+{
+ struct stat st;
+ int fd;
+
+ /* Make sure any existing device file links to correct device. */
+ if ((lstat(EVTCHN_DEV_NAME, &st) != 0) || !S_ISCHR(st.st_mode) ||
+ (st.st_rdev != makedev(EVTCHN_DEV_MAJOR, EVTCHN_DEV_MINOR)))
+ (void)unlink(EVTCHN_DEV_NAME);
+
+reopen:
+ if ( (fd = open(EVTCHN_DEV_NAME, O_RDWR)) == -1 )
+ {
+ if ( (errno == ENOENT) &&
+ ((mkdir("/dev/xen", 0755) == 0) || (errno == EEXIST)) &&
+ (mknod(EVTCHN_DEV_NAME, S_IFCHR|0600,
+ makedev(EVTCHN_DEV_MAJOR, EVTCHN_DEV_MINOR)) == 0) )
+ goto reopen;
+
+ PERROR("Could not open event channel interface");
+ return -1;
+ }
+
+ return fd;
+}
+
+int xc_evtchn_close(int xce_handle)
+{
+ return close(xce_handle);
+}
+
+int xc_evtchn_fd(int xce_handle)
+{
+ return xce_handle;
+}
+
+int xc_evtchn_notify(int xce_handle, evtchn_port_t port)
+{
+ struct ioctl_evtchn_notify notify;
+
+ notify.port = port;
+
+ return ioctl(xce_handle, IOCTL_EVTCHN_NOTIFY, ¬ify);
+}
+
+evtchn_port_t xc_evtchn_bind_interdomain(int xce_handle, int domid,
+ evtchn_port_t remote_port)
+{
+ struct ioctl_evtchn_bind_interdomain bind;
+
+ bind.remote_domain = domid;
+ bind.remote_port = remote_port;
+
+ return ioctl(xce_handle, IOCTL_EVTCHN_BIND_INTERDOMAIN, &bind);
+}
+
+int xc_evtchn_unbind(int xce_handle, evtchn_port_t port)
+{
+ struct ioctl_evtchn_unbind unbind;
+
+ unbind.port = port;
+
+ return ioctl(xce_handle, IOCTL_EVTCHN_UNBIND, &unbind);
+}
+
+evtchn_port_t xc_evtchn_bind_virq(int xce_handle, unsigned int virq)
+{
+ struct ioctl_evtchn_bind_virq bind;
+
+ bind.virq = virq;
+
+ return ioctl(xce_handle, IOCTL_EVTCHN_BIND_VIRQ, &bind);
+}
+
+static int dorw(int fd, char *data, size_t size, int do_write)
+{
+ size_t offset = 0;
+ ssize_t len;
+
+ while ( offset < size )
+ {
+ if (do_write)
+ len = write(fd, data + offset, size - offset);
+ else
+ len = read(fd, data + offset, size - offset);
+
+ if ( len == -1 )
+ {
+ if ( errno == EINTR )
+ continue;
+ return -1;
+ }
+
+ offset += len;
+ }
+
+ return 0;
+}
+
+evtchn_port_t xc_evtchn_pending(int xce_handle)
+{
+ evtchn_port_t port;
+
+ if ( dorw(xce_handle, (char *)&port, sizeof(port), 0) == -1 )
+ return -1;
+
+ return port;
+}
+
+int xc_evtchn_unmask(int xce_handle, evtchn_port_t port)
+{
+ return dorw(xce_handle, (char *)&port, sizeof(port), 1);
+}
+
/*
* Local variables:
* mode: C
diff -r 7f67c15e2c91 -r fbc0e953732e tools/libxc/xc_linux_restore.c
--- a/tools/libxc/xc_linux_restore.c Thu Jun 15 10:02:53 2006 -0600
+++ b/tools/libxc/xc_linux_restore.c Thu Jun 15 10:23:57 2006 -0600
@@ -456,6 +456,15 @@ int xc_linux_restore(int xc_handle, int
n+= j; /* crude stats */
}
+ /*
+ * Ensure we flush all machphys updates before potential PAE-specific
+ * reallocations below.
+ */
+ if (xc_finish_mmu_updates(xc_handle, mmu)) {
+ ERR("Error doing finish_mmu_updates()");
+ goto out;
+ }
+
DPRINTF("Received all pages (%d races)\n", nraces);
if ((pt_levels == 3) && !pae_extended_cr3) {
@@ -550,14 +559,11 @@ int xc_linux_restore(int xc_handle, int
}
}
- }
-
-
- if (xc_finish_mmu_updates(xc_handle, mmu)) {
- ERR("Error doing finish_mmu_updates()");
- goto out;
- }
-
+ if (xc_finish_mmu_updates(xc_handle, mmu)) {
+ ERR("Error doing finish_mmu_updates()");
+ goto out;
+ }
+ }
/*
* Pin page tables. Do this after writing to them as otherwise Xen
diff -r 7f67c15e2c91 -r fbc0e953732e tools/libxc/xc_load_elf.c
--- a/tools/libxc/xc_load_elf.c Thu Jun 15 10:02:53 2006 -0600
+++ b/tools/libxc/xc_load_elf.c Thu Jun 15 10:23:57 2006 -0600
@@ -21,6 +21,24 @@ loadelfsymtab(
loadelfsymtab(
const char *image, int xch, uint32_t dom, xen_pfn_t *parray,
struct domain_setup_info *dsi);
+
+/*
+ * Elf header attributes we require for each supported host platform.
+ * These are checked in parseelfimage().
+ */
+#if defined(__ia64__)
+#define ELFCLASS ELFCLASS64
+#define ELFDATA ELFDATA2LSB
+#define ELFMACHINE EM_IA_64
+#elif defined(__i386__)
+#define ELFCLASS ELFCLASS32
+#define ELFDATA ELFDATA2LSB
+#define ELFMACHINE EM_386
+#elif defined(__x86_64__)
+#define ELFCLASS ELFCLASS64
+#define ELFDATA ELFDATA2LSB
+#define ELFMACHINE EM_X86_64
+#endif
int probe_elf(const char *image,
unsigned long image_size,
@@ -61,16 +79,10 @@ static int parseelfimage(const char *ima
return -EINVAL;
}
- if (
-#if defined(__i386__)
- (ehdr->e_ident[EI_CLASS] != ELFCLASS32) ||
- (ehdr->e_machine != EM_386) ||
-#elif defined(__x86_64__)
- (ehdr->e_ident[EI_CLASS] != ELFCLASS64) ||
- (ehdr->e_machine != EM_X86_64) ||
-#endif
- (ehdr->e_ident[EI_DATA] != ELFDATA2LSB) ||
- (ehdr->e_type != ET_EXEC) )
+ if ( (ehdr->e_ident[EI_CLASS] != ELFCLASS) ||
+ (ehdr->e_machine != ELFMACHINE) ||
+ (ehdr->e_ident[EI_DATA] != ELFDATA) ||
+ (ehdr->e_type != ET_EXEC) )
{
ERROR("Kernel not a Xen-compatible Elf image.");
return -EINVAL;
diff -r 7f67c15e2c91 -r fbc0e953732e tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h Thu Jun 15 10:02:53 2006 -0600
+++ b/tools/libxc/xenctrl.h Thu Jun 15 10:23:57 2006 -0600
@@ -604,4 +604,58 @@ int xc_finish_mmu_updates(int xc_handle,
int xc_acm_op(int xc_handle, int cmd, void *arg, size_t arg_size);
+/*
+ * Return a handle to the event channel driver, or -1 on failure, in which case
+ * errno will be set appropriately.
+ */
+int xc_evtchn_open(void);
+
+/*
+ * Close a handle previously allocated with xc_evtchn_open().
+ */
+int xc_evtchn_close(int xce_handle);
+
+/*
+ * Return an fd that can be select()ed on for further calls to
+ * xc_evtchn_pending().
+ */
+int xc_evtchn_fd(int xce_handle);
+
+/*
+ * Notify the given event channel. Returns -1 on failure, in which case
+ * errno will be set appropriately.
+ */
+int xc_evtchn_notify(int xce_handle, evtchn_port_t port);
+
+/*
+ * Returns a new event port bound to the remote port for the given domain ID,
+ * or -1 on failure, in which case errno will be set appropriately.
+ */
+evtchn_port_t xc_evtchn_bind_interdomain(int xce_handle, int domid,
+ evtchn_port_t remote_port);
+
+/*
+ * Unbind the given event channel. Returns -1 on failure, in which case errno
+ * will be set appropriately.
+ */
+int xc_evtchn_unbind(int xce_handle, evtchn_port_t port);
+
+/*
+ * Bind an event channel to the given VIRQ. Returns the event channel bound to
+ * the VIRQ, or -1 on failure, in which case errno will be set appropriately.
+ */
+evtchn_port_t xc_evtchn_bind_virq(int xce_handle, unsigned int virq);
+
+/*
+ * Return the next event channel to become pending, or -1 on failure, in which
+ * case errno will be set appropriately.
+ */
+evtchn_port_t xc_evtchn_pending(int xce_handle);
+
+/*
+ * Unmask the given event channel. Returns -1 on failure, in which case errno
+ * will be set appropriately.
+ */
+int xc_evtchn_unmask(int xce_handle, evtchn_port_t port);
+
#endif
diff -r 7f67c15e2c91 -r fbc0e953732e tools/python/xen/util/security.py
--- a/tools/python/xen/util/security.py Thu Jun 15 10:02:53 2006 -0600
+++ b/tools/python/xen/util/security.py Thu Jun 15 10:23:57 2006 -0600
@@ -52,7 +52,8 @@ binary_name_re = re.compile(".*[chwall|s
binary_name_re = re.compile(".*[chwall|ste|chwall_ste].*\.bin", re.IGNORECASE)
policy_name_re = re.compile(".*[chwall|ste|chwall_ste].*", re.IGNORECASE)
-
+#other global variables
+NULL_SSIDREF = 0
log = logging.getLogger("xend.util.security")
@@ -255,6 +256,8 @@ def ssidref2label(ssidref_var):
#2. get labelnames for both ssidref parts
pri_ssid = ssidref & 0xffff
sec_ssid = ssidref >> 16
+ pri_null_ssid = NULL_SSIDREF & 0xffff
+ sec_null_ssid = NULL_SSIDREF >> 16
pri_labels = []
sec_labels = []
labels = []
@@ -270,7 +273,11 @@ def ssidref2label(ssidref_var):
f.close()
#3. get the label that is in both lists (combination must be a single
label)
- if secondary == "NULL":
+ if (primary == "CHWALL") and (pri_ssid == pri_null_ssid) and (sec_ssid !=
sec_null_ssid):
+ labels = sec_labels
+ elif (secondary == "CHWALL") and (pri_ssid != pri_null_ssid) and (sec_ssid
== sec_null_ssid):
+ labels = pri_labels
+ elif secondary == "NULL":
labels = pri_labels
else:
for i in pri_labels:
@@ -285,7 +292,7 @@ def ssidref2label(ssidref_var):
-def label2ssidref(labelname, policyname):
+def label2ssidref(labelname, policyname, type):
"""
returns ssidref corresponding to labelname;
maps current policy to default directory
@@ -293,6 +300,14 @@ def label2ssidref(labelname, policyname)
if policyname in ['NULL', 'INACTIVE', 'DEFAULT']:
err("Cannot translate labels for \'" + policyname + "\' policy.")
+
+ allowed_types = ['ANY']
+ if type == 'dom':
+ allowed_types.append('VM')
+ elif type == 'res':
+ allowed_types.append('RES')
+ else:
+ err("Invalid type. Must specify 'dom' or 'res'.")
(primary, secondary, f, pol_exists) = getmapfile(policyname)
@@ -303,11 +318,15 @@ def label2ssidref(labelname, policyname)
l = line.split()
if (len(l) < 5) or (l[0] != "LABEL->SSID"):
continue
- if primary and (l[2] == primary) and (l[3] == labelname):
+ if primary and (l[1] in allowed_types) and (l[2] == primary) and (l[3]
== labelname):
pri_ssid.append(int(l[4], 16))
- if secondary and (l[2] == secondary) and (l[3] == labelname):
+ if secondary and (l[1] in allowed_types) and (l[2] == secondary) and
(l[3] == labelname):
sec_ssid.append(int(l[4], 16))
f.close()
+ if (type == 'res') and (primary == "CHWALL") and (len(pri_ssid) == 0):
+ pri_ssid.append(NULL_SSIDREF)
+ elif (type == 'res') and (secondary == "CHWALL") and (len(sec_ssid) == 0):
+ sec_ssid.append(NULL_SSIDREF)
#3. sanity check and composition of ssidref
if (len(pri_ssid) == 0) or ((len(sec_ssid) == 0) and (secondary !=
"NULL")):
@@ -360,7 +379,7 @@ def refresh_ssidref(config):
err("Policy \'" + policyname + "\' in label does not match active
policy \'"
+ active_policy +"\'!")
- new_ssidref = label2ssidref(labelname, policyname)
+ new_ssidref = label2ssidref(labelname, policyname, 'dom')
if not new_ssidref:
err("SSIDREF refresh failed!")
@@ -409,7 +428,7 @@ def get_decision(arg1, arg2):
enables domains to retrieve access control decisions from
the hypervisor Access Control Module.
IN: args format = ['domid', id] or ['ssidref', ssidref]
- or ['access_control', ['policy', policy], ['label', label]]
+ or ['access_control', ['policy', policy], ['label', label], ['type', type]]
"""
if not on():
@@ -417,14 +436,14 @@ def get_decision(arg1, arg2):
#translate labels before calling low-level function
if arg1[0] == 'access_control':
- if (arg1[1][0] != 'policy') or (arg1[2][0] != 'label') :
+ if (arg1[1][0] != 'policy') or (arg1[2][0] != 'label') or (arg1[3][0]
!= 'type'):
err("Argument type not supported.")
- ssidref = label2ssidref(arg1[2][1], arg1[1][1])
+ ssidref = label2ssidref(arg1[2][1], arg1[1][1], arg1[3][1])
arg1 = ['ssidref', str(ssidref)]
if arg2[0] == 'access_control':
- if (arg2[1][0] != 'policy') or (arg2[2][0] != 'label') :
+ if (arg2[1][0] != 'policy') or (arg2[2][0] != 'label') or (arg2[3][0]
!= 'type'):
err("Argument type not supported.")
- ssidref = label2ssidref(arg2[2][1], arg2[1][1])
+ ssidref = label2ssidref(arg2[2][1], arg2[1][1], arg2[3][1])
arg2 = ['ssidref', str(ssidref)]
# accept only int or string types for domid and ssidref
diff -r 7f67c15e2c91 -r fbc0e953732e tools/python/xen/xm/addlabel.py
--- a/tools/python/xen/xm/addlabel.py Thu Jun 15 10:02:53 2006 -0600
+++ b/tools/python/xen/xm/addlabel.py Thu Jun 15 10:23:57 2006 -0600
@@ -50,7 +50,7 @@ def main(argv):
err("No active policy. Policy must be specified in command line.")
#sanity checks: make sure this label can be instantiated later on
- ssidref = label2ssidref(label, policyref)
+ ssidref = label2ssidref(label, policyref, 'dom')
new_label = "access_control = ['policy=%s,label=%s']\n" % (policyref,
label)
if not os.path.isfile(configfile):
diff -r 7f67c15e2c91 -r fbc0e953732e tools/python/xen/xm/create.py
--- a/tools/python/xen/xm/create.py Thu Jun 15 10:02:53 2006 -0600
+++ b/tools/python/xen/xm/create.py Thu Jun 15 10:23:57 2006 -0600
@@ -541,7 +541,7 @@ def configure_security(config, vals):
if sxp.child_value(config, 'ssidref'):
err("ERROR: SSIDREF and access_control are mutually exclusive but
both specified!")
#else calculate ssidre from label
- ssidref = security.label2ssidref(label, policy)
+ ssidref = security.label2ssidref(label, policy, 'dom')
if not ssidref :
err("ERROR calculating ssidref from access_control.")
security_label = ['security', [ config_access_control, ['ssidref' ,
ssidref ] ] ]
diff -r 7f67c15e2c91 -r fbc0e953732e tools/python/xen/xm/main.py
--- a/tools/python/xen/xm/main.py Thu Jun 15 10:02:53 2006 -0600
+++ b/tools/python/xen/xm/main.py Thu Jun 15 10:23:57 2006 -0600
@@ -1193,6 +1193,9 @@ def main(argv=sys.argv):
else:
print >>sys.stderr, "Error: %s" % ex.faultString
sys.exit(1)
+ except (ValueError, OverflowError):
+ err("Invalid argument.")
+ usage(argv[1])
except:
print "Unexpected error:", sys.exc_info()[0]
print
diff -r 7f67c15e2c91 -r fbc0e953732e tools/security/Makefile
--- a/tools/security/Makefile Thu Jun 15 10:02:53 2006 -0600
+++ b/tools/security/Makefile Thu Jun 15 10:23:57 2006 -0600
@@ -33,7 +33,7 @@ OBJS_XML2BIN := $(patsubst %.c,%.o,$(fil
ACM_INST_TOOLS = xensec_tool xensec_xml2bin xensec_gen
ACM_OBJS = $(OBJS_TOOL) $(OBJS_XML2BIN) $(OBJS_GETD)
-ACM_SCRIPTS = python/xensec_tools/acm_getlabel
python/xensec_tools/acm_getdecision
+ACM_SCRIPTS = python/xensec_tools/acm_getlabel
ACM_CONFIG_DIR = /etc/xen/acm-security
ACM_POLICY_DIR = $(ACM_CONFIG_DIR)/policies
diff -r 7f67c15e2c91 -r fbc0e953732e
tools/security/python/xensec_gen/cgi-bin/policy.cgi
--- a/tools/security/python/xensec_gen/cgi-bin/policy.cgi Thu Jun 15
10:02:53 2006 -0600
+++ b/tools/security/python/xensec_gen/cgi-bin/policy.cgi Thu Jun 15
10:23:57 2006 -0600
@@ -406,7 +406,7 @@ def parsePolicyXml( ):
msg = msg + 'Please validate the Policy
file used.'
formatXmlError( msg )
- allCSMTypes[csName][1] = csMemberList
+ allCSMTypes[csName][1] = csMemberList
if pOrder != '':
formPolicyOrder[1] = pOrder
diff -r 7f67c15e2c91 -r fbc0e953732e tools/security/secpol_xml2bin.c
--- a/tools/security/secpol_xml2bin.c Thu Jun 15 10:02:53 2006 -0600
+++ b/tools/security/secpol_xml2bin.c Thu Jun 15 10:23:57 2006 -0600
@@ -44,6 +44,8 @@
#define DEBUG 0
+#define NULL_LABEL_NAME "__NULL_LABEL__"
+
/* primary / secondary policy component setting */
enum policycomponent { CHWALL, STE, NULLPOLICY }
primary = NULLPOLICY, secondary = NULLPOLICY;
@@ -467,7 +469,7 @@ int init_ssid_queues(void)
return -ENOMEM;
/* default chwall ssid */
- default_ssid_chwall->name = "DEFAULT";
+ default_ssid_chwall->name = NULL_LABEL_NAME;
default_ssid_chwall->num = max_chwall_ssids++;
default_ssid_chwall->is_ref = 0;
default_ssid_chwall->type = ANY;
@@ -484,7 +486,7 @@ int init_ssid_queues(void)
max_chwall_labels++;
/* default ste ssid */
- default_ssid_ste->name = "DEFAULT";
+ default_ssid_ste->name = NULL_LABEL_NAME;
default_ssid_ste->num = max_ste_ssids++;
default_ssid_ste->is_ref = 0;
default_ssid_ste->type = ANY;
diff -r 7f67c15e2c91 -r fbc0e953732e tools/xenmon/xenbaked.c
--- a/tools/xenmon/xenbaked.c Thu Jun 15 10:02:53 2006 -0600
+++ b/tools/xenmon/xenbaked.c Thu Jun 15 10:23:57 2006 -0600
@@ -33,9 +33,6 @@
#include <stdlib.h>
#include <stdio.h>
#include <sys/mman.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-#include <sys/ioctl.h>
#include <fcntl.h>
#include <unistd.h>
#include <errno.h>
@@ -45,7 +42,6 @@
#include <xen/xen.h>
#include <string.h>
#include <sys/select.h>
-#include <xen/linux/evtchn.h>
#define PERROR(_m, _a...) \
do { \
@@ -256,51 +252,29 @@ void log_event(int event_id)
stat_map[0].event_count++; // other
}
-#define EVTCHN_DEV_NAME "/dev/xen/evtchn"
-#define EVTCHN_DEV_MAJOR 10
-#define EVTCHN_DEV_MINOR 201
-
int virq_port;
-int eventchn_fd = -1;
+int xce_handle = -1;
/* Returns the event channel handle. */
/* Stolen from xenstore code */
int eventchn_init(void)
{
- struct stat st;
- struct ioctl_evtchn_bind_virq bind;
int rc;
// to revert to old way:
if (0)
return -1;
- /* Make sure any existing device file links to correct device. */
- if ((lstat(EVTCHN_DEV_NAME, &st) != 0) || !S_ISCHR(st.st_mode) ||
- (st.st_rdev != makedev(EVTCHN_DEV_MAJOR, EVTCHN_DEV_MINOR)))
- (void)unlink(EVTCHN_DEV_NAME);
-
- reopen:
- eventchn_fd = open(EVTCHN_DEV_NAME, O_NONBLOCK|O_RDWR);
- if (eventchn_fd == -1) {
- if ((errno == ENOENT) &&
- ((mkdir("/dev/xen", 0755) == 0) || (errno == EEXIST)) &&
- (mknod(EVTCHN_DEV_NAME, S_IFCHR|0600,
- makedev(EVTCHN_DEV_MAJOR, EVTCHN_DEV_MINOR)) == 0))
- goto reopen;
- return -errno;
- }
-
- if (eventchn_fd < 0)
+ xce_handle = xc_evtchn_open();
+
+ if (xce_handle < 0)
perror("Failed to open evtchn device");
- bind.virq = VIRQ_TBUF;
- rc = ioctl(eventchn_fd, IOCTL_EVTCHN_BIND_VIRQ, &bind);
- if (rc == -1)
+ if ((rc = xc_evtchn_bind_virq(xce_handle, VIRQ_TBUF)) == -1)
perror("Failed to bind to domain exception virq port");
virq_port = rc;
- return eventchn_fd;
+ return xce_handle;
}
void wait_for_event(void)
@@ -309,27 +283,30 @@ void wait_for_event(void)
fd_set inset;
evtchn_port_t port;
struct timeval tv;
+ int evtchn_fd;
- if (eventchn_fd < 0) {
+ if (xce_handle < 0) {
nanosleep(&opts.poll_sleep, NULL);
return;
}
+ evtchn_fd = xc_evtchn_fd(xce_handle);
+
FD_ZERO(&inset);
- FD_SET(eventchn_fd, &inset);
+ FD_SET(evtchn_fd, &inset);
tv.tv_sec = 1;
tv.tv_usec = 0;
// tv = millis_to_timespec(&opts.poll_sleep);
- ret = select(eventchn_fd+1, &inset, NULL, NULL, &tv);
+ ret = select(evtchn_fd+1, &inset, NULL, NULL, &tv);
- if ( (ret == 1) && FD_ISSET(eventchn_fd, &inset)) {
- if (read(eventchn_fd, &port, sizeof(port)) != sizeof(port))
+ if ( (ret == 1) && FD_ISSET(evtchn_fd, &inset)) {
+ if ((port = xc_evtchn_pending(xce_handle)) == -1)
perror("Failed to read from event fd");
// if (port == virq_port)
// printf("got the event I was looking for\r\n");
-
- if (write(eventchn_fd, &port, sizeof(port)) != sizeof(port))
+
+ if (xc_evtchn_unmask(xce_handle, port) == -1)
perror("Failed to write to event fd");
}
}
diff -r 7f67c15e2c91 -r fbc0e953732e tools/xenstat/libxenstat/src/xenstat.c
--- a/tools/xenstat/libxenstat/src/xenstat.c Thu Jun 15 10:02:53 2006 -0600
+++ b/tools/xenstat/libxenstat/src/xenstat.c Thu Jun 15 10:23:57 2006 -0600
@@ -223,18 +223,20 @@ xenstat_node *xenstat_get_node(xenstat_h
num_domains = 0;
do {
- xenstat_domain *domain;
+ xenstat_domain *domain, *tmp;
new_domains = xc_domain_getinfolist(handle->xc_handle,
num_domains, DOMAIN_CHUNK_SIZE, domaininfo);
- node->domains = realloc(node->domains,
- (num_domains + new_domains)
- * sizeof(xenstat_domain));
- if (node->domains == NULL) {
+ tmp = realloc(node->domains,
+ (num_domains + new_domains)
+ * sizeof(xenstat_domain));
+ if (tmp == NULL) {
+ free(node->domains);
free(node);
return NULL;
}
+ node->domains = tmp;
domain = node->domains + num_domains;
@@ -582,11 +584,14 @@ static int xenstat_collect_networks(xens
domain->num_networks = 1;
domain->networks = malloc(sizeof(xenstat_network));
} else {
+ struct xenstat_network *tmp;
domain->num_networks++;
- domain->networks =
- realloc(domain->networks,
- domain->num_networks *
- sizeof(xenstat_network));
+ tmp = realloc(domain->networks,
+ domain->num_networks *
+ sizeof(xenstat_network));
+ if (tmp == NULL)
+ free(domain->networks);
+ domain->networks = tmp;
}
if (domain->networks == NULL)
return 0;
diff -r 7f67c15e2c91 -r fbc0e953732e tools/xenstore/fake_libxc.c
--- a/tools/xenstore/fake_libxc.c Thu Jun 15 10:02:53 2006 -0600
+++ b/tools/xenstore/fake_libxc.c Thu Jun 15 10:23:57 2006 -0600
@@ -37,7 +37,7 @@ static evtchn_port_t port;
static evtchn_port_t port;
/* The event channel maps to a signal, shared page to an mmapped file. */
-void evtchn_notify(int local_port)
+void xc_evtchn_notify(int xce_handle, int local_port)
{
assert(local_port == port);
if (kill(xs_test_pid, SIGUSR2) != 0)
@@ -124,7 +124,7 @@ void fake_ack_event(void)
signal(SIGUSR2, send_to_fd);
}
-int fake_open_eventchn(void)
+int xc_evtchn_open(void)
{
int fds[2];
diff -r 7f67c15e2c91 -r fbc0e953732e tools/xenstore/xenstored_core.c
--- a/tools/xenstore/xenstored_core.c Thu Jun 15 10:02:53 2006 -0600
+++ b/tools/xenstore/xenstored_core.c Thu Jun 15 10:23:57 2006 -0600
@@ -54,7 +54,7 @@
#include "hashtable.h"
-extern int eventchn_fd; /* in xenstored_domain.c */
+extern int xce_handle; /* in xenstored_domain.c */
static bool verbose = false;
LIST_HEAD(connections);
@@ -353,8 +353,11 @@ static int initialize_set(fd_set *inset,
set_fd(sock, inset, &max);
set_fd(ro_sock, inset, &max);
- set_fd(eventchn_fd, inset, &max);
set_fd(reopen_log_pipe[0], inset, &max);
+
+ if (xce_handle != -1)
+ set_fd(xc_evtchn_fd(xce_handle), inset, &max);
+
list_for_each_entry(i, &connections, list) {
if (i->domain)
continue;
@@ -1769,6 +1772,7 @@ int main(int argc, char *argv[])
bool outputpid = false;
bool no_domain_init = false;
const char *pidfile = NULL;
+ int evtchn_fd = -1;
while ((opt = getopt_long(argc, argv, "DE:F:HNPS:T:RLVW:", options,
NULL)) != -1) {
@@ -1907,6 +1911,9 @@ int main(int argc, char *argv[])
signal(SIGUSR1, stop_failtest);
#endif
+ if (xce_handle != -1)
+ evtchn_fd = xc_evtchn_fd(xce_handle);
+
/* Get ready to listen to the tools. */
max = initialize_set(&inset, &outset, *sock, *ro_sock);
@@ -1934,7 +1941,7 @@ int main(int argc, char *argv[])
if (FD_ISSET(*ro_sock, &inset))
accept_connection(*ro_sock, false);
- if (eventchn_fd > 0 && FD_ISSET(eventchn_fd, &inset))
+ if (evtchn_fd != -1 && FD_ISSET(evtchn_fd, &inset))
handle_event();
list_for_each_entry(i, &connections, list) {
diff -r 7f67c15e2c91 -r fbc0e953732e tools/xenstore/xenstored_domain.c
--- a/tools/xenstore/xenstored_domain.c Thu Jun 15 10:02:53 2006 -0600
+++ b/tools/xenstore/xenstored_domain.c Thu Jun 15 10:23:57 2006 -0600
@@ -18,15 +18,10 @@
*/
#include <stdio.h>
-#include <linux/ioctl.h>
-#include <sys/ioctl.h>
#include <sys/mman.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdarg.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
//#define DEBUG
#include "utils.h"
@@ -37,12 +32,11 @@
#include "xenstored_test.h"
#include <xenctrl.h>
-#include <xen/sys/evtchn.h>
static int *xc_handle;
static evtchn_port_t virq_port;
-int eventchn_fd = -1;
+int xce_handle = -1;
struct domain
{
@@ -82,19 +76,6 @@ struct domain
};
static LIST_HEAD(domains);
-
-#ifndef TESTING
-static void evtchn_notify(int port)
-{
- int rc;
-
- struct ioctl_evtchn_notify notify;
- notify.port = port;
- rc = ioctl(eventchn_fd, IOCTL_EVTCHN_NOTIFY, ¬ify);
-}
-#else
-extern void evtchn_notify(int port);
-#endif
/* FIXME: Mark connection as broken (close it?) when this happens. */
static bool check_indexes(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod)
@@ -146,7 +127,7 @@ static int writechn(struct connection *c
mb();
intf->rsp_prod += len;
- evtchn_notify(conn->domain->port);
+ xc_evtchn_notify(xce_handle, conn->domain->port);
return len;
}
@@ -176,7 +157,7 @@ static int readchn(struct connection *co
mb();
intf->req_cons += len;
- evtchn_notify(conn->domain->port);
+ xc_evtchn_notify(xce_handle, conn->domain->port);
return len;
}
@@ -184,13 +165,11 @@ static int destroy_domain(void *_domain)
static int destroy_domain(void *_domain)
{
struct domain *domain = _domain;
- struct ioctl_evtchn_unbind unbind;
list_del(&domain->list);
if (domain->port) {
- unbind.port = domain->port;
- if (ioctl(eventchn_fd, IOCTL_EVTCHN_UNBIND, &unbind) == -1)
+ if (xc_evtchn_unbind(xce_handle, domain->port) == -1)
eprintf("> Unbinding port %i failed!\n", domain->port);
}
@@ -231,14 +210,14 @@ void handle_event(void)
{
evtchn_port_t port;
- if (read(eventchn_fd, &port, sizeof(port)) != sizeof(port))
+ if ((port = xc_evtchn_pending(xce_handle)) == -1)
barf_perror("Failed to read from event fd");
if (port == virq_port)
domain_cleanup();
#ifndef TESTING
- if (write(eventchn_fd, &port, sizeof(port)) != sizeof(port))
+ if (xc_evtchn_unmask(xce_handle, port) == -1)
barf_perror("Failed to write to event fd");
#endif
}
@@ -269,7 +248,6 @@ static struct domain *new_domain(void *c
int port)
{
struct domain *domain;
- struct ioctl_evtchn_bind_interdomain bind;
int rc;
@@ -283,9 +261,7 @@ static struct domain *new_domain(void *c
talloc_set_destructor(domain, destroy_domain);
/* Tell kernel we're interested in this event. */
- bind.remote_domain = domid;
- bind.remote_port = port;
- rc = ioctl(eventchn_fd, IOCTL_EVTCHN_BIND_INTERDOMAIN, &bind);
+ rc = xc_evtchn_bind_interdomain(xce_handle, domid, port);
if (rc == -1)
return NULL;
domain->port = rc;
@@ -490,23 +466,14 @@ static int dom0_init(void)
talloc_steal(dom0->conn, dom0);
- evtchn_notify(dom0->port);
+ xc_evtchn_notify(xce_handle, dom0->port);
return 0;
}
-
-
-
-#define EVTCHN_DEV_NAME "/dev/xen/evtchn"
-#define EVTCHN_DEV_MAJOR 10
-#define EVTCHN_DEV_MINOR 201
-
/* Returns the event channel handle. */
int domain_init(void)
{
- struct stat st;
- struct ioctl_evtchn_bind_virq bind;
int rc;
xc_handle = talloc(talloc_autofree_context(), int);
@@ -519,39 +486,19 @@ int domain_init(void)
talloc_set_destructor(xc_handle, close_xc_handle);
-#ifdef TESTING
- eventchn_fd = fake_open_eventchn();
- (void)&st;
-#else
- /* Make sure any existing device file links to correct device. */
- if ((lstat(EVTCHN_DEV_NAME, &st) != 0) || !S_ISCHR(st.st_mode) ||
- (st.st_rdev != makedev(EVTCHN_DEV_MAJOR, EVTCHN_DEV_MINOR)))
- (void)unlink(EVTCHN_DEV_NAME);
-
- reopen:
- eventchn_fd = open(EVTCHN_DEV_NAME, O_NONBLOCK|O_RDWR);
- if (eventchn_fd == -1) {
- if ((errno == ENOENT) &&
- ((mkdir("/dev/xen", 0755) == 0) || (errno == EEXIST)) &&
- (mknod(EVTCHN_DEV_NAME, S_IFCHR|0600,
- makedev(EVTCHN_DEV_MAJOR, EVTCHN_DEV_MINOR)) == 0))
- goto reopen;
- return -errno;
- }
-#endif
- if (eventchn_fd < 0)
+ xce_handle = xc_evtchn_open();
+
+ if (xce_handle < 0)
barf_perror("Failed to open evtchn device");
if (dom0_init() != 0)
barf_perror("Failed to initialize dom0 state");
- bind.virq = VIRQ_DOM_EXC;
- rc = ioctl(eventchn_fd, IOCTL_EVTCHN_BIND_VIRQ, &bind);
- if (rc == -1)
+ if ((rc = xc_evtchn_bind_virq(xce_handle, VIRQ_DOM_EXC)) == -1)
barf_perror("Failed to bind to domain exception virq port");
virq_port = rc;
- return eventchn_fd;
+ return xce_handle;
}
void domain_entry_inc(struct connection *conn)
diff -r 7f67c15e2c91 -r fbc0e953732e
tools/xm-test/tests/block-integrity/01_block_device_read_verify.py
--- a/tools/xm-test/tests/block-integrity/01_block_device_read_verify.py
Thu Jun 15 10:02:53 2006 -0600
+++ b/tools/xm-test/tests/block-integrity/01_block_device_read_verify.py
Thu Jun 15 10:23:57 2006 -0600
@@ -31,7 +31,7 @@ traceCommand("cat /dev/urandom > /dev/ra
s, o = traceCommand("md5sum /dev/ram1")
-dom0_md5sum_match = re.search(r"^[\dA-Fa-f]{32}", o)
+dom0_md5sum_match = re.search(r"^[\dA-Fa-f]{32}", o, re.M)
block_attach(domain, "phy:ram1", "hda1")
@@ -40,7 +40,7 @@ except ConsoleError, e:
except ConsoleError, e:
FAIL(str(e))
-domU_md5sum_match = re.search(r"^[\dA-Fa-f]{32}", run["output"])
+domU_md5sum_match = re.search(r"^[\dA-Fa-f]{32}", run["output"], re.M)
domain.closeConsole()
diff -r 7f67c15e2c91 -r fbc0e953732e
tools/xm-test/tests/block-integrity/02_block_device_write_verify.py
--- a/tools/xm-test/tests/block-integrity/02_block_device_write_verify.py
Thu Jun 15 10:02:53 2006 -0600
+++ b/tools/xm-test/tests/block-integrity/02_block_device_write_verify.py
Thu Jun 15 10:23:57 2006 -0600
@@ -37,7 +37,7 @@ except ConsoleError, e:
except ConsoleError, e:
FAIL(str(e))
-domU_md5sum_match = re.search(r"^[\dA-Fa-f]{32}", run["output"])
+domU_md5sum_match = re.search(r"^[\dA-Fa-f]{32}", run["output"], re.M)
domain.closeConsole()
@@ -45,7 +45,7 @@ domain.stop()
s, o = traceCommand("md5sum /dev/ram1")
-dom0_md5sum_match = re.search(r"^[\dA-Fa-f]{32}", o)
+dom0_md5sum_match = re.search(r"^[\dA-Fa-f]{32}", o, re.M)
if domU_md5sum_match == None:
FAIL("Failed to get md5sum of data written in domU.")
diff -r 7f67c15e2c91 -r fbc0e953732e xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c Thu Jun 15 10:02:53 2006 -0600
+++ b/xen/arch/x86/traps.c Thu Jun 15 10:23:57 2006 -0600
@@ -1279,7 +1279,7 @@ static void nmi_softirq(void)
static void nmi_softirq(void)
{
/* Only used to defer wakeup of dom0,vcpu0 to a safe (non-NMI) context. */
- evtchn_notify(dom0->vcpu[0]);
+ vcpu_kick(dom0->vcpu[0]);
}
static void nmi_dom0_report(unsigned int reason_idx)
diff -r 7f67c15e2c91 -r fbc0e953732e xen/common/event_channel.c
--- a/xen/common/event_channel.c Thu Jun 15 10:02:53 2006 -0600
+++ b/xen/common/event_channel.c Thu Jun 15 10:23:57 2006 -0600
@@ -493,10 +493,9 @@ void evtchn_set_pending(struct vcpu *v,
if ( !test_bit (port, s->evtchn_mask) &&
!test_and_set_bit(port / BITS_PER_LONG,
- &v->vcpu_info->evtchn_pending_sel) &&
- !test_and_set_bit(0, &v->vcpu_info->evtchn_upcall_pending) )
- {
- evtchn_notify(v);
+ &v->vcpu_info->evtchn_pending_sel) )
+ {
+ vcpu_mark_events_pending(v);
}
/* Check if some VCPU might be polling for this event. */
@@ -682,10 +681,9 @@ static long evtchn_unmask(evtchn_unmask_
if ( test_and_clear_bit(port, s->evtchn_mask) &&
test_bit (port, s->evtchn_pending) &&
!test_and_set_bit (port / BITS_PER_LONG,
- &v->vcpu_info->evtchn_pending_sel) &&
- !test_and_set_bit (0, &v->vcpu_info->evtchn_upcall_pending) )
- {
- evtchn_notify(v);
+ &v->vcpu_info->evtchn_pending_sel) )
+ {
+ vcpu_mark_events_pending(v);
}
spin_unlock(&d->evtchn_lock);
diff -r 7f67c15e2c91 -r fbc0e953732e xen/include/asm-ia64/event.h
--- a/xen/include/asm-ia64/event.h Thu Jun 15 10:02:53 2006 -0600
+++ b/xen/include/asm-ia64/event.h Thu Jun 15 10:23:57 2006 -0600
@@ -12,7 +12,7 @@
#include <public/arch-ia64.h>
#include <asm/vcpu.h>
-static inline void evtchn_notify(struct vcpu *v)
+static inline void vcpu_kick(struct vcpu *v)
{
/*
* NB1. 'vcpu_flags' and 'processor' must be checked /after/ update of
@@ -30,6 +30,12 @@ static inline void evtchn_notify(struct
if(!VMX_DOMAIN(v) && !v->arch.event_callback_ip)
vcpu_pend_interrupt(v, v->domain->shared_info->arch.evtchn_vector);
+}
+
+static inline void vcpu_mark_events_pending(struct vcpu *v)
+{
+ if ( !test_and_set_bit(0, &v->vcpu_info->evtchn_upcall_pending) )
+ vcpu_kick(v);
}
/* Note: Bitwise operations result in fast code with no branches. */
diff -r 7f67c15e2c91 -r fbc0e953732e xen/include/asm-x86/event.h
--- a/xen/include/asm-x86/event.h Thu Jun 15 10:02:53 2006 -0600
+++ b/xen/include/asm-x86/event.h Thu Jun 15 10:23:57 2006 -0600
@@ -9,7 +9,7 @@
#ifndef __ASM_EVENT_H__
#define __ASM_EVENT_H__
-static inline void evtchn_notify(struct vcpu *v)
+static inline void vcpu_kick(struct vcpu *v)
{
/*
* NB1. 'vcpu_flags' and 'processor' must be checked /after/ update of
@@ -24,6 +24,12 @@ static inline void evtchn_notify(struct
vcpu_unblock(v);
if ( running )
smp_send_event_check_cpu(v->processor);
+}
+
+static inline void vcpu_mark_events_pending(struct vcpu *v)
+{
+ if ( !test_and_set_bit(0, &v->vcpu_info->evtchn_upcall_pending) )
+ vcpu_kick(v);
}
static inline int local_events_need_delivery(void)
diff -r 7f67c15e2c91 -r fbc0e953732e xen/include/xen/elf.h
--- a/xen/include/xen/elf.h Thu Jun 15 10:02:53 2006 -0600
+++ b/xen/include/xen/elf.h Thu Jun 15 10:23:57 2006 -0600
@@ -178,9 +178,9 @@ typedef struct {
#define EM_ALPHA 41 /* DEC ALPHA */
#define EM_SPARCV9 43 /* SPARC version 9 */
#define EM_ALPHA_EXP 0x9026 /* DEC ALPHA */
+#define EM_IA_64 50 /* Intel Merced */
#define EM_X86_64 62 /* AMD x86-64 architecture */
#define EM_VAX 75 /* DEC VAX */
-#define EM_NUM 15 /* number of machine types */
/* Version */
#define EV_NONE 0 /* Invalid */
diff -r 7f67c15e2c91 -r fbc0e953732e linux-2.6-xen-sparse/kernel/fork.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/linux-2.6-xen-sparse/kernel/fork.c Thu Jun 15 10:23:57 2006 -0600
@@ -0,0 +1,1619 @@
+/*
+ * linux/kernel/fork.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ */
+
+/*
+ * 'fork.c' contains the help-routines for the 'fork' system call
+ * (see also entry.S and others).
+ * Fork is rather simple, once you get the hang of it, but the memory
+ * management can be a bitch. See 'mm/memory.c': 'copy_page_range()'
+ */
+
+#include <linux/config.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/unistd.h>
+#include <linux/smp_lock.h>
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/completion.h>
+#include <linux/namespace.h>
+#include <linux/personality.h>
+#include <linux/mempolicy.h>
+#include <linux/sem.h>
+#include <linux/file.h>
+#include <linux/key.h>
+#include <linux/binfmts.h>
+#include <linux/mman.h>
+#include <linux/fs.h>
+#include <linux/capability.h>
+#include <linux/cpu.h>
+#include <linux/cpuset.h>
+#include <linux/security.h>
+#include <linux/swap.h>
+#include <linux/syscalls.h>
+#include <linux/jiffies.h>
+#include <linux/futex.h>
+#include <linux/rcupdate.h>
+#include <linux/ptrace.h>
+#include <linux/mount.h>
+#include <linux/audit.h>
+#include <linux/profile.h>
+#include <linux/rmap.h>
+#include <linux/acct.h>
+#include <linux/cn_proc.h>
+
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/uaccess.h>
+#include <asm/mmu_context.h>
+#include <asm/cacheflush.h>
+#include <asm/tlbflush.h>
+
+/*
+ * Protected counters by write_lock_irq(&tasklist_lock)
+ */
+unsigned long total_forks; /* Handle normal Linux uptimes. */
+int nr_threads; /* The idle threads do not count.. */
+
+int max_threads; /* tunable limit on nr_threads */
+
+DEFINE_PER_CPU(unsigned long, process_counts) = 0;
+
+ __cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */
+
+EXPORT_SYMBOL(tasklist_lock);
+
+int nr_processes(void)
+{
+ int cpu;
+ int total = 0;
+
+ for_each_online_cpu(cpu)
+ total += per_cpu(process_counts, cpu);
+
+ return total;
+}
+
+#ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR
+# define alloc_task_struct() kmem_cache_alloc(task_struct_cachep, GFP_KERNEL)
+# define free_task_struct(tsk) kmem_cache_free(task_struct_cachep, (tsk))
+static kmem_cache_t *task_struct_cachep;
+#endif
+
+/* SLAB cache for signal_struct structures (tsk->signal) */
+kmem_cache_t *signal_cachep;
+
+/* SLAB cache for sighand_struct structures (tsk->sighand) */
+kmem_cache_t *sighand_cachep;
+
+/* SLAB cache for files_struct structures (tsk->files) */
+kmem_cache_t *files_cachep;
+
+/* SLAB cache for fs_struct structures (tsk->fs) */
+kmem_cache_t *fs_cachep;
+
+/* SLAB cache for vm_area_struct structures */
+kmem_cache_t *vm_area_cachep;
+
+/* SLAB cache for mm_struct structures (tsk->mm) */
+static kmem_cache_t *mm_cachep;
+
+void free_task(struct task_struct *tsk)
+{
+ free_thread_info(tsk->thread_info);
+ free_task_struct(tsk);
+}
+EXPORT_SYMBOL(free_task);
+
+void __put_task_struct_cb(struct rcu_head *rhp)
+{
+ struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
+
+ WARN_ON(!(tsk->exit_state & (EXIT_DEAD | EXIT_ZOMBIE)));
+ WARN_ON(atomic_read(&tsk->usage));
+ WARN_ON(tsk == current);
+
+ if (unlikely(tsk->audit_context))
+ audit_free(tsk);
+ security_task_free(tsk);
+ free_uid(tsk->user);
+ put_group_info(tsk->group_info);
+
+ if (!profile_handoff_task(tsk))
+ free_task(tsk);
+}
+
+void __init fork_init(unsigned long mempages)
+{
+#ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR
+#ifndef ARCH_MIN_TASKALIGN
+#define ARCH_MIN_TASKALIGN L1_CACHE_BYTES
+#endif
+ /* create a slab on which task_structs can be allocated */
+ task_struct_cachep =
+ kmem_cache_create("task_struct", sizeof(struct task_struct),
+ ARCH_MIN_TASKALIGN, SLAB_PANIC, NULL, NULL);
+#endif
+
+ /*
+ * The default maximum number of threads is set to a safe
+ * value: the thread structures can take up at most half
+ * of memory.
+ */
+ max_threads = mempages / (8 * THREAD_SIZE / PAGE_SIZE);
+
+ /*
+ * we need to allow at least 20 threads to boot a system
+ */
+ if(max_threads < 20)
+ max_threads = 20;
+
+ init_task.signal->rlim[RLIMIT_NPROC].rlim_cur = max_threads/2;
+ init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2;
+ init_task.signal->rlim[RLIMIT_SIGPENDING] =
+ init_task.signal->rlim[RLIMIT_NPROC];
+}
+
+static struct task_struct *dup_task_struct(struct task_struct *orig)
+{
+ struct task_struct *tsk;
+ struct thread_info *ti;
+
+ prepare_to_copy(orig);
+
+ tsk = alloc_task_struct();
+ if (!tsk)
+ return NULL;
+
+ ti = alloc_thread_info(tsk);
+ if (!ti) {
+ free_task_struct(tsk);
+ return NULL;
+ }
+
+ *tsk = *orig;
+ tsk->thread_info = ti;
+ setup_thread_stack(tsk, orig);
+
+ /* One for us, one for whoever does the "release_task()" (usually
parent) */
+ atomic_set(&tsk->usage,2);
+ atomic_set(&tsk->fs_excl, 0);
+ return tsk;
+}
+
+#ifdef CONFIG_MMU
+static inline int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
+{
+ struct vm_area_struct *mpnt, *tmp, **pprev;
+ struct rb_node **rb_link, *rb_parent;
+ int retval;
+ unsigned long charge;
+ struct mempolicy *pol;
+
+ down_write(&oldmm->mmap_sem);
+ flush_cache_mm(oldmm);
+ down_write(&mm->mmap_sem);
+
+ mm->locked_vm = 0;
+ mm->mmap = NULL;
+ mm->mmap_cache = NULL;
+ mm->free_area_cache = oldmm->mmap_base;
+ mm->cached_hole_size = ~0UL;
+ mm->map_count = 0;
+ cpus_clear(mm->cpu_vm_mask);
+ mm->mm_rb = RB_ROOT;
+ rb_link = &mm->mm_rb.rb_node;
+ rb_parent = NULL;
+ pprev = &mm->mmap;
+
+ for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) {
+ struct file *file;
+
+ if (mpnt->vm_flags & VM_DONTCOPY) {
+ long pages = vma_pages(mpnt);
+ mm->total_vm -= pages;
+ vm_stat_account(mm, mpnt->vm_flags, mpnt->vm_file,
+ -pages);
+ continue;
+ }
+ charge = 0;
+ if (mpnt->vm_flags & VM_ACCOUNT) {
+ unsigned int len = (mpnt->vm_end - mpnt->vm_start) >>
PAGE_SHIFT;
+ if (security_vm_enough_memory(len))
+ goto fail_nomem;
+ charge = len;
+ }
+ tmp = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+ if (!tmp)
+ goto fail_nomem;
+ *tmp = *mpnt;
+ pol = mpol_copy(vma_policy(mpnt));
+ retval = PTR_ERR(pol);
+ if (IS_ERR(pol))
+ goto fail_nomem_policy;
+ vma_set_policy(tmp, pol);
+ tmp->vm_flags &= ~VM_LOCKED;
+ tmp->vm_mm = mm;
+ tmp->vm_next = NULL;
+ anon_vma_link(tmp);
+ file = tmp->vm_file;
+ if (file) {
+ struct inode *inode = file->f_dentry->d_inode;
+ get_file(file);
+ if (tmp->vm_flags & VM_DENYWRITE)
+ atomic_dec(&inode->i_writecount);
+
+ /* insert tmp into the share list, just after mpnt */
+ spin_lock(&file->f_mapping->i_mmap_lock);
+ tmp->vm_truncate_count = mpnt->vm_truncate_count;
+ flush_dcache_mmap_lock(file->f_mapping);
+ vma_prio_tree_add(tmp, mpnt);
+ flush_dcache_mmap_unlock(file->f_mapping);
+ spin_unlock(&file->f_mapping->i_mmap_lock);
+ }
+
+ /*
+ * Link in the new vma and copy the page table entries.
+ */
+ *pprev = tmp;
+ pprev = &tmp->vm_next;
+
+ __vma_link_rb(mm, tmp, rb_link, rb_parent);
+ rb_link = &tmp->vm_rb.rb_right;
+ rb_parent = &tmp->vm_rb;
+
+ mm->map_count++;
+ retval = copy_page_range(mm, oldmm, mpnt);
+
+ if (tmp->vm_ops && tmp->vm_ops->open)
+ tmp->vm_ops->open(tmp);
+
+ if (retval)
+ goto out;
+ }
+#ifdef arch_dup_mmap
+ arch_dup_mmap(mm, oldmm);
+#endif
+ retval = 0;
+out:
+ up_write(&mm->mmap_sem);
+ flush_tlb_mm(oldmm);
+ up_write(&oldmm->mmap_sem);
+ return retval;
+fail_nomem_policy:
+ kmem_cache_free(vm_area_cachep, tmp);
+fail_nomem:
+ retval = -ENOMEM;
+ vm_unacct_memory(charge);
+ goto out;
+}
+
+static inline int mm_alloc_pgd(struct mm_struct * mm)
+{
+ mm->pgd = pgd_alloc(mm);
+ if (unlikely(!mm->pgd))
+ return -ENOMEM;
+ return 0;
+}
+
+static inline void mm_free_pgd(struct mm_struct * mm)
+{
+ pgd_free(mm->pgd);
+}
+#else
+#define dup_mmap(mm, oldmm) (0)
+#define mm_alloc_pgd(mm) (0)
+#define mm_free_pgd(mm)
+#endif /* CONFIG_MMU */
+
+ __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock);
+
+#define allocate_mm() (kmem_cache_alloc(mm_cachep, SLAB_KERNEL))
+#define free_mm(mm) (kmem_cache_free(mm_cachep, (mm)))
+
+#include <linux/init_task.h>
+
+static struct mm_struct * mm_init(struct mm_struct * mm)
+{
+ atomic_set(&mm->mm_users, 1);
+ atomic_set(&mm->mm_count, 1);
+ init_rwsem(&mm->mmap_sem);
+ INIT_LIST_HEAD(&mm->mmlist);
+ mm->core_waiters = 0;
+ mm->nr_ptes = 0;
+ set_mm_counter(mm, file_rss, 0);
+ set_mm_counter(mm, anon_rss, 0);
+ spin_lock_init(&mm->page_table_lock);
+ rwlock_init(&mm->ioctx_list_lock);
+ mm->ioctx_list = NULL;
+ mm->free_area_cache = TASK_UNMAPPED_BASE;
+ mm->cached_hole_size = ~0UL;
+
+ if (likely(!mm_alloc_pgd(mm))) {
+ mm->def_flags = 0;
+ return mm;
+ }
+ free_mm(mm);
+ return NULL;
+}
+
+/*
+ * Allocate and initialize an mm_struct.
+ */
+struct mm_struct * mm_alloc(void)
+{
+ struct mm_struct * mm;
+
+ mm = allocate_mm();
+ if (mm) {
+ memset(mm, 0, sizeof(*mm));
+ mm = mm_init(mm);
+ }
+ return mm;
+}
+
+/*
+ * Called when the last reference to the mm
+ * is dropped: either by a lazy thread or by
+ * mmput. Free the page directory and the mm.
+ */
+void fastcall __mmdrop(struct mm_struct *mm)
+{
+ BUG_ON(mm == &init_mm);
+ mm_free_pgd(mm);
+ destroy_context(mm);
+ free_mm(mm);
+}
+
+/*
+ * Decrement the use count and release all resources for an mm.
+ */
+void mmput(struct mm_struct *mm)
+{
+ if (atomic_dec_and_test(&mm->mm_users)) {
+ exit_aio(mm);
+ exit_mmap(mm);
+ if (!list_empty(&mm->mmlist)) {
+ spin_lock(&mmlist_lock);
+ list_del(&mm->mmlist);
+ spin_unlock(&mmlist_lock);
+ }
+ put_swap_token(mm);
+ mmdrop(mm);
+ }
+}
+EXPORT_SYMBOL_GPL(mmput);
+
+/**
+ * get_task_mm - acquire a reference to the task's mm
+ *
+ * Returns %NULL if the task has no mm. Checks PF_BORROWED_MM (meaning
+ * this kernel workthread has transiently adopted a user mm with use_mm,
+ * to do its AIO) is not set and if so returns a reference to it, after
+ * bumping up the use count. User must release the mm via mmput()
+ * after use. Typically used by /proc and ptrace.
+ */
+struct mm_struct *get_task_mm(struct task_struct *task)
+{
+ struct mm_struct *mm;
+
+ task_lock(task);
+ mm = task->mm;
+ if (mm) {
+ if (task->flags & PF_BORROWED_MM)
+ mm = NULL;
+ else
+ atomic_inc(&mm->mm_users);
+ }
+ task_unlock(task);
+ return mm;
+}
+EXPORT_SYMBOL_GPL(get_task_mm);
+
+/* Please note the differences between mmput and mm_release.
+ * mmput is called whenever we stop holding onto a mm_struct,
+ * error success whatever.
+ *
+ * mm_release is called after a mm_struct has been removed
+ * from the current process.
+ *
+ * This difference is important for error handling, when we
+ * only half set up a mm_struct for a new process and need to restore
+ * the old one. Because we mmput the new mm_struct before
+ * restoring the old one. . .
+ * Eric Biederman 10 January 1998
+ */
+void mm_release(struct task_struct *tsk, struct mm_struct *mm)
+{
+ struct completion *vfork_done = tsk->vfork_done;
+
+ /* Get rid of any cached register state */
+ deactivate_mm(tsk, mm);
+
+ /* notify parent sleeping on vfork() */
+ if (vfork_done) {
+ tsk->vfork_done = NULL;
+ complete(vfork_done);
+ }
+ if (tsk->clear_child_tid && atomic_read(&mm->mm_users) > 1) {
+ u32 __user * tidptr = tsk->clear_child_tid;
+ tsk->clear_child_tid = NULL;
+
+ /*
+ * We don't check the error code - if userspace has
+ * not set up a proper pointer then tough luck.
+ */
+ put_user(0, tidptr);
+ sys_futex(tidptr, FUTEX_WAKE, 1, NULL, NULL, 0);
+ }
+}
+
+/*
+ * Allocate a new mm structure and copy contents from the
+ * mm structure of the passed in task structure.
+ */
+static struct mm_struct *dup_mm(struct task_struct *tsk)
+{
+ struct mm_struct *mm, *oldmm = current->mm;
+ int err;
+
+ if (!oldmm)
+ return NULL;
+
+ mm = allocate_mm();
+ if (!mm)
+ goto fail_nomem;
+
+ memcpy(mm, oldmm, sizeof(*mm));
+
+ if (!mm_init(mm))
+ goto fail_nomem;
+
+ if (init_new_context(tsk, mm))
+ goto fail_nocontext;
+
+ err = dup_mmap(mm, oldmm);
+ if (err)
+ goto free_pt;
+
+ mm->hiwater_rss = get_mm_rss(mm);
+ mm->hiwater_vm = mm->total_vm;
+
+ return mm;
+
+free_pt:
+ mmput(mm);
+
+fail_nomem:
+ return NULL;
+
+fail_nocontext:
+ /*
+ * If init_new_context() failed, we cannot use mmput() to free the mm
+ * because it calls destroy_context()
+ */
+ mm_free_pgd(mm);
+ free_mm(mm);
+ return NULL;
+}
+
+static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
+{
+ struct mm_struct * mm, *oldmm;
+ int retval;
+
+ tsk->min_flt = tsk->maj_flt = 0;
+ tsk->nvcsw = tsk->nivcsw = 0;
+
+ tsk->mm = NULL;
+ tsk->active_mm = NULL;
+
+ /*
+ * Are we cloning a kernel thread?
+ *
+ * We need to steal a active VM for that..
+ */
+ oldmm = current->mm;
+ if (!oldmm)
+ return 0;
+
+ if (clone_flags & CLONE_VM) {
+ atomic_inc(&oldmm->mm_users);
+ mm = oldmm;
+ goto good_mm;
+ }
+
+ retval = -ENOMEM;
+ mm = dup_mm(tsk);
+ if (!mm)
+ goto fail_nomem;
+
+good_mm:
+ tsk->mm = mm;
+ tsk->active_mm = mm;
+ return 0;
+
+fail_nomem:
+ return retval;
+}
+
+static inline struct fs_struct *__copy_fs_struct(struct fs_struct *old)
+{
+ struct fs_struct *fs = kmem_cache_alloc(fs_cachep, GFP_KERNEL);
+ /* We don't need to lock fs - think why ;-) */
+ if (fs) {
+ atomic_set(&fs->count, 1);
+ rwlock_init(&fs->lock);
+ fs->umask = old->umask;
+ read_lock(&old->lock);
+ fs->rootmnt = mntget(old->rootmnt);
+ fs->root = dget(old->root);
+ fs->pwdmnt = mntget(old->pwdmnt);
+ fs->pwd = dget(old->pwd);
+ if (old->altroot) {
+ fs->altrootmnt = mntget(old->altrootmnt);
+ fs->altroot = dget(old->altroot);
+ } else {
+ fs->altrootmnt = NULL;
+ fs->altroot = NULL;
+ }
+ read_unlock(&old->lock);
+ }
+ return fs;
+}
+
+struct fs_struct *copy_fs_struct(struct fs_struct *old)
+{
+ return __copy_fs_struct(old);
+}
+
+EXPORT_SYMBOL_GPL(copy_fs_struct);
+
+static inline int copy_fs(unsigned long clone_flags, struct task_struct * tsk)
+{
+ if (clone_flags & CLONE_FS) {
+ atomic_inc(¤t->fs->count);
+ return 0;
+ }
+ tsk->fs = __copy_fs_struct(current->fs);
+ if (!tsk->fs)
+ return -ENOMEM;
+ return 0;
+}
+
+static int count_open_files(struct fdtable *fdt)
+{
+ int size = fdt->max_fdset;
+ int i;
+
+ /* Find the last open fd */
+ for (i = size/(8*sizeof(long)); i > 0; ) {
+ if (fdt->open_fds->fds_bits[--i])
+ break;
+ }
+ i = (i+1) * 8 * sizeof(long);
+ return i;
+}
+
+static struct files_struct *alloc_files(void)
+{
+ struct files_struct *newf;
+ struct fdtable *fdt;
+
+ newf = kmem_cache_alloc(files_cachep, SLAB_KERNEL);
+ if (!newf)
+ goto out;
+
+ atomic_set(&newf->count, 1);
+
+ spin_lock_init(&newf->file_lock);
+ fdt = &newf->fdtab;
+ fdt->next_fd = 0;
+ fdt->max_fds = NR_OPEN_DEFAULT;
+ fdt->max_fdset = __FD_SETSIZE;
+ fdt->close_on_exec = &newf->close_on_exec_init;
+ fdt->open_fds = &newf->open_fds_init;
+ fdt->fd = &newf->fd_array[0];
+ INIT_RCU_HEAD(&fdt->rcu);
+ fdt->free_files = NULL;
+ fdt->next = NULL;
+ rcu_assign_pointer(newf->fdt, fdt);
+out:
+ return newf;
+}
+
+/*
+ * Allocate a new files structure and copy contents from the
+ * passed in files structure.
+ */
+static struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
+{
+ struct files_struct *newf;
+ struct file **old_fds, **new_fds;
+ int open_files, size, i, expand;
+ struct fdtable *old_fdt, *new_fdt;
+
+ newf = alloc_files();
+ if (!newf)
+ goto out;
+
+ spin_lock(&oldf->file_lock);
+ old_fdt = files_fdtable(oldf);
+ new_fdt = files_fdtable(newf);
+ size = old_fdt->max_fdset;
+ open_files = count_open_files(old_fdt);
+ expand = 0;
+
+ /*
+ * Check whether we need to allocate a larger fd array or fd set.
+ * Note: we're not a clone task, so the open count won't change.
+ */
+ if (open_files > new_fdt->max_fdset) {
+ new_fdt->max_fdset = 0;
+ expand = 1;
+ }
+ if (open_files > new_fdt->max_fds) {
+ new_fdt->max_fds = 0;
+ expand = 1;
+ }
+
+ /* if the old fdset gets grown now, we'll only copy up to "size" fds */
+ if (expand) {
+ spin_unlock(&oldf->file_lock);
+ spin_lock(&newf->file_lock);
+ *errorp = expand_files(newf, open_files-1);
+ spin_unlock(&newf->file_lock);
+ if (*errorp < 0)
+ goto out_release;
+ new_fdt = files_fdtable(newf);
+ /*
+ * Reacquire the oldf lock and a pointer to its fd table
+ * who knows it may have a new bigger fd table. We need
+ * the latest pointer.
+ */
+ spin_lock(&oldf->file_lock);
+ old_fdt = files_fdtable(oldf);
+ }
+
+ old_fds = old_fdt->fd;
+ new_fds = new_fdt->fd;
+
+ memcpy(new_fdt->open_fds->fds_bits, old_fdt->open_fds->fds_bits,
open_files/8);
+ memcpy(new_fdt->close_on_exec->fds_bits,
old_fdt->close_on_exec->fds_bits, open_files/8);
+
+ for (i = open_files; i != 0; i--) {
+ struct file *f = *old_fds++;
+ if (f) {
+ get_file(f);
+ } else {
+ /*
+ * The fd may be claimed in the fd bitmap but not yet
+ * instantiated in the files array if a sibling thread
+ * is partway through open(). So make sure that this
+ * fd is available to the new process.
+ */
+ FD_CLR(open_files - i, new_fdt->open_fds);
+ }
+ rcu_assign_pointer(*new_fds++, f);
+ }
+ spin_unlock(&oldf->file_lock);
+
+ /* compute the remainder to be cleared */
+ size = (new_fdt->max_fds - open_files) * sizeof(struct file *);
+
+ /* This is long word aligned thus could use a optimized version */
+ memset(new_fds, 0, size);
+
+ if (new_fdt->max_fdset > open_files) {
+ int left = (new_fdt->max_fdset-open_files)/8;
+ int start = open_files / (8 * sizeof(unsigned long));
+
+ memset(&new_fdt->open_fds->fds_bits[start], 0, left);
+ memset(&new_fdt->close_on_exec->fds_bits[start], 0, left);
+ }
+
+out:
+ return newf;
+
+out_release:
+ free_fdset (new_fdt->close_on_exec, new_fdt->max_fdset);
+ free_fdset (new_fdt->open_fds, new_fdt->max_fdset);
+ free_fd_array(new_fdt->fd, new_fdt->max_fds);
+ kmem_cache_free(files_cachep, newf);
+ return NULL;
+}
+
+static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
+{
+ struct files_struct *oldf, *newf;
+ int error = 0;
+
+ /*
+ * A background process may not have any files ...
+ */
+ oldf = current->files;
+ if (!oldf)
+ goto out;
+
+ if (clone_flags & CLONE_FILES) {
+ atomic_inc(&oldf->count);
+ goto out;
+ }
+
+ /*
+ * Note: we may be using current for both targets (See exec.c)
+ * This works because we cache current->files (old) as oldf. Don't
+ * break this.
+ */
+ tsk->files = NULL;
+ error = -ENOMEM;
+ newf = dup_fd(oldf, &error);
+ if (!newf)
+ goto out;
+
+ tsk->files = newf;
+ error = 0;
+out:
+ return error;
+}
+
+/*
+ * Helper to unshare the files of the current task.
+ * We don't want to expose copy_files internals to
+ * the exec layer of the kernel.
+ */
+
+int unshare_files(void)
+{
+ struct files_struct *files = current->files;
+ int rc;
+
+ if(!files)
+ BUG();
+
+ /* This can race but the race causes us to copy when we don't
+ need to and drop the copy */
+ if(atomic_read(&files->count) == 1)
+ {
+ atomic_inc(&files->count);
+ return 0;
+ }
+ rc = copy_files(0, current);
+ if(rc)
+ current->files = files;
+ return rc;
+}
+
+EXPORT_SYMBOL(unshare_files);
+
+void sighand_free_cb(struct rcu_head *rhp)
+{
+ struct sighand_struct *sp;
+
+ sp = container_of(rhp, struct sighand_struct, rcu);
+ kmem_cache_free(sighand_cachep, sp);
+}
+
+static inline int copy_sighand(unsigned long clone_flags, struct task_struct *
tsk)
+{
+ struct sighand_struct *sig;
+
+ if (clone_flags & (CLONE_SIGHAND | CLONE_THREAD)) {
+ atomic_inc(¤t->sighand->count);
+ return 0;
+ }
+ sig = kmem_cache_alloc(sighand_cachep, GFP_KERNEL);
+ rcu_assign_pointer(tsk->sighand, sig);
+ if (!sig)
+ return -ENOMEM;
+ spin_lock_init(&sig->siglock);
+ atomic_set(&sig->count, 1);
+ memcpy(sig->action, current->sighand->action, sizeof(sig->action));
+ return 0;
+}
+
+static inline int copy_signal(unsigned long clone_flags, struct task_struct *
tsk)
+{
+ struct signal_struct *sig;
+ int ret;
+
+ if (clone_flags & CLONE_THREAD) {
+ atomic_inc(¤t->signal->count);
+ atomic_inc(¤t->signal->live);
+ return 0;
+ }
+ sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL);
+ tsk->signal = sig;
+ if (!sig)
+ return -ENOMEM;
+
+ ret = copy_thread_group_keys(tsk);
+ if (ret < 0) {
+ kmem_cache_free(signal_cachep, sig);
+ return ret;
+ }
+
+ atomic_set(&sig->count, 1);
+ atomic_set(&sig->live, 1);
+ init_waitqueue_head(&sig->wait_chldexit);
+ sig->flags = 0;
+ sig->group_exit_code = 0;
+ sig->group_exit_task = NULL;
+ sig->group_stop_count = 0;
+ sig->curr_target = NULL;
+ init_sigpending(&sig->shared_pending);
+ INIT_LIST_HEAD(&sig->posix_timers);
+
+ hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_REL);
+ sig->it_real_incr.tv64 = 0;
+ sig->real_timer.function = it_real_fn;
+ sig->real_timer.data = tsk;
+
+ sig->it_virt_expires = cputime_zero;
+ sig->it_virt_incr = cputime_zero;
+ sig->it_prof_expires = cputime_zero;
+ sig->it_prof_incr = cputime_zero;
+
+ sig->leader = 0; /* session leadership doesn't inherit */
+ sig->tty_old_pgrp = 0;
+
+ sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero;
+ sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
+ sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
+ sig->sched_time = 0;
+ INIT_LIST_HEAD(&sig->cpu_timers[0]);
+ INIT_LIST_HEAD(&sig->cpu_timers[1]);
+ INIT_LIST_HEAD(&sig->cpu_timers[2]);
+
+ task_lock(current->group_leader);
+ memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim);
+ task_unlock(current->group_leader);
+
+ if (sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) {
+ /*
+ * New sole thread in the process gets an expiry time
+ * of the whole CPU time limit.
+ */
+ tsk->it_prof_expires =
+ secs_to_cputime(sig->rlim[RLIMIT_CPU].rlim_cur);
+ }
+
+ return 0;
+}
+
+static inline void copy_flags(unsigned long clone_flags, struct task_struct *p)
+{
+ unsigned long new_flags = p->flags;
+
+ new_flags &= ~(PF_SUPERPRIV | PF_NOFREEZE);
+ new_flags |= PF_FORKNOEXEC;
+ if (!(clone_flags & CLONE_PTRACE))
+ p->ptrace = 0;
+ p->flags = new_flags;
+}
+
+asmlinkage long sys_set_tid_address(int __user *tidptr)
+{
+ current->clear_child_tid = tidptr;
+
+ return current->pid;
+}
+
+/*
+ * This creates a new process as a copy of the old one,
+ * but does not actually start it yet.
+ *
+ * It copies the registers, and all the appropriate
+ * parts of the process environment (as per the clone
+ * flags). The actual kick-off is left to the caller.
+ */
+static task_t *copy_process(unsigned long clone_flags,
+ unsigned long stack_start,
+ struct pt_regs *regs,
+ unsigned long stack_size,
+ int __user *parent_tidptr,
+ int __user *child_tidptr,
+ int pid)
+{
+ int retval;
+ struct task_struct *p = NULL;
+
+ if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
+ return ERR_PTR(-EINVAL);
+
+ /*
+ * Thread groups must share signals as well, and detached threads
+ * can only be started up within the thread group.
+ */
+ if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND))
+ return ERR_PTR(-EINVAL);
+
+ /*
+ * Shared signal handlers imply shared VM. By way of the above,
+ * thread groups also imply shared VM. Blocking this case allows
+ * for various simplifications in other code.
+ */
+ if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM))
+ return ERR_PTR(-EINVAL);
+
+ retval = security_task_create(clone_flags);
+ if (retval)
+ goto fork_out;
+
+ retval = -ENOMEM;
+ p = dup_task_struct(current);
+ if (!p)
+ goto fork_out;
+
+ retval = -EAGAIN;
+ if (atomic_read(&p->user->processes) >=
+ p->signal->rlim[RLIMIT_NPROC].rlim_cur) {
+ if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) &&
+ p->user != &root_user)
+ goto bad_fork_free;
+ }
+
+ atomic_inc(&p->user->__count);
+ atomic_inc(&p->user->processes);
+ get_group_info(p->group_info);
+
+ /*
+ * If multiple threads are within copy_process(), then this check
+ * triggers too late. This doesn't hurt, the check is only there
+ * to stop root fork bombs.
+ */
+ if (nr_threads >= max_threads)
+ goto bad_fork_cleanup_count;
+
+ if (!try_module_get(task_thread_info(p)->exec_domain->module))
+ goto bad_fork_cleanup_count;
+
+ if (p->binfmt && !try_module_get(p->binfmt->module))
+ goto bad_fork_cleanup_put_domain;
+
+ p->did_exec = 0;
+ copy_flags(clone_flags, p);
+ p->pid = pid;
+ retval = -EFAULT;
+ if (clone_flags & CLONE_PARENT_SETTID)
+ if (put_user(p->pid, parent_tidptr))
+ goto bad_fork_cleanup;
+
+ p->proc_dentry = NULL;
+
+ INIT_LIST_HEAD(&p->children);
+ INIT_LIST_HEAD(&p->sibling);
+ p->vfork_done = NULL;
+ spin_lock_init(&p->alloc_lock);
+ spin_lock_init(&p->proc_lock);
+
+ clear_tsk_thread_flag(p, TIF_SIGPENDING);
+ init_sigpending(&p->pending);
+
+ p->utime = cputime_zero;
+ p->stime = cputime_zero;
+ p->sched_time = 0;
+ p->rchar = 0; /* I/O counter: bytes read */
+ p->wchar = 0; /* I/O counter: bytes written */
+ p->syscr = 0; /* I/O counter: read syscalls */
+ p->syscw = 0; /* I/O counter: write syscalls */
+ acct_clear_integrals(p);
+
+ p->it_virt_expires = cputime_zero;
+ p->it_prof_expires = cputime_zero;
+ p->it_sched_expires = 0;
+ INIT_LIST_HEAD(&p->cpu_timers[0]);
+ INIT_LIST_HEAD(&p->cpu_timers[1]);
+ INIT_LIST_HEAD(&p->cpu_timers[2]);
+
+ p->lock_depth = -1; /* -1 = no lock */
+ do_posix_clock_monotonic_gettime(&p->start_time);
+ p->security = NULL;
+ p->io_context = NULL;
+ p->io_wait = NULL;
+ p->audit_context = NULL;
+ cpuset_fork(p);
+#ifdef CONFIG_NUMA
+ p->mempolicy = mpol_copy(p->mempolicy);
+ if (IS_ERR(p->mempolicy)) {
+ retval = PTR_ERR(p->mempolicy);
+ p->mempolicy = NULL;
+ goto bad_fork_cleanup_cpuset;
+ }
+#endif
+
+#ifdef CONFIG_DEBUG_MUTEXES
+ p->blocked_on = NULL; /* not blocked yet */
+#endif
+
+ p->tgid = p->pid;
+ if (clone_flags & CLONE_THREAD)
+ p->tgid = current->tgid;
+
+ if ((retval = security_task_alloc(p)))
+ goto bad_fork_cleanup_policy;
+ if ((retval = audit_alloc(p)))
+ goto bad_fork_cleanup_security;
+ /* copy all the process information */
+ if ((retval = copy_semundo(clone_flags, p)))
+ goto bad_fork_cleanup_audit;
+ if ((retval = copy_files(clone_flags, p)))
+ goto bad_fork_cleanup_semundo;
+ if ((retval = copy_fs(clone_flags, p)))
+ goto bad_fork_cleanup_files;
+ if ((retval = copy_sighand(clone_flags, p)))
+ goto bad_fork_cleanup_fs;
+ if ((retval = copy_signal(clone_flags, p)))
+ goto bad_fork_cleanup_sighand;
+ if ((retval = copy_mm(clone_flags, p)))
+ goto bad_fork_cleanup_signal;
+ if ((retval = copy_keys(clone_flags, p)))
+ goto bad_fork_cleanup_mm;
+ if ((retval = copy_namespace(clone_flags, p)))
+ goto bad_fork_cleanup_keys;
+ retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs);
+ if (retval)
+ goto bad_fork_cleanup_namespace;
+
+ p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr :
NULL;
+ /*
+ * Clear TID on mm_release()?
+ */
+ p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ?
child_tidptr: NULL;
+
+ /*
+ * sigaltstack should be cleared when sharing the same VM
+ */
+ if ((clone_flags & (CLONE_VM|CLONE_VFORK)) == CLONE_VM)
+ p->sas_ss_sp = p->sas_ss_size = 0;
+
+ /*
+ * Syscall tracing should be turned off in the child regardless
+ * of CLONE_PTRACE.
+ */
+ clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE);
+#ifdef TIF_SYSCALL_EMU
+ clear_tsk_thread_flag(p, TIF_SYSCALL_EMU);
+#endif
+
+ /* Our parent execution domain becomes current domain
+ These must match for thread signalling to apply */
+
+ p->parent_exec_id = p->self_exec_id;
+
+ /* ok, now we should be set up.. */
+ p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags &
CSIGNAL);
+ p->pdeath_signal = 0;
+ p->exit_state = 0;
+
+ /*
+ * Ok, make it visible to the rest of the system.
+ * We dont wake it up yet.
+ */
+ p->group_leader = p;
+ INIT_LIST_HEAD(&p->ptrace_children);
+ INIT_LIST_HEAD(&p->ptrace_list);
+
+ /* Perform scheduler related setup. Assign this task to a CPU. */
+ sched_fork(p, clone_flags);
+
+ /* Need tasklist lock for parent etc handling! */
+ write_lock_irq(&tasklist_lock);
+
+ /*
+ * The task hasn't been attached yet, so its cpus_allowed mask will
+ * not be changed, nor will its assigned CPU.
+ *
+ * The cpus_allowed mask of the parent may have changed after it was
+ * copied first time - so re-copy it here, then check the child's CPU
+ * to ensure it is on a valid CPU (and if not, just force it back to
+ * parent's CPU). This avoids alot of nasty races.
+ */
+ p->cpus_allowed = current->cpus_allowed;
+ if (unlikely(!cpu_isset(task_cpu(p), p->cpus_allowed) ||
+ !cpu_online(task_cpu(p))))
+ set_task_cpu(p, smp_processor_id());
+
+ /*
+ * Check for pending SIGKILL! The new thread should not be allowed
+ * to slip out of an OOM kill. (or normal SIGKILL.)
+ */
+ if (sigismember(¤t->pending.signal, SIGKILL)) {
+ write_unlock_irq(&tasklist_lock);
+ retval = -EINTR;
+ goto bad_fork_cleanup_namespace;
+ }
+
+ /* CLONE_PARENT re-uses the old parent */
+ if (clone_flags & (CLONE_PARENT|CLONE_THREAD))
+ p->real_parent = current->real_parent;
+ else
+ p->real_parent = current;
+ p->parent = p->real_parent;
+
+ spin_lock(¤t->sighand->siglock);
+ if (clone_flags & CLONE_THREAD) {
+ /*
+ * Important: if an exit-all has been started then
+ * do not create this new thread - the whole thread
+ * group is supposed to exit anyway.
+ */
+ if (current->signal->flags & SIGNAL_GROUP_EXIT) {
+ spin_unlock(¤t->sighand->siglock);
+ write_unlock_irq(&tasklist_lock);
+ retval = -EAGAIN;
+ goto bad_fork_cleanup_namespace;
+ }
+ p->group_leader = current->group_leader;
+
+ if (current->signal->group_stop_count > 0) {
+ /*
+ * There is an all-stop in progress for the group.
+ * We ourselves will stop as soon as we check signals.
+ * Make the new thread part of that group stop too.
+ */
+ current->signal->group_stop_count++;
+ set_tsk_thread_flag(p, TIF_SIGPENDING);
+ }
+
+ if (!cputime_eq(current->signal->it_virt_expires,
+ cputime_zero) ||
+ !cputime_eq(current->signal->it_prof_expires,
+ cputime_zero) ||
+ current->signal->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY
||
+ !list_empty(¤t->signal->cpu_timers[0]) ||
+ !list_empty(¤t->signal->cpu_timers[1]) ||
+ !list_empty(¤t->signal->cpu_timers[2])) {
+ /*
+ * Have child wake up on its first tick to check
+ * for process CPU timers.
+ */
+ p->it_prof_expires = jiffies_to_cputime(1);
+ }
+ }
+
+ /*
+ * inherit ioprio
+ */
+ p->ioprio = current->ioprio;
+
+ SET_LINKS(p);
+ if (unlikely(p->ptrace & PT_PTRACED))
+ __ptrace_link(p, current->parent);
+
+ if (thread_group_leader(p)) {
+ p->signal->tty = current->signal->tty;
+ p->signal->pgrp = process_group(current);
+ p->signal->session = current->signal->session;
+ attach_pid(p, PIDTYPE_PGID, process_group(p));
+ attach_pid(p, PIDTYPE_SID, p->signal->session);
+ if (p->pid)
+ __get_cpu_var(process_counts)++;
+ }
+ attach_pid(p, PIDTYPE_TGID, p->tgid);
+ attach_pid(p, PIDTYPE_PID, p->pid);
+
+ nr_threads++;
+ total_forks++;
+ spin_unlock(¤t->sighand->siglock);
+ write_unlock_irq(&tasklist_lock);
+ proc_fork_connector(p);
+ return p;
+
+bad_fork_cleanup_namespace:
+ exit_namespace(p);
+bad_fork_cleanup_keys:
+ exit_keys(p);
+bad_fork_cleanup_mm:
+ if (p->mm)
+ mmput(p->mm);
+bad_fork_cleanup_signal:
+ exit_signal(p);
+bad_fork_cleanup_sighand:
+ exit_sighand(p);
+bad_fork_cleanup_fs:
+ exit_fs(p); /* blocking */
+bad_fork_cleanup_files:
+ exit_files(p); /* blocking */
+bad_fork_cleanup_semundo:
+ exit_sem(p);
+bad_fork_cleanup_audit:
+ audit_free(p);
+bad_fork_cleanup_security:
+ security_task_free(p);
+bad_fork_cleanup_policy:
+#ifdef CONFIG_NUMA
+ mpol_free(p->mempolicy);
+bad_fork_cleanup_cpuset:
+#endif
+ cpuset_exit(p);
+bad_fork_cleanup:
+ if (p->binfmt)
+ module_put(p->binfmt->module);
+bad_fork_cleanup_put_domain:
+ module_put(task_thread_info(p)->exec_domain->module);
+bad_fork_cleanup_count:
+ put_group_info(p->group_info);
+ atomic_dec(&p->user->processes);
+ free_uid(p->user);
+bad_fork_free:
+ free_task(p);
+fork_out:
+ return ERR_PTR(retval);
+}
+
+struct pt_regs * __devinit __attribute__((weak)) idle_regs(struct pt_regs
*regs)
+{
+ memset(regs, 0, sizeof(struct pt_regs));
+ return regs;
+}
+
+task_t * __devinit fork_idle(int cpu)
+{
+ task_t *task;
+ struct pt_regs regs;
+
+ task = copy_process(CLONE_VM, 0, idle_regs(®s), 0, NULL, NULL, 0);
+ if (!task)
+ return ERR_PTR(-ENOMEM);
+ init_idle(task, cpu);
+ unhash_process(task);
+ return task;
+}
+
+static inline int fork_traceflag (unsigned clone_flags)
+{
+ if (clone_flags & CLONE_UNTRACED)
+ return 0;
+ else if (clone_flags & CLONE_VFORK) {
+ if (current->ptrace & PT_TRACE_VFORK)
+ return PTRACE_EVENT_VFORK;
+ } else if ((clone_flags & CSIGNAL) != SIGCHLD) {
+ if (current->ptrace & PT_TRACE_CLONE)
+ return PTRACE_EVENT_CLONE;
+ } else if (current->ptrace & PT_TRACE_FORK)
+ return PTRACE_EVENT_FORK;
+
+ return 0;
+}
+
+/*
+ * Ok, this is the main fork-routine.
+ *
+ * It copies the process, and if successful kick-starts
+ * it and waits for it to finish using the VM if required.
+ */
+long do_fork(unsigned long clone_flags,
+ unsigned long stack_start,
+ struct pt_regs *regs,
+ unsigned long stack_size,
+ int __user *parent_tidptr,
+ int __user *child_tidptr)
+{
+ struct task_struct *p;
+ int trace = 0;
+ long pid = alloc_pidmap();
+
+ if (pid < 0)
+ return -EAGAIN;
+ if (unlikely(current->ptrace)) {
+ trace = fork_traceflag (clone_flags);
+ if (trace)
+ clone_flags |= CLONE_PTRACE;
+ }
+
+ p = copy_process(clone_flags, stack_start, regs, stack_size,
parent_tidptr, child_tidptr, pid);
+ /*
+ * Do this prior waking up the new thread - the thread pointer
+ * might get invalid after that point, if the thread exits quickly.
+ */
+ if (!IS_ERR(p)) {
+ struct completion vfork;
+
+ if (clone_flags & CLONE_VFORK) {
+ p->vfork_done = &vfork;
+ init_completion(&vfork);
+ }
+
+ if ((p->ptrace & PT_PTRACED) || (clone_flags & CLONE_STOPPED)) {
+ /*
+ * We'll start up with an immediate SIGSTOP.
+ */
+ sigaddset(&p->pending.signal, SIGSTOP);
+ set_tsk_thread_flag(p, TIF_SIGPENDING);
+ }
+
+ if (!(clone_flags & CLONE_STOPPED))
+ wake_up_new_task(p, clone_flags);
+ else
+ p->state = TASK_STOPPED;
+
+ if (unlikely (trace)) {
+ current->ptrace_message = pid;
+ ptrace_notify ((trace << 8) | SIGTRAP);
+ }
+
+ if (clone_flags & CLONE_VFORK) {
+ wait_for_completion(&vfork);
+ if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE))
+ ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) |
SIGTRAP);
+ }
+ } else {
+ free_pidmap(pid);
+ pid = PTR_ERR(p);
+ }
+ return pid;
+}
+
+#ifndef ARCH_MIN_MMSTRUCT_ALIGN
+#define ARCH_MIN_MMSTRUCT_ALIGN 0
+#endif
+
+void __init proc_caches_init(void)
+{
+ sighand_cachep = kmem_cache_create("sighand_cache",
+ sizeof(struct sighand_struct), 0,
+ SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+ signal_cachep = kmem_cache_create("signal_cache",
+ sizeof(struct signal_struct), 0,
+ SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+ files_cachep = kmem_cache_create("files_cache",
+ sizeof(struct files_struct), 0,
+ SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+ fs_cachep = kmem_cache_create("fs_cache",
+ sizeof(struct fs_struct), 0,
+ SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+ vm_area_cachep = kmem_cache_create("vm_area_struct",
+ sizeof(struct vm_area_struct), 0,
+ SLAB_PANIC, NULL, NULL);
+ mm_cachep = kmem_cache_create("mm_struct",
+ sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
+ SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+}
+
+
+/*
+ * Check constraints on flags passed to the unshare system call and
+ * force unsharing of additional process context as appropriate.
+ */
+static inline void check_unshare_flags(unsigned long *flags_ptr)
+{
+ /*
+ * If unsharing a thread from a thread group, must also
+ * unshare vm.
+ */
+ if (*flags_ptr & CLONE_THREAD)
+ *flags_ptr |= CLONE_VM;
+
+ /*
+ * If unsharing vm, must also unshare signal handlers.
+ */
+ if (*flags_ptr & CLONE_VM)
+ *flags_ptr |= CLONE_SIGHAND;
+
+ /*
+ * If unsharing signal handlers and the task was created
+ * using CLONE_THREAD, then must unshare the thread
+ */
+ if ((*flags_ptr & CLONE_SIGHAND) &&
+ (atomic_read(¤t->signal->count) > 1))
+ *flags_ptr |= CLONE_THREAD;
+
+ /*
+ * If unsharing namespace, must also unshare filesystem information.
+ */
+ if (*flags_ptr & CLONE_NEWNS)
+ *flags_ptr |= CLONE_FS;
+}
+
+/*
+ * Unsharing of tasks created with CLONE_THREAD is not supported yet
+ */
+static int unshare_thread(unsigned long unshare_flags)
+{
+ if (unshare_flags & CLONE_THREAD)
+ return -EINVAL;
+
+ return 0;
+}
+
+/*
+ * Unshare the filesystem structure if it is being shared
+ */
+static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp)
+{
+ struct fs_struct *fs = current->fs;
+
+ if ((unshare_flags & CLONE_FS) &&
+ (fs && atomic_read(&fs->count) > 1)) {
+ *new_fsp = __copy_fs_struct(current->fs);
+ if (!*new_fsp)
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+/*
+ * Unshare the namespace structure if it is being shared
+ */
+static int unshare_namespace(unsigned long unshare_flags, struct namespace
**new_nsp, struct fs_struct *new_fs)
+{
+ struct namespace *ns = current->namespace;
+
+ if ((unshare_flags & CLONE_NEWNS) &&
+ (ns && atomic_read(&ns->count) > 1)) {
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ *new_nsp = dup_namespace(current, new_fs ? new_fs :
current->fs);
+ if (!*new_nsp)
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+/*
+ * Unsharing of sighand for tasks created with CLONE_SIGHAND is not
+ * supported yet
+ */
+static int unshare_sighand(unsigned long unshare_flags, struct sighand_struct
**new_sighp)
+{
+ struct sighand_struct *sigh = current->sighand;
+
+ if ((unshare_flags & CLONE_SIGHAND) &&
+ (sigh && atomic_read(&sigh->count) > 1))
+ return -EINVAL;
+ else
+ return 0;
+}
+
+/*
+ * Unshare vm if it is being shared
+ */
+static int unshare_vm(unsigned long unshare_flags, struct mm_struct **new_mmp)
+{
+ struct mm_struct *mm = current->mm;
+
+ if ((unshare_flags & CLONE_VM) &&
+ (mm && atomic_read(&mm->mm_users) > 1)) {
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/*
+ * Unshare file descriptor table if it is being shared
+ */
+static int unshare_fd(unsigned long unshare_flags, struct files_struct
**new_fdp)
+{
+ struct files_struct *fd = current->files;
+ int error = 0;
+
+ if ((unshare_flags & CLONE_FILES) &&
+ (fd && atomic_read(&fd->count) > 1)) {
+ *new_fdp = dup_fd(fd, &error);
+ if (!*new_fdp)
+ return error;
+ }
+
+ return 0;
+}
+
+/*
+ * Unsharing of semundo for tasks created with CLONE_SYSVSEM is not
+ * supported yet
+ */
+static int unshare_semundo(unsigned long unshare_flags, struct sem_undo_list
**new_ulistp)
+{
+ if (unshare_flags & CLONE_SYSVSEM)
+ return -EINVAL;
+
+ return 0;
+}
+
+/*
+ * unshare allows a process to 'unshare' part of the process
+ * context which was originally shared using clone. copy_*
+ * functions used by do_fork() cannot be used here directly
+ * because they modify an inactive task_struct that is being
+ * constructed. Here we are modifying the current, active,
+ * task_struct.
+ */
+asmlinkage long sys_unshare(unsigned long unshare_flags)
+{
+ int err = 0;
+ struct fs_struct *fs, *new_fs = NULL;
+ struct namespace *ns, *new_ns = NULL;
+ struct sighand_struct *sigh, *new_sigh = NULL;
+ struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL;
+ struct files_struct *fd, *new_fd = NULL;
+ struct sem_undo_list *new_ulist = NULL;
+
+ check_unshare_flags(&unshare_flags);
+
+ if ((err = unshare_thread(unshare_flags)))
+ goto bad_unshare_out;
+ if ((err = unshare_fs(unshare_flags, &new_fs)))
+ goto bad_unshare_cleanup_thread;
+ if ((err = unshare_namespace(unshare_flags, &new_ns, new_fs)))
+ goto bad_unshare_cleanup_fs;
+ if ((err = unshare_sighand(unshare_flags, &new_sigh)))
+ goto bad_unshare_cleanup_ns;
+ if ((err = unshare_vm(unshare_flags, &new_mm)))
+ goto bad_unshare_cleanup_sigh;
+ if ((err = unshare_fd(unshare_flags, &new_fd)))
+ goto bad_unshare_cleanup_vm;
+ if ((err = unshare_semundo(unshare_flags, &new_ulist)))
+ goto bad_unshare_cleanup_fd;
+
+ if (new_fs || new_ns || new_sigh || new_mm || new_fd || new_ulist) {
+
+ task_lock(current);
+
+ if (new_fs) {
+ fs = current->fs;
+ current->fs = new_fs;
+ new_fs = fs;
+ }
+
+ if (new_ns) {
+ ns = current->namespace;
+ current->namespace = new_ns;
+ new_ns = ns;
+ }
+
+ if (new_sigh) {
+ sigh = current->sighand;
+ rcu_assign_pointer(current->sighand, new_sigh);
+ new_sigh = sigh;
+ }
+
+ if (new_mm) {
+ mm = current->mm;
+ active_mm = current->active_mm;
+ current->mm = new_mm;
+ current->active_mm = new_mm;
+ activate_mm(active_mm, new_mm);
+ new_mm = mm;
+ }
+
+ if (new_fd) {
+ fd = current->files;
+ current->files = new_fd;
+ new_fd = fd;
+ }
+
+ task_unlock(current);
+ }
+
+bad_unshare_cleanup_fd:
+ if (new_fd)
+ put_files_struct(new_fd);
+
+bad_unshare_cleanup_vm:
+ if (new_mm)
+ mmput(new_mm);
+
+bad_unshare_cleanup_sigh:
+ if (new_sigh)
+ if (atomic_dec_and_test(&new_sigh->count))
+ kmem_cache_free(sighand_cachep, new_sigh);
+
+bad_unshare_cleanup_ns:
+ if (new_ns)
+ put_namespace(new_ns);
+
+bad_unshare_cleanup_fs:
+ if (new_fs)
+ put_fs_struct(new_fs);
+
+bad_unshare_cleanup_thread:
+bad_unshare_out:
+ return err;
+}
diff -r 7f67c15e2c91 -r fbc0e953732e
tools/security/python/xensec_tools/acm_getdecision
--- a/tools/security/python/xensec_tools/acm_getdecision Thu Jun 15
10:02:53 2006 -0600
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,55 +0,0 @@
-#!/usr/bin/env python
-# -*- mode: python; -*-
-import sys
-import traceback
-import getopt
-
-# add fallback path for non-native python path installs if needed
-sys.path.insert(-1, '/usr/lib/python')
-sys.path.insert(-1, '/usr/lib64/python')
-
-from xen.util.security import ACMError, err, get_decision, active_policy
-
-def usage():
- print "Usage: acm_getdecision -i domainid --label labelname"
- print " Test program illustrating the retrieval of"
- print " access control decisions from Xen. At this time,"
- print " only sharing (STE) policy decisions are supported."
- print " Arguments are two paramters in any combination:"
- print "\t -i domain_id or --domid domain_id"
- print "\t -l labelname or --label labelname"
- print " Return value:"
- print "\t PERMITTED if access is permitted"
- print "\t DENIED if access is denied"
- print "\t ACMError -- e.g., unknown label or domain id"
- err("Usage")
-
-try:
-
- if len(sys.argv) != 5:
- usage()
-
- decision_args = []
-
- for idx in range(1, len(sys.argv), 2):
- if sys.argv[idx] in ['-i', '--domid']:
- decision_args.append(['domid', sys.argv[idx+1]])
- elif sys.argv[idx] in ['-l', '--label']:
- decision_args.append(['access_control',
- ['policy', active_policy],
- ['label', sys.argv[idx+1]]
- ])
- else:
- print "unknown argument %s" % sys.argv[idx]
- usage()
-
- if len(decision_args) != 2:
- print "too many arguments"
- usage()
-
- print get_decision(decision_args[0], decision_args[1])
-
-except ACMError:
- pass
-except:
- traceback.print_exc(limit=1)
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|