# HG changeset patch
# User emellor@xxxxxxxxxxxxxxxxxxxxxx
# Node ID 8af1199488d3636135f3adf3f7302d4a04e9004e
# Parent 25e3c8668f1f4769db8466b4af965a99503311ae
# Parent 299d6ff8fdb2604dde767af2a2bee985602e9a46
Merged.
diff -r 25e3c8668f1f -r 8af1199488d3 .hgignore
--- a/.hgignore Mon Jan 9 11:19:55 2006
+++ b/.hgignore Mon Jan 9 11:22:17 2006
@@ -181,6 +181,7 @@
^xen/TAGS$
^xen/arch/x86/asm-offsets\.s$
^xen/arch/x86/boot/mkelf32$
+^xen/arch/x86/xen\.lds$
^xen/ddb/.*$
^xen/include/asm$
^xen/include/asm-.*/asm-offsets\.h$
diff -r 25e3c8668f1f -r 8af1199488d3
linux-2.6-xen-sparse/arch/ia64/xen/drivers/xenia64_init.c
--- a/linux-2.6-xen-sparse/arch/ia64/xen/drivers/xenia64_init.c Mon Jan 9
11:19:55 2006
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/drivers/xenia64_init.c Mon Jan 9
11:22:17 2006
@@ -25,8 +25,9 @@
xen_start_info = __va(s->arch.start_info_pfn << PAGE_SHIFT);
xen_start_info->flags = s->arch.flags;
- printk("Running on Xen! start_info_pfn=0x%lx lags=0x%x\n",
- s->arch.start_info_pfn, xen_start_info->flags);
+ printk("Running on Xen! start_info_pfn=0x%lx nr_pages=%d flags=0x%x\n",
+ s->arch.start_info_pfn, xen_start_info->nr_pages,
+ xen_start_info->flags);
evtchn_init();
initialized = 1;
diff -r 25e3c8668f1f -r 8af1199488d3 linux-2.6-xen-sparse/arch/xen/Makefile
--- a/linux-2.6-xen-sparse/arch/xen/Makefile Mon Jan 9 11:19:55 2006
+++ b/linux-2.6-xen-sparse/arch/xen/Makefile Mon Jan 9 11:22:17 2006
@@ -77,8 +77,6 @@
install -m0664 .config
$(INSTALL_PATH)/boot/config-$(XINSTALL_NAME)$(INSTALL_SUFFIX)
install -m0664 System.map
$(INSTALL_PATH)/boot/System.map-$(XINSTALL_NAME)$(INSTALL_SUFFIX)
ln -f -s vmlinuz-$(XINSTALL_NAME)$(INSTALL_SUFFIX)
$(INSTALL_PATH)/boot/vmlinuz-$(VERSION).$(PATCHLEVEL)$(XENGUEST)$(INSTALL_SUFFIX)
- mkdir -p $(INSTALL_PATH)/usr/include/xen/linux
- install -m0644 $(srctree)/include/asm-xen/linux-public/*.h
$(INSTALL_PATH)/usr/include/xen/linux
archclean:
@if [ -e arch/xen/arch ]; then $(MAKE) $(clean)=arch/xen/arch; fi;
diff -r 25e3c8668f1f -r 8af1199488d3
linux-2.6-xen-sparse/arch/xen/i386/kernel/acpi/boot.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/acpi/boot.c Mon Jan 9
11:19:55 2006
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/acpi/boot.c Mon Jan 9
11:22:17 2006
@@ -39,8 +39,6 @@
#ifdef CONFIG_XEN
#include <asm/fixmap.h>
#endif
-
-void (*pm_power_off)(void) = NULL;
#ifdef CONFIG_X86_64
diff -r 25e3c8668f1f -r 8af1199488d3
linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/mtrr/main.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/mtrr/main.c Mon Jan 9
11:19:55 2006
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/mtrr/main.c Mon Jan 9
11:22:17 2006
@@ -67,8 +67,11 @@
op.u.add_memtype.pfn = base;
op.u.add_memtype.nr_pfns = size;
op.u.add_memtype.type = type;
- if ((error = HYPERVISOR_dom0_op(&op)))
+ error = HYPERVISOR_dom0_op(&op);
+ if (error) {
+ BUG_ON(error > 0);
return error;
+ }
if (increment)
++usage_table[op.u.add_memtype.reg];
@@ -121,8 +124,12 @@
if (--usage_table[reg] < 1) {
op.cmd = DOM0_DEL_MEMTYPE;
op.u.del_memtype.handle = 0;
- op.u.add_memtype.reg = reg;
- (void)HYPERVISOR_dom0_op(&op);
+ op.u.del_memtype.reg = reg;
+ error = HYPERVISOR_dom0_op(&op);
+ if (error) {
+ BUG_ON(error > 0);
+ goto out;
+ }
}
error = reg;
out:
diff -r 25e3c8668f1f -r 8af1199488d3
linux-2.6-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c Mon Jan 9
11:19:55 2006
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c Mon Jan 9
11:22:17 2006
@@ -76,9 +76,7 @@
EXPORT_SYMBOL(iounmap);
EXPORT_SYMBOL(kernel_thread);
EXPORT_SYMBOL(pm_idle);
-#ifdef CONFIG_ACPI_BOOT
EXPORT_SYMBOL(pm_power_off);
-#endif
EXPORT_SYMBOL(get_cmos_time);
EXPORT_SYMBOL(cpu_khz);
EXPORT_SYMBOL(apm_info);
diff -r 25e3c8668f1f -r 8af1199488d3
linux-2.6-xen-sparse/arch/xen/kernel/gnttab.c
--- a/linux-2.6-xen-sparse/arch/xen/kernel/gnttab.c Mon Jan 9 11:19:55 2006
+++ b/linux-2.6-xen-sparse/arch/xen/kernel/gnttab.c Mon Jan 9 11:22:17 2006
@@ -389,6 +389,30 @@
return -ENOSYS;
}
+static int __init
+gnttab_proc_init(void)
+{
+ /*
+ * /proc/xen/grant : used by libxc to access grant tables
+ */
+ if ((grant_pde = create_xen_proc_entry("grant", 0600)) == NULL) {
+ WPRINTK("Unable to create grant xen proc entry\n");
+ return -1;
+ }
+
+ grant_file_ops.read = grant_pde->proc_fops->read;
+ grant_file_ops.write = grant_pde->proc_fops->write;
+
+ grant_pde->proc_fops = &grant_file_ops;
+
+ grant_pde->read_proc = &grant_read;
+ grant_pde->write_proc = &grant_write;
+
+ return 0;
+}
+
+device_initcall(gnttab_proc_init);
+
#endif /* CONFIG_PROC_FS */
int
@@ -446,29 +470,11 @@
gnttab_free_count = NR_GRANT_ENTRIES - NR_RESERVED_ENTRIES;
gnttab_free_head = NR_RESERVED_ENTRIES;
-#ifdef CONFIG_PROC_FS
- /*
- * /proc/xen/grant : used by libxc to access grant tables
- */
- if ((grant_pde = create_xen_proc_entry("grant", 0600)) == NULL) {
- WPRINTK("Unable to create grant xen proc entry\n");
- return -1;
- }
-
- grant_file_ops.read = grant_pde->proc_fops->read;
- grant_file_ops.write = grant_pde->proc_fops->write;
-
- grant_pde->proc_fops = &grant_file_ops;
-
- grant_pde->read_proc = &grant_read;
- grant_pde->write_proc = &grant_write;
-#endif
-
printk("Grant table initialized\n");
return 0;
}
-__initcall(gnttab_init);
+core_initcall(gnttab_init);
/*
* Local variables:
diff -r 25e3c8668f1f -r 8af1199488d3
linux-2.6-xen-sparse/arch/xen/kernel/reboot.c
--- a/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c Mon Jan 9 11:19:55 2006
+++ b/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c Mon Jan 9 11:22:17 2006
@@ -16,6 +16,13 @@
#include <linux/cpu.h>
#include <linux/kthread.h>
#include <asm-xen/xencons.h>
+
+#if defined(__i386__) || defined(__x86_64__)
+/*
+ * Power off function, if any
+ */
+void (*pm_power_off)(void);
+#endif
#define SHUTDOWN_INVALID -1
#define SHUTDOWN_POWEROFF 0
diff -r 25e3c8668f1f -r 8af1199488d3
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/x8664_ksyms.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/x8664_ksyms.c Mon Jan 9
11:19:55 2006
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/x8664_ksyms.c Mon Jan 9
11:22:17 2006
@@ -59,9 +59,7 @@
EXPORT_SYMBOL(probe_irq_mask);
EXPORT_SYMBOL(kernel_thread);
EXPORT_SYMBOL(pm_idle);
-#ifdef CONFIG_ACPI_BOOT
EXPORT_SYMBOL(pm_power_off);
-#endif
EXPORT_SYMBOL(get_cmos_time);
EXPORT_SYMBOL(__down_failed);
diff -r 25e3c8668f1f -r 8af1199488d3
linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c Mon Jan 9
11:19:55 2006
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c Mon Jan 9
11:22:17 2006
@@ -540,6 +540,9 @@
pending_vaddrs = kmalloc(sizeof(pending_vaddrs[0]) *
mmap_pages, GFP_KERNEL);
if (!pending_reqs || !pending_grant_handles || !pending_vaddrs) {
+ kfree(pending_reqs);
+ kfree(pending_grant_handles);
+ kfree(pending_vaddrs);
printk("%s: out of memory\n", __FUNCTION__);
return -1;
}
diff -r 25e3c8668f1f -r 8af1199488d3
linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Mon Jan 9
11:19:55 2006
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Mon Jan 9
11:22:17 2006
@@ -331,7 +331,12 @@
return;
}
- xlvbd_add(sectors, info->vdevice, binfo, sector_size, info);
+ err = xlvbd_add(sectors, info->vdevice, binfo, sector_size, info);
+ if (err) {
+ xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
+ info->xbdev->otherend);
+ return;
+ }
(void)xenbus_switch_state(info->xbdev, NULL, XenbusStateConnected);
diff -r 25e3c8668f1f -r 8af1199488d3
linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c Mon Jan 9 11:19:55 2006
+++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c Mon Jan 9 11:22:17 2006
@@ -208,7 +208,7 @@
}
struct vm_operations_struct blktap_vm_ops = {
- nopage: blktap_nopage,
+ .nopage = blktap_nopage,
};
/******************************************************************
@@ -225,7 +225,7 @@
/* Allocate the fe ring. */
sring = (blkif_sring_t *)get_zeroed_page(GFP_KERNEL);
if (sring == NULL)
- goto fail_nomem;
+ return -ENOMEM;
SetPageReserved(virt_to_page(sring));
@@ -233,9 +233,6 @@
FRONT_RING_INIT(&blktap_ufe_ring, sring, PAGE_SIZE);
return 0;
-
- fail_nomem:
- return -ENOMEM;
}
static int blktap_release(struct inode *inode, struct file *filp)
@@ -391,12 +388,12 @@
}
static struct file_operations blktap_fops = {
- owner: THIS_MODULE,
- poll: blktap_poll,
- ioctl: blktap_ioctl,
- open: blktap_open,
- release: blktap_release,
- mmap: blktap_mmap,
+ .owner = THIS_MODULE,
+ .poll = blktap_poll,
+ .ioctl = blktap_ioctl,
+ .open = blktap_open,
+ .release = blktap_release,
+ .mmap = blktap_mmap,
};
diff -r 25e3c8668f1f -r 8af1199488d3
linux-2.6-xen-sparse/drivers/xen/console/console.c
--- a/linux-2.6-xen-sparse/drivers/xen/console/console.c Mon Jan 9
11:19:55 2006
+++ b/linux-2.6-xen-sparse/drivers/xen/console/console.c Mon Jan 9
11:22:17 2006
@@ -314,39 +314,31 @@
{
int sent, sz, work_done = 0;
- if (xen_start_info->flags & SIF_INITDOMAIN) {
- if (x_char) {
+ if (x_char) {
+ if (xen_start_info->flags & SIF_INITDOMAIN)
kcons_write_dom0(NULL, &x_char, 1);
- x_char = 0;
- work_done = 1;
- }
-
- while (wc != wp) {
- sz = wp - wc;
- if (sz > (wbuf_size - WBUF_MASK(wc)))
- sz = wbuf_size - WBUF_MASK(wc);
+ else
+ while (x_char)
+ if (xencons_ring_send(&x_char, 1) == 1)
+ break;
+ x_char = 0;
+ work_done = 1;
+ }
+
+ while (wc != wp) {
+ sz = wp - wc;
+ if (sz > (wbuf_size - WBUF_MASK(wc)))
+ sz = wbuf_size - WBUF_MASK(wc);
+ if (xen_start_info->flags & SIF_INITDOMAIN) {
kcons_write_dom0(NULL, &wbuf[WBUF_MASK(wc)], sz);
wc += sz;
- work_done = 1;
- }
- } else {
- while (x_char) {
- if (xencons_ring_send(&x_char, 1) == 1) {
- x_char = 0;
- work_done = 1;
- }
- }
-
- while (wc != wp) {
- sz = wp - wc;
- if (sz > (wbuf_size - WBUF_MASK(wc)))
- sz = wbuf_size - WBUF_MASK(wc);
+ } else {
sent = xencons_ring_send(&wbuf[WBUF_MASK(wc)], sz);
if (sent == 0)
break;
wc += sent;
- work_done = 1;
}
+ work_done = 1;
}
if (work_done && (xencons_tty != NULL)) {
diff -r 25e3c8668f1f -r 8af1199488d3
linux-2.6-xen-sparse/drivers/xen/netback/common.h
--- a/linux-2.6-xen-sparse/drivers/xen/netback/common.h Mon Jan 9 11:19:55 2006
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/common.h Mon Jan 9 11:22:17 2006
@@ -82,7 +82,7 @@
#define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE)
void netif_creditlimit(netif_t *netif);
-int netif_disconnect(netif_t *netif);
+void netif_disconnect(netif_t *netif);
netif_t *alloc_netif(domid_t domid, unsigned int handle, u8 be_mac[ETH_ALEN]);
void free_netif(netif_t *netif);
diff -r 25e3c8668f1f -r 8af1199488d3
linux-2.6-xen-sparse/drivers/xen/netback/interface.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/interface.c Mon Jan 9
11:19:55 2006
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/interface.c Mon Jan 9
11:22:17 2006
@@ -183,7 +183,7 @@
int netif_map(netif_t *netif, unsigned long tx_ring_ref,
unsigned long rx_ring_ref, unsigned int evtchn)
{
- int err;
+ int err = -ENOMEM;
netif_tx_sring_t *txs;
netif_rx_sring_t *rxs;
evtchn_op_t op = {
@@ -196,24 +196,19 @@
return 0;
netif->tx_comms_area = alloc_vm_area(PAGE_SIZE);
+ if (netif->tx_comms_area == NULL)
+ return -ENOMEM;
netif->rx_comms_area = alloc_vm_area(PAGE_SIZE);
- if (netif->tx_comms_area == NULL || netif->rx_comms_area == NULL)
- return -ENOMEM;
+ if (netif->rx_comms_area == NULL)
+ goto err_rx;
err = map_frontend_pages(netif, tx_ring_ref, rx_ring_ref);
- if (err) {
- free_vm_area(netif->tx_comms_area);
- free_vm_area(netif->rx_comms_area);
- return err;
- }
+ if (err)
+ goto err_map;
err = HYPERVISOR_event_channel_op(&op);
- if (err) {
- unmap_frontend_pages(netif);
- free_vm_area(netif->tx_comms_area);
- free_vm_area(netif->rx_comms_area);
- return err;
- }
+ if (err)
+ goto err_hypervisor;
netif->evtchn = op.u.bind_interdomain.local_port;
@@ -241,19 +236,22 @@
rtnl_unlock();
return 0;
+err_hypervisor:
+ unmap_frontend_pages(netif);
+err_map:
+ free_vm_area(netif->rx_comms_area);
+err_rx:
+ free_vm_area(netif->tx_comms_area);
+ return err;
}
static void free_netif_callback(void *arg)
{
netif_t *netif = (netif_t *)arg;
- /* Already disconnected? */
- if (!netif->irq)
- return;
-
- unbind_from_irqhandler(netif->irq, netif);
- netif->irq = 0;
-
+ if (netif->irq)
+ unbind_from_irqhandler(netif->irq, netif);
+
unregister_netdev(netif->dev);
if (netif->tx.sring) {
@@ -290,10 +288,10 @@
#endif
}
-int netif_disconnect(netif_t *netif)
-{
-
- if (netif->status == CONNECTED) {
+void netif_disconnect(netif_t *netif)
+{
+ switch (netif->status) {
+ case CONNECTED:
rtnl_lock();
netif->status = DISCONNECTING;
wmb();
@@ -301,10 +299,14 @@
__netif_down(netif);
rtnl_unlock();
netif_put(netif);
- return 0; /* Caller should not send response message. */
- }
-
- return 1;
+ break;
+ case DISCONNECTED:
+ BUG_ON(atomic_read(&netif->refcnt) != 0);
+ free_netif(netif);
+ break;
+ default:
+ BUG();
+ }
}
/*
diff -r 25e3c8668f1f -r 8af1199488d3
linux-2.6-xen-sparse/drivers/xen/netback/netback.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c Mon Jan 9
11:19:55 2006
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c Mon Jan 9
11:22:17 2006
@@ -14,6 +14,7 @@
#include <asm-xen/balloon.h>
#include <asm-xen/xen-public/memory.h>
+/*#define NETBE_DEBUG_INTERRUPT*/
static void netif_idx_release(u16 pending_idx);
static void netif_page_release(struct page *page);
@@ -727,6 +728,7 @@
return notify;
}
+#ifdef NETBE_DEBUG_INTERRUPT
static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs)
{
struct list_head *ent;
@@ -758,6 +760,7 @@
return IRQ_HANDLED;
}
+#endif
static int __init netback_init(void)
{
@@ -794,6 +797,7 @@
netif_xenbus_init();
+#ifdef NETBE_DEBUG_INTERRUPT
(void)bind_virq_to_irqhandler(
VIRQ_DEBUG,
0,
@@ -801,6 +805,7 @@
SA_SHIRQ,
"net-be-dbg",
&netif_be_dbg);
+#endif
return 0;
}
diff -r 25e3c8668f1f -r 8af1199488d3
linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Mon Jan 9
11:19:55 2006
+++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Mon Jan 9
11:22:17 2006
@@ -116,6 +116,8 @@
#define RX_MAX_TARGET NET_RX_RING_SIZE
int rx_min_target, rx_max_target, rx_target;
struct sk_buff_head rx_batch;
+
+ struct timer_list rx_refill_timer;
/*
* {tx,rx}_skbs store outstanding skbuffs. The first entry in each
@@ -517,6 +519,13 @@
}
+static void rx_refill_timeout(unsigned long data)
+{
+ struct net_device *dev = (struct net_device *)data;
+ netif_rx_schedule(dev);
+}
+
+
static void network_alloc_rx_buffers(struct net_device *dev)
{
unsigned short id;
@@ -534,7 +543,7 @@
* Allocate skbuffs greedily, even though we batch updates to the
* receive ring. This creates a less bursty demand on the memory
* allocator, so should reduce the chance of failed allocation requests
- * both for ourself and for other kernel subsystems.
+ * both for ourself and for other kernel subsystems.
*/
batch_target = np->rx_target - (req_prod - np->rx.rsp_cons);
for (i = skb_queue_len(&np->rx_batch); i < batch_target; i++) {
@@ -545,8 +554,15 @@
skb = alloc_xen_skb(
((PAGE_SIZE - sizeof(struct skb_shared_info)) &
(-SKB_DATA_ALIGN(1))) - 16);
- if (skb == NULL)
- break;
+ if (skb == NULL) {
+ /* Any skbuffs queued for refill? Force them out. */
+ if (i != 0)
+ goto refill;
+ /* Could not allocate any skbuffs. Try again later. */
+ mod_timer(&np->rx_refill_timer,
+ jiffies + (HZ/10));
+ return;
+ }
__skb_queue_tail(&np->rx_batch, skb);
}
@@ -554,6 +570,12 @@
if (i < (np->rx_target/2))
return;
+ /* Adjust our fill target if we risked running out of buffers. */
+ if (((req_prod - np->rx.sring->rsp_prod) < (np->rx_target / 4)) &&
+ ((np->rx_target *= 2) > np->rx_max_target))
+ np->rx_target = np->rx_max_target;
+
+ refill:
for (i = 0; ; i++) {
if ((skb = __skb_dequeue(&np->rx_batch)) == NULL)
break;
@@ -608,11 +630,6 @@
/* Above is a suitable barrier to ensure backend will see requests. */
np->rx.req_prod_pvt = req_prod + i;
RING_PUSH_REQUESTS(&np->rx);
-
- /* Adjust our fill target if we risked running out of buffers. */
- if (((req_prod - np->rx.sring->rsp_prod) < (np->rx_target / 4)) &&
- ((np->rx_target *= 2) > np->rx_max_target))
- np->rx_target = np->rx_max_target;
}
@@ -1077,6 +1094,10 @@
np->rx_min_target = RX_MIN_TARGET;
np->rx_max_target = RX_MAX_TARGET;
+ init_timer(&np->rx_refill_timer);
+ np->rx_refill_timer.data = (unsigned long)netdev;
+ np->rx_refill_timer.function = rx_refill_timeout;
+
/* Initialise {tx,rx}_skbs as a free chain containing every entry. */
for (i = 0; i <= NET_TX_RING_SIZE; i++) {
np->tx_skbs[i] = (void *)((unsigned long) i+1);
@@ -1188,29 +1209,26 @@
DPRINTK("%s\n", dev->nodename);
- netif_free(info);
- kfree(info);
+ netif_disconnect_backend(info);
+ free_netdev(info->netdev);
return 0;
}
-static void netif_free(struct netfront_info *info)
-{
- netif_disconnect_backend(info);
- close_netdev(info);
-}
-
-
static void close_netdev(struct netfront_info *info)
{
- if (info->netdev) {
+ spin_lock_irq(&info->netdev->xmit_lock);
+ netif_stop_queue(info->netdev);
+ spin_unlock_irq(&info->netdev->xmit_lock);
+
#ifdef CONFIG_PROC_FS
- xennet_proc_delif(info->netdev);
+ xennet_proc_delif(info->netdev);
#endif
- unregister_netdev(info->netdev);
- info->netdev = NULL;
- }
+
+ del_timer_sync(&info->rx_refill_timer);
+
+ unregister_netdev(info->netdev);
}
@@ -1219,21 +1237,28 @@
/* Stop old i/f to prevent errors whilst we rebuild the state. */
spin_lock_irq(&info->tx_lock);
spin_lock(&info->rx_lock);
- netif_stop_queue(info->netdev);
- /* info->backend_state = BEST_DISCONNECTED; */
+ info->backend_state = BEST_DISCONNECTED;
spin_unlock(&info->rx_lock);
spin_unlock_irq(&info->tx_lock);
-
+
+ if (info->irq)
+ unbind_from_irqhandler(info->irq, info->netdev);
+ info->evtchn = info->irq = 0;
+
end_access(info->tx_ring_ref, info->tx.sring);
end_access(info->rx_ring_ref, info->rx.sring);
info->tx_ring_ref = GRANT_INVALID_REF;
info->rx_ring_ref = GRANT_INVALID_REF;
info->tx.sring = NULL;
info->rx.sring = NULL;
-
- if (info->irq)
- unbind_from_irqhandler(info->irq, info->netdev);
- info->evtchn = info->irq = 0;
+}
+
+
+static void netif_free(struct netfront_info *info)
+{
+ close_netdev(info);
+ netif_disconnect_backend(info);
+ free_netdev(info->netdev);
}
diff -r 25e3c8668f1f -r 8af1199488d3
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c Mon Jan 9
11:19:55 2006
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c Mon Jan 9
11:22:17 2006
@@ -111,7 +111,6 @@
struct xenbus_dev_data *u = filp->private_data;
struct xenbus_dev_transaction *trans;
void *reply;
- int err = 0;
if ((len + u->len) > sizeof(u->u.buffer))
return -EINVAL;
@@ -136,41 +135,36 @@
case XS_RM:
case XS_SET_PERMS:
reply = xenbus_dev_request_and_reply(&u->u.msg);
- if (IS_ERR(reply)) {
- err = PTR_ERR(reply);
- } else {
- if (u->u.msg.type == XS_TRANSACTION_START) {
- trans = kmalloc(sizeof(*trans), GFP_KERNEL);
- trans->handle = (struct xenbus_transaction *)
- simple_strtoul(reply, NULL, 0);
- list_add(&trans->list, &u->transactions);
- } else if (u->u.msg.type == XS_TRANSACTION_END) {
- list_for_each_entry(trans, &u->transactions,
- list)
- if ((unsigned long)trans->handle ==
- (unsigned long)u->u.msg.tx_id)
- break;
- BUG_ON(&trans->list == &u->transactions);
- list_del(&trans->list);
- kfree(trans);
- }
- queue_reply(u, (char *)&u->u.msg, sizeof(u->u.msg));
- queue_reply(u, (char *)reply, u->u.msg.len);
- kfree(reply);
+ if (IS_ERR(reply))
+ return PTR_ERR(reply);
+
+ if (u->u.msg.type == XS_TRANSACTION_START) {
+ trans = kmalloc(sizeof(*trans), GFP_KERNEL);
+ if (!trans)
+ return -ENOMEM;
+ trans->handle = (struct xenbus_transaction *)
+ simple_strtoul(reply, NULL, 0);
+ list_add(&trans->list, &u->transactions);
+ } else if (u->u.msg.type == XS_TRANSACTION_END) {
+ list_for_each_entry(trans, &u->transactions, list)
+ if ((unsigned long)trans->handle ==
+ (unsigned long)u->u.msg.tx_id)
+ break;
+ BUG_ON(&trans->list == &u->transactions);
+ list_del(&trans->list);
+ kfree(trans);
}
+ queue_reply(u, (char *)&u->u.msg, sizeof(u->u.msg));
+ queue_reply(u, (char *)reply, u->u.msg.len);
+ kfree(reply);
break;
default:
- err = -EINVAL;
- break;
+ return -EINVAL;
}
- if (err == 0) {
- u->len = 0;
- err = len;
- }
-
- return err;
+ u->len = 0;
+ return len;
}
static int xenbus_dev_open(struct inode *inode, struct file *filp)
diff -r 25e3c8668f1f -r 8af1199488d3
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c Mon Jan 9
11:19:55 2006
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c Mon Jan 9
11:22:17 2006
@@ -542,14 +542,6 @@
const char *type,
const char *nodename)
{
-#define CHECK_FAIL \
- do { \
- if (err) \
- goto fail; \
- } \
- while (0) \
-
-
int err;
struct xenbus_device *xendev;
size_t stringlen;
@@ -584,19 +576,18 @@
xendev->dev.release = xenbus_dev_release;
err = bus->get_bus_id(xendev->dev.bus_id, xendev->nodename);
- CHECK_FAIL;
+ if (err)
+ goto fail;
/* Register with generic device framework. */
err = device_register(&xendev->dev);
- CHECK_FAIL;
+ if (err)
+ goto fail;
device_create_file(&xendev->dev, &dev_attr_nodename);
device_create_file(&xendev->dev, &dev_attr_devtype);
return 0;
-
-#undef CHECK_FAIL
-
fail:
xenbus_dev_free(xendev);
return err;
diff -r 25e3c8668f1f -r 8af1199488d3
linux-2.6-xen-sparse/include/asm-xen/asm-ia64/hypercall.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-ia64/hypercall.h Mon Jan 9
11:19:55 2006
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-ia64/hypercall.h Mon Jan 9
11:22:17 2006
@@ -372,7 +372,7 @@
int ret;
__asm__ __volatile__ ( ";; mov r14=%2 ; mov r15=%3 ; mov r2=%1 ; break
0x1000 ;; mov %0=r8 ;;"
: "=r" (ret)
- : "i" (__HYPERVISOR_console_io), "r"(cmd), "r"(arg)
+ : "i" (__HYPERVISOR_memory_op), "r"(cmd), "r"(arg)
: "r14","r15","r2","r8","memory" );
return ret;
}
diff -r 25e3c8668f1f -r 8af1199488d3 tools/Makefile
--- a/tools/Makefile Mon Jan 9 11:19:55 2006
+++ b/tools/Makefile Mon Jan 9 11:22:17 2006
@@ -12,6 +12,7 @@
SUBDIRS += security
SUBDIRS += console
SUBDIRS += xenmon
+SUBDIRS += guest-headers
ifeq ($(VTPM_TOOLS),y)
SUBDIRS += vtpm_manager
SUBDIRS += vtpm
diff -r 25e3c8668f1f -r 8af1199488d3 tools/Rules.mk
--- a/tools/Rules.mk Mon Jan 9 11:19:55 2006
+++ b/tools/Rules.mk Mon Jan 9 11:22:17 2006
@@ -35,6 +35,8 @@
mk-symlinks:
mkdir -p xen
( cd xen && ln -sf ../$(XEN_ROOT)/xen/include/public/*.h . )
+ mkdir -p xen/hvm
+ ( cd xen/hvm && ln -sf ../../$(XEN_ROOT)/xen/include/public/hvm/*.h . )
mkdir -p xen/io
( cd xen/io && ln -sf ../../$(XEN_ROOT)/xen/include/public/io/*.h . )
mkdir -p xen/linux
diff -r 25e3c8668f1f -r 8af1199488d3 tools/debugger/libxendebug/xendebug.c
--- a/tools/debugger/libxendebug/xendebug.c Mon Jan 9 11:19:55 2006
+++ b/tools/debugger/libxendebug/xendebug.c Mon Jan 9 11:22:17 2006
@@ -119,8 +119,8 @@
if ( !ctxt->valid[vcpu] )
{
- if ( (rc = xc_domain_get_vcpu_context(xc_handle, domid, vcpu,
- &ctxt->context[vcpu])) )
+ if ( (rc = xc_vcpu_getcontext(xc_handle, domid, vcpu,
+ &ctxt->context[vcpu])) )
return NULL;
ctxt->valid[vcpu] = true;
@@ -139,10 +139,10 @@
return -EINVAL;
op.interface_version = DOM0_INTERFACE_VERSION;
- op.cmd = DOM0_SETDOMAININFO;
- op.u.setdomaininfo.domain = ctxt->domid;
- op.u.setdomaininfo.vcpu = vcpu;
- op.u.setdomaininfo.ctxt = &ctxt->context[vcpu];
+ op.cmd = DOM0_SETVCPUCONTEXT;
+ op.u.setvcpucontext.domain = ctxt->domid;
+ op.u.setvcpucontext.vcpu = vcpu;
+ op.u.setvcpucontext.ctxt = &ctxt->context[vcpu];
if ( (rc = mlock(&ctxt->context[vcpu], sizeof(vcpu_guest_context_t))) )
return rc;
diff -r 25e3c8668f1f -r 8af1199488d3 tools/examples/xmexample.vmx
--- a/tools/examples/xmexample.vmx Mon Jan 9 11:19:55 2006
+++ b/tools/examples/xmexample.vmx Mon Jan 9 11:22:17 2006
@@ -28,7 +28,13 @@
#-----------------------------------------------------------------------------
# the number of cpus guest platform has, default=1
-vcpus=1
+#vcpus=1
+
+# enable/disalbe vmx guest ACPI, default=0 (disabled)
+#acpi=0
+
+# enable/disalbe vmx guest APIC, default=0 (disabled)
+#apic=0
# List of which CPUS this domain is allowed to use, default Xen picks
#cpus = "" # leave to Xen to pick
diff -r 25e3c8668f1f -r 8af1199488d3 tools/firmware/vmxassist/Makefile
--- a/tools/firmware/vmxassist/Makefile Mon Jan 9 11:19:55 2006
+++ b/tools/firmware/vmxassist/Makefile Mon Jan 9 11:22:17 2006
@@ -24,7 +24,7 @@
# The emulator code lives in ROM space
TEXTADDR=0x000D0000
-DEFINES=-DDEBUG -D_ACPI_ -DTEXTADDR=$(TEXTADDR)
+DEFINES=-DDEBUG -DTEXTADDR=$(TEXTADDR)
XENINC=-I$(XEN_ROOT)/tools/libxc
LD = ld
diff -r 25e3c8668f1f -r 8af1199488d3 tools/firmware/vmxassist/acpi_madt.c
--- a/tools/firmware/vmxassist/acpi_madt.c Mon Jan 9 11:19:55 2006
+++ b/tools/firmware/vmxassist/acpi_madt.c Mon Jan 9 11:22:17 2006
@@ -17,30 +17,73 @@
* this program; if not, write to the Free Software Foundation, Inc., 59 Temple
* Place - Suite 330, Boston, MA 02111-1307 USA.
*/
+
#include "../acpi/acpi2_0.h"
#include "../acpi/acpi_madt.h"
+
+#include <xen/hvm/hvm_info_table.h>
#define NULL ((void*)0)
extern int puts(const char *s);
-#define VCPU_NR_PAGE 0x0009F000
-#define VCPU_NR_OFFSET 0x00000800
-#define VCPU_MAGIC 0x76637075 /* "vcpu" */
+static struct hvm_info_table *table = NULL;
-/* xc_vmx_builder wrote vcpu block at 0x9F800. Return it. */
-static int
+static int validate_hvm_info(struct hvm_info_table *t)
+{
+ char signature[] = "HVM INFO";
+ uint8_t *ptr = (uint8_t *)t;
+ uint8_t sum = 0;
+ int i;
+
+ /* strncmp(t->signature, "HVM INFO", 8) */
+ for (i = 0; i < 8; i++) {
+ if (signature[i] != t->signature[i]) {
+ puts("Bad hvm info signature\n");
+ return 0;
+ }
+ }
+
+ for (i = 0; i < t->length; i++)
+ sum += ptr[i];
+
+ return (sum == 0);
+}
+
+/* xc_vmx_builder wrote hvm info at 0x9F800. Return it. */
+static struct hvm_info_table *
+get_hvm_info_table(void)
+{
+ struct hvm_info_table *t;
+ int i;
+
+ if (table != NULL)
+ return table;
+
+ t = (struct hvm_info_table *)HVM_INFO_PADDR;
+
+ if (!validate_hvm_info(t)) {
+ puts("Bad hvm info table\n");
+ return NULL;
+ }
+
+ table = t;
+
+ return table;
+}
+
+int
get_vcpu_nr(void)
{
- unsigned int *vcpus;
+ struct hvm_info_table *t = get_hvm_info_table();
+ return (t ? t->nr_vcpus : 1); /* default 1 vcpu */
+}
- vcpus = (unsigned int *)(VCPU_NR_PAGE + VCPU_NR_OFFSET);
- if (vcpus[0] != VCPU_MAGIC) {
- puts("Bad vcpus magic, set vcpu number to 1 by default.\n");
- return 1;
- }
-
- return vcpus[1];
+int
+get_acpi_enabled(void)
+{
+ struct hvm_info_table *t = get_hvm_info_table();
+ return (t ? t->acpi_enabled : 0); /* default no acpi */
}
static void *
@@ -74,10 +117,10 @@
return madt;
}
-static void
+static void
set_checksum(void *start, int checksum_offset, int len)
{
- unsigned char sum = 0;
+ unsigned char sum = 0;
unsigned char *ptr;
ptr = start;
@@ -89,9 +132,9 @@
ptr[checksum_offset] = -sum;
}
-static int
+static int
acpi_madt_set_local_apics(
- int nr_vcpu,
+ int nr_vcpu,
ACPI_MULTIPLE_APIC_DESCRIPTION_TABLE *madt)
{
int i;
@@ -104,14 +147,14 @@
madt->LocalApic[i].Length = sizeof
(ACPI_LOCAL_APIC_STRUCTURE);
madt->LocalApic[i].AcpiProcessorId = i;
madt->LocalApic[i].ApicId = i;
- madt->LocalApic[i].Flags = 1;
+ madt->LocalApic[i].Flags = 1;
}
madt->Header.Header.Length =
- sizeof(ACPI_MULTIPLE_APIC_DESCRIPTION_TABLE) -
+ sizeof(ACPI_MULTIPLE_APIC_DESCRIPTION_TABLE) -
(MAX_VIRT_CPUS - nr_vcpu)* sizeof(ACPI_LOCAL_APIC_STRUCTURE);
- return 0;
+ return 0;
}
#define FIELD_OFFSET(TYPE,Field) ((unsigned int)(&(((TYPE *) 0)->Field)))
@@ -133,7 +176,7 @@
madt, FIELD_OFFSET(ACPI_TABLE_HEADER, Checksum),
madt->Header.Header.Length);
- return 0;
+ return 0;
}
/*
diff -r 25e3c8668f1f -r 8af1199488d3 tools/firmware/vmxassist/vm86.h
--- a/tools/firmware/vmxassist/vm86.h Mon Jan 9 11:19:55 2006
+++ b/tools/firmware/vmxassist/vm86.h Mon Jan 9 11:22:17 2006
@@ -24,7 +24,7 @@
#include <stdint.h>
#endif
-#include <xen/vmx_assist.h>
+#include <xen/hvm/vmx_assist.h>
#define NR_EXCEPTION_HANDLER 32
#define NR_INTERRUPT_HANDLERS 16
diff -r 25e3c8668f1f -r 8af1199488d3 tools/firmware/vmxassist/vmxloader.c
--- a/tools/firmware/vmxassist/vmxloader.c Mon Jan 9 11:19:55 2006
+++ b/tools/firmware/vmxassist/vmxloader.c Mon Jan 9 11:22:17 2006
@@ -24,12 +24,10 @@
#include "machine.h"
#include "roms.h"
-#ifdef _ACPI_
#include "acpi.h"
#include "../acpi/acpi2_0.h" // for ACPI_PHYSICAL_ADDRESS
int acpi_madt_update(unsigned char* acpi_start);
-#endif
-
+int get_acpi_enabled(void);
/*
* C runtime start off
@@ -120,18 +118,17 @@
memcpy((void *)0xC0000,
vgabios_stdvga, sizeof(vgabios_stdvga));
}
-#ifdef _ACPI_
- puts("Loading ACPI ...\n");
- acpi_madt_update(acpi);
-
- if (ACPI_PHYSICAL_ADDRESS+sizeof(acpi) <= 0xF0000) {
- /* make sure acpi table does not overlap rombios
- * currently acpi less than 8K will be OK.
- */
- memcpy((void *)ACPI_PHYSICAL_ADDRESS, acpi, sizeof(acpi));
+ if (get_acpi_enabled() != 0) {
+ puts("Loading ACPI ...\n");
+ acpi_madt_update((unsigned char*)acpi);
+ if (ACPI_PHYSICAL_ADDRESS+sizeof(acpi) <= 0xF0000) {
+ /* make sure acpi table does not overlap rombios
+ * currently acpi less than 8K will be OK.
+ */
+ memcpy((void *)ACPI_PHYSICAL_ADDRESS, acpi,
sizeof(acpi));
+ }
}
-#endif
puts("Loading VMXAssist ...\n");
memcpy((void *)TEXTADDR, vmxassist, sizeof(vmxassist));
diff -r 25e3c8668f1f -r 8af1199488d3 tools/ioemu/hw/i8254.c
--- a/tools/ioemu/hw/i8254.c Mon Jan 9 11:19:55 2006
+++ b/tools/ioemu/hw/i8254.c Mon Jan 9 11:22:17 2006
@@ -23,7 +23,7 @@
*/
#include "vl.h"
#include <xenctrl.h>
-#include <xen/io/ioreq.h>
+#include <xen/hvm/ioreq.h>
//#define DEBUG_PIT
diff -r 25e3c8668f1f -r 8af1199488d3 tools/ioemu/hw/i8259.c
--- a/tools/ioemu/hw/i8259.c Mon Jan 9 11:19:55 2006
+++ b/tools/ioemu/hw/i8259.c Mon Jan 9 11:22:17 2006
@@ -23,7 +23,7 @@
*/
#include "vl.h"
#include <xenctrl.h>
-#include <xen/io/ioreq.h>
+#include <xen/hvm/ioreq.h>
/* debug PIC */
//#define DEBUG_PIC
diff -r 25e3c8668f1f -r 8af1199488d3 tools/ioemu/hw/i8259_stub.c
--- a/tools/ioemu/hw/i8259_stub.c Mon Jan 9 11:19:55 2006
+++ b/tools/ioemu/hw/i8259_stub.c Mon Jan 9 11:22:17 2006
@@ -22,7 +22,7 @@
* THE SOFTWARE.
*/
#include "xenctrl.h"
-#include <xen/io/ioreq.h>
+#include <xen/hvm/ioreq.h>
#include <stdio.h>
#include "cpu.h"
#include "cpu-all.h"
diff -r 25e3c8668f1f -r 8af1199488d3 tools/ioemu/target-i386-dm/helper2.c
--- a/tools/ioemu/target-i386-dm/helper2.c Mon Jan 9 11:19:55 2006
+++ b/tools/ioemu/target-i386-dm/helper2.c Mon Jan 9 11:22:17 2006
@@ -48,7 +48,7 @@
#include <sys/ioctl.h>
#include <xenctrl.h>
-#include <xen/io/ioreq.h>
+#include <xen/hvm/ioreq.h>
#include <xen/linux/evtchn.h>
#include "cpu.h"
diff -r 25e3c8668f1f -r 8af1199488d3 tools/ioemu/vl.c
--- a/tools/ioemu/vl.c Mon Jan 9 11:19:55 2006
+++ b/tools/ioemu/vl.c Mon Jan 9 11:22:17 2006
@@ -2948,6 +2948,7 @@
case QEMU_OPTION_vcpus:
vcpus = atoi(optarg);
fprintf(logfile, "qemu: the number of cpus is %d\n", vcpus);
+ break;
case QEMU_OPTION_pci:
pci_enabled = 1;
break;
diff -r 25e3c8668f1f -r 8af1199488d3 tools/libxc/xc_core.c
--- a/tools/libxc/xc_core.c Mon Jan 9 11:19:55 2006
+++ b/tools/libxc/xc_core.c Mon Jan 9 11:22:17 2006
@@ -55,7 +55,7 @@
}
for (i = 0; i < info.max_vcpu_id; i++)
- if (xc_domain_get_vcpu_context(xc_handle, domid,
+ if (xc_vcpu_getcontext(xc_handle, domid,
i, &ctxt[nr_vcpus]) == 0)
nr_vcpus++;
diff -r 25e3c8668f1f -r 8af1199488d3 tools/libxc/xc_domain.c
--- a/tools/libxc/xc_domain.c Mon Jan 9 11:19:55 2006
+++ b/tools/libxc/xc_domain.c Mon Jan 9 11:22:17 2006
@@ -58,16 +58,16 @@
return do_dom0_op(xc_handle, &op);
}
-int xc_domain_pincpu(int xc_handle,
- uint32_t domid,
- int vcpu,
- cpumap_t cpumap)
-{
- DECLARE_DOM0_OP;
- op.cmd = DOM0_PINCPUDOMAIN;
- op.u.pincpudomain.domain = (domid_t)domid;
- op.u.pincpudomain.vcpu = vcpu;
- op.u.pincpudomain.cpumap = cpumap;
+int xc_vcpu_setaffinity(int xc_handle,
+ uint32_t domid,
+ int vcpu,
+ cpumap_t cpumap)
+{
+ DECLARE_DOM0_OP;
+ op.cmd = DOM0_SETVCPUAFFINITY;
+ op.u.setvcpuaffinity.domain = (domid_t)domid;
+ op.u.setvcpuaffinity.vcpu = vcpu;
+ op.u.setvcpuaffinity.cpumap = cpumap;
return do_dom0_op(xc_handle, &op);
}
@@ -155,7 +155,7 @@
return ret;
}
-int xc_domain_get_vcpu_context(int xc_handle,
+int xc_vcpu_getcontext(int xc_handle,
uint32_t domid,
uint32_t vcpu,
vcpu_guest_context_t *ctxt)
@@ -345,10 +345,10 @@
return do_dom0_op(xc_handle, &op);
}
-int xc_domain_get_vcpu_info(int xc_handle,
- uint32_t domid,
- uint32_t vcpu,
- xc_vcpuinfo_t *info)
+int xc_vcpu_getinfo(int xc_handle,
+ uint32_t domid,
+ uint32_t vcpu,
+ xc_vcpuinfo_t *info)
{
int rc;
DECLARE_DOM0_OP;
@@ -380,18 +380,18 @@
return do_dom0_op(xc_handle, &op);
}
-int xc_domain_setinfo(int xc_handle,
- uint32_t domid,
- uint32_t vcpu,
- vcpu_guest_context_t *ctxt)
+int xc_vcpu_setcontext(int xc_handle,
+ uint32_t domid,
+ uint32_t vcpu,
+ vcpu_guest_context_t *ctxt)
{
dom0_op_t op;
int rc;
- op.cmd = DOM0_SETDOMAININFO;
- op.u.setdomaininfo.domain = domid;
- op.u.setdomaininfo.vcpu = vcpu;
- op.u.setdomaininfo.ctxt = ctxt;
+ op.cmd = DOM0_SETVCPUCONTEXT;
+ op.u.setvcpucontext.domain = domid;
+ op.u.setvcpucontext.vcpu = vcpu;
+ op.u.setvcpucontext.ctxt = ctxt;
if ( (rc = mlock(ctxt, sizeof(*ctxt))) != 0 )
return rc;
@@ -402,6 +402,38 @@
return rc;
+}
+
+int xc_domain_irq_permission(int xc_handle,
+ uint32_t domid,
+ uint8_t pirq,
+ uint8_t allow_access)
+{
+ dom0_op_t op;
+
+ op.cmd = DOM0_IRQ_PERMISSION;
+ op.u.irq_permission.domain = domid;
+ op.u.irq_permission.pirq = pirq;
+ op.u.irq_permission.allow_access = allow_access;
+
+ return do_dom0_op(xc_handle, &op);
+}
+
+int xc_domain_iomem_permission(int xc_handle,
+ uint32_t domid,
+ unsigned long first_pfn,
+ unsigned long nr_pfns,
+ uint8_t allow_access)
+{
+ dom0_op_t op;
+
+ op.cmd = DOM0_IOMEM_PERMISSION;
+ op.u.iomem_permission.domain = domid;
+ op.u.iomem_permission.first_pfn = first_pfn;
+ op.u.iomem_permission.nr_pfns = nr_pfns;
+ op.u.iomem_permission.allow_access = allow_access;
+
+ return do_dom0_op(xc_handle, &op);
}
/*
diff -r 25e3c8668f1f -r 8af1199488d3 tools/libxc/xc_ia64_stubs.c
--- a/tools/libxc/xc_ia64_stubs.c Mon Jan 9 11:19:55 2006
+++ b/tools/libxc/xc_ia64_stubs.c Mon Jan 9 11:22:17 2006
@@ -5,7 +5,7 @@
#include <stdlib.h>
#include <zlib.h>
#include "xen/arch-ia64.h"
-#include <xen/io/ioreq.h>
+#include <xen/hvm/ioreq.h>
/* this is a very ugly way of getting FPSR_DEFAULT. struct ia64_fpreg is
* mysteriously declared in two places: /usr/include/asm/fpu.h and
@@ -627,6 +627,7 @@
unsigned int control_evtchn,
unsigned int lapic,
unsigned int vcpus,
+ unsigned int acpi,
unsigned int store_evtchn,
unsigned long *store_mfn)
{
@@ -663,7 +664,7 @@
goto error_out;
}
- if ( xc_domain_get_vcpu_context(xc_handle, domid, 0, ctxt) ){
+ if ( xc_vcpu_getcontext(xc_handle, domid, 0, ctxt) ){
PERROR("Could not get vcpu context");
goto error_out;
}
@@ -687,11 +688,11 @@
memset( &launch_op, 0, sizeof(launch_op) );
- launch_op.u.setdomaininfo.domain = (domid_t)domid;
- launch_op.u.setdomaininfo.vcpu = 0;
- launch_op.u.setdomaininfo.ctxt = ctxt;
-
- launch_op.cmd = DOM0_SETDOMAININFO;
+ launch_op.u.setvcpucontext.domain = (domid_t)domid;
+ launch_op.u.setvcpucontext.vcpu = 0;
+ launch_op.u.setvcpucontext.ctxt = ctxt;
+
+ launch_op.cmd = DOM0_SETVCPUCONTEXT;
rc = do_dom0_op(xc_handle, &launch_op);
return rc;
diff -r 25e3c8668f1f -r 8af1199488d3 tools/libxc/xc_linux_build.c
--- a/tools/libxc/xc_linux_build.c Mon Jan 9 11:19:55 2006
+++ b/tools/libxc/xc_linux_build.c Mon Jan 9 11:22:17 2006
@@ -393,10 +393,14 @@
start_info->store_evtchn = store_evtchn;
start_info->console_mfn = nr_pages - 1;
start_info->console_evtchn = console_evtchn;
+ start_info->nr_pages = nr_pages; // FIXME?: nr_pages - 2 ????
if ( initrd_len != 0 )
{
ctxt->initrd.start = vinitrd_start;
ctxt->initrd.size = initrd_len;
+ } else {
+ ctxt->initrd.start = 0;
+ ctxt->initrd.size = 0;
}
strncpy((char *)ctxt->cmdline, cmdline, IA64_COMMAND_LINE_SIZE);
ctxt->cmdline[IA64_COMMAND_LINE_SIZE-1] = '\0';
@@ -790,7 +794,7 @@
goto error_out;
}
- if ( xc_domain_get_vcpu_context(xc_handle, domid, 0, ctxt) )
+ if ( xc_vcpu_getcontext(xc_handle, domid, 0, ctxt) )
{
PERROR("Could not get vcpu context");
goto error_out;
@@ -893,11 +897,11 @@
memset( &launch_op, 0, sizeof(launch_op) );
- launch_op.u.setdomaininfo.domain = (domid_t)domid;
- launch_op.u.setdomaininfo.vcpu = 0;
- launch_op.u.setdomaininfo.ctxt = ctxt;
-
- launch_op.cmd = DOM0_SETDOMAININFO;
+ launch_op.u.setvcpucontext.domain = (domid_t)domid;
+ launch_op.u.setvcpucontext.vcpu = 0;
+ launch_op.u.setvcpucontext.ctxt = ctxt;
+
+ launch_op.cmd = DOM0_SETVCPUCONTEXT;
rc = xc_dom0_op(xc_handle, &launch_op);
return rc;
diff -r 25e3c8668f1f -r 8af1199488d3 tools/libxc/xc_linux_restore.c
--- a/tools/libxc/xc_linux_restore.c Mon Jan 9 11:19:55 2006
+++ b/tools/libxc/xc_linux_restore.c Mon Jan 9 11:22:17 2006
@@ -171,7 +171,7 @@
/* Only have to worry about vcpu 0 even for SMP */
- if (xc_domain_get_vcpu_context( xc_handle, dom, 0, &ctxt)) {
+ if (xc_vcpu_getcontext( xc_handle, dom, 0, &ctxt)) {
ERR("Could not get vcpu context");
goto out;
}
@@ -735,10 +735,10 @@
DPRINTF("Domain ready to be built.\n");
- op.cmd = DOM0_SETDOMAININFO;
- op.u.setdomaininfo.domain = (domid_t)dom;
- op.u.setdomaininfo.vcpu = 0;
- op.u.setdomaininfo.ctxt = &ctxt;
+ op.cmd = DOM0_SETVCPUCONTEXT;
+ op.u.setvcpucontext.domain = (domid_t)dom;
+ op.u.setvcpucontext.vcpu = 0;
+ op.u.setvcpucontext.ctxt = &ctxt;
rc = xc_dom0_op(xc_handle, &op);
if (rc != 0) {
diff -r 25e3c8668f1f -r 8af1199488d3 tools/libxc/xc_linux_save.c
--- a/tools/libxc/xc_linux_save.c Mon Jan 9 11:19:55 2006
+++ b/tools/libxc/xc_linux_save.c Mon Jan 9 11:22:17 2006
@@ -382,7 +382,7 @@
return -1;
}
- if ( xc_domain_get_vcpu_context(xc_handle, dom, 0 /* XXX */, ctxt))
+ if ( xc_vcpu_getcontext(xc_handle, dom, 0 /* XXX */, ctxt))
ERR("Could not get vcpu context");
@@ -643,7 +643,7 @@
}
/* Only have to worry about vcpu 0 even for SMP */
- if (xc_domain_get_vcpu_context(xc_handle, dom, 0, &ctxt)) {
+ if (xc_vcpu_getcontext(xc_handle, dom, 0, &ctxt)) {
ERR("Could not get vcpu context");
goto out;
}
diff -r 25e3c8668f1f -r 8af1199488d3 tools/libxc/xc_pagetab.c
--- a/tools/libxc/xc_pagetab.c Mon Jan 9 11:19:55 2006
+++ b/tools/libxc/xc_pagetab.c Mon Jan 9 11:22:17 2006
@@ -74,7 +74,7 @@
#define pt_levels 4
#endif
- if (xc_domain_get_vcpu_context(xc_handle, dom, vcpu, &ctx) != 0) {
+ if (xc_vcpu_getcontext(xc_handle, dom, vcpu, &ctx) != 0) {
fprintf(stderr, "failed to retreive vcpu context\n");
goto out;
}
diff -r 25e3c8668f1f -r 8af1199488d3 tools/libxc/xc_ptrace.c
--- a/tools/libxc/xc_ptrace.c Mon Jan 9 11:19:55 2006
+++ b/tools/libxc/xc_ptrace.c Mon Jan 9 11:22:17 2006
@@ -33,7 +33,7 @@
if (online)
*online = 0;
if ( !(regs_valid & (1 << cpu)) ) {
- retval = xc_domain_get_vcpu_context(xc_handle, current_domid,
+ retval = xc_vcpu_getcontext(xc_handle, current_domid,
cpu, &ctxt[cpu]);
if ( retval )
goto done;
@@ -43,8 +43,7 @@
if ( online == NULL )
goto done;
- retval = xc_domain_get_vcpu_info(xc_handle, current_domid,
- cpu, &info);
+ retval = xc_vcpu_getinfo(xc_handle, current_domid, cpu, &info);
*online = info.online;
done:
@@ -395,7 +394,7 @@
case PTRACE_SETREGS:
SET_XC_REGS(((struct gdb_regs *)data), ctxt[cpu].user_regs);
- retval = xc_domain_setinfo(xc_handle, current_domid, cpu, &ctxt[cpu]);
+ retval = xc_vcpu_setcontext(xc_handle, current_domid, cpu, &ctxt[cpu]);
if (retval)
goto error_out;
break;
@@ -405,7 +404,7 @@
* during single-stepping - but that just seems retarded
*/
ctxt[cpu].user_regs.eflags |= PSL_T;
- retval = xc_domain_setinfo(xc_handle, current_domid, cpu, &ctxt[cpu]);
+ retval = xc_vcpu_setcontext(xc_handle, current_domid, cpu, &ctxt[cpu]);
if ( retval )
{
perror("dom0 op failed");
@@ -423,8 +422,8 @@
/* Clear trace flag */
if ( ctxt[cpu].user_regs.eflags & PSL_T ) {
ctxt[cpu].user_regs.eflags &= ~PSL_T;
- retval = xc_domain_setinfo(xc_handle, current_domid,
- cpu, &ctxt[cpu]);
+ retval = xc_vcpu_setcontext(xc_handle, current_domid,
+ cpu, &ctxt[cpu]);
if ( retval ) {
perror("dom0 op failed");
goto error_out;
diff -r 25e3c8668f1f -r 8af1199488d3 tools/libxc/xc_vmx_build.c
--- a/tools/libxc/xc_vmx_build.c Mon Jan 9 11:19:55 2006
+++ b/tools/libxc/xc_vmx_build.c Mon Jan 9 11:22:17 2006
@@ -9,7 +9,8 @@
#include <stdlib.h>
#include <unistd.h>
#include <zlib.h>
-#include <xen/io/ioreq.h>
+#include <xen/hvm/hvm_info_table.h>
+#include <xen/hvm/ioreq.h>
#define VMX_LOADER_ENTR_ADDR 0x00100000
@@ -33,9 +34,6 @@
#define E820_MAP_NR_OFFSET 0x000001E8
#define E820_MAP_OFFSET 0x000002D0
-#define VCPU_NR_PAGE 0x0009F000
-#define VCPU_NR_OFFSET 0x00000800
-
struct e820entry {
uint64_t addr;
uint64_t size;
@@ -119,26 +117,50 @@
return (*(((unsigned char *)e820_page) + E820_MAP_NR_OFFSET) = nr_map);
}
+static void
+set_hvm_info_checksum(struct hvm_info_table *t)
+{
+ uint8_t *ptr = (uint8_t *)t, sum = 0;
+ unsigned int i;
+
+ t->checksum = 0;
+
+ for (i = 0; i < t->length; i++)
+ sum += *ptr++;
+
+ t->checksum = -sum;
+}
+
/*
- * Use E820 reserved memory 0x9F800 to pass number of vcpus to vmxloader
- * vmxloader will use it to config ACPI MADT table
+ * Use E820 reserved memory 0x9F800 to pass HVM info to vmxloader
+ * vmxloader will use this info to set BIOS accordingly
*/
-#define VCPU_MAGIC 0x76637075 /* "vcpu" */
-static int set_vcpu_nr(int xc_handle, uint32_t dom,
- unsigned long *pfn_list, unsigned int vcpus)
-{
- char *va_map;
- unsigned int *va_vcpus;
-
- va_map = xc_map_foreign_range(xc_handle, dom,
- PAGE_SIZE, PROT_READ|PROT_WRITE,
- pfn_list[VCPU_NR_PAGE >> PAGE_SHIFT]);
+static int set_hvm_info(int xc_handle, uint32_t dom,
+ unsigned long *pfn_list, unsigned int vcpus,
+ unsigned int acpi, unsigned int apic)
+{
+ char *va_map;
+ struct hvm_info_table *va_hvm;
+
+ va_map = xc_map_foreign_range(
+ xc_handle,
+ dom,
+ PAGE_SIZE,
+ PROT_READ|PROT_WRITE,
+ pfn_list[HVM_INFO_PFN]);
+
if ( va_map == NULL )
return -1;
- va_vcpus = (unsigned int *)(va_map + VCPU_NR_OFFSET);
- va_vcpus[0] = VCPU_MAGIC;
- va_vcpus[1] = vcpus;
+ va_hvm = (struct hvm_info_table *)(va_map + HVM_INFO_OFFSET);
+ memset(va_hvm, 0, sizeof(*va_hvm));
+ strncpy(va_hvm->signature, "HVM INFO", 8);
+ va_hvm->length = sizeof(struct hvm_info_table);
+ va_hvm->acpi_enabled = acpi;
+ va_hvm->apic_enabled = apic;
+ va_hvm->nr_vcpus = vcpus;
+
+ set_hvm_info_checksum(va_hvm);
munmap(va_map, PAGE_SIZE);
@@ -279,8 +301,9 @@
vcpu_guest_context_t *ctxt,
unsigned long shared_info_frame,
unsigned int control_evtchn,
- unsigned int lapic,
unsigned int vcpus,
+ unsigned int acpi,
+ unsigned int apic,
unsigned int store_evtchn,
unsigned long *store_mfn)
{
@@ -490,20 +513,14 @@
goto error_out;
}
- if (set_vcpu_nr(xc_handle, dom, page_array, vcpus)) {
- fprintf(stderr, "Couldn't set vcpu number for VMX guest.\n");
- goto error_out;
- }
-
- *store_mfn = page_array[(v_end-2) >> PAGE_SHIFT];
- if ( xc_clear_domain_page(xc_handle, dom, *store_mfn) )
- goto error_out;
-
- shared_page_frame = (v_end - PAGE_SIZE) >> PAGE_SHIFT;
-
- if ((e820_page = xc_map_foreign_range(
- xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
- page_array[E820_MAP_PAGE >> PAGE_SHIFT])) == 0)
+ if ( set_hvm_info(xc_handle, dom, page_array, vcpus, acpi, apic) ) {
+ fprintf(stderr, "Couldn't set hvm info for VMX guest.\n");
+ goto error_out;
+ }
+
+ if ( (e820_page = xc_map_foreign_range(
+ xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
+ page_array[E820_MAP_PAGE >> PAGE_SHIFT])) == 0 )
goto error_out;
memset(e820_page, 0, PAGE_SIZE);
e820_map_nr = build_e820map(e820_page, v_end);
@@ -518,25 +535,29 @@
munmap(e820_page, PAGE_SIZE);
/* shared_info page starts its life empty. */
- if ((shared_info = xc_map_foreign_range(
- xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
- shared_info_frame)) == 0)
+ if ( (shared_info = xc_map_foreign_range(
+ xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
+ shared_info_frame)) == 0 )
goto error_out;
memset(shared_info, 0, sizeof(shared_info_t));
/* Mask all upcalls... */
for ( i = 0; i < MAX_VIRT_CPUS; i++ )
shared_info->vcpu_info[i].evtchn_upcall_mask = 1;
-
munmap(shared_info, PAGE_SIZE);
/* Populate the event channel port in the shared page */
- if ((sp = (shared_iopage_t *) xc_map_foreign_range(
- xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
- page_array[shared_page_frame])) == 0)
+ shared_page_frame = page_array[(v_end >> PAGE_SHIFT) - 1];
+ if ( (sp = (shared_iopage_t *) xc_map_foreign_range(
+ xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
+ shared_page_frame)) == 0 )
goto error_out;
memset(sp, 0, PAGE_SIZE);
sp->sp_global.eport = control_evtchn;
munmap(sp, PAGE_SIZE);
+
+ *store_mfn = page_array[(v_end >> PAGE_SHIFT) - 2];
+ if ( xc_clear_domain_page(xc_handle, dom, *store_mfn) )
+ goto error_out;
/* Send the page update requests down to the hypervisor. */
if ( xc_finish_mmu_updates(xc_handle, mmu) )
@@ -559,7 +580,7 @@
ctxt->user_regs.eax = 0;
ctxt->user_regs.esp = 0;
ctxt->user_regs.ebx = 0; /* startup_32 expects this to be 0 to signal boot
cpu */
- ctxt->user_regs.ecx = lapic;
+ ctxt->user_regs.ecx = 0;
ctxt->user_regs.esi = 0;
ctxt->user_regs.edi = 0;
ctxt->user_regs.ebp = 0;
@@ -572,29 +593,6 @@
free(mmu);
free(page_array);
return -1;
-}
-
-#define VMX_FEATURE_FLAG 0x20
-
-static int vmx_identify(void)
-{
- int eax, ecx;
-
- __asm__ __volatile__ (
-#if defined(__i386__)
- "push %%ebx; cpuid; pop %%ebx"
-#elif defined(__x86_64__)
- "push %%rbx; cpuid; pop %%rbx"
-#endif
- : "=a" (eax), "=c" (ecx)
- : "0" (1)
- : "dx");
-
- if (!(ecx & VMX_FEATURE_FLAG)) {
- return -1;
- }
-
- return 0;
}
int xc_vmx_build(int xc_handle,
@@ -602,8 +600,9 @@
int memsize,
const char *image_name,
unsigned int control_evtchn,
- unsigned int lapic,
unsigned int vcpus,
+ unsigned int acpi,
+ unsigned int apic,
unsigned int store_evtchn,
unsigned long *store_mfn)
{
@@ -613,10 +612,18 @@
unsigned long nr_pages;
char *image = NULL;
unsigned long image_size;
-
- if ( vmx_identify() < 0 )
- {
- PERROR("CPU doesn't support VMX Extensions");
+ xen_capabilities_info_t xen_caps;
+
+ if ( (rc = xc_version(xc_handle, XENVER_capabilities, &xen_caps)) != 0 )
+ {
+ PERROR("Failed to get xen version info");
+ goto error_out;
+ }
+
+ if ( !strstr(xen_caps, "hvm") )
+ {
+ PERROR("CPU doesn't support VMX Extensions or "
+ "CPU VMX Extensions are not turned on");
goto error_out;
}
@@ -644,7 +651,7 @@
goto error_out;
}
- if ( xc_domain_get_vcpu_context(xc_handle, domid, 0, ctxt) )
+ if ( xc_vcpu_getcontext(xc_handle, domid, 0, ctxt) )
{
PERROR("Could not get vcpu context");
goto error_out;
@@ -659,7 +666,7 @@
if ( setup_guest(xc_handle, domid, memsize, image, image_size, nr_pages,
ctxt, op.u.getdomaininfo.shared_info_frame,
control_evtchn,
- lapic, vcpus, store_evtchn, store_mfn) < 0)
+ vcpus, acpi, apic, store_evtchn, store_mfn) < 0)
{
ERROR("Error constructing guest OS");
goto error_out;
@@ -701,11 +708,11 @@
memset( &launch_op, 0, sizeof(launch_op) );
- launch_op.u.setdomaininfo.domain = (domid_t)domid;
- launch_op.u.setdomaininfo.vcpu = 0;
- launch_op.u.setdomaininfo.ctxt = ctxt;
-
- launch_op.cmd = DOM0_SETDOMAININFO;
+ launch_op.u.setvcpucontext.domain = (domid_t)domid;
+ launch_op.u.setvcpucontext.vcpu = 0;
+ launch_op.u.setvcpucontext.ctxt = ctxt;
+
+ launch_op.cmd = DOM0_SETVCPUCONTEXT;
rc = xc_dom0_op(xc_handle, &launch_op);
return rc;
diff -r 25e3c8668f1f -r 8af1199488d3 tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h Mon Jan 9 11:19:55 2006
+++ b/tools/libxc/xenctrl.h Mon Jan 9 11:22:17 2006
@@ -181,10 +181,11 @@
*/
int xc_domain_destroy(int xc_handle,
uint32_t domid);
-int xc_domain_pincpu(int xc_handle,
- uint32_t domid,
- int vcpu,
- cpumap_t cpumap);
+
+int xc_vcpu_setaffinity(int xc_handle,
+ uint32_t domid,
+ int vcpu,
+ cpumap_t cpumap);
/**
* This function will return information about one or more domains. It is
@@ -208,7 +209,7 @@
/**
- * This function will set the vcpu context for the specified domain.
+ * This function will set the execution context for the specified vcpu.
*
* @parm xc_handle a handle to an open hypervisor interface
* @parm domid the domain to set the vcpu context for
@@ -216,10 +217,10 @@
* @parm ctxt pointer to the the cpu context with the values to set
* @return the number of domains enumerated or -1 on error
*/
-int xc_domain_setinfo(int xc_handle,
- uint32_t domid,
- uint32_t vcpu,
- vcpu_guest_context_t *ctxt);
+int xc_vcpu_setcontext(int xc_handle,
+ uint32_t domid,
+ uint32_t vcpu,
+ vcpu_guest_context_t *ctxt);
/**
* This function will return information about one or more domains, using a
* single hypercall. The domain information will be stored into the supplied
@@ -249,17 +250,16 @@
* domain
* @return 0 on success, -1 on failure
*/
-int xc_domain_get_vcpu_context(int xc_handle,
+int xc_vcpu_getcontext(int xc_handle,
uint32_t domid,
uint32_t vcpu,
vcpu_guest_context_t *ctxt);
typedef dom0_getvcpuinfo_t xc_vcpuinfo_t;
-int xc_domain_get_vcpu_info(int xc_handle,
- uint32_t domid,
- uint32_t vcpu,
- xc_vcpuinfo_t *info);
-
+int xc_vcpu_getinfo(int xc_handle,
+ uint32_t domid,
+ uint32_t vcpu,
+ xc_vcpuinfo_t *info);
int xc_domain_setcpuweight(int xc_handle,
uint32_t domid,
@@ -379,6 +379,17 @@
uint32_t first_port,
uint32_t nr_ports,
uint32_t allow_access);
+
+int xc_domain_irq_permission(int xc_handle,
+ uint32_t domid,
+ uint8_t pirq,
+ uint8_t allow_access);
+
+int xc_domain_iomem_permission(int xc_handle,
+ uint32_t domid,
+ unsigned long first_pfn,
+ unsigned long nr_pfns,
+ uint8_t allow_access);
unsigned long xc_make_page_below_4G(int xc_handle, uint32_t domid,
unsigned long mfn);
diff -r 25e3c8668f1f -r 8af1199488d3 tools/libxc/xenguest.h
--- a/tools/libxc/xenguest.h Mon Jan 9 11:19:55 2006
+++ b/tools/libxc/xenguest.h Mon Jan 9 11:22:17 2006
@@ -56,8 +56,9 @@
int memsize,
const char *image_name,
unsigned int control_evtchn,
- unsigned int lapic,
unsigned int vcpus,
+ unsigned int acpi,
+ unsigned int apic,
unsigned int store_evtchn,
unsigned long *store_mfn);
diff -r 25e3c8668f1f -r 8af1199488d3 tools/python/xen/lowlevel/xc/xc.c
--- a/tools/python/xen/lowlevel/xc/xc.c Mon Jan 9 11:19:55 2006
+++ b/tools/python/xen/lowlevel/xc/xc.c Mon Jan 9 11:22:17 2006
@@ -135,9 +135,9 @@
}
-static PyObject *pyxc_domain_pincpu(XcObject *self,
- PyObject *args,
- PyObject *kwds)
+static PyObject *pyxc_vcpu_setaffinity(XcObject *self,
+ PyObject *args,
+ PyObject *kwds)
{
uint32_t dom;
int vcpu = 0, i;
@@ -157,7 +157,7 @@
cpumap |= (cpumap_t)1 << PyInt_AsLong(PyList_GetItem(cpulist, i));
}
- if ( xc_domain_pincpu(self->xc_handle, dom, vcpu, cpumap) != 0 )
+ if ( xc_vcpu_setaffinity(self->xc_handle, dom, vcpu, cpumap) != 0 )
return PyErr_SetFromErrno(xc_error);
Py_INCREF(zero);
@@ -297,7 +297,7 @@
&dom, &vcpu) )
return NULL;
- rc = xc_domain_get_vcpu_info(self->xc_handle, dom, vcpu, &info);
+ rc = xc_vcpu_getinfo(self->xc_handle, dom, vcpu, &info);
if ( rc < 0 )
return PyErr_SetFromErrno(xc_error);
@@ -362,21 +362,23 @@
uint32_t dom;
char *image;
int control_evtchn, store_evtchn;
+ int memsize;
int vcpus = 1;
- int lapic = 0;
- int memsize;
+ int acpi = 0;
+ int apic = 0;
unsigned long store_mfn = 0;
static char *kwd_list[] = { "dom", "control_evtchn", "store_evtchn",
- "memsize", "image", "lapic", "vcpus", NULL };
-
- if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iiiisii", kwd_list,
+ "memsize", "image", "vcpus", "acpi", "apic",
+ NULL };
+
+ if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iiiisiii", kwd_list,
&dom, &control_evtchn, &store_evtchn,
- &memsize, &image, &lapic, &vcpus) )
+ &memsize, &image, &vcpus, &acpi, &apic) )
return NULL;
if ( xc_vmx_build(self->xc_handle, dom, memsize, image, control_evtchn,
- lapic, vcpus, store_evtchn, &store_mfn) != 0 )
+ vcpus, acpi, apic, store_evtchn, &store_mfn) != 0 )
return PyErr_SetFromErrno(xc_error);
return Py_BuildValue("{s:i}", "store_mfn", store_mfn);
@@ -774,6 +776,52 @@
return zero;
}
+static PyObject *pyxc_domain_irq_permission(PyObject *self,
+ PyObject *args,
+ PyObject *kwds)
+{
+ XcObject *xc = (XcObject *)self;
+ uint32_t dom;
+ int pirq, allow_access, ret;
+
+ static char *kwd_list[] = { "dom", "pirq", "allow_access", NULL };
+
+ if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iii", kwd_list,
+ &dom, &pirq, &allow_access) )
+ return NULL;
+
+ ret = xc_domain_irq_permission(
+ xc->xc_handle, dom, pirq, allow_access);
+ if ( ret != 0 )
+ return PyErr_SetFromErrno(xc_error);
+
+ Py_INCREF(zero);
+ return zero;
+}
+
+static PyObject *pyxc_domain_iomem_permission(PyObject *self,
+ PyObject *args,
+ PyObject *kwds)
+{
+ XcObject *xc = (XcObject *)self;
+ uint32_t dom;
+ unsigned long first_pfn, nr_pfns, allow_access, ret;
+
+ static char *kwd_list[] = { "dom", "first_pfn", "nr_pfns", "allow_access",
NULL };
+
+ if ( !PyArg_ParseTupleAndKeywords(args, kwds, "illi", kwd_list,
+ &dom, &first_pfn, &nr_pfns,
&allow_access) )
+ return NULL;
+
+ ret = xc_domain_iomem_permission(
+ xc->xc_handle, dom, first_pfn, nr_pfns, allow_access);
+ if ( ret != 0 )
+ return PyErr_SetFromErrno(xc_error);
+
+ Py_INCREF(zero);
+ return zero;
+}
+
static PyObject *dom_op(XcObject *self, PyObject *args,
int (*fn)(int, uint32_t))
@@ -842,8 +890,8 @@
" dom [int]: Identifier of domain to be destroyed.\n\n"
"Returns: [int] 0 on success; -1 on error.\n" },
- { "domain_pincpu",
- (PyCFunction)pyxc_domain_pincpu,
+ { "vcpu_setaffinity",
+ (PyCFunction)pyxc_vcpu_setaffinity,
METH_VARARGS | METH_KEYWORDS, "\n"
"Pin a VCPU to a specified set CPUs.\n"
" dom [int]: Identifier of domain to which VCPU belongs.\n"
@@ -1067,6 +1115,25 @@
" dom [int]: Identifier of domain to be allowed access.\n"
" first_port [int]: First IO port\n"
" nr_ports [int]: Number of IO ports\n"
+ " allow_access [int]: Non-zero means enable access; else disable
access\n\n"
+ "Returns: [int] 0 on success; -1 on error.\n" },
+
+ { "domain_irq_permission",
+ (PyCFunction)pyxc_domain_irq_permission,
+ METH_VARARGS | METH_KEYWORDS, "\n"
+ "Allow a domain access to a physical IRQ\n"
+ " dom [int]: Identifier of domain to be allowed access.\n"
+ " pirq [int]: The Physical IRQ\n"
+ " allow_access [int]: Non-zero means enable access; else disable
access\n\n"
+ "Returns: [int] 0 on success; -1 on error.\n" },
+
+ { "domain_iomem_permission",
+ (PyCFunction)pyxc_domain_iomem_permission,
+ METH_VARARGS | METH_KEYWORDS, "\n"
+ "Allow a domain access to a range of IO memory pages\n"
+ " dom [int]: Identifier of domain to be allowed access.\n"
+ " first_pfn [long]: First page of I/O Memory\n"
+ " nr_pfns [long]: Number of pages of I/O Memory (>0)\n"
" allow_access [int]: Non-zero means enable access; else disable
access\n\n"
"Returns: [int] 0 on success; -1 on error.\n" },
diff -r 25e3c8668f1f -r 8af1199488d3 tools/python/xen/xend/XendDomain.py
--- a/tools/python/xen/xend/XendDomain.py Mon Jan 9 11:19:55 2006
+++ b/tools/python/xen/xend/XendDomain.py Mon Jan 9 11:22:17 2006
@@ -443,7 +443,7 @@
cpumap = map(lambda x: int(x),
cpumap.replace("[", "").replace("]", "").split(","))
try:
- return xc.domain_pincpu(dominfo.getDomid(), vcpu, cpumap)
+ return xc.vcpu_setaffinity(dominfo.getDomid(), vcpu, cpumap)
except Exception, ex:
raise XendError(str(ex))
diff -r 25e3c8668f1f -r 8af1199488d3 tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py Mon Jan 9 11:19:55 2006
+++ b/tools/python/xen/xend/XendDomainInfo.py Mon Jan 9 11:22:17 2006
@@ -1179,7 +1179,7 @@
for v in range(0, self.info['max_vcpu_id']+1):
# pincpu takes a list of ints
cpu = [ int( cpus[v % len(cpus)] ) ]
- xc.domain_pincpu(self.domid, v, cpu)
+ xc.vcpu_setaffinity(self.domid, v, cpu)
m = self.image.getDomainMemory(self.info['memory'] * 1024)
balloon.free(m)
diff -r 25e3c8668f1f -r 8af1199488d3 tools/python/xen/xend/image.py
--- a/tools/python/xen/xend/image.py Mon Jan 9 11:19:55 2006
+++ b/tools/python/xen/xend/image.py Mon Jan 9 11:22:17 2006
@@ -189,11 +189,16 @@
def configure(self, imageConfig, deviceConfig):
ImageHandler.configure(self, imageConfig, deviceConfig)
+ info = xc.xeninfo()
+ if not 'hvm' in info['xen_caps']:
+ raise VmError("vmx: not an Intel VT platform, we stop creating!")
+
self.dmargs = self.parseDeviceModelArgs(imageConfig, deviceConfig)
self.device_model = sxp.child_value(imageConfig, 'device_model')
if not self.device_model:
raise VmError("vmx: missing device model")
self.display = sxp.child_value(imageConfig, 'display')
+ self.xauthority = sxp.child_value(imageConfig, 'xauthority')
self.vm.storeVm(("image/dmargs", " ".join(self.dmargs)),
("image/device-model", self.device_model),
@@ -204,10 +209,8 @@
self.dmargs += self.configVNC(imageConfig)
- self.lapic = 0
- lapic = sxp.child_value(imageConfig, 'lapic')
- if not lapic is None:
- self.lapic = int(lapic)
+ self.acpi = int(sxp.child_value(imageConfig, 'acpi', 0))
+ self.apic = int(sxp.child_value(imageConfig, 'apic', 0))
def buildDomain(self):
# Create an event channel
@@ -222,17 +225,18 @@
log.debug("control_evtchn = %d", self.device_channel)
log.debug("store_evtchn = %d", store_evtchn)
log.debug("memsize = %d", self.vm.getMemoryTarget() / 1024)
- log.debug("lapic = %d", self.lapic)
log.debug("vcpus = %d", self.vm.getVCpuCount())
+ log.debug("acpi = %d", self.acpi)
+ log.debug("apic = %d", self.apic)
return xc.vmx_build(dom = self.vm.getDomid(),
image = self.kernel,
control_evtchn = self.device_channel,
store_evtchn = store_evtchn,
memsize = self.vm.getMemoryTarget() / 1024,
- lapic = self.lapic,
- vcpus = self.vm.getVCpuCount())
-
+ vcpus = self.vm.getVCpuCount(),
+ acpi = self.acpi,
+ apic = self.apic)
# Return a list of cmd line args to the device models based on the
# xm config file
@@ -264,44 +268,44 @@
nics = 0
for (name, info) in deviceConfig:
if name == 'vbd':
- uname = sxp.child_value(info, 'uname')
- typedev = sxp.child_value(info, 'dev')
- (_, vbdparam) = string.split(uname, ':', 1)
- if re.match('^ioemu:', typedev):
- (emtype, vbddev) = string.split(typedev, ':', 1)
- else:
- emtype = 'vbd'
- vbddev = typedev
- if emtype != 'ioemu':
- continue;
- vbddev_list = ['hda', 'hdb', 'hdc', 'hdd']
- if vbddev not in vbddev_list:
- raise VmError("vmx: for qemu vbd type=file&dev=hda~hdd")
- ret.append("-%s" % vbddev)
- ret.append("%s" % vbdparam)
+ uname = sxp.child_value(info, 'uname')
+ typedev = sxp.child_value(info, 'dev')
+ (_, vbdparam) = string.split(uname, ':', 1)
+ if 'ioemu:' in typedev:
+ (emtype, vbddev) = string.split(typedev, ':', 1)
+ else:
+ emtype = 'vbd'
+ vbddev = typedev
+ if emtype == 'vbd':
+ continue;
+ vbddev_list = ['hda', 'hdb', 'hdc', 'hdd']
+ if vbddev not in vbddev_list:
+ raise VmError("vmx: for qemu vbd type=file&dev=hda~hdd")
+ ret.append("-%s" % vbddev)
+ ret.append("%s" % vbdparam)
if name == 'vif':
- type = sxp.child_value(info, 'type')
- if type != 'ioemu':
- continue
- nics += 1
- if mac != None:
- continue
- mac = sxp.child_value(info, 'mac')
- bridge = sxp.child_value(info, 'bridge')
- if mac == None:
- mac = randomMAC()
- if bridge == None:
- bridge = 'xenbr0'
- ret.append("-macaddr")
- ret.append("%s" % mac)
- ret.append("-bridge")
- ret.append("%s" % bridge)
+ type = sxp.child_value(info, 'type')
+ if type != 'ioemu':
+ continue
+ nics += 1
+ if mac != None:
+ continue
+ mac = sxp.child_value(info, 'mac')
+ bridge = sxp.child_value(info, 'bridge')
+ if mac == None:
+ mac = randomMAC()
+ if bridge == None:
+ bridge = 'xenbr0'
+ ret.append("-macaddr")
+ ret.append("%s" % mac)
+ ret.append("-bridge")
+ ret.append("%s" % bridge)
if name == 'vtpm':
- instance = sxp.child_value(info, 'pref_instance')
- ret.append("-instance")
- ret.append("%s" % instance)
+ instance = sxp.child_value(info, 'pref_instance')
+ ret.append("-instance")
+ ret.append("%s" % instance)
ret.append("-nics")
- ret.append("%d" % nics)
+ ret.append("%d" % nics)
return ret
def configVNC(self, config):
@@ -340,6 +344,8 @@
env = dict(os.environ)
if self.display:
env['DISPLAY'] = self.display
+ if self.xauthority:
+ env['XAUTHORITY'] = self.xauthority
log.info("spawning device models: %s %s", self.device_model, args)
self.pid = os.spawnve(os.P_NOWAIT, self.device_model, args, env)
log.info("device model pid: %d", self.pid)
diff -r 25e3c8668f1f -r 8af1199488d3 tools/python/xen/xend/server/blkif.py
--- a/tools/python/xen/xend/server/blkif.py Mon Jan 9 11:19:55 2006
+++ b/tools/python/xen/xend/server/blkif.py Mon Jan 9 11:22:17 2006
@@ -31,7 +31,7 @@
"""Block device interface controller. Handles all block devices
for a domain.
"""
-
+
def __init__(self, vm):
"""Create a block device controller.
"""
@@ -40,9 +40,9 @@
def getDeviceDetails(self, config):
"""@see DevController.getDeviceDetails"""
-
+
dev = sxp.child_value(config, 'dev')
- if re.match('^ioemu:', dev):
+ if 'ioemu:' in dev:
return (None,{},{})
devid = blkif.blkdev_name_to_number(dev)
diff -r 25e3c8668f1f -r 8af1199488d3 tools/python/xen/xm/create.py
--- a/tools/python/xen/xm/create.py Mon Jan 9 11:19:55 2006
+++ b/tools/python/xen/xm/create.py Mon Jan 9 11:22:17 2006
@@ -160,9 +160,13 @@
fn=set_int, default=None,
use="CPUS to run the domain on.")
-gopts.var('lapic', val='LAPIC',
+gopts.var('acpi', val='ACPI',
fn=set_int, default=0,
- use="Disable or enable local APIC of VMX domain.")
+ use="Disable or enable ACPI of VMX domain.")
+
+gopts.var('apic', val='APIC',
+ fn=set_int, default=0,
+ use="Disable or enable APIC of VMX domain.")
gopts.var('vcpus', val='VCPUS',
fn=set_int, default=1,
@@ -387,6 +391,10 @@
gopts.var('display', val='DISPLAY',
fn=set_value, default=None,
use="X11 display to use")
+
+gopts.var('xauthority', val='XAUTHORITY',
+ fn=set_value, default=None,
+ use="X11 Authority to use")
def err(msg):
@@ -526,7 +534,8 @@
"""
args = [ 'device_model', 'vcpus', 'cdrom', 'boot', 'fda', 'fdb',
'localtime', 'serial', 'stdvga', 'isa', 'nographic', 'audio',
- 'vnc', 'vncviewer', 'sdl', 'display', 'ne2000', 'lapic']
+ 'vnc', 'vncviewer', 'sdl', 'display', 'ne2000', 'acpi', 'apic',
+ 'xauthority' ]
for a in args:
if (vals.__dict__[a]):
config_image.append([a, vals.__dict__[a]])
@@ -801,6 +810,9 @@
if not gopts.vals.display:
gopts.vals.display = os.getenv("DISPLAY")
+ if not gopts.vals.xauthority:
+ gopts.vals.xauthority = os.getenv("XAUTHORITY")
+
# Process remaining args as config variables.
for arg in args:
if '=' in arg:
diff -r 25e3c8668f1f -r 8af1199488d3 tools/python/xen/xm/main.py
--- a/tools/python/xen/xm/main.py Mon Jan 9 11:19:55 2006
+++ b/tools/python/xen/xm/main.py Mon Jan 9 11:22:17 2006
@@ -390,7 +390,6 @@
def xm_vcpu_list(args):
- print 'Name ID VCPU CPU State Time(s)
CPU Affinity'
from xen.xend.XendClient import server
if args:
@@ -400,6 +399,8 @@
dominfo = map(
lambda x: server.xend_domain_vcpuinfo(sxp.child_value(x, 'name')),
doms)
+
+ print 'Name ID VCPU CPU State Time(s)
CPU Affinity'
for dom in dominfo:
def get_info(n):
@@ -625,6 +626,8 @@
server.xend_domain_cpu_sedf_set(dom, *v)
def xm_info(args):
+ arg_check(args, "info", 0)
+
from xen.xend.XendClient import server
info = server.xend_node()
@@ -645,9 +648,12 @@
def xm_top(args):
+ arg_check(args, "top", 0)
+
os.execvp('xentop', ['xentop'])
def xm_dmesg(args):
+ arg_check(args, "dmesg", 0)
gopts = Opts(use="""[-c|--clear]
diff -r 25e3c8668f1f -r 8af1199488d3 tools/tests/test_x86_emulator.c
--- a/tools/tests/test_x86_emulator.c Mon Jan 9 11:19:55 2006
+++ b/tools/tests/test_x86_emulator.c Mon Jan 9 11:22:17 2006
@@ -92,7 +92,7 @@
regs.ecx = 0x12345678;
cr2 = (unsigned long)&res;
res = 0x7FFFFFFF;
- rc = x86_emulate_memop(®s, cr2, &emulops, 4);
+ rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32);
if ( (rc != 0) ||
(res != 0x92345677) ||
(regs.eflags != 0xa94) ||
@@ -110,7 +110,7 @@
regs.ecx = 0x12345678UL;
#endif
cr2 = (unsigned long)&res;
- rc = x86_emulate_memop(®s, cr2, &emulops, 4);
+ rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32);
if ( (rc != 0) ||
(res != 0x92345677) ||
(regs.ecx != 0x8000000FUL) ||
@@ -125,7 +125,7 @@
regs.eax = 0x92345677UL;
regs.ecx = 0xAA;
cr2 = (unsigned long)&res;
- rc = x86_emulate_memop(®s, cr2, &emulops, 4);
+ rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32);
if ( (rc != 0) ||
(res != 0x923456AA) ||
(regs.eflags != 0x244) ||
@@ -141,7 +141,7 @@
regs.eax = 0xAABBCC77UL;
regs.ecx = 0xFF;
cr2 = (unsigned long)&res;
- rc = x86_emulate_memop(®s, cr2, &emulops, 4);
+ rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32);
if ( (rc != 0) ||
(res != 0x923456AA) ||
((regs.eflags&0x240) != 0x200) ||
@@ -157,7 +157,7 @@
regs.eip = (unsigned long)&instr[0];
regs.ecx = 0x12345678;
cr2 = (unsigned long)&res;
- rc = x86_emulate_memop(®s, cr2, &emulops, 4);
+ rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32);
if ( (rc != 0) ||
(res != 0x12345678) ||
(regs.eflags != 0x200) ||
@@ -174,7 +174,7 @@
regs.eax = 0x923456AAUL;
regs.ecx = 0xDDEEFF00L;
cr2 = (unsigned long)&res;
- rc = x86_emulate_memop(®s, cr2, &emulops, 4);
+ rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32);
if ( (rc != 0) ||
(res != 0xDDEEFF00) ||
(regs.eflags != 0x244) ||
@@ -193,7 +193,7 @@
regs.edi = (unsigned long)&res + 2;
regs.error_code = 0; /* read fault */
cr2 = regs.esi;
- rc = x86_emulate_memop(®s, cr2, &emulops, 4);
+ rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32);
if ( (rc != 0) ||
(res != 0x44554455) ||
(regs.eflags != 0x200) ||
@@ -211,7 +211,7 @@
regs.eip = (unsigned long)&instr[0];
regs.edi = (unsigned long)&res;
cr2 = regs.edi;
- rc = x86_emulate_memop(®s, cr2, &emulops, 4);
+ rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32);
if ( (rc != 0) ||
(res != 0x2233445D) ||
((regs.eflags&0x201) != 0x201) ||
@@ -229,7 +229,7 @@
regs.eip = (unsigned long)&instr[0];
regs.edi = (unsigned long)cmpxchg8b_res;
cr2 = regs.edi;
- rc = x86_emulate_memop(®s, cr2, &emulops, 4);
+ rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32);
if ( (rc != 0) ||
(cmpxchg8b_res[0] != 0x9999AAAA) ||
(cmpxchg8b_res[1] != 0xCCCCFFFF) ||
@@ -243,7 +243,7 @@
regs.eip = (unsigned long)&instr[0];
regs.edi = (unsigned long)cmpxchg8b_res;
cr2 = regs.edi;
- rc = x86_emulate_memop(®s, cr2, &emulops, 4);
+ rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32);
if ( (rc != 0) ||
(cmpxchg8b_res[0] != 0x9999AAAA) ||
(cmpxchg8b_res[1] != 0xCCCCFFFF) ||
@@ -260,7 +260,7 @@
regs.ecx = 0x12345678;
cr2 = (unsigned long)&res;
res = 0x82;
- rc = x86_emulate_memop(®s, cr2, &emulops, 4);
+ rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32);
if ( (rc != 0) ||
(res != 0x82) ||
(regs.ecx != 0xFFFFFF82) ||
@@ -275,7 +275,7 @@
regs.ecx = 0x12345678;
cr2 = (unsigned long)&res;
res = 0x1234aa82;
- rc = x86_emulate_memop(®s, cr2, &emulops, 4);
+ rc = x86_emulate_memop(®s, cr2, &emulops, X86EMUL_MODE_PROT32);
if ( (rc != 0) ||
(res != 0x1234aa82) ||
(regs.ecx != 0xaa82) ||
diff -r 25e3c8668f1f -r 8af1199488d3 tools/vtpm_manager/README
--- a/tools/vtpm_manager/README Mon Jan 9 11:19:55 2006
+++ b/tools/vtpm_manager/README Mon Jan 9 11:22:17 2006
@@ -53,11 +53,6 @@
MANUAL_DM_LAUNCH -> Must manually launch & kill VTPMs
-WELL_KNOWN_SRK_AUTH -> Rather than randomly generating the password
for the SRK,
- use a well known value. This is necessary for
sharing use
- of the SRK across applications. Such as VTPM
and Dom0
- measurement software.
-
WELL_KNOWN_OWNER_AUTH -> Rather than randomly generating the password
for the owner,
use a well known value. This is useful for
debugging and for
poor bios which do not support clearing TPM if
OwnerAuth is
diff -r 25e3c8668f1f -r 8af1199488d3 tools/vtpm_manager/Rules.mk
--- a/tools/vtpm_manager/Rules.mk Mon Jan 9 11:19:55 2006
+++ b/tools/vtpm_manager/Rules.mk Mon Jan 9 11:22:17 2006
@@ -56,8 +56,7 @@
# Do not have manager launch DMs.
#CFLAGS += -DMANUAL_DM_LAUNCH
-# Fixed SRK
-CFLAGS += -DWELL_KNOWN_SRK_AUTH
+# Fixed OwnerAuth
#CFLAGS += -DWELL_KNOWN_OWNER_AUTH
# TPM Hardware Device or TPM Simulator
diff -r 25e3c8668f1f -r 8af1199488d3 tools/vtpm_manager/manager/securestorage.c
--- a/tools/vtpm_manager/manager/securestorage.c Mon Jan 9 11:19:55 2006
+++ b/tools/vtpm_manager/manager/securestorage.c Mon Jan 9 11:22:17 2006
@@ -65,7 +65,7 @@
UINT32 i;
struct pack_constbuf_t symkey_cipher32, data_cipher32;
- vtpmloginfo(VTPM_LOG_VTPM_DEEP, "Enveloping[%d]: 0x", buffer_len(inbuf));
+ vtpmloginfo(VTPM_LOG_VTPM_DEEP, "Enveloping Input[%d]: 0x",
buffer_len(inbuf));
for (i=0; i< buffer_len(inbuf); i++)
vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", inbuf->bytes[i]);
vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "\n");
@@ -94,6 +94,12 @@
BSG_TPM_SIZE32_DATA, &data_cipher32);
vtpmloginfo(VTPM_LOG_VTPM, "Saved %d bytes of E(symkey) + %d bytes of
E(data)\n", buffer_len(&symkey_cipher), buffer_len(&data_cipher));
+
+ vtpmloginfo(VTPM_LOG_VTPM_DEEP, "Enveloping Output[%d]: 0x",
buffer_len(sealed_data));
+ for (i=0; i< buffer_len(sealed_data); i++)
+ vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", sealed_data->bytes[i]);
+ vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "\n");
+
goto egress;
abort_egress:
@@ -125,7 +131,7 @@
memset(&symkey, 0, sizeof(symkey_t));
- vtpmloginfo(VTPM_LOG_VTPM_DEEP, "envelope decrypting[%ld]: 0x", cipher_size);
+ vtpmloginfo(VTPM_LOG_VTPM_DEEP, "Envelope Decrypt Input[%ld]: 0x",
cipher_size);
for (i=0; i< cipher_size; i++)
vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", cipher[i]);
vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "\n");
@@ -155,6 +161,11 @@
// Decrypt State
TPMTRY(TPM_DECRYPT_ERROR, Crypto_symcrypto_decrypt (&symkey, &data_cipher,
unsealed_data) );
+
+ vtpmloginfo(VTPM_LOG_VTPM_DEEP, "Envelope Decrypte Output[%d]: 0x",
buffer_len(unsealed_data));
+ for (i=0; i< buffer_len(unsealed_data); i++)
+ vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", unsealed_data->bytes[i]);
+ vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "\n");
goto egress;
@@ -291,124 +302,175 @@
return status;
}
+
TPM_RESULT VTPM_SaveService(void) {
TPM_RESULT status=TPM_SUCCESS;
int fh, dmis=-1;
-
- BYTE *flat_global;
- int flat_global_size, bytes_written;
+
+ BYTE *flat_boot_key, *flat_dmis, *flat_enc;
+ buffer_t clear_flat_global, enc_flat_global;
UINT32 storageKeySize = buffer_len(&vtpm_globals->storageKeyWrap);
+ UINT32 bootKeySize = buffer_len(&vtpm_globals->bootKeyWrap);
struct pack_buf_t storage_key_pack = {storageKeySize,
vtpm_globals->storageKeyWrap.bytes};
-
+ struct pack_buf_t boot_key_pack = {bootKeySize,
vtpm_globals->bootKeyWrap.bytes};
+
struct hashtable_itr *dmi_itr;
VTPM_DMI_RESOURCE *dmi_res;
-
- UINT32 flat_global_full_size;
-
- // Global Values needing to be saved
- flat_global_full_size = 3*sizeof(TPM_DIGEST) + // Auths
- sizeof(UINT32) + // storagekeysize
- storageKeySize + // storage key
- hashtable_count(vtpm_globals->dmi_map) * // num DMIS
- (sizeof(UINT32) + 2*sizeof(TPM_DIGEST)); // Per DMI info
-
-
- flat_global = (BYTE *) malloc( flat_global_full_size);
-
- flat_global_size = BSG_PackList(flat_global, 4,
- BSG_TPM_AUTHDATA,
&vtpm_globals->owner_usage_auth,
- BSG_TPM_AUTHDATA,
&vtpm_globals->srk_usage_auth,
- BSG_TPM_SECRET,
&vtpm_globals->storage_key_usage_auth,
- BSG_TPM_SIZE32_DATA, &storage_key_pack);
-
+
+ UINT32 boot_key_size, flat_dmis_size;
+
+ // Initially fill these with buffer sizes for each data type. Later fill
+ // in actual size, once flattened.
+ boot_key_size = sizeof(UINT32) + // bootkeysize
+ bootKeySize; // boot key
+
+ TPMTRYRETURN(buffer_init(&clear_flat_global, 3*sizeof(TPM_DIGEST) + // Auths
+ sizeof(UINT32) +// storagekeysize
+ storageKeySize, NULL) ); //
storage key
+
+ flat_dmis_size = (hashtable_count(vtpm_globals->dmi_map) - 1) * // num DMIS
(-1 for Dom0)
+ (sizeof(UINT32) + 2*sizeof(TPM_DIGEST)); // Per DMI info
+
+ flat_boot_key = (BYTE *) malloc( boot_key_size );
+ flat_enc = (BYTE *) malloc( sizeof(UINT32) );
+ flat_dmis = (BYTE *) malloc( flat_dmis_size );
+
+ boot_key_size = BSG_PackList(flat_boot_key, 1,
+ BSG_TPM_SIZE32_DATA, &boot_key_pack);
+
+ BSG_PackList(clear_flat_global.bytes, 3,
+ BSG_TPM_AUTHDATA, &vtpm_globals->owner_usage_auth,
+ BSG_TPM_SECRET, &vtpm_globals->storage_key_usage_auth,
+ BSG_TPM_SIZE32_DATA, &storage_key_pack);
+
+ TPMTRYRETURN(envelope_encrypt(&clear_flat_global,
+ &vtpm_globals->bootKey,
+ &enc_flat_global) );
+
+ BSG_PackConst(buffer_len(&enc_flat_global), 4, flat_enc);
+
// Per DMI values to be saved
if (hashtable_count(vtpm_globals->dmi_map) > 0) {
-
+
dmi_itr = hashtable_iterator(vtpm_globals->dmi_map);
do {
dmi_res = (VTPM_DMI_RESOURCE *) hashtable_iterator_value(dmi_itr);
dmis++;
// No need to save dmi0.
- if (dmi_res->dmi_id == 0)
- continue;
-
-
- flat_global_size += BSG_PackList( flat_global + flat_global_size, 3,
- BSG_TYPE_UINT32, &dmi_res->dmi_id,
- BSG_TPM_DIGEST,
&dmi_res->NVM_measurement,
- BSG_TPM_DIGEST,
&dmi_res->DMI_measurement);
-
+ if (dmi_res->dmi_id == 0)
+ continue;
+
+
+ flat_dmis_size += BSG_PackList( flat_dmis + flat_dmis_size, 3,
+ BSG_TYPE_UINT32, &dmi_res->dmi_id,
+ BSG_TPM_DIGEST,
&dmi_res->NVM_measurement,
+ BSG_TPM_DIGEST,
&dmi_res->DMI_measurement);
+
} while (hashtable_iterator_advance(dmi_itr));
}
-
- //FIXME: Once we have a way to protect a TPM key, we should use it to
- // encrypt this blob. BUT, unless there is a way to ensure the key is
- // not used by other apps, this encryption is useless.
+
fh = open(STATE_FILE, O_WRONLY | O_CREAT, S_IREAD | S_IWRITE);
if (fh == -1) {
vtpmlogerror(VTPM_LOG_VTPM, "Unable to open %s file for write.\n",
STATE_FILE);
status = TPM_IOERROR;
goto abort_egress;
}
-
- if ( (bytes_written = write(fh, flat_global, flat_global_size)) !=
flat_global_size ) {
- vtpmlogerror(VTPM_LOG_VTPM, "Failed to save service data. %d/%d bytes
written.\n", bytes_written, flat_global_size);
- status = TPM_IOERROR;
- goto abort_egress;
- }
- vtpm_globals->DMI_table_dirty = FALSE;
-
+
+ if ( ( write(fh, flat_boot_key, boot_key_size) != boot_key_size ) ||
+ ( write(fh, flat_enc, sizeof(UINT32)) != sizeof(UINT32) ) ||
+ ( write(fh, enc_flat_global.bytes, buffer_len(&enc_flat_global)) !=
buffer_len(&enc_flat_global) ) ||
+ ( write(fh, flat_dmis, flat_dmis_size) != flat_dmis_size ) ) {
+ vtpmlogerror(VTPM_LOG_VTPM, "Failed to completely write service data.\n");
+ status = TPM_IOERROR;
+ goto abort_egress;
+ }
+
+ vtpm_globals->DMI_table_dirty = FALSE;
+
goto egress;
-
+
abort_egress:
egress:
-
- free(flat_global);
+
+ free(flat_boot_key);
+ free(flat_enc);
+ buffer_free(&enc_flat_global);
+ free(flat_dmis);
close(fh);
-
+
vtpmloginfo(VTPM_LOG_VTPM, "Saved VTPM Service state (status = %d, dmis =
%d)\n", (int) status, dmis);
return status;
}
TPM_RESULT VTPM_LoadService(void) {
-
+
TPM_RESULT status=TPM_SUCCESS;
int fh, stat_ret, dmis=0;
long fh_size = 0, step_size;
- BYTE *flat_global=NULL;
- struct pack_buf_t storage_key_pack;
- UINT32 *dmi_id_key;
-
+ BYTE *flat_table=NULL;
+ buffer_t unsealed_data;
+ struct pack_buf_t storage_key_pack, boot_key_pack;
+ UINT32 *dmi_id_key, enc_size;
+
VTPM_DMI_RESOURCE *dmi_res;
struct stat file_stat;
-
+
+ TPM_HANDLE boot_key_handle;
+ TPM_AUTHDATA boot_usage_auth;
+ memset(&boot_usage_auth, 0, sizeof(TPM_AUTHDATA));
+
fh = open(STATE_FILE, O_RDONLY );
stat_ret = fstat(fh, &file_stat);
- if (stat_ret == 0)
+ if (stat_ret == 0)
fh_size = file_stat.st_size;
else {
status = TPM_IOERROR;
goto abort_egress;
}
-
- flat_global = (BYTE *) malloc(fh_size);
-
- if ((long) read(fh, flat_global, fh_size) != fh_size ) {
- status = TPM_IOERROR;
- goto abort_egress;
- }
-
+
+ flat_table = (BYTE *) malloc(fh_size);
+
+ if ((long) read(fh, flat_table, fh_size) != fh_size ) {
+ status = TPM_IOERROR;
+ goto abort_egress;
+ }
+
+ // Read Boot Key
+ step_size = BSG_UnpackList( flat_table, 2,
+ BSG_TPM_SIZE32_DATA, &boot_key_pack,
+ BSG_TYPE_UINT32, &enc_size);
+
+ TPMTRYRETURN(buffer_init(&vtpm_globals->bootKeyWrap, 0, 0) );
+ TPMTRYRETURN(buffer_append_raw(&vtpm_globals->bootKeyWrap,
boot_key_pack.size, boot_key_pack.data) );
+
+ //Load Boot Key
+ TPMTRYRETURN( VTSP_LoadKey( vtpm_globals->manager_tcs_handle,
+ TPM_SRK_KEYHANDLE,
+ &vtpm_globals->bootKeyWrap,
+ &SRK_AUTH,
+ &boot_key_handle,
+ &vtpm_globals->keyAuth,
+ &vtpm_globals->bootKey,
+ FALSE) );
+
+ TPMTRYRETURN( envelope_decrypt(enc_size,
+ flat_table + step_size,
+ vtpm_globals->manager_tcs_handle,
+ boot_key_handle,
+ (const TPM_AUTHDATA*) &boot_usage_auth,
+ &unsealed_data) );
+ step_size += enc_size;
+
// Global Values needing to be saved
- step_size = BSG_UnpackList( flat_global, 4,
- BSG_TPM_AUTHDATA, &vtpm_globals->owner_usage_auth,
- BSG_TPM_AUTHDATA, &vtpm_globals->srk_usage_auth,
- BSG_TPM_SECRET,
&vtpm_globals->storage_key_usage_auth,
- BSG_TPM_SIZE32_DATA, &storage_key_pack);
-
+ BSG_UnpackList( unsealed_data.bytes, 3,
+ BSG_TPM_AUTHDATA, &vtpm_globals->owner_usage_auth,
+ BSG_TPM_SECRET, &vtpm_globals->storage_key_usage_auth,
+ BSG_TPM_SIZE32_DATA, &storage_key_pack);
+
TPMTRYRETURN(buffer_init(&vtpm_globals->storageKeyWrap, 0, 0) );
TPMTRYRETURN(buffer_append_raw(&vtpm_globals->storageKeyWrap,
storage_key_pack.size, storage_key_pack.data) );
-
+
// Per DMI values to be saved
while ( step_size < fh_size ){
if (fh_size - step_size < (long) (sizeof(UINT32) + 2*sizeof(TPM_DIGEST))) {
@@ -417,35 +479,38 @@
} else {
dmi_res = (VTPM_DMI_RESOURCE *) malloc(sizeof(VTPM_DMI_RESOURCE));
dmis++;
-
+
dmi_res->connected = FALSE;
-
- step_size += BSG_UnpackList(flat_global + step_size, 3,
- BSG_TYPE_UINT32, &dmi_res->dmi_id,
- BSG_TPM_DIGEST, &dmi_res->NVM_measurement,
- BSG_TPM_DIGEST, &dmi_res->DMI_measurement);
-
+
+ step_size += BSG_UnpackList(flat_table + step_size, 3,
+ BSG_TYPE_UINT32, &dmi_res->dmi_id,
+ BSG_TPM_DIGEST, &dmi_res->NVM_measurement,
+ BSG_TPM_DIGEST, &dmi_res->DMI_measurement);
+
// install into map
dmi_id_key = (UINT32 *) malloc (sizeof(UINT32));
*dmi_id_key = dmi_res->dmi_id;
if (!hashtable_insert(vtpm_globals->dmi_map, dmi_id_key, dmi_res)) {
- status = TPM_FAIL;
- goto abort_egress;
+ status = TPM_FAIL;
+ goto abort_egress;
}
-
+
}
-
- }
-
+
+ }
+
vtpmloginfo(VTPM_LOG_VTPM, "Loaded saved state (dmis = %d).\n", dmis);
goto egress;
-
+
abort_egress:
vtpmlogerror(VTPM_LOG_VTPM, "Failed to load service data with error = %s\n",
tpm_get_error_name(status));
egress:
-
- free(flat_global);
+
+ free(flat_table);
close(fh);
-
+
+ // TODO: Could be nice and evict BootKey. (Need to add EvictKey to VTSP.
+
return status;
}
+
diff -r 25e3c8668f1f -r 8af1199488d3 tools/vtpm_manager/manager/vtpm_manager.c
--- a/tools/vtpm_manager/manager/vtpm_manager.c Mon Jan 9 11:19:55 2006
+++ b/tools/vtpm_manager/manager/vtpm_manager.c Mon Jan 9 11:22:17 2006
@@ -74,16 +74,15 @@
#endif
// --------------------------- Well Known Auths --------------------------
-#ifdef WELL_KNOWN_SRK_AUTH
-static BYTE FIXED_SRK_AUTH[20] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff,
+const TPM_AUTHDATA SRK_AUTH = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff};
-#endif
#ifdef WELL_KNOWN_OWNER_AUTH
static BYTE FIXED_OWNER_AUTH[20] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff};
#endif
-
+
+
// -------------------------- Hash table functions --------------------
static unsigned int hashfunc32(void *ky) {
@@ -100,13 +99,7 @@
TPM_RESULT status = TPM_SUCCESS;
- // Generate Auth's for SRK & Owner
-#ifdef WELL_KNOWN_SRK_AUTH
- memcpy(vtpm_globals->srk_usage_auth, FIXED_SRK_AUTH, sizeof(TPM_AUTHDATA));
-#else
- Crypto_GetRandom(vtpm_globals->srk_usage_auth, sizeof(TPM_AUTHDATA) );
-#endif
-
+ // Generate Auth for Owner
#ifdef WELL_KNOWN_OWNER_AUTH
memcpy(vtpm_globals->owner_usage_auth, FIXED_OWNER_AUTH,
sizeof(TPM_AUTHDATA));
#else
@@ -116,14 +109,14 @@
// Take Owership of TPM
CRYPTO_INFO ek_cryptoInfo;
- vtpmloginfo(VTPM_LOG_VTPM, "Attempting Pubek Read. NOTE: Failure is ok.\n");
status = VTSP_ReadPubek(vtpm_globals->manager_tcs_handle, &ek_cryptoInfo);
// If we can read PubEK then there is no owner and we should take it.
if (status == TPM_SUCCESS) {
+ vtpmloginfo(VTPM_LOG_VTPM, "Failed to readEK meaning TPM has an owner.
Creating Keys off existing SRK.\n");
TPMTRYRETURN(VTSP_TakeOwnership(vtpm_globals->manager_tcs_handle,
(const
TPM_AUTHDATA*)&vtpm_globals->owner_usage_auth,
- (const
TPM_AUTHDATA*)&vtpm_globals->srk_usage_auth,
+ &SRK_AUTH,
&ek_cryptoInfo,
&vtpm_globals->keyAuth));
@@ -142,7 +135,7 @@
TPMTRYRETURN( VTSP_OSAP(vtpm_globals->manager_tcs_handle,
TPM_ET_KEYHANDLE,
TPM_SRK_KEYHANDLE,
- (const TPM_AUTHDATA*)&vtpm_globals->srk_usage_auth,
+ &SRK_AUTH,
&sharedsecret,
&osap) );
@@ -157,8 +150,43 @@
&vtpm_globals->storageKeyWrap,
&osap) );
- vtpm_globals->keyAuth.fContinueAuthSession = TRUE;
-
+ // Generate boot key's auth
+ Crypto_GetRandom( &vtpm_globals->storage_key_usage_auth,
+ sizeof(TPM_AUTHDATA) );
+
+ TPM_AUTHDATA bootKeyWrapAuth;
+ memset(&bootKeyWrapAuth, 0, sizeof(bootKeyWrapAuth));
+
+ TPMTRYRETURN( VTSP_OSAP(vtpm_globals->manager_tcs_handle,
+ TPM_ET_KEYHANDLE,
+ TPM_SRK_KEYHANDLE,
+ &SRK_AUTH,
+ &sharedsecret,
+ &osap) );
+
+ osap.fContinueAuthSession = FALSE;
+
+ // FIXME: This key protects the global secrets on disk. It should use TPM
+ // PCR bindings to limit its use to legit configurations.
+ // Current binds are open, implying a Trusted VM contains this code.
+ // If this VM is not Trusted, use measurement and PCR bindings.
+ TPMTRYRETURN( VTSP_CreateWrapKey( vtpm_globals->manager_tcs_handle,
+ TPM_KEY_BIND,
+ (const TPM_AUTHDATA*)&bootKeyWrapAuth,
+ TPM_SRK_KEYHANDLE,
+ (const TPM_AUTHDATA*)&sharedsecret,
+ &vtpm_globals->bootKeyWrap,
+ &osap) );
+
+ // Populate CRYPTO_INFO vtpm_globals->bootKey. This does not load it into
the TPM
+ TPMTRYRETURN( VTSP_LoadKey( vtpm_globals->manager_tcs_handle,
+ TPM_SRK_KEYHANDLE,
+ &vtpm_globals->bootKeyWrap,
+ NULL,
+ NULL,
+ NULL,
+ &vtpm_globals->bootKey,
+ TRUE ) );
goto egress;
abort_egress:
@@ -278,24 +306,26 @@
#endif
// Check status of rx_fh. If necessary attempt to re-open it.
+ char* s = NULL;
if (*rx_fh < 0) {
#ifdef VTPM_MULTI_VM
- *rx_fh = open(VTPM_BE_DEV, O_RDWR);
+ s = VTPM_BE_DEV;
#else
if (threadType == BE_LISTENER_THREAD)
#ifdef DUMMY_BACKEND
- *rx_fh = open("/tmp/in.fifo", O_RDWR);
+ s = "/tmp/in.fifo";
#else
- *rx_fh = open(VTPM_BE_DEV, O_RDWR);
+ s = VTPM_BE_DEV;
#endif
else // DMI Listener
- *rx_fh = open(VTPM_RX_FIFO, O_RDWR);
+ s = VTPM_RX_FIFO;
+ *rx_fh = open(s, O_RDWR);
#endif
}
// Respond to failures to open rx_fh
if (*rx_fh < 0) {
- vtpmhandlerlogerror(VTPM_LOG_VTPM, "Can't open inbound fh.\n");
+ vtpmhandlerlogerror(VTPM_LOG_VTPM, "Can't open inbound fh for %s.\n", s);
#ifdef VTPM_MULTI_VM
return TPM_IOERROR;
#else
@@ -713,7 +743,7 @@
///////////////////////////////////////////////////////////////////////////////
TPM_RESULT VTPM_Init_Service() {
- TPM_RESULT status = TPM_FAIL;
+ TPM_RESULT status = TPM_FAIL, serviceStatus;
BYTE *randomsead;
UINT32 randomsize;
@@ -737,7 +767,7 @@
// Create new TCS Object
vtpm_globals->manager_tcs_handle = 0;
-
+
TPMTRYRETURN(TCS_create());
// Create TCS Context for service
@@ -756,17 +786,24 @@
vtpm_globals->keyAuth.fContinueAuthSession = TRUE;
// If failed, create new Service.
- if (VTPM_LoadService() != TPM_SUCCESS)
+ serviceStatus = VTPM_LoadService();
+ if (serviceStatus == TPM_IOERROR) {
+ vtpmloginfo(VTPM_LOG_VTPM, "Failed to read service file. Assuming first
time initialization.\n");
TPMTRYRETURN( VTPM_Create_Service() );
+ } else if (serviceStatus != TPM_SUCCESS) {
+ vtpmlogerror(VTPM_LOG_VTPM, "Failed to read existing service file");
+ exit(1);
+ }
//Load Storage Key
TPMTRYRETURN( VTSP_LoadKey( vtpm_globals->manager_tcs_handle,
TPM_SRK_KEYHANDLE,
&vtpm_globals->storageKeyWrap,
- (const
TPM_AUTHDATA*)&vtpm_globals->srk_usage_auth,
+ &SRK_AUTH,
&vtpm_globals->storageKeyHandle,
&vtpm_globals->keyAuth,
- &vtpm_globals->storageKey) );
+ &vtpm_globals->storageKey,
+ FALSE ) );
// Create entry for Dom0 for control messages
TPMTRYRETURN( VTPM_Handle_New_DMI(NULL) );
@@ -797,12 +834,11 @@
free (dmi_itr);
}
-
+ if ( (vtpm_globals->DMI_table_dirty) && (VTPM_SaveService() != TPM_SUCCESS) )
+ vtpmlogerror(VTPM_LOG_VTPM, "Unable to save manager data.\n");
+
TCS_CloseContext(vtpm_globals->manager_tcs_handle);
-
- if ( (vtpm_globals->DMI_table_dirty) &&
- (VTPM_SaveService() != TPM_SUCCESS) )
- vtpmlogerror(VTPM_LOG_VTPM, "Unable to save manager data.\n");
+ TCS_destroy();
hashtable_destroy(vtpm_globals->dmi_map, 1);
free(vtpm_globals);
diff -r 25e3c8668f1f -r 8af1199488d3 tools/vtpm_manager/manager/vtpmpriv.h
--- a/tools/vtpm_manager/manager/vtpmpriv.h Mon Jan 9 11:19:55 2006
+++ b/tools/vtpm_manager/manager/vtpmpriv.h Mon Jan 9 11:22:17 2006
@@ -108,6 +108,7 @@
TCS_CONTEXT_HANDLE manager_tcs_handle; // TCS Handle used by manager
TPM_HANDLE storageKeyHandle; // Key used by persistent store
CRYPTO_INFO storageKey; // For software encryption
+ CRYPTO_INFO bootKey; // For saving table
TCS_AUTH keyAuth; // OIAP session for storageKey
BOOL DMI_table_dirty; // Indicates that a command
// has updated the DMI table
@@ -115,15 +116,17 @@
// Persistent Data
TPM_AUTHDATA owner_usage_auth; // OwnerAuth of real TPM
- TPM_AUTHDATA srk_usage_auth; // SRK Auth of real TPM
buffer_t storageKeyWrap; // Wrapped copy of storageKey
+ TPM_AUTHDATA srk_usage_auth;
+ TPM_AUTHDATA storage_key_usage_auth;
- TPM_AUTHDATA storage_key_usage_auth;
-
+ buffer_t bootKeyWrap; // Wrapped copy of boot key
+
}VTPM_GLOBALS;
-//Global dmi map
-extern VTPM_GLOBALS *vtpm_globals;
+// --------------------------- Global Values --------------------------
+extern VTPM_GLOBALS *vtpm_globals; // Key info and DMI states
+extern const TPM_AUTHDATA SRK_AUTH; // SRK Well Known Auth Value
// ********************** Command Handler Prototypes ***********************
TPM_RESULT VTPM_Handle_Load_NVM( VTPM_DMI_RESOURCE *myDMI,
diff -r 25e3c8668f1f -r 8af1199488d3 tools/vtpm_manager/manager/vtsp.c
--- a/tools/vtpm_manager/manager/vtsp.c Mon Jan 9 11:19:55 2006
+++ b/tools/vtpm_manager/manager/vtsp.c Mon Jan 9 11:22:17 2006
@@ -144,7 +144,10 @@
if (memcmp (&hm, &(auth->HMAC), sizeof(TPM_DIGEST)) == 0) // 0 indicates
equality
return (TPM_SUCCESS);
else {
- VTSP_OIAP( hContext, auth);
+ // If specified, reconnect the OIAP session.
+ // NOTE: This only works for TCS's that never have a 0 context.
+ if (hContext)
+ VTSP_OIAP( hContext, auth);
return (TPM_AUTHFAIL);
}
}
@@ -157,6 +160,10 @@
TPMTRYRETURN( TCSP_OIAP(hContext,
&auth->AuthHandle,
&auth->NonceEven) );
+
+ memset(&auth->HMAC, 0, sizeof(TPM_DIGEST));
+ auth->fContinueAuthSession = FALSE;
+
goto egress;
abort_egress:
@@ -195,6 +202,9 @@
BSG_TPM_NONCE, &nonceOddOSAP);
Crypto_HMAC(sharedSecretText, sizeof(sharedSecretText), (BYTE *) usageAuth,
TPM_DIGEST_SIZE, (BYTE *) sharedSecret);
+
+ memset(&auth->HMAC, 0, sizeof(TPM_DIGEST));
+ auth->fContinueAuthSession = FALSE;
goto egress;
@@ -287,9 +297,6 @@
srkKeyInfo.parms = (BYTE *) &srkRSAkeyInfo;
struct pack_buf_t srkText;
-
- // GenerateAuth new nonceOdd
- Crypto_GetRandom(&auth->NonceOdd, sizeof(TPM_NONCE) );
//These values are accurate for an enc(AuthData).
struct pack_buf_t encOwnerAuth, encSrkAuth;
@@ -383,9 +390,6 @@
BYTE *paramText; // Digest to make Auth.
UINT32 paramTextSize;
- // Generate HMAC
- Crypto_GetRandom(&auth->NonceOdd, sizeof(TPM_NONCE) );
-
paramText = (BYTE *) malloc(sizeof(BYTE) * TCPA_MAX_BUFFER_LENGTH);
paramTextSize = BSG_PackList(paramText, 1,
@@ -504,9 +508,6 @@
newKeyText.data = flatKey;
newKeyText.size = flatKeySize;
- // GenerateAuth new nonceOdd
- Crypto_GetRandom(&auth->NonceOdd, sizeof(TPM_NONCE) );
-
// Generate HMAC
paramText = (BYTE *) malloc(sizeof(BYTE) * TCPA_MAX_BUFFER_LENGTH);
@@ -563,63 +564,66 @@
const TPM_AUTHDATA *parentAuth,
TPM_HANDLE *newKeyHandle,
TCS_AUTH *auth,
- CRYPTO_INFO *cryptoinfo /*= NULL*/) {
-
-
- vtpmloginfo(VTPM_LOG_VTSP, "Loading Key.\n%s","");
+ CRYPTO_INFO *cryptoinfo,
+ const BOOL skipTPMLoad) {
+
+
+ vtpmloginfo(VTPM_LOG_VTSP, "Loading Key %s.\n", (!skipTPMLoad ? "into TPM" :
"only into memory"));
TPM_RESULT status = TPM_SUCCESS;
TPM_COMMAND_CODE command = TPM_ORD_LoadKey;
-
- BYTE *paramText; // Digest to make Auth.
+
+ BYTE *paramText=NULL; // Digest to make Auth.
UINT32 paramTextSize;
-
- if ((rgbWrappedKeyBlob == NULL) || (parentAuth == NULL) ||
- (newKeyHandle==NULL) || (auth==NULL)) {
- status = TPM_BAD_PARAMETER;
- goto abort_egress;
- }
-
- // Generate Extra TCS Parameters
- TPM_HANDLE phKeyHMAC;
-
- // Generate HMAC
- Crypto_GetRandom(&auth->NonceOdd, sizeof(TPM_NONCE) );
-
- paramText = (BYTE *) malloc(sizeof(BYTE) * TCPA_MAX_BUFFER_LENGTH);
-
- paramTextSize = BSG_PackList(paramText, 1,
- BSG_TPM_COMMAND_CODE, &command);
-
- memcpy(paramText + paramTextSize, rgbWrappedKeyBlob->bytes,
buffer_len(rgbWrappedKeyBlob));
- paramTextSize += buffer_len(rgbWrappedKeyBlob);
-
- TPMTRYRETURN( GenerateAuth( paramText, paramTextSize,
+
+ // SkipTPMLoad stops key from being loaded into TPM, but still generates
CRYPTO_INFO for it
+ if (! skipTPMLoad) {
+
+ if ((rgbWrappedKeyBlob == NULL) || (parentAuth == NULL) ||
+ (newKeyHandle==NULL) || (auth==NULL)) {
+ status = TPM_BAD_PARAMETER;
+ goto abort_egress;
+ }
+
+ // Generate Extra TCS Parameters
+ TPM_HANDLE phKeyHMAC;
+
+ paramText = (BYTE *) malloc(sizeof(BYTE) * TCPA_MAX_BUFFER_LENGTH);
+
+ paramTextSize = BSG_PackList(paramText, 1,
+ BSG_TPM_COMMAND_CODE, &command);
+
+ memcpy(paramText + paramTextSize, rgbWrappedKeyBlob->bytes,
buffer_len(rgbWrappedKeyBlob));
+ paramTextSize += buffer_len(rgbWrappedKeyBlob);
+
+ TPMTRYRETURN( GenerateAuth( paramText, paramTextSize,
parentAuth, auth) );
- // Call TCS
- TPMTRYRETURN( TCSP_LoadKeyByBlob( hContext,
- hUnwrappingKey,
- buffer_len(rgbWrappedKeyBlob),
- rgbWrappedKeyBlob->bytes,
- auth,
- newKeyHandle,
- &phKeyHMAC) );
-
- // Verify Auth
- paramTextSize = BSG_PackList(paramText, 3,
- BSG_TPM_RESULT, &status,
- BSG_TPM_COMMAND_CODE, &command,
- BSG_TPM_HANDLE, newKeyHandle);
-
- TPMTRYRETURN( VerifyAuth( paramText, paramTextSize,
- parentAuth, auth,
- hContext) );
-
- // Unpack/return key structure
+ // Call TCS
+ TPMTRYRETURN( TCSP_LoadKeyByBlob( hContext,
+ hUnwrappingKey,
+ buffer_len(rgbWrappedKeyBlob),
+ rgbWrappedKeyBlob->bytes,
+ auth,
+ newKeyHandle,
+ &phKeyHMAC) );
+
+ // Verify Auth
+ paramTextSize = BSG_PackList(paramText, 3,
+ BSG_TPM_RESULT, &status,
+ BSG_TPM_COMMAND_CODE, &command,
+ BSG_TPM_HANDLE, newKeyHandle);
+
+ TPMTRYRETURN( VerifyAuth( paramText, paramTextSize,
+ parentAuth, auth,
+ hContext) );
+ }
+
+ // Build cryptoinfo structure for software crypto function.
if (cryptoinfo != NULL) {
TPM_KEY newKey;
+ // Unpack/return key structure
BSG_Unpack(BSG_TPM_KEY, rgbWrappedKeyBlob->bytes , &newKey);
TPM_RSA_KEY_PARMS rsaKeyParms;
@@ -669,9 +673,6 @@
struct pack_buf_t clear_data32;
BYTE *clear_data_text;
UINT32 clear_data_size;
-
- // Generate HMAC
- Crypto_GetRandom(&auth->NonceOdd, sizeof(TPM_NONCE) );
struct pack_buf_t bound_data32 = {bound_data->size, bound_data->bytes};
@@ -781,6 +782,196 @@
return TPM_SUCCESS;
}
+TPM_RESULT VTSP_Seal(const TCS_CONTEXT_HANDLE hContext,
+ const TPM_KEY_HANDLE keyHandle,
+ const TPM_AUTHDATA *sealDataAuth,
+ const TPM_PCR_COMPOSITE *pcrComp,
+ const buffer_t *inData,
+ TPM_STORED_DATA *sealedData,
+ const TPM_SECRET *osapSharedSecret,
+ TCS_AUTH *auth) {
+
+ TPM_RESULT status = TPM_SUCCESS;
+ TPM_COMMAND_CODE command = TPM_ORD_Seal;
+
+ BYTE *paramText; // Digest to make Auth.
+ UINT32 paramTextSize;
+
+ // Generate PCR_Info Struct from Comp
+ TPM_PCR_INFO pcrInfo;
+ UINT32 pcrInfoSize, flatpcrSize;
+ BYTE flatpcr[3 + // PCR_Select = 3 1 byte banks
+ sizeof(UINT16) + // 2 byte UINT16
+ sizeof(UINT32) + // PCR_Comp = 4 byte UINT32
+ 24 * sizeof(TPM_PCRVALUE) ]; // up to 24 PCRs
+
+ if (pcrComp != NULL) {
+ //printf("\n\tBinding to PCRs: ");
+ //for(int i = 0 ; i < pcrComp->select.sizeOfSelect ; i++)
+ //printf("%2.2x", pcrComp->select.pcrSelect[i]);
+
+ memcpy(&pcrInfo.pcrSelection, &pcrComp->select,
sizeof(TPM_PCR_SELECTION));
+
+ flatpcrSize = BSG_Pack(BSG_TPM_PCR_COMPOSITE, (BYTE *) pcrComp, flatpcr);
+ Crypto_SHA1Full((BYTE *) flatpcr, flatpcrSize, (BYTE *)
&(pcrInfo.digestAtRelease));
+ memset(&(pcrInfo.digestAtCreation), 0, sizeof(TPM_DIGEST));
+ pcrInfoSize = BSG_Pack(BSG_TPM_PCR_INFO, (BYTE *) &pcrInfo, flatpcr);
+ } else {
+ //printf("\n\tBinding to no PCRS.");
+ pcrInfoSize = 0;
+ }
+
+ // Calculate encUsageAuth
+ BYTE XORbuffer[sizeof(TPM_SECRET) + sizeof(TPM_NONCE)];
+ UINT32 XORbufferSize = sizeof(XORbuffer);
+ TPM_DIGEST XORKey;
+ TPM_ENCAUTH encAuth;
+
+ BSG_PackList( XORbuffer, 2,
+ BSG_TPM_SECRET, osapSharedSecret,
+ BSG_TPM_NONCE, &auth->NonceEven );
+
+ Crypto_SHA1Full(XORbuffer, XORbufferSize, (BYTE *) &XORKey);
+
+ int i;
+ for (i=0; i < TPM_DIGEST_SIZE; i++)
+ ((BYTE *) &encAuth)[i] = ((BYTE *) &XORKey)[i] ^ ((BYTE *)
sealDataAuth)[i];
+
+ // Generate Extra TCS Parameters
+ UINT32 inDataSize = buffer_len(inData);
+ struct pack_buf_t inData_pack = {inDataSize, inData->bytes};
+ struct pack_buf_t pcrInfo_pack = {pcrInfoSize, flatpcr};
+
+ UINT32 sealedDataSize;
+ BYTE *flatSealedData=NULL;
+
+ paramText = (BYTE *) malloc(sizeof(BYTE) * TCPA_MAX_BUFFER_LENGTH);
+
+ paramTextSize = BSG_PackList(paramText, 4,
+ BSG_TPM_COMMAND_CODE, &command,
+ BSG_TPM_ENCAUTH, &encAuth,
+ BSG_TPM_SIZE32_DATA, &pcrInfo_pack,
+ BSG_TPM_SIZE32_DATA, &inData_pack);
+
+ TPMTRYRETURN( GenerateAuth( paramText, paramTextSize,
+ osapSharedSecret, auth) );
+
+ // Call TCS
+ TPMTRYRETURN( TCSP_Seal( hContext,
+ keyHandle,
+ encAuth,
+ pcrInfoSize,
+ flatpcr,
+ inDataSize,
+ inData->bytes,
+ auth,
+ &sealedDataSize,
+ &flatSealedData) );
+
+ // Unpack/return key structure
+ BSG_Unpack( BSG_TPM_STORED_DATA, flatSealedData, sealedData );
+
+ paramTextSize = BSG_PackList(paramText, 3,
+ BSG_TPM_RESULT, &status,
+ BSG_TPM_COMMAND_CODE, &command,
+ BSG_TPM_STORED_DATA, sealedData);
+
+ TPMTRYRETURN( VerifyAuth( paramText, paramTextSize,
+ osapSharedSecret, auth,
+ 0) );
+
+
+ goto egress;
+
+ abort_egress:
+ egress:
+
+ if (flatSealedData)
+ TCS_FreeMemory( hContext, flatSealedData);
+
+ free(paramText);
+ return status;
+}
+
+
+TPM_RESULT VTSP_Unseal(const TCS_CONTEXT_HANDLE hContext,
+ const TPM_KEY_HANDLE keyHandle,
+ const TPM_STORED_DATA *sealedData,
+ const TPM_AUTHDATA *key_usage_auth,
+ const TPM_AUTHDATA *data_usage_auth,
+ buffer_t *outData,
+ TCS_AUTH *auth,
+ TCS_AUTH *dataAuth) {
+
+ TPM_RESULT status = TPM_SUCCESS;
+ TPM_COMMAND_CODE command = TPM_ORD_Unseal;
+
+ BYTE *paramText; // Digest to make Auth.
+ UINT32 paramTextSize;
+
+ // Generate Extra TCS Parameters
+ UINT32 sealDataSize, clearDataSize;
+ BYTE *flatSealedData= (BYTE *) malloc(sizeof(TPM_VERSION) +
+ 2 * sizeof(UINT32) +
+ sealedData->sealInfoSize +
+ sealedData->encDataSize),
+ *clearData=NULL;
+
+ sealDataSize = BSG_Pack(BSG_TPM_STORED_DATA, sealedData, flatSealedData );
+
+ paramText = (BYTE *) malloc(sizeof(BYTE) * TCPA_MAX_BUFFER_LENGTH);
+
+ paramTextSize = BSG_PackList(paramText, 2,
+ BSG_TPM_COMMAND_CODE, &command,
+ BSG_TPM_STORED_DATA, sealedData);
+
+ TPMTRYRETURN( GenerateAuth( paramText, paramTextSize,
+ key_usage_auth, auth) );
+
+ TPMTRYRETURN( GenerateAuth( paramText, paramTextSize,
+ data_usage_auth, dataAuth) );
+ // Call TCS
+ TPMTRYRETURN( TCSP_Unseal( hContext,
+ keyHandle,
+ sealDataSize,
+ flatSealedData,
+ auth,
+ dataAuth,
+ &clearDataSize,
+ &clearData) );
+
+ // Verify Auth
+ struct pack_buf_t clearData_pack = {clearDataSize, clearData};
+
+ paramTextSize = BSG_PackList(paramText, 3,
+ BSG_TPM_RESULT, &status,
+ BSG_TPM_COMMAND_CODE, &command,
+ BSG_TPM_SIZE32_DATA, &clearData_pack);
+
+ TPMTRYRETURN( VerifyAuth( paramText, paramTextSize,
+ key_usage_auth, auth,
+ hContext) );
+
+ TPMTRYRETURN( VerifyAuth( paramText, paramTextSize,
+ data_usage_auth, dataAuth,
+ hContext) );
+
+ // Unpack/return key structure
+ TPMTRYRETURN( buffer_init(outData, clearDataSize, clearData) );
+
+ goto egress;
+
+ abort_egress:
+ egress:
+
+ if (flatSealedData)
+ TCS_FreeMemory( hContext, clearData);
+
+ free(paramText);
+ return status;
+}
+
+
// Function Reaches into unsupported TCS command, beware.
TPM_RESULT VTSP_RawTransmit(const TCS_CONTEXT_HANDLE hContext,
const buffer_t *inbuf,
diff -r 25e3c8668f1f -r 8af1199488d3 tools/vtpm_manager/manager/vtsp.h
--- a/tools/vtpm_manager/manager/vtsp.h Mon Jan 9 11:19:55 2006
+++ b/tools/vtpm_manager/manager/vtsp.h Mon Jan 9 11:22:17 2006
@@ -86,7 +86,8 @@
const TPM_AUTHDATA *parentAuth,
TPM_HANDLE *newKeyHandle,
TCS_AUTH *pAuth,
- CRYPTO_INFO *cryptoinfo);
+ CRYPTO_INFO *cryptoinfo,
+ const BOOL skipTPMLoad);
TPM_RESULT VTSP_Unbind( const TCS_CONTEXT_HANDLE hContext,
const TPM_KEY_HANDLE key_handle,
@@ -99,4 +100,22 @@
const buffer_t *inData,
buffer_t *outData);
+TPM_RESULT VTSP_Seal(const TCS_CONTEXT_HANDLE hContext,
+ const TPM_KEY_HANDLE keyHandle,
+ const TPM_AUTHDATA *sealDataAuth,
+ const TPM_PCR_COMPOSITE *pcrComp,
+ const buffer_t *inData,
+ TPM_STORED_DATA *sealedData,
+ const TPM_SECRET *osapSharedSecret,
+ TCS_AUTH *auth);
+
+TPM_RESULT VTSP_Unseal(const TCS_CONTEXT_HANDLE hContext,
+ const TPM_KEY_HANDLE keyHandle,
+ const TPM_STORED_DATA *sealedData,
+ const TPM_AUTHDATA *key_usage_auth,
+ const TPM_AUTHDATA *data_usage_auth,
+ buffer_t *outData,
+ TCS_AUTH *auth,
+ TCS_AUTH *dataAuth);
+
#endif //_VTSP_H_
diff -r 25e3c8668f1f -r 8af1199488d3 tools/vtpm_manager/tcs/tcs.c
--- a/tools/vtpm_manager/tcs/tcs.c Mon Jan 9 11:19:55 2006
+++ b/tools/vtpm_manager/tcs/tcs.c Mon Jan 9 11:22:17 2006
@@ -636,7 +636,7 @@
TDDL_UINT32 OutLength = TCPA_MAX_BUFFER_LENGTH;
// check input params
- if (inData == NULL || pubAuth == NULL || SealedDataSize == NULL ||
*SealedData == NULL)
+ if (inData == NULL || pubAuth == NULL || SealedDataSize == NULL ||
SealedData == NULL)
return TPM_BAD_PARAMETER;
// Convert Byte Input parameter in the input byte stream InBuf
diff -r 25e3c8668f1f -r 8af1199488d3 tools/xentrace/Makefile
--- a/tools/xentrace/Makefile Mon Jan 9 11:19:55 2006
+++ b/tools/xentrace/Makefile Mon Jan 9 11:22:17 2006
@@ -15,24 +15,32 @@
OBJS = $(patsubst %.c,%.o,$(wildcard *.c))
BIN = xentrace tbctl setsize
-LIBBIN = xenctx
+LIBBIN =
SCRIPTS = xentrace_format
MAN1 = $(wildcard *.1)
MAN8 = $(wildcard *.8)
+
+ifeq ($(XEN_TARGET_ARCH),x86_32)
+LIBBIN += xenctx
+endif
+
+ifeq ($(XEN_TARGET_ARCH),x86_64)
+LIBBIN += xenctx
+endif
all: build
build: $(BIN) $(LIBBIN)
install: build
[ -d $(DESTDIR)/usr/bin ] || $(INSTALL_DIR) $(DESTDIR)/usr/bin
- [ -d $(DESTDIR)/usr/$(LIBDIR)/xen/bin ] || \
+ [ -z "$(LIBBIN)"] || [ -d $(DESTDIR)/usr/$(LIBDIR)/xen/bin ] || \
$(INSTALL_DIR) $(DESTDIR)/usr/$(LIBDIR)/xen/bin
[ -d $(DESTDIR)/usr/share/man/man1 ] || \
$(INSTALL_DIR) $(DESTDIR)/usr/share/man/man1
[ -d $(DESTDIR)/usr/share/man/man8 ] || \
$(INSTALL_DIR) $(DESTDIR)/usr/share/man/man8
$(INSTALL_PROG) $(BIN) $(SCRIPTS) $(DESTDIR)/usr/bin
- $(INSTALL_PROG) $(LIBBIN) $(DESTDIR)/usr/$(LIBDIR)/xen/bin
+ [ -z "$(LIBBIN)"] || $(INSTALL_PROG) $(LIBBIN)
$(DESTDIR)/usr/$(LIBDIR)/xen/bin
$(INSTALL_DATA) $(MAN1) $(DESTDIR)/usr/share/man/man1
$(INSTALL_DATA) $(MAN8) $(DESTDIR)/usr/share/man/man8
diff -r 25e3c8668f1f -r 8af1199488d3 tools/xentrace/xenctx.c
--- a/tools/xentrace/xenctx.c Mon Jan 9 11:19:55 2006
+++ b/tools/xentrace/xenctx.c Mon Jan 9 11:22:17 2006
@@ -380,10 +380,10 @@
exit(-1);
}
- ret = xc_domain_get_vcpu_context(xc_handle, domid, vcpu, &ctx);
+ ret = xc_vcpu_getcontext(xc_handle, domid, vcpu, &ctx);
if (ret < 0) {
xc_domain_unpause(xc_handle, domid);
- perror("xc_domain_get_vcpu_context");
+ perror("xc_vcpu_getcontext");
exit(-1);
}
diff -r 25e3c8668f1f -r 8af1199488d3
tools/xm-test/tests/network-attach/Makefile.am
--- a/tools/xm-test/tests/network-attach/Makefile.am Mon Jan 9 11:19:55 2006
+++ b/tools/xm-test/tests/network-attach/Makefile.am Mon Jan 9 11:22:17 2006
@@ -6,7 +6,7 @@
03_network_attach_detach_multiple_pos.test \
04_network_attach_baddomain_neg.test
-XFAIL_TESTS = 03_network_attach_detach_multiple_pos.test
+XFAIL_TESTS =
EXTRA_DIST = $(TESTS) $(XFAIL_TESTS) network_utils.py
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/ia64/Makefile
--- a/xen/arch/ia64/Makefile Mon Jan 9 11:19:55 2006
+++ b/xen/arch/ia64/Makefile Mon Jan 9 11:22:17 2006
@@ -23,6 +23,13 @@
__divsi3.o __udivsi3.o __modsi3.o __umodsi3.o \
__divdi3.o __udivdi3.o __moddi3.o __umoddi3.o
+# xen stack unwinder
+# unwind_decoder.c is included in unwind.c
+OBJS += unwind.o
+#unwind.o: CFLAGS += -DUNW_DEBUG=4
+
+OBJS += process-linux-xen.o
+
# perfmon.o
# unwind.o needed for kernel unwinding (rare)
@@ -31,11 +38,26 @@
# remove following line if not privifying in memory
# OBJS += privify.o
-default: $(OBJS) head.o xen.lds.s
- $(LD) -r -o arch.o $(OBJS)
+default: $(TARGET)
+
+$(CURDIR)/arch.o: $(OBJS)
+ $(LD) -r -o $@ $(OBJS)
+
+$(TARGET)-syms: $(ALL_OBJS) head.o xen.lds.s
$(LD) $(LDFLAGS) -T $(BASEDIR)/arch/$(TARGET_ARCH)/xen.lds.s -N \
- -Map map.out head.o $(ALL_OBJS) -o $(TARGET)-syms
- $(OBJCOPY) -R .note -R .comment -S $(TARGET)-syms $(TARGET)
+ -Map map.out head.o $(ALL_OBJS) -o $@
+ $(NM) -n $@ | $(BASEDIR)/tools/symbols > $(BASEDIR)/xen-syms.S
+ $(MAKE) $(BASEDIR)/xen-syms.o
+ $(LD) $(LDFLAGS) -T $(BASEDIR)/arch/$(TARGET_ARCH)/xen.lds.s -N \
+ -Map map.out head.o $(ALL_OBJS) $(BASEDIR)/xen-syms.o -o $@
+ $(NM) -n $@ | $(BASEDIR)/tools/symbols >$(BASEDIR)/xen-syms.S
+ $(MAKE) $(BASEDIR)/xen-syms.o
+ $(LD) $(LDFLAGS) -T $(BASEDIR)/arch/$(TARGET_ARCH)/xen.lds.s -N \
+ -Map map.out head.o $(ALL_OBJS) $(BASEDIR)/xen-syms.o -o $@
+ rm -f $(BASEDIR)/xen-syms.S $(BASEDIR)/xen-syms.o
+
+$(TARGET): $(TARGET)-syms
+ $(OBJCOPY) -R .note -R .comment -S $(TARGET)-syms $@
$(NM) -n $(TARGET)-syms | grep -v '\( [aUw] \)\|\(__crc_\)\|\(
\$[adt]\)'\
> $(BASEDIR)/System.map
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/ia64/linux-xen/entry.S
--- a/xen/arch/ia64/linux-xen/entry.S Mon Jan 9 11:19:55 2006
+++ b/xen/arch/ia64/linux-xen/entry.S Mon Jan 9 11:22:17 2006
@@ -1417,7 +1417,6 @@
br.cond.sptk.many rp // goes to
ia64_leave_kernel
END(ia64_prepare_handle_unaligned)
-#ifndef XEN
//
// unw_init_running(void (*callback)(info, arg), void *arg)
//
@@ -1463,6 +1462,7 @@
br.ret.sptk.many rp
END(unw_init_running)
+#ifndef XEN
.rodata
.align 8
.globl sys_call_table
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/ia64/vmx/mmio.c
--- a/xen/arch/ia64/vmx/mmio.c Mon Jan 9 11:19:55 2006
+++ b/xen/arch/ia64/vmx/mmio.c Mon Jan 9 11:22:17 2006
@@ -29,7 +29,7 @@
#include <asm/vmx_vcpu.h>
#include <asm/privop.h>
#include <asm/types.h>
-#include <public/io/ioreq.h>
+#include <public/hvm/ioreq.h>
#include <asm/mm.h>
#include <asm/vmx.h>
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/ia64/vmx/vlsapic.c
--- a/xen/arch/ia64/vmx/vlsapic.c Mon Jan 9 11:19:55 2006
+++ b/xen/arch/ia64/vmx/vlsapic.c Mon Jan 9 11:22:17 2006
@@ -218,7 +218,7 @@
*/
void vtm_domain_out(VCPU *vcpu)
{
- if(!is_idle_task(vcpu->domain))
+ if(!is_idle_domain(vcpu->domain))
rem_ac_timer(&vcpu->arch.arch_vmx.vtm.vtm_timer);
}
@@ -230,7 +230,7 @@
{
vtime_t *vtm;
- if(!is_idle_task(vcpu->domain)) {
+ if(!is_idle_domain(vcpu->domain)) {
vtm=&(vcpu->arch.arch_vmx.vtm);
vtm_interruption_update(vcpu, vtm);
}
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/ia64/vmx/vmx_init.c
--- a/xen/arch/ia64/vmx/vmx_init.c Mon Jan 9 11:19:55 2006
+++ b/xen/arch/ia64/vmx/vmx_init.c Mon Jan 9 11:22:17 2006
@@ -42,7 +42,7 @@
#include <xen/lib.h>
#include <asm/vmmu.h>
#include <public/arch-ia64.h>
-#include <public/io/ioreq.h>
+#include <public/hvm/ioreq.h>
#include <asm/vmx_phy_mode.h>
#include <asm/processor.h>
#include <asm/vmx.h>
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/ia64/vmx/vmx_process.c
--- a/xen/arch/ia64/vmx/vmx_process.c Mon Jan 9 11:19:55 2006
+++ b/xen/arch/ia64/vmx/vmx_process.c Mon Jan 9 11:22:17 2006
@@ -53,6 +53,7 @@
#define INITIAL_PSR_VALUE_AT_INTERRUPTION 0x0000001808028034
+extern void die_if_kernel(char *str, struct pt_regs *regs, long err);
extern void rnat_consumption (VCPU *vcpu);
#define DOMN_PAL_REQUEST 0x110000
@@ -185,8 +186,11 @@
}else if(iim == DOMN_PAL_REQUEST){
pal_emul(current);
vmx_vcpu_increment_iip(current);
- } else
+ } else {
+ if (iim == 0)
+ die_if_kernel("bug check", regs, iim);
vmx_reflect_interruption(ifa,isr,iim,11,regs);
+ }
}
@@ -227,7 +231,7 @@
struct domain *d = current->domain;
struct vcpu *v = current;
// FIXME: Will this work properly if doing an RFI???
- if (!is_idle_task(d) ) { // always comes from guest
+ if (!is_idle_domain(d) ) { // always comes from guest
extern void vmx_dorfirfi(void);
struct pt_regs *user_regs = vcpu_regs(current);
if (local_softirq_pending())
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/ia64/vmx/vmx_support.c
--- a/xen/arch/ia64/vmx/vmx_support.c Mon Jan 9 11:19:55 2006
+++ b/xen/arch/ia64/vmx/vmx_support.c Mon Jan 9 11:22:17 2006
@@ -21,7 +21,7 @@
*/
#include <xen/config.h>
#include <xen/sched.h>
-#include <public/io/ioreq.h>
+#include <public/hvm/ioreq.h>
#include <asm/vmx.h>
#include <asm/vmx_vcpu.h>
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/ia64/xen/dom_fw.c
--- a/xen/arch/ia64/xen/dom_fw.c Mon Jan 9 11:19:55 2006
+++ b/xen/arch/ia64/xen/dom_fw.c Mon Jan 9 11:22:17 2006
@@ -861,12 +861,16 @@
bp->console_info.orig_x = 0;
bp->console_info.orig_y = 24;
bp->fpswa = 0;
- bp->initrd_start = (dom0_start+dom0_size) -
- (PAGE_ALIGN(ia64_boot_param->initrd_size) + 4*1024*1024);
- bp->initrd_size = ia64_boot_param->initrd_size;
- printf(" initrd start %0xlx", bp->initrd_start);
- printf(" initrd size %0xlx", bp->initrd_size);
-
-
+ if (d == dom0) {
+ bp->initrd_start = (dom0_start+dom0_size) -
+ (PAGE_ALIGN(ia64_boot_param->initrd_size) + 4*1024*1024);
+ bp->initrd_size = ia64_boot_param->initrd_size;
+ }
+ else {
+ bp->initrd_start = d->arch.initrd_start;
+ bp->initrd_size = d->arch.initrd_len;
+ }
+ printf(" initrd start %0xlx", bp->initrd_start);
+ printf(" initrd size %0xlx", bp->initrd_size);
return bp;
}
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/ia64/xen/domain.c
--- a/xen/arch/ia64/xen/domain.c Mon Jan 9 11:19:55 2006
+++ b/xen/arch/ia64/xen/domain.c Mon Jan 9 11:22:17 2006
@@ -19,6 +19,7 @@
#include <xen/delay.h>
#include <xen/softirq.h>
#include <xen/mm.h>
+#include <xen/iocap.h>
#include <asm/ptrace.h>
#include <asm/system.h>
#include <asm/io.h>
@@ -45,7 +46,7 @@
#include <asm/vmx_vcpu.h>
#include <asm/vmx_vpd.h>
#include <asm/pal.h>
-#include <public/io/ioreq.h>
+#include <public/hvm/ioreq.h>
#define CONFIG_DOMAIN0_CONTIGUOUS
unsigned long dom0_start = -1L;
@@ -181,7 +182,7 @@
memset(v->arch._thread.fph,0,sizeof(struct ia64_fpreg)*96);
}
-void arch_do_createdomain(struct vcpu *v)
+int arch_do_createdomain(struct vcpu *v)
{
struct domain *d = v->domain;
struct thread_info *ti = alloc_thread_info(v);
@@ -248,7 +249,9 @@
}
} else
d->arch.mm = NULL;
- printf ("arch_do_create_domain: domain=%p\n", d);
+ printf ("arch_do_create_domain: domain=%p\n", d);
+
+ return 0;
}
void arch_getdomaininfo_ctxt(struct vcpu *v, struct vcpu_guest_context *c)
@@ -291,16 +294,7 @@
d->arch.cmdline = c->cmdline;
new_thread(v, regs->cr_iip, 0, 0);
-#ifdef CONFIG_IA64_SPLIT_CACHE
- /* Sync d/i cache conservatively */
- if (!running_on_sim) {
- ret = ia64_pal_cache_flush(4, 0, &progress, NULL);
- if ((ret!=PAL_STATUS_SUCCESS)&& (ret!=PAL_STATUS_UNIMPLEMENTED))
- printk("PAL CACHE FLUSH failed for dom0.\n");
- else
- printk("Sync i/d cache for guest SUCC\n");
- }
-#endif
+ sync_split_caches();
v->vcpu_info->arch.evtchn_vector = c->vcpu.evtchn_vector;
if ( c->vcpu.privregs && copy_from_user(v->arch.privregs,
c->vcpu.privregs, sizeof(mapped_regs_t))) {
@@ -428,7 +422,7 @@
{
p = alloc_domheap_page(d);
// zero out pages for security reasons
- memset(__va(page_to_phys(p)),0,PAGE_SIZE);
+ if (p) memset(__va(page_to_phys(p)),0,PAGE_SIZE);
}
if (unlikely(!p)) {
printf("map_new_domain_page: Can't alloc!!!! Aaaargh!\n");
@@ -763,7 +757,10 @@
*/
void physdev_init_dom0(struct domain *d)
{
- set_bit(_DOMF_physdev_access, &d->domain_flags);
+ if (iomem_permit_access(d, 0UL, ~0UL))
+ BUG();
+ if (irqs_permit_access(d, 0, NR_PIRQS-1))
+ BUG();
}
unsigned int vmx_dom0 = 0;
@@ -912,9 +909,9 @@
memset(si, 0, PAGE_SIZE);
d->shared_info->arch.start_info_pfn = __pa(si) >> PAGE_SHIFT;
sprintf(si->magic, "xen-%i.%i-ia64", XEN_VERSION, XEN_SUBVERSION);
+ si->nr_pages = d->tot_pages;
#if 0
- si->nr_pages = d->tot_pages;
si->shared_info = virt_to_phys(d->shared_info);
si->flags = SIF_PRIVILEGED | SIF_INITDOMAIN;
//si->pt_base = vpt_start;
@@ -959,16 +956,7 @@
new_thread(v, pkern_entry, 0, 0);
physdev_init_dom0(d);
-#ifdef CONFIG_IA64_SPLIT_CACHE
- /* Sync d/i cache conservatively */
- if (!running_on_sim) {
- ret = ia64_pal_cache_flush(4, 0, &progress, NULL);
- if ((ret!=PAL_STATUS_SUCCESS)&& (ret!=PAL_STATUS_UNIMPLEMENTED))
- printk("PAL CACHE FLUSH failed for dom0.\n");
- else
- printk("Sync i/d cache for guest SUCC\n");
- }
-#endif
+ sync_split_caches();
// FIXME: Hack for keyboard input
#ifdef CLONE_DOMAIN0
@@ -1027,16 +1015,7 @@
#endif
new_thread(v, pkern_entry, 0, 0);
printk("new_thread returns\n");
-#ifdef CONFIG_IA64_SPLIT_CACHE
- /* Sync d/i cache conservatively */
- if (!running_on_sim) {
- ret = ia64_pal_cache_flush(4, 0, &progress, NULL);
- if ((ret!=PAL_STATUS_SUCCESS)&& (ret!=PAL_STATUS_UNIMPLEMENTED))
- printk("PAL CACHE FLUSH failed for dom0.\n");
- else
- printk("Sync i/d cache for guest SUCC\n");
- }
-#endif
+ sync_split_caches();
__set_bit(0x30, VCPU(v, delivery_mask));
return 0;
@@ -1050,16 +1029,7 @@
v->domain->domain_id);
loaddomainelfimage(v->domain,v->domain->arch.image_start);
new_thread(v, v->domain->arch.entry, 0, 0);
-#ifdef CONFIG_IA64_SPLIT_CACHE
- /* Sync d/i cache conservatively */
- if (!running_on_sim) {
- ret = ia64_pal_cache_flush(4, 0, &progress, NULL);
- if ((ret!=PAL_STATUS_SUCCESS)&& (ret!=PAL_STATUS_UNIMPLEMENTED))
- printk("PAL CACHE FLUSH failed for dom0.\n");
- else
- printk("Sync i/d cache for guest SUCC\n");
- }
-#endif
+ sync_split_caches();
}
#endif
@@ -1098,15 +1068,6 @@
void domain_pend_keyboard_interrupt(int irq)
{
vcpu_pend_interrupt(dom0->vcpu[0],irq);
-}
-
-void vcpu_migrate_cpu(struct vcpu *v, int newcpu)
-{
- if ( v->processor == newcpu )
- return;
-
- set_bit(_VCPUF_cpu_migrated, &v->vcpu_flags);
- v->processor = newcpu;
}
void sync_vcpu_execstate(struct vcpu *v)
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/ia64/xen/hyperprivop.S
--- a/xen/arch/ia64/xen/hyperprivop.S Mon Jan 9 11:19:55 2006
+++ b/xen/arch/ia64/xen/hyperprivop.S Mon Jan 9 11:22:17 2006
@@ -543,6 +543,13 @@
extr.u r21=r30,IA64_PSR_PP_BIT,1 ;;
cmp.ne p7,p0=r21,r0 ;;
(p7) br.spnt.few dispatch_break_fault ;;
+ movl r20=IA64_PSR_CPL ;;
+ and r22=r20,r30 ;;
+ cmp.ne p7,p0=r22,r0
+(p7) br.spnt.many 1f ;;
+ cmp.eq p7,p0=r17,r0
+(p7) br.spnt.few dispatch_break_fault ;;
+1:
#if 1 /* special handling in case running on simulator */
movl r20=first_break;;
ld4 r23=[r20];;
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/ia64/xen/irq.c
--- a/xen/arch/ia64/xen/irq.c Mon Jan 9 11:19:55 2006
+++ b/xen/arch/ia64/xen/irq.c Mon Jan 9 11:22:17 2006
@@ -1377,9 +1377,6 @@
irq_guest_action_t *action;
unsigned long flags;
int rc = 0;
-
- if ( !IS_CAPABLE_PHYSDEV(d->domain) )
- return -EPERM;
spin_lock_irqsave(&desc->lock, flags);
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/ia64/xen/ivt.S
--- a/xen/arch/ia64/xen/ivt.S Mon Jan 9 11:19:55 2006
+++ b/xen/arch/ia64/xen/ivt.S Mon Jan 9 11:22:17 2006
@@ -839,6 +839,8 @@
mov r17=cr.iim
mov r31=pr
;;
+ cmp.eq p7,p0=r17,r0
+(p7) br.spnt.few dispatch_break_fault ;;
movl r18=XSI_PSR_IC
;;
ld8 r19=[r18]
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/ia64/xen/process.c
--- a/xen/arch/ia64/xen/process.c Mon Jan 9 11:19:55 2006
+++ b/xen/arch/ia64/xen/process.c Mon Jan 9 11:22:17 2006
@@ -33,6 +33,7 @@
#include <xen/multicall.h>
extern unsigned long vcpu_get_itir_on_fault(struct vcpu *, UINT64);
+extern void die_if_kernel(char *str, struct pt_regs *regs, long err);
extern unsigned long dom0_start, dom0_size;
@@ -64,26 +65,16 @@
extern struct schedule_data schedule_data[NR_CPUS];
-void schedule_tail(struct vcpu *next)
-{
- unsigned long rr7;
- //printk("current=%lx,shared_info=%lx\n",current,current->vcpu_info);
- //printk("next=%lx,shared_info=%lx\n",next,next->vcpu_info);
-
- // TG: Real HACK FIXME.
- // This is currently necessary because when a new domain is started,
- // the context_switch function of xen/common/schedule.c(__enter_scheduler)
- // never returns. Therefore, the lock must be released.
- // schedule_tail is only called when a domain is started.
- spin_unlock_irq(&schedule_data[current->processor].schedule_lock);
-
- /* rr7 will be postponed to last point when resuming back to guest */
- if(VMX_DOMAIN(current)){
- vmx_load_all_rr(current);
- }else{
- load_region_regs(current);
- vcpu_load_kernel_regs(current);
- }
+void schedule_tail(struct vcpu *prev)
+{
+ context_saved(prev);
+
+ if (VMX_DOMAIN(current)) {
+ vmx_load_all_rr(current);
+ } else {
+ load_region_regs(current);
+ vcpu_load_kernel_regs(current);
+ }
}
void tdpfoo(void) { }
@@ -251,7 +242,7 @@
struct domain *d = current->domain;
struct vcpu *v = current;
// FIXME: Will this work properly if doing an RFI???
- if (!is_idle_task(d) && user_mode(regs)) {
+ if (!is_idle_domain(d) && user_mode(regs)) {
//vcpu_poke_timer(v);
if (vcpu_deliverable_interrupts(v))
reflect_extint(regs);
@@ -686,6 +677,8 @@
vcpu_increment_iip(current);
}
else {
+ if (iim == 0)
+ die_if_kernel("bug check", regs, iim);
PSCB(v,iim) = iim;
reflect_interruption(isr,regs,IA64_BREAK_VECTOR);
}
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/ia64/xen/vcpu.c
--- a/xen/arch/ia64/xen/vcpu.c Mon Jan 9 11:19:55 2006
+++ b/xen/arch/ia64/xen/vcpu.c Mon Jan 9 11:22:17 2006
@@ -1085,7 +1085,7 @@
/* gloss over the wraparound problem for now... we know it exists
* but it doesn't matter right now */
- if (is_idle_task(vcpu->domain)) {
+ if (is_idle_domain(vcpu->domain)) {
// printf("****** vcpu_set_next_timer called during idle!!\n");
vcpu_safe_set_itm(s);
return;
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/ia64/xen/xenmisc.c
--- a/xen/arch/ia64/xen/xenmisc.c Mon Jan 9 11:19:55 2006
+++ b/xen/arch/ia64/xen/xenmisc.c Mon Jan 9 11:22:17 2006
@@ -25,7 +25,6 @@
int phys_proc_id[NR_CPUS];
unsigned long loops_per_jiffy = (1<<12); // from linux/init/main.c
-void unw_init(void) { printf("unw_init() skipped (NEED FOR KERNEL UNWIND)\n");
}
void ia64_mca_init(void) { printf("ia64_mca_init() skipped (Machine check
abort handling)\n"); }
void ia64_mca_cpu_init(void *x) { }
void ia64_patch_mckinley_e9(unsigned long a, unsigned long b) { }
@@ -180,11 +179,6 @@
// from arch/ia64/traps.c
///////////////////////////////
-void show_registers(struct pt_regs *regs)
-{
- printf("*** ADD REGISTER DUMP HERE FOR DEBUGGING\n");
-}
-
int is_kernel_text(unsigned long addr)
{
extern char _stext[], _etext[];
@@ -236,7 +230,13 @@
void die_if_kernel(char *str, struct pt_regs *regs, long err) /* __attribute__
((noreturn)) */
{
- printk("die_if_kernel: called, not implemented\n");
+ if (user_mode(regs))
+ return;
+
+ printk("%s: %s %ld\n", __func__, str, err);
+ debugtrace_dump();
+ show_registers(regs);
+ domain_crash_synchronous();
}
long
@@ -320,18 +320,15 @@
ia64_set_iva(&ia64_ivt);
ia64_set_pta(VHPT_ADDR | (1 << 8) | (VHPT_SIZE_LOG2 << 2) |
VHPT_ENABLED);
- if (!is_idle_task(current->domain)) {
+ if (!is_idle_domain(current->domain)) {
load_region_regs(current);
vcpu_load_kernel_regs(current);
if (vcpu_timer_expired(current)) vcpu_pend_timer(current);
}
if (vcpu_timer_expired(current)) vcpu_pend_timer(current);
}
-}
-
-void context_switch_finalise(struct vcpu *next)
-{
- /* nothing to do */
+
+ context_saved(prev);
}
void continue_running(struct vcpu *same)
@@ -368,3 +365,23 @@
goto loop;
}
}
+
+/* FIXME: for the forseeable future, all cpu's that enable VTi have split
+ * caches and all cpu's that have split caches enable VTi. This may
+ * eventually be untrue though. */
+#define cpu_has_split_cache vmx_enabled
+extern unsigned int vmx_enabled;
+
+void sync_split_caches(void)
+{
+ unsigned long ret, progress = 0;
+
+ if (cpu_has_split_cache) {
+ /* Sync d/i cache conservatively */
+ ret = ia64_pal_cache_flush(4, 0, &progress, NULL);
+ if ((ret!=PAL_STATUS_SUCCESS)&& (ret!=PAL_STATUS_UNIMPLEMENTED))
+ printk("PAL CACHE FLUSH failed\n");
+ else printk("Sync i/d cache for guest SUCC\n");
+ }
+ else printk("sync_split_caches ignored for CPU with no split cache\n");
+}
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/ia64/xen/xensetup.c
--- a/xen/arch/ia64/xen/xensetup.c Mon Jan 9 11:19:55 2006
+++ b/xen/arch/ia64/xen/xensetup.c Mon Jan 9 11:22:17 2006
@@ -26,7 +26,7 @@
char saved_command_line[COMMAND_LINE_SIZE];
-struct vcpu *idle_task[NR_CPUS] = { &idle0_vcpu };
+struct vcpu *idle_domain[NR_CPUS] = { &idle0_vcpu };
cpumask_t cpu_present_map;
@@ -382,8 +382,7 @@
panic("Could not set up DOM0 guest OS\n");
/* PIN domain0 on CPU 0. */
- dom0->vcpu[0]->cpumap=1;
- set_bit(_VCPUF_cpu_pinned, &dom0->vcpu[0]->vcpu_flags);
+ dom0->vcpu[0]->cpu_affinity = cpumask_of_cpu(0);
#ifdef CLONE_DOMAIN0
{
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/ia64/xen/xentime.c
--- a/xen/arch/ia64/xen/xentime.c Mon Jan 9 11:19:55 2006
+++ b/xen/arch/ia64/xen/xentime.c Mon Jan 9 11:22:17 2006
@@ -127,7 +127,7 @@
vcpu_wake(dom0->vcpu[0]);
}
}
- if (!is_idle_task(current->domain)) {
+ if (!is_idle_domain(current->domain)) {
if (vcpu_timer_expired(current)) {
vcpu_pend_timer(current);
// ensure another timer interrupt happens even if
domain doesn't
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/Makefile
--- a/xen/arch/x86/Makefile Mon Jan 9 11:19:55 2006
+++ b/xen/arch/x86/Makefile Mon Jan 9 11:22:17 2006
@@ -29,6 +29,7 @@
endif
OBJS := $(subst $(TARGET_SUBARCH)/asm-offsets.o,,$(OBJS))
+OBJS := $(subst $(TARGET_SUBARCH)/xen.lds.o,,$(OBJS))
ifneq ($(crash_debug),y)
OBJS := $(patsubst cdb%.o,,$(OBJS))
@@ -43,21 +44,24 @@
$(CURDIR)/arch.o: $(OBJS)
$(LD) $(LDFLAGS) -r -o $@ $(OBJS)
-$(TARGET)-syms: boot/$(TARGET_SUBARCH).o $(ALL_OBJS) $(TARGET_SUBARCH)/xen.lds
- $(LD) $(LDFLAGS) -T $(TARGET_SUBARCH)/xen.lds -N \
+$(TARGET)-syms: boot/$(TARGET_SUBARCH).o $(ALL_OBJS) xen.lds
+ $(LD) $(LDFLAGS) -T xen.lds -N \
boot/$(TARGET_SUBARCH).o $(ALL_OBJS) -o $@
$(NM) -n $@ | $(BASEDIR)/tools/symbols >$(BASEDIR)/xen-syms.S
$(MAKE) $(BASEDIR)/xen-syms.o
- $(LD) $(LDFLAGS) -T $(TARGET_SUBARCH)/xen.lds -N \
+ $(LD) $(LDFLAGS) -T xen.lds -N \
boot/$(TARGET_SUBARCH).o $(ALL_OBJS) $(BASEDIR)/xen-syms.o -o $@
$(NM) -n $@ | $(BASEDIR)/tools/symbols >$(BASEDIR)/xen-syms.S
$(MAKE) $(BASEDIR)/xen-syms.o
- $(LD) $(LDFLAGS) -T $(TARGET_SUBARCH)/xen.lds -N \
+ $(LD) $(LDFLAGS) -T xen.lds -N \
boot/$(TARGET_SUBARCH).o $(ALL_OBJS) $(BASEDIR)/xen-syms.o -o $@
rm -f $(BASEDIR)/xen-syms.S $(BASEDIR)/xen-syms.o
asm-offsets.s: $(TARGET_SUBARCH)/asm-offsets.c $(HDRS)
$(CC) $(CFLAGS) -S -o $@ $<
+
+xen.lds: $(TARGET_SUBARCH)/xen.lds.S $(HDRS)
+ $(CC) $(CFLAGS) -P -E -Ui386 -D__ASSEMBLY__ -o $@ $<
boot/mkelf32: boot/mkelf32.c
$(HOSTCC) $(HOSTCFLAGS) -o $@ $<
@@ -73,5 +77,6 @@
rm -f dm/*.o dm/*~ dm/core
rm -f genapic/*.o genapic/*~ genapic/core
rm -f cpu/*.o cpu/*~ cpu/core
+ rm -f xen.lds
.PHONY: default clean
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/boot/x86_32.S
--- a/xen/arch/x86/boot/x86_32.S Mon Jan 9 11:19:55 2006
+++ b/xen/arch/x86/boot/x86_32.S Mon Jan 9 11:22:17 2006
@@ -1,5 +1,6 @@
#include <xen/config.h>
#include <public/xen.h>
+#include <asm/asm_defns.h>
#include <asm/desc.h>
#include <asm/page.h>
#include <asm/msr.h>
@@ -53,6 +54,7 @@
mov %ecx,%gs
ljmp $(__HYPERVISOR_CS),$(1f)-__PAGE_OFFSET
1: lss stack_start-__PAGE_OFFSET,%esp
+ add $(STACK_SIZE-CPUINFO_sizeof-__PAGE_OFFSET),%esp
/* Reset EFLAGS (subsumes CLI and CLD). */
pushl $0
@@ -98,7 +100,7 @@
1: stosl /* low mappings cover as much physmem as possible */
add $4,%edi
add $(1<<L2_PAGETABLE_SHIFT),%eax
- cmp $__HYPERVISOR_VIRT_START+0xe3,%eax
+ cmp $HYPERVISOR_VIRT_START+0xe3,%eax
jne 1b
#else
/* Initialize low and high mappings of all memory with 4MB pages */
@@ -111,7 +113,7 @@
jne 1b
1: stosl /* low mappings cover as much physmem as possible */
add $(1<<L2_PAGETABLE_SHIFT),%eax
- cmp $__HYPERVISOR_VIRT_START+0xe3,%eax
+ cmp $HYPERVISOR_VIRT_START+0xe3,%eax
jne 1b
#endif
@@ -189,7 +191,7 @@
/*** STACK LOCATION ***/
ENTRY(stack_start)
- .long cpu0_stack + STACK_SIZE - 200 - __PAGE_OFFSET
+ .long cpu0_stack
.long __HYPERVISOR_DS
/*** DESCRIPTOR TABLES ***/
@@ -256,10 +258,6 @@
.fill 1*PAGE_SIZE,1,0
#endif
-#if (STACK_ORDER == 0)
-.section ".bss.page_aligned","w"
-#else
-.section ".bss.twopage_aligned","w"
-#endif
+.section ".bss.stack_aligned","w"
ENTRY(cpu0_stack)
.fill STACK_SIZE,1,0
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/boot/x86_64.S
--- a/xen/arch/x86/boot/x86_64.S Mon Jan 9 11:19:55 2006
+++ b/xen/arch/x86/boot/x86_64.S Mon Jan 9 11:22:17 2006
@@ -1,5 +1,6 @@
#include <xen/config.h>
#include <public/xen.h>
+#include <asm/asm_defns.h>
#include <asm/desc.h>
#include <asm/page.h>
#include <asm/msr.h>
@@ -121,7 +122,8 @@
mov %rcx,%cr4
mov stack_start(%rip),%rsp
-
+ or $(STACK_SIZE-CPUINFO_sizeof),%rsp
+
/* Reset EFLAGS (subsumes CLI and CLD). */
pushq $0
popf
@@ -140,7 +142,7 @@
mov %ecx,%ss
lidt idt_descr(%rip)
-
+
cmp $(SECONDARY_CPU_FLAG),%ebx
je start_secondary
@@ -219,7 +221,7 @@
.quad idt_table
ENTRY(stack_start)
- .quad cpu0_stack + STACK_SIZE - 200
+ .quad cpu0_stack
high_start:
.quad __high_start
@@ -265,10 +267,6 @@
.org 0x4000 + PAGE_SIZE
.code64
-#if (STACK_ORDER == 0)
-.section ".bss.page_aligned","w"
-#else
-.section ".bss.twopage_aligned","w"
-#endif
+.section ".bss.stack_aligned","w"
ENTRY(cpu0_stack)
.fill STACK_SIZE,1,0
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/dm/i8259.c
--- a/xen/arch/x86/dm/i8259.c Mon Jan 9 11:19:55 2006
+++ b/xen/arch/x86/dm/i8259.c Mon Jan 9 11:22:17 2006
@@ -29,7 +29,7 @@
#include <xen/lib.h>
#include <xen/errno.h>
#include <xen/sched.h>
-#include <public/io/ioreq.h>
+#include <public/hvm/ioreq.h>
#include <asm/vmx.h>
#include <asm/vmx_vpic.h>
#include <asm/current.h>
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/dm/vmx_vioapic.c
--- a/xen/arch/x86/dm/vmx_vioapic.c Mon Jan 9 11:19:55 2006
+++ b/xen/arch/x86/dm/vmx_vioapic.c Mon Jan 9 11:22:17 2006
@@ -37,7 +37,7 @@
#include <xen/lib.h>
#include <xen/errno.h>
#include <xen/sched.h>
-#include <public/io/ioreq.h>
+#include <public/hvm/ioreq.h>
#include <asm/vmx.h>
#include <asm/vmx_vpic.h>
#include <asm/current.h>
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/dom0_ops.c
--- a/xen/arch/x86/dom0_ops.c Mon Jan 9 11:19:55 2006
+++ b/xen/arch/x86/dom0_ops.c Mon Jan 9 11:22:17 2006
@@ -17,6 +17,7 @@
#include <asm/msr.h>
#include <xen/trace.h>
#include <xen/console.h>
+#include <xen/iocap.h>
#include <asm/shadow.h>
#include <asm/irq.h>
#include <asm/processor.h>
@@ -35,13 +36,13 @@
static void write_msr_for(void *unused)
{
- if ( ((1 << current->processor) & msr_cpu_mask) )
+ if ( ((1 << smp_processor_id()) & msr_cpu_mask) )
(void)wrmsr_user(msr_addr, msr_lo, msr_hi);
}
static void read_msr_for(void *unused)
{
- if ( ((1 << current->processor) & msr_cpu_mask) )
+ if ( ((1 << smp_processor_id()) & msr_cpu_mask) )
(void)rdmsr_user(msr_addr, msr_lo, msr_hi);
}
@@ -102,12 +103,27 @@
op->u.add_memtype.nr_pfns,
op->u.add_memtype.type,
1);
+ if (ret > 0)
+ {
+ (void)__put_user(0, &u_dom0_op->u.add_memtype.handle);
+ (void)__put_user(ret, &u_dom0_op->u.add_memtype.reg);
+ ret = 0;
+ }
}
break;
case DOM0_DEL_MEMTYPE:
{
- ret = mtrr_del_page(op->u.del_memtype.reg, 0, 0);
+ if (op->u.del_memtype.handle == 0
+ /* mtrr/main.c otherwise does a lookup */
+ && (int)op->u.del_memtype.reg >= 0)
+ {
+ ret = mtrr_del_page(op->u.del_memtype.reg, 0, 0);
+ if (ret > 0)
+ ret = 0;
+ }
+ else
+ ret = -EINVAL;
}
break;
@@ -141,7 +157,6 @@
struct domain *d;
unsigned int fp = op->u.ioport_permission.first_port;
unsigned int np = op->u.ioport_permission.nr_ports;
- unsigned int p;
ret = -EINVAL;
if ( (fp + np) > 65536 )
@@ -152,26 +167,12 @@
op->u.ioport_permission.domain)) == NULL) )
break;
- ret = -ENOMEM;
- if ( d->arch.iobmp_mask != NULL )
- {
- if ( (d->arch.iobmp_mask = xmalloc_array(
- u8, IOBMP_BYTES)) == NULL )
- {
- put_domain(d);
- break;
- }
- memset(d->arch.iobmp_mask, 0xFF, IOBMP_BYTES);
- }
-
- ret = 0;
- for ( p = fp; p < (fp + np); p++ )
- {
- if ( op->u.ioport_permission.allow_access )
- clear_bit(p, d->arch.iobmp_mask);
- else
- set_bit(p, d->arch.iobmp_mask);
- }
+ if ( np == 0 )
+ ret = 0;
+ else if ( op->u.ioport_permission.allow_access )
+ ret = ioports_permit_access(d, fp, fp + np - 1);
+ else
+ ret = ioports_deny_access(d, fp, fp + np - 1);
put_domain(d);
}
@@ -193,7 +194,7 @@
memcpy(pi->hw_cap, boot_cpu_data.x86_capability, NCAPINTS*4);
ret = 0;
if ( copy_to_user(u_dom0_op, op, sizeof(*op)) )
- ret = -EFAULT;
+ ret = -EFAULT;
}
break;
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c Mon Jan 9 11:19:55 2006
+++ b/xen/arch/x86/domain.c Mon Jan 9 11:22:17 2006
@@ -20,6 +20,7 @@
#include <xen/delay.h>
#include <xen/softirq.h>
#include <xen/grant_table.h>
+#include <xen/iocap.h>
#include <asm/regs.h>
#include <asm/mc146818rtc.h>
#include <asm/system.h>
@@ -35,9 +36,7 @@
#include <xen/console.h>
#include <xen/elf.h>
#include <asm/vmx.h>
-#include <asm/vmx_vmcs.h>
#include <asm/msr.h>
-#include <asm/physdev.h>
#include <xen/kernel.h>
#include <xen/multicall.h>
@@ -47,17 +46,16 @@
struct percpu_ctxt {
struct vcpu *curr_vcpu;
- unsigned int context_not_finalised;
unsigned int dirty_segment_mask;
} __cacheline_aligned;
static struct percpu_ctxt percpu_ctxt[NR_CPUS];
-static void continue_idle_task(struct vcpu *v)
+static void continue_idle_domain(struct vcpu *v)
{
reset_stack_and_jump(idle_loop);
}
-static void continue_nonidle_task(struct vcpu *v)
+static void continue_nonidle_domain(struct vcpu *v)
{
reset_stack_and_jump(ret_from_intr);
}
@@ -93,12 +91,13 @@
{
struct vcpu *v = current;
- ASSERT(is_idle_task(v->domain));
+ ASSERT(is_idle_domain(v->domain));
percpu_ctxt[smp_processor_id()].curr_vcpu = v;
- cpu_set(smp_processor_id(), v->domain->cpumask);
- v->arch.schedule_tail = continue_idle_task;
-
- idle_loop();
+ cpu_set(smp_processor_id(), v->domain->domain_dirty_cpumask);
+ cpu_set(smp_processor_id(), v->vcpu_dirty_cpumask);
+ v->arch.schedule_tail = continue_idle_domain;
+
+ reset_stack_and_jump(idle_loop);
}
static long no_idt[2];
@@ -185,11 +184,17 @@
{
struct pfn_info *page;
- if ( d->tot_pages < 10 )
+ printk("Memory pages belonging to domain %u:\n", d->domain_id);
+
+ if ( d->tot_pages >= 10 )
+ {
+ printk(" DomPage list too long to display\n");
+ }
+ else
{
list_for_each_entry ( page, &d->page_list, list )
{
- printk("Page %p: mfn=%p, caf=%08x, taf=%" PRtype_info "\n",
+ printk(" DomPage %p: mfn=%p, caf=%08x, taf=%" PRtype_info "\n",
_p(page_to_phys(page)), _p(page_to_pfn(page)),
page->count_info, page->u.inuse.type_info);
}
@@ -197,15 +202,10 @@
list_for_each_entry ( page, &d->xenpage_list, list )
{
- printk("XenPage %p: mfn=%p, caf=%08x, taf=%" PRtype_info "\n",
+ printk(" XenPage %p: mfn=%p, caf=%08x, taf=%" PRtype_info "\n",
_p(page_to_phys(page)), _p(page_to_pfn(page)),
page->count_info, page->u.inuse.type_info);
}
-
- page = virt_to_page(d->shared_info);
- printk("Shared_info@%p: mfn=%p, caf=%08x, taf=%" PRtype_info "\n",
- _p(page_to_phys(page)), _p(page_to_pfn(page)), page->count_info,
- page->u.inuse.type_info);
}
struct vcpu *alloc_vcpu_struct(struct domain *d, unsigned int vcpu_id)
@@ -250,24 +250,36 @@
#endif
}
-void arch_do_createdomain(struct vcpu *v)
+int arch_do_createdomain(struct vcpu *v)
{
struct domain *d = v->domain;
l1_pgentry_t gdt_l1e;
- int vcpuid, pdpt_order;
+ int vcpuid, pdpt_order, rc;
#ifdef __x86_64__
int i;
#endif
- if ( is_idle_task(d) )
- return;
-
- v->arch.schedule_tail = continue_nonidle_task;
-
- d->shared_info = alloc_xenheap_page();
+ if ( is_idle_domain(d) )
+ return 0;
+
+ d->arch.ioport_caps =
+ rangeset_new(d, "I/O Ports", RANGESETF_prettyprint_hex);
+ if ( d->arch.ioport_caps == NULL )
+ return -ENOMEM;
+
+ if ( (d->shared_info = alloc_xenheap_page()) == NULL )
+ return -ENOMEM;
+
+ if ( (rc = ptwr_init(d)) != 0 )
+ {
+ free_xenheap_page(d->shared_info);
+ return rc;
+ }
+
+ v->arch.schedule_tail = continue_nonidle_domain;
+
memset(d->shared_info, 0, PAGE_SIZE);
v->vcpu_info = &d->shared_info->vcpu_info[v->vcpu_id];
- v->cpumap = CPUMAP_RUNANYWHERE;
SHARE_PFN_WITH_DOMAIN(virt_to_page(d->shared_info), d);
pdpt_order = get_order_from_bytes(PDPT_L1_ENTRIES * sizeof(l1_pgentry_t));
@@ -308,25 +320,10 @@
__PAGE_HYPERVISOR);
#endif
- (void)ptwr_init(d);
-
shadow_lock_init(d);
INIT_LIST_HEAD(&d->arch.free_shadow_frames);
-}
-
-void vcpu_migrate_cpu(struct vcpu *v, int newcpu)
-{
- if ( v->processor == newcpu )
- return;
-
- set_bit(_VCPUF_cpu_migrated, &v->vcpu_flags);
- v->processor = newcpu;
-
- if ( VMX_DOMAIN(v) )
- {
- __vmpclear(virt_to_phys(v->arch.arch_vmx.vmcs));
- v->arch.schedule_tail = arch_vmx_do_relaunch;
- }
+
+ return 0;
}
/* This is called by arch_final_setup_guest and do_boot_vcpu */
@@ -348,6 +345,8 @@
((c->user_regs.ss & 3) == 0) )
return -EINVAL;
}
+ else if ( !hvm_enabled )
+ return -EINVAL;
clear_bit(_VCPUF_fpu_initialised, &v->vcpu_flags);
if ( c->flags & VGCF_I387_VALID )
@@ -690,7 +689,7 @@
struct vcpu *p = percpu_ctxt[cpu].curr_vcpu;
struct vcpu *n = current;
- if ( !is_idle_task(p->domain) )
+ if ( !is_idle_domain(p->domain) )
{
memcpy(&p->arch.guest_context.user_regs,
stack_regs,
@@ -699,7 +698,7 @@
save_segments(p);
}
- if ( !is_idle_task(n->domain) )
+ if ( !is_idle_domain(n->domain) )
{
memcpy(stack_regs,
&n->arch.guest_context.user_regs,
@@ -725,7 +724,8 @@
}
if ( p->domain != n->domain )
- cpu_set(cpu, n->domain->cpumask);
+ cpu_set(cpu, n->domain->domain_dirty_cpumask);
+ cpu_set(cpu, n->vcpu_dirty_cpumask);
write_ptbase(n);
@@ -738,7 +738,8 @@
}
if ( p->domain != n->domain )
- cpu_clear(cpu, p->domain->cpumask);
+ cpu_clear(cpu, p->domain->domain_dirty_cpumask);
+ cpu_clear(cpu, p->vcpu_dirty_cpumask);
percpu_ctxt[cpu].curr_vcpu = n;
}
@@ -748,28 +749,24 @@
{
unsigned int cpu = smp_processor_id();
- ASSERT(!local_irq_is_enabled());
+ ASSERT(local_irq_is_enabled());
set_current(next);
- if ( (percpu_ctxt[cpu].curr_vcpu != next) && !is_idle_task(next->domain) )
- {
+ if ( (percpu_ctxt[cpu].curr_vcpu != next) &&
+ !is_idle_domain(next->domain) )
+ {
+ /* This may happen if next has been migrated by the scheduler. */
+ if ( unlikely(!cpus_empty(next->vcpu_dirty_cpumask)) )
+ {
+ ASSERT(!cpu_isset(cpu, next->vcpu_dirty_cpumask));
+ sync_vcpu_execstate(next);
+ ASSERT(cpus_empty(next->vcpu_dirty_cpumask));
+ }
+
+ local_irq_disable();
__context_switch();
- percpu_ctxt[cpu].context_not_finalised = 1;
- }
-}
-
-void context_switch_finalise(struct vcpu *next)
-{
- unsigned int cpu = smp_processor_id();
-
- ASSERT(local_irq_is_enabled());
-
- if ( percpu_ctxt[cpu].context_not_finalised )
- {
- percpu_ctxt[cpu].context_not_finalised = 0;
-
- BUG_ON(percpu_ctxt[cpu].curr_vcpu != next);
+ local_irq_enable();
if ( VMX_DOMAIN(next) )
{
@@ -783,6 +780,8 @@
}
}
+ context_saved(prev);
+
schedule_tail(next);
BUG();
}
@@ -812,20 +811,11 @@
void sync_vcpu_execstate(struct vcpu *v)
{
- unsigned int cpu = v->processor;
-
- if ( !cpu_isset(cpu, v->domain->cpumask) )
- return;
-
- if ( cpu == smp_processor_id() )
- {
+ if ( cpu_isset(smp_processor_id(), v->vcpu_dirty_cpumask) )
(void)__sync_lazy_execstate();
- }
- else
- {
- /* Other cpus call __sync_lazy_execstate from flush ipi handler. */
- flush_tlb_mask(cpumask_of_cpu(cpu));
- }
+
+ /* Other cpus call __sync_lazy_execstate from flush ipi handler. */
+ flush_tlb_mask(v->vcpu_dirty_cpumask);
}
unsigned long __hypercall_create_continuation(
@@ -951,9 +941,7 @@
struct vcpu *v;
unsigned long pfn;
- BUG_ON(!cpus_empty(d->cpumask));
-
- physdev_destroy_state(d);
+ BUG_ON(!cpus_empty(d->domain_dirty_cpumask));
ptwr_destroy(d);
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/domain_build.c
--- a/xen/arch/x86/domain_build.c Mon Jan 9 11:19:55 2006
+++ b/xen/arch/x86/domain_build.c Mon Jan 9 11:22:17 2006
@@ -16,13 +16,13 @@
#include <xen/kernel.h>
#include <xen/domain.h>
#include <xen/compile.h>
+#include <xen/iocap.h>
#include <asm/regs.h>
#include <asm/system.h>
#include <asm/io.h>
#include <asm/processor.h>
#include <asm/desc.h>
#include <asm/i387.h>
-#include <asm/physdev.h>
#include <asm/shadow.h>
static long dom0_nrpages;
@@ -94,9 +94,9 @@
return page;
}
-static void process_dom0_ioports_disable()
+static void process_dom0_ioports_disable(void)
{
- unsigned long io_from, io_to, io_nr;
+ unsigned long io_from, io_to;
char *t, *u, *s = opt_dom0_ioports_disable;
if ( *s == '\0' )
@@ -126,8 +126,8 @@
printk("Disabling dom0 access to ioport range %04lx-%04lx\n",
io_from, io_to);
- io_nr = io_to - io_from + 1;
- physdev_modify_ioport_access_range(dom0, 0, io_from, io_nr);
+ if ( ioports_deny_access(dom0, io_from, io_to) != 0 )
+ BUG();
}
}
@@ -183,7 +183,6 @@
/* Machine address of next candidate page-table page. */
unsigned long mpt_alloc;
- extern void physdev_init_dom0(struct domain *);
extern void translate_l2pgtable(
struct domain *d, l1_pgentry_t *p2m, unsigned long l2mfn);
@@ -692,9 +691,6 @@
zap_low_mappings(l2start);
zap_low_mappings(idle_pg_table_l2);
#endif
-
- /* DOM0 gets access to everything. */
- physdev_init_dom0(d);
init_domain_time(d);
@@ -746,19 +742,28 @@
printk("dom0: shadow setup done\n");
}
+ i = 0;
+
+ /* DOM0 is permitted full I/O capabilities. */
+ i |= ioports_permit_access(dom0, 0, 0xFFFF);
+ i |= iomem_permit_access(dom0, 0UL, ~0UL);
+ i |= irqs_permit_access(dom0, 0, NR_PIRQS-1);
+
/*
* Modify I/O port access permissions.
*/
/* Master Interrupt Controller (PIC). */
- physdev_modify_ioport_access_range(dom0, 0, 0x20, 2);
+ i |= ioports_deny_access(dom0, 0x20, 0x21);
/* Slave Interrupt Controller (PIC). */
- physdev_modify_ioport_access_range(dom0, 0, 0xA0, 2);
+ i |= ioports_deny_access(dom0, 0xA0, 0xA1);
/* Interval Timer (PIT). */
- physdev_modify_ioport_access_range(dom0, 0, 0x40, 4);
+ i |= ioports_deny_access(dom0, 0x40, 0x43);
/* PIT Channel 2 / PC Speaker Control. */
- physdev_modify_ioport_access_range(dom0, 0, 0x61, 1);
- /* Command-line passed i/o ranges */
+ i |= ioports_deny_access(dom0, 0x61, 0x61);
+ /* Command-line I/O ranges. */
process_dom0_ioports_disable();
+
+ BUG_ON(i != 0);
return 0;
}
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/idle0_task.c
--- a/xen/arch/x86/idle0_task.c Mon Jan 9 11:19:55 2006
+++ b/xen/arch/x86/idle0_task.c Mon Jan 9 11:22:17 2006
@@ -11,6 +11,7 @@
struct vcpu idle0_vcpu = {
processor: 0,
+ cpu_affinity:CPU_MASK_CPU0,
domain: &idle0_domain
};
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/io_apic.c
--- a/xen/arch/x86/io_apic.c Mon Jan 9 11:19:55 2006
+++ b/xen/arch/x86/io_apic.c Mon Jan 9 11:22:17 2006
@@ -1807,3 +1807,47 @@
return 0;
}
+
+void dump_ioapic_irq_info(void)
+{
+ struct irq_pin_list *entry;
+ struct IO_APIC_route_entry rte;
+ unsigned int irq, pin, printed = 0;
+ unsigned long flags;
+
+ for ( irq = 0; irq < NR_IRQS; irq++ )
+ {
+ entry = &irq_2_pin[irq];
+ if ( entry->pin == -1 )
+ continue;
+
+ if ( !printed++ )
+ printk("IO-APIC interrupt information:\n");
+
+ printk(" IRQ%3d Vec%3d:\n", irq, irq_to_vector(irq));
+
+ for ( ; ; )
+ {
+ pin = entry->pin;
+
+ printk(" Apic 0x%02x, Pin %2d: ", entry->apic, pin);
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ *(((int *)&rte) + 0) = io_apic_read(entry->apic, 0x10 + 2 * pin);
+ *(((int *)&rte) + 1) = io_apic_read(entry->apic, 0x11 + 2 * pin);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ printk("vector=%u, delivery_mode=%u, dest_mode=%s, "
+ "delivery_status=%d, polarity=%d, irr=%d, "
+ "trigger=%s, mask=%d\n",
+ rte.vector, rte.delivery_mode,
+ rte.dest_mode ? "logical" : "physical",
+ rte.delivery_status, rte.polarity, rte.irr,
+ rte.trigger ? "level" : "edge", rte.mask);
+
+ if ( entry->next == 0 )
+ break;
+ entry = &irq_2_pin[entry->next];
+ }
+ }
+}
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/irq.c
--- a/xen/arch/x86/irq.c Mon Jan 9 11:19:55 2006
+++ b/xen/arch/x86/irq.c Mon Jan 9 11:22:17 2006
@@ -12,6 +12,7 @@
#include <xen/irq.h>
#include <xen/perfc.h>
#include <xen/sched.h>
+#include <xen/keyhandler.h>
#include <asm/current.h>
#include <asm/smpboot.h>
@@ -198,19 +199,21 @@
int pirq_guest_bind(struct vcpu *v, int irq, int will_share)
{
- unsigned int vector = irq_to_vector(irq);
- struct domain *d = v->domain;
- irq_desc_t *desc = &irq_desc[vector];
+ unsigned int vector;
+ irq_desc_t *desc;
irq_guest_action_t *action;
unsigned long flags;
int rc = 0;
cpumask_t cpumask = CPU_MASK_NONE;
- if ( !IS_CAPABLE_PHYSDEV(d) )
- return -EPERM;
-
+ if ( (irq < 0) || (irq >= NR_IRQS) )
+ return -EINVAL;
+
+ vector = irq_to_vector(irq);
if ( vector == 0 )
- return -EBUSY;
+ return -EINVAL;
+
+ desc = &irq_desc[vector];
spin_lock_irqsave(&desc->lock, flags);
@@ -309,3 +312,71 @@
spin_unlock_irqrestore(&desc->lock, flags);
return 0;
}
+
+extern void dump_ioapic_irq_info(void);
+
+static void dump_irqs(unsigned char key)
+{
+ int i, irq, vector;
+ irq_desc_t *desc;
+ irq_guest_action_t *action;
+ struct domain *d;
+ unsigned long flags;
+
+ printk("Guest interrupt information:\n");
+
+ for ( irq = 0; irq < NR_IRQS; irq++ )
+ {
+ vector = irq_to_vector(irq);
+ if ( vector == 0 )
+ continue;
+
+ desc = &irq_desc[vector];
+
+ spin_lock_irqsave(&desc->lock, flags);
+
+ if ( desc->status & IRQ_GUEST )
+ {
+ action = (irq_guest_action_t *)desc->action;
+
+ printk(" IRQ%3d Vec%3d: type=%-15s status=%08x "
+ "in-flight=%d domain-list=",
+ irq, vector, desc->handler->typename,
+ desc->status, action->in_flight);
+
+ for ( i = 0; i < action->nr_guests; i++ )
+ {
+ d = action->guest[i];
+ printk("%u(%c%c%c%c)",
+ d->domain_id,
+ (test_bit(d->pirq_to_evtchn[irq],
+ &d->shared_info->evtchn_pending[0]) ?
+ 'P' : '-'),
+ (test_bit(d->pirq_to_evtchn[irq]/BITS_PER_LONG,
+ &d->shared_info->vcpu_info[0].
+ evtchn_pending_sel) ?
+ 'S' : '-'),
+ (test_bit(d->pirq_to_evtchn[irq],
+ &d->shared_info->evtchn_mask[0]) ?
+ 'M' : '-'),
+ (test_bit(irq, &d->pirq_mask) ?
+ 'M' : '-'));
+ if ( i != action->nr_guests )
+ printk(",");
+ }
+
+ printk("\n");
+ }
+
+ spin_unlock_irqrestore(&desc->lock, flags);
+ }
+
+ dump_ioapic_irq_info();
+}
+
+static int __init setup_dump_irqs(void)
+{
+ register_keyhandler('i', dump_irqs, "dump interrupt bindings");
+ return 0;
+}
+__initcall(setup_dump_irqs);
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Mon Jan 9 11:19:55 2006
+++ b/xen/arch/x86/mm.c Mon Jan 9 11:22:17 2006
@@ -96,6 +96,7 @@
#include <xen/softirq.h>
#include <xen/domain_page.h>
#include <xen/event.h>
+#include <xen/iocap.h>
#include <asm/shadow.h>
#include <asm/page.h>
#include <asm/flushtlb.h>
@@ -437,7 +438,6 @@
unsigned long mfn = l1e_get_pfn(l1e);
struct pfn_info *page = pfn_to_page(mfn);
int okay;
- extern int domain_iomem_in_pfn(struct domain *d, unsigned long pfn);
if ( !(l1e_get_flags(l1e) & _PAGE_PRESENT) )
return 1;
@@ -455,8 +455,7 @@
if ( d == dom_io )
d = current->domain;
- if ( (!IS_PRIV(d)) &&
- (!IS_CAPABLE_PHYSDEV(d) || !domain_iomem_in_pfn(d, mfn)) )
+ if ( !iomem_access_permitted(d, mfn, mfn) )
{
MEM_LOG("Non-privileged attempt to map I/O space %08lx", mfn);
return 0;
@@ -1458,7 +1457,8 @@
* was GDT/LDT) but those circumstances should be
* very rare.
*/
- cpumask_t mask = page_get_owner(page)->cpumask;
+ cpumask_t mask =
+ page_get_owner(page)->domain_dirty_cpumask;
tlbflush_filter(mask, page->tlbflush_timestamp);
if ( unlikely(!cpus_empty(mask)) )
@@ -1620,7 +1620,7 @@
if ( shadow_mode_enabled(d) )
shadow_sync_all(d);
if ( deferred_ops & DOP_FLUSH_ALL_TLBS )
- flush_tlb_mask(d->cpumask);
+ flush_tlb_mask(d->domain_dirty_cpumask);
else
local_flush_tlb();
}
@@ -1692,7 +1692,7 @@
struct domain *d, unsigned long vmask)
{
unsigned int vcpu_id;
- cpumask_t pmask;
+ cpumask_t pmask = CPU_MASK_NONE;
struct vcpu *v;
while ( vmask != 0 )
@@ -1701,7 +1701,7 @@
vmask &= ~(1UL << vcpu_id);
if ( (vcpu_id < MAX_VIRT_CPUS) &&
((v = d->vcpu[vcpu_id]) != NULL) )
- cpu_set(v->processor, pmask);
+ cpus_or(pmask, pmask, v->vcpu_dirty_cpumask);
}
return pmask;
@@ -1870,7 +1870,6 @@
break;
}
pmask = vcpumask_to_pcpumask(d, vmask);
- cpus_and(pmask, pmask, d->cpumask);
if ( op.cmd == MMUEXT_TLB_FLUSH_MULTI )
flush_tlb_mask(pmask);
else
@@ -1879,15 +1878,15 @@
}
case MMUEXT_TLB_FLUSH_ALL:
- flush_tlb_mask(d->cpumask);
+ flush_tlb_mask(d->domain_dirty_cpumask);
break;
case MMUEXT_INVLPG_ALL:
- flush_tlb_one_mask(d->cpumask, op.arg1.linear_addr);
+ flush_tlb_one_mask(d->domain_dirty_cpumask, op.arg1.linear_addr);
break;
case MMUEXT_FLUSH_CACHE:
- if ( unlikely(!IS_CAPABLE_PHYSDEV(d)) )
+ if ( unlikely(!cache_flush_permitted(d)) )
{
MEM_LOG("Non-physdev domain tried to FLUSH_CACHE.");
okay = 0;
@@ -2498,7 +2497,7 @@
l1_pgentry_t val = l1e_from_intpte(val64);
struct vcpu *v = current;
struct domain *d = v->domain;
- unsigned int cpu = v->processor;
+ unsigned int cpu = smp_processor_id();
unsigned long vmask, bmap_ptr;
cpumask_t pmask;
int rc = 0;
@@ -2549,13 +2548,12 @@
local_flush_tlb();
break;
case UVMF_ALL:
- flush_tlb_mask(d->cpumask);
+ flush_tlb_mask(d->domain_dirty_cpumask);
break;
default:
if ( unlikely(get_user(vmask, (unsigned long *)bmap_ptr)) )
rc = -EFAULT;
pmask = vcpumask_to_pcpumask(d, vmask);
- cpus_and(pmask, pmask, d->cpumask);
flush_tlb_mask(pmask);
break;
}
@@ -2570,13 +2568,12 @@
local_flush_tlb_one(va);
break;
case UVMF_ALL:
- flush_tlb_one_mask(d->cpumask, va);
+ flush_tlb_one_mask(d->domain_dirty_cpumask, va);
break;
default:
if ( unlikely(get_user(vmask, (unsigned long *)bmap_ptr)) )
rc = -EFAULT;
pmask = vcpumask_to_pcpumask(d, vmask);
- cpus_and(pmask, pmask, d->cpumask);
flush_tlb_one_mask(pmask, va);
break;
}
@@ -3019,7 +3016,7 @@
/* Ensure that there are no stale writable mappings in any TLB. */
/* NB. INVLPG is a serialising instruction: flushes pending updates. */
- flush_tlb_one_mask(d->cpumask, l1va);
+ flush_tlb_one_mask(d->domain_dirty_cpumask, l1va);
PTWR_PRINTK("[%c] disconnected_l1va at %p now %"PRIpte"\n",
PTWR_PRINT_WHICH, ptep, pte.l1);
@@ -3343,7 +3340,7 @@
if ( which == PTWR_PT_ACTIVE )
{
l2e_remove_flags(*pl2e, _PAGE_PRESENT);
- flush_tlb_mask(d->cpumask);
+ flush_tlb_mask(d->domain_dirty_cpumask);
}
/* Temporarily map the L1 page, and make a copy of it. */
@@ -3370,7 +3367,7 @@
emulate:
if ( x86_emulate_memop(guest_cpu_user_regs(), addr,
- &ptwr_mem_emulator, BITS_PER_LONG/8) )
+ &ptwr_mem_emulator, X86EMUL_MODE_HOST) )
return 0;
perfc_incrc(ptwr_emulations);
return EXCRET_fault_fixed;
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/physdev.c
--- a/xen/arch/x86/physdev.c Mon Jan 9 11:19:55 2006
+++ b/xen/arch/x86/physdev.c Mon Jan 9 11:22:17 2006
@@ -13,27 +13,6 @@
extern int ioapic_guest_read(int apicid, int address, u32 *pval);
extern int ioapic_guest_write(int apicid, int address, u32 pval);
-
-void physdev_modify_ioport_access_range(
- struct domain *d, int enable, int port, int num)
-{
- int i;
- for ( i = port; i < (port + num); i++ )
- (enable ? clear_bit : set_bit)(i, d->arch.iobmp_mask);
-}
-
-void physdev_destroy_state(struct domain *d)
-{
- xfree(d->arch.iobmp_mask);
- d->arch.iobmp_mask = NULL;
-}
-
-/* Check if a domain controls a device with IO memory within frame @pfn.
- * Returns: 1 if the domain should be allowed to map @pfn, 0 otherwise. */
-int domain_iomem_in_pfn(struct domain *p, unsigned long pfn)
-{
- return 0;
-}
/*
* Demuxing hypercall.
@@ -120,18 +99,6 @@
return ret;
}
-/* Domain 0 has read access to all devices. */
-void physdev_init_dom0(struct domain *d)
-{
- /* Access to all I/O ports. */
- d->arch.iobmp_mask = xmalloc_array(u8, IOBMP_BYTES);
- BUG_ON(d->arch.iobmp_mask == NULL);
- memset(d->arch.iobmp_mask, 0, IOBMP_BYTES);
-
- set_bit(_DOMF_physdev_access, &d->domain_flags);
-}
-
-
/*
* Local variables:
* mode: C
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/setup.c
--- a/xen/arch/x86/setup.c Mon Jan 9 11:19:55 2006
+++ b/xen/arch/x86/setup.c Mon Jan 9 11:22:17 2006
@@ -92,7 +92,7 @@
#endif
EXPORT_SYMBOL(mmu_cr4_features);
-struct vcpu *idle_task[NR_CPUS] = { &idle0_vcpu };
+struct vcpu *idle_domain[NR_CPUS] = { &idle0_vcpu };
int acpi_disabled;
@@ -138,131 +138,19 @@
(*call)();
}
-static void __init start_of_day(void)
-{
- int i;
- unsigned long vgdt, gdt_pfn;
-
- early_cpu_init();
-
- paging_init();
-
- /* Unmap the first page of CPU0's stack. */
- memguard_guard_stack(cpu0_stack);
-
- open_softirq(NEW_TLBFLUSH_CLOCK_PERIOD_SOFTIRQ, new_tlbflush_clock_period);
-
- if ( opt_watchdog )
- nmi_watchdog = NMI_LOCAL_APIC;
-
- sort_exception_tables();
-
- arch_do_createdomain(current);
-
- /*
- * Map default GDT into its final positions in the idle page table. As
- * noted in arch_do_createdomain(), we must map for every possible VCPU#.
- */
- vgdt = GDT_VIRT_START(current) + FIRST_RESERVED_GDT_BYTE;
- gdt_pfn = virt_to_phys(gdt_table) >> PAGE_SHIFT;
- for ( i = 0; i < MAX_VIRT_CPUS; i++ )
- {
- map_pages_to_xen(vgdt, gdt_pfn, 1, PAGE_HYPERVISOR);
- vgdt += 1 << PDPT_VCPU_VA_SHIFT;
- }
-
- find_smp_config();
-
- smp_alloc_memory();
-
- dmi_scan_machine();
-
- generic_apic_probe();
-
- acpi_boot_table_init();
- acpi_boot_init();
-
- if ( smp_found_config )
- get_smp_config();
-
- init_apic_mappings();
-
- init_IRQ();
-
- trap_init();
-
- ac_timer_init();
-
- early_time_init();
-
- arch_init_memory();
-
- scheduler_init();
-
- identify_cpu(&boot_cpu_data);
- if ( cpu_has_fxsr )
- set_in_cr4(X86_CR4_OSFXSR);
- if ( cpu_has_xmm )
- set_in_cr4(X86_CR4_OSXMMEXCPT);
-
- if ( opt_nosmp )
- {
- max_cpus = 0;
- smp_num_siblings = 1;
- boot_cpu_data.x86_num_cores = 1;
- }
-
- smp_prepare_cpus(max_cpus);
-
- /* We aren't hotplug-capable yet. */
- BUG_ON(!cpus_empty(cpu_present_map));
- for_each_cpu ( i )
- cpu_set(i, cpu_present_map);
-
- /*
- * Initialise higher-level timer functions. We do this fairly late
- * (post-SMP) because the time bases and scale factors need to be updated
- * regularly, and SMP initialisation can cause a long delay with
- * interrupts not yet enabled.
- */
- init_xen_time();
-
- initialize_keytable();
-
- serial_init_postirq();
-
- BUG_ON(!local_irq_is_enabled());
-
- for_each_present_cpu ( i )
- {
- if ( num_online_cpus() >= max_cpus )
- break;
- if ( !cpu_online(i) )
- __cpu_up(i);
- }
-
- printk("Brought up %ld CPUs\n", (long)num_online_cpus());
- smp_cpus_done(max_cpus);
-
- do_initcalls();
-
- schedulers_start();
-
- watchdog_enable();
-}
-
#define EARLY_FAIL() for ( ; ; ) __asm__ __volatile__ ( "hlt" )
static struct e820entry e820_raw[E820MAX];
void __init __start_xen(multiboot_info_t *mbi)
{
+ unsigned long vgdt, gdt_pfn;
char *cmdline;
+ unsigned long _initrd_start = 0, _initrd_len = 0;
+ unsigned int initrdidx = 1;
module_t *mod = (module_t *)__va(mbi->mods_addr);
unsigned long nr_pages, modules_length;
unsigned long initial_images_start, initial_images_end;
- unsigned long _initrd_start = 0, _initrd_len = 0;
- unsigned int initrdidx = 1;
physaddr_t s, e;
int i, e820_warn = 0, e820_raw_nr = 0, bytes = 0;
struct ns16550_defaults ns16550 = {
@@ -455,6 +343,12 @@
BUG_ON(sizeof(shared_info_t) > PAGE_SIZE);
BUG_ON(sizeof(vcpu_info_t) != 64);
+ /* __foo are defined in public headers. Check they match internal defs. */
+ BUG_ON(__HYPERVISOR_VIRT_START != HYPERVISOR_VIRT_START);
+#ifdef HYPERVISOR_VIRT_END
+ BUG_ON(__HYPERVISOR_VIRT_END != HYPERVISOR_VIRT_END);
+#endif
+
init_frametable();
end_boot_allocator();
@@ -486,7 +380,113 @@
early_boot = 0;
- start_of_day();
+ early_cpu_init();
+
+ paging_init();
+
+ /* Unmap the first page of CPU0's stack. */
+ memguard_guard_stack(cpu0_stack);
+
+ open_softirq(NEW_TLBFLUSH_CLOCK_PERIOD_SOFTIRQ, new_tlbflush_clock_period);
+
+ if ( opt_watchdog )
+ nmi_watchdog = NMI_LOCAL_APIC;
+
+ sort_exception_tables();
+
+ if ( arch_do_createdomain(current) != 0 )
+ BUG();
+
+ /*
+ * Map default GDT into its final positions in the idle page table. As
+ * noted in arch_do_createdomain(), we must map for every possible VCPU#.
+ */
+ vgdt = GDT_VIRT_START(current) + FIRST_RESERVED_GDT_BYTE;
+ gdt_pfn = virt_to_phys(gdt_table) >> PAGE_SHIFT;
+ for ( i = 0; i < MAX_VIRT_CPUS; i++ )
+ {
+ map_pages_to_xen(vgdt, gdt_pfn, 1, PAGE_HYPERVISOR);
+ vgdt += 1 << PDPT_VCPU_VA_SHIFT;
+ }
+
+ find_smp_config();
+
+ smp_alloc_memory();
+
+ dmi_scan_machine();
+
+ generic_apic_probe();
+
+ acpi_boot_table_init();
+ acpi_boot_init();
+
+ if ( smp_found_config )
+ get_smp_config();
+
+ init_apic_mappings();
+
+ init_IRQ();
+
+ trap_init();
+
+ ac_timer_init();
+
+ early_time_init();
+
+ arch_init_memory();
+
+ scheduler_init();
+
+ identify_cpu(&boot_cpu_data);
+ if ( cpu_has_fxsr )
+ set_in_cr4(X86_CR4_OSFXSR);
+ if ( cpu_has_xmm )
+ set_in_cr4(X86_CR4_OSXMMEXCPT);
+
+ if ( opt_nosmp )
+ {
+ max_cpus = 0;
+ smp_num_siblings = 1;
+ boot_cpu_data.x86_num_cores = 1;
+ }
+
+ smp_prepare_cpus(max_cpus);
+
+ /* We aren't hotplug-capable yet. */
+ BUG_ON(!cpus_empty(cpu_present_map));
+ for_each_cpu ( i )
+ cpu_set(i, cpu_present_map);
+
+ /*
+ * Initialise higher-level timer functions. We do this fairly late
+ * (post-SMP) because the time bases and scale factors need to be updated
+ * regularly, and SMP initialisation can cause a long delay with
+ * interrupts not yet enabled.
+ */
+ init_xen_time();
+
+ initialize_keytable();
+
+ serial_init_postirq();
+
+ BUG_ON(!local_irq_is_enabled());
+
+ for_each_present_cpu ( i )
+ {
+ if ( num_online_cpus() >= max_cpus )
+ break;
+ if ( !cpu_online(i) )
+ __cpu_up(i);
+ }
+
+ printk("Brought up %ld CPUs\n", (long)num_online_cpus());
+ smp_cpus_done(max_cpus);
+
+ do_initcalls();
+
+ schedulers_start();
+
+ watchdog_enable();
shadow_mode_init();
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/shadow.c
--- a/xen/arch/x86/shadow.c Mon Jan 9 11:19:55 2006
+++ b/xen/arch/x86/shadow.c Mon Jan 9 11:22:17 2006
@@ -1800,7 +1800,7 @@
}
/* Other VCPUs mustn't use the revoked writable mappings. */
- other_vcpus_mask = d->cpumask;
+ other_vcpus_mask = d->domain_dirty_cpumask;
cpu_clear(smp_processor_id(), other_vcpus_mask);
flush_tlb_mask(other_vcpus_mask);
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/shadow32.c
--- a/xen/arch/x86/shadow32.c Mon Jan 9 11:19:55 2006
+++ b/xen/arch/x86/shadow32.c Mon Jan 9 11:22:17 2006
@@ -2586,7 +2586,7 @@
}
/* Other VCPUs mustn't use the revoked writable mappings. */
- other_vcpus_mask = d->cpumask;
+ other_vcpus_mask = d->domain_dirty_cpumask;
cpu_clear(smp_processor_id(), other_vcpus_mask);
flush_tlb_mask(other_vcpus_mask);
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/smpboot.c
--- a/xen/arch/x86/smpboot.c Mon Jan 9 11:19:55 2006
+++ b/xen/arch/x86/smpboot.c Mon Jan 9 11:22:17 2006
@@ -435,7 +435,7 @@
extern void percpu_traps_init(void);
- set_current(idle_task[cpu]);
+ set_current(idle_domain[cpu]);
set_processor_id(cpu);
percpu_traps_init();
@@ -763,7 +763,6 @@
{
struct domain *idle;
struct vcpu *v;
- void *stack;
unsigned long boot_error;
int timeout, cpu;
unsigned long start_eip;
@@ -774,7 +773,7 @@
if ( (idle = do_createdomain(IDLE_DOMAIN_ID, cpu)) == NULL )
panic("failed 'createdomain' for CPU %d", cpu);
- v = idle_task[cpu] = idle->vcpu[0];
+ v = idle_domain[cpu] = idle->vcpu[0];
set_bit(_DOMF_idle_domain, &idle->domain_flags);
@@ -786,16 +785,10 @@
/* So we see what's up */
printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);
- stack = alloc_xenheap_pages(STACK_ORDER);
-#if defined(__i386__)
- stack_start.esp = (void *)__pa(stack);
-#elif defined(__x86_64__)
- stack_start.esp = stack;
-#endif
- stack_start.esp += STACK_SIZE - sizeof(struct cpu_info);
+ stack_start.esp = alloc_xenheap_pages(STACK_ORDER);
/* Debug build: detect stack overflow by setting up a guard page. */
- memguard_guard_stack(stack);
+ memguard_guard_stack(stack_start.esp);
/*
* This grunge runs the startup process for
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c Mon Jan 9 11:19:55 2006
+++ b/xen/arch/x86/traps.c Mon Jan 9 11:22:17 2006
@@ -41,6 +41,7 @@
#include <xen/softirq.h>
#include <xen/domain_page.h>
#include <xen/symbols.h>
+#include <xen/iocap.h>
#include <asm/shadow.h>
#include <asm/system.h>
#include <asm/io.h>
@@ -192,7 +193,8 @@
/* Bounds for range of valid frame pointer. */
low = (unsigned long)(ESP_BEFORE_EXCEPTION(regs) - 2);
- high = (low & ~(STACK_SIZE - 1)) + (STACK_SIZE - sizeof(struct cpu_info));
+ high = (low & ~(STACK_SIZE - 1)) +
+ (STACK_SIZE - sizeof(struct cpu_info) - 2*sizeof(unsigned long));
/* The initial frame pointer. */
next = regs->ebp;
@@ -200,14 +202,14 @@
for ( ; ; )
{
/* Valid frame pointer? */
- if ( (next < low) || (next > high) )
+ if ( (next < low) || (next >= high) )
{
/*
* Exception stack frames have a different layout, denoted by an
* inverted frame pointer.
*/
next = ~next;
- if ( (next < low) || (next > high) )
+ if ( (next < low) || (next >= high) )
break;
frame = (unsigned long *)next;
next = frame[0];
@@ -621,17 +623,7 @@
unsigned int port, unsigned int bytes,
struct vcpu *v, struct cpu_user_regs *regs)
{
- struct domain *d = v->domain;
- u16 x;
-
- if ( d->arch.iobmp_mask != NULL )
- {
- x = *(u16 *)(d->arch.iobmp_mask + (port >> 3));
- if ( (x & (((1<<bytes)-1) << (port&7))) == 0 )
- return 1;
- }
-
- return 0;
+ return ioports_access_permitted(v->domain, port, port + bytes - 1);
}
/* Check admin limits. Silently fail the access if it is disallowed. */
@@ -871,7 +863,7 @@
case 0x09: /* WBINVD */
/* Ignore the instruction if unprivileged. */
- if ( !IS_CAPABLE_PHYSDEV(v->domain) )
+ if ( !cache_flush_permitted(v->domain) )
DPRINTK("Non-physdev domain attempted WBINVD.\n");
else
wbinvd();
@@ -885,7 +877,8 @@
switch ( modrm_reg )
{
case 0: /* Read CR0 */
- *reg = v->arch.guest_context.ctrlreg[0];
+ *reg = (read_cr0() & ~X86_CR0_TS) |
+ v->arch.guest_context.ctrlreg[0];
break;
case 2: /* Read CR2 */
@@ -927,6 +920,11 @@
switch ( modrm_reg )
{
case 0: /* Write CR0 */
+ if ( (*reg ^ read_cr0()) & ~X86_CR0_TS )
+ {
+ DPRINTK("Attempt to change unmodifiable CR0 flags.\n");
+ goto fail;
+ }
(void)do_fpu_taskswitch(!!(*reg & X86_CR0_TS));
break;
@@ -939,6 +937,14 @@
LOCK_BIGLOCK(v->domain);
(void)new_guest_cr3(*reg);
UNLOCK_BIGLOCK(v->domain);
+ break;
+
+ case 4:
+ if ( *reg != (read_cr4() & ~(X86_CR4_PGE|X86_CR4_PSE)) )
+ {
+ DPRINTK("Attempt to change CR4 flags.\n");
+ goto fail;
+ }
break;
default:
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/vmx.c
--- a/xen/arch/x86/vmx.c Mon Jan 9 11:19:55 2006
+++ b/xen/arch/x86/vmx.c Mon Jan 9 11:22:17 2006
@@ -42,7 +42,7 @@
#include <asm/shadow_64.h>
#endif
#include <public/sched.h>
-#include <public/io/ioreq.h>
+#include <public/hvm/ioreq.h>
#include <asm/vmx_vpic.h>
#include <asm/vmx_vlapic.h>
@@ -53,7 +53,7 @@
integer_param("vmx_debug", opt_vmx_debug_level);
static unsigned long trace_values[NR_CPUS][4];
-#define TRACE_VMEXIT(index,value) trace_values[current->processor][index]=value
+#define TRACE_VMEXIT(index,value) trace_values[smp_processor_id()][index]=value
static int vmx_switch_on;
@@ -65,11 +65,6 @@
{
struct domain *d = v->domain;
struct vcpu *vc;
-
- d->arch.vmx_platform.lapic_enable =
v->arch.guest_context.user_regs.ecx;
- v->arch.guest_context.user_regs.ecx = 0;
- VMX_DBG_LOG(DBG_LEVEL_VLAPIC, "lapic enable is %d.\n",
- d->arch.vmx_platform.lapic_enable);
/* Initialize monitor page table */
for_each_vcpu(d, vc)
@@ -95,7 +90,7 @@
void vmx_relinquish_resources(struct vcpu *v)
{
struct vmx_virpit *vpit;
-
+
if ( !VMX_DOMAIN(v) )
return;
@@ -1955,9 +1950,12 @@
asmlinkage void trace_vmentry (void)
{
- TRACE_5D(TRC_VMENTRY,trace_values[current->processor][0],
-
trace_values[current->processor][1],trace_values[current->processor][2],
-
trace_values[current->processor][3],trace_values[current->processor][4]);
+ TRACE_5D(TRC_VMENTRY,
+ trace_values[smp_processor_id()][0],
+ trace_values[smp_processor_id()][1],
+ trace_values[smp_processor_id()][2],
+ trace_values[smp_processor_id()][3],
+ trace_values[smp_processor_id()][4]);
TRACE_VMEXIT(0,9);
TRACE_VMEXIT(1,9);
TRACE_VMEXIT(2,9);
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/vmx_intercept.c
--- a/xen/arch/x86/vmx_intercept.c Mon Jan 9 11:19:55 2006
+++ b/xen/arch/x86/vmx_intercept.c Mon Jan 9 11:22:17 2006
@@ -24,7 +24,7 @@
#include <asm/vmx_vpit.h>
#include <asm/vmx_intercept.h>
#include <asm/vmx_vlapic.h>
-#include <public/io/ioreq.h>
+#include <public/hvm/ioreq.h>
#include <xen/lib.h>
#include <xen/sched.h>
#include <asm/current.h>
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/vmx_io.c
--- a/xen/arch/x86/vmx_io.c Mon Jan 9 11:19:55 2006
+++ b/xen/arch/x86/vmx_io.c Mon Jan 9 11:22:17 2006
@@ -37,7 +37,7 @@
#include <asm/shadow.h>
#include <asm/vmx_vpic.h>
#include <asm/vmx_vlapic.h>
-#include <public/io/ioreq.h>
+#include <public/hvm/ioreq.h>
#ifdef CONFIG_VMX
#if defined (__i386__)
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/vmx_platform.c
--- a/xen/arch/x86/vmx_platform.c Mon Jan 9 11:19:55 2006
+++ b/xen/arch/x86/vmx_platform.c Mon Jan 9 11:22:17 2006
@@ -27,7 +27,7 @@
#include <xen/trace.h>
#include <asm/vmx.h>
#include <asm/vmx_platform.h>
-#include <public/io/ioreq.h>
+#include <public/hvm/ioreq.h>
#include <xen/lib.h>
#include <xen/sched.h>
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/vmx_vlapic.c
--- a/xen/arch/x86/vmx_vlapic.c Mon Jan 9 11:19:55 2006
+++ b/xen/arch/x86/vmx_vlapic.c Mon Jan 9 11:22:17 2006
@@ -32,7 +32,7 @@
#include <xen/lib.h>
#include <xen/sched.h>
#include <asm/current.h>
-#include <public/io/ioreq.h>
+#include <public/hvm/ioreq.h>
#ifdef CONFIG_VMX
@@ -62,7 +62,7 @@
int vmx_apic_support(struct domain *d)
{
- return d->arch.vmx_platform.lapic_enable;
+ return d->arch.vmx_platform.apic_enabled;
}
s_time_t get_apictime_scheduled(struct vcpu *v)
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/vmx_vmcs.c
--- a/xen/arch/x86/vmx_vmcs.c Mon Jan 9 11:19:55 2006
+++ b/xen/arch/x86/vmx_vmcs.c Mon Jan 9 11:22:17 2006
@@ -32,7 +32,7 @@
#include <asm/flushtlb.h>
#include <xen/event.h>
#include <xen/kernel.h>
-#include <public/io/ioreq.h>
+#include <public/hvm/hvm_info_table.h>
#if CONFIG_PAGING_LEVELS >= 4
#include <asm/shadow_64.h>
#endif
@@ -206,35 +206,55 @@
&d->shared_info->evtchn_mask[0]);
}
-#define VCPU_NR_PAGE 0x0009F000
-#define VCPU_NR_OFFSET 0x00000800
-#define VCPU_MAGIC 0x76637075 /* "vcpu" */
-
-static void vmx_set_vcpu_nr(struct domain *d)
+static int validate_hvm_info(struct hvm_info_table *t)
+{
+ char signature[] = "HVM INFO";
+ uint8_t *ptr = (uint8_t *)t;
+ uint8_t sum = 0;
+ int i;
+
+ /* strncmp(t->signature, "HVM INFO", 8) */
+ for ( i = 0; i < 8; i++ ) {
+ if ( signature[i] != t->signature[i] ) {
+ printk("Bad hvm info signature\n");
+ return 0;
+ }
+ }
+
+ for ( i = 0; i < t->length; i++ )
+ sum += ptr[i];
+
+ return (sum == 0);
+}
+
+static void vmx_get_hvm_info(struct domain *d)
{
unsigned char *p;
unsigned long mpfn;
- unsigned int *vcpus;
-
- mpfn = get_mfn_from_pfn(VCPU_NR_PAGE >> PAGE_SHIFT);
- if (mpfn == INVALID_MFN) {
- printk("Can not get vcpu number page mfn for VMX domain.\n");
+ struct hvm_info_table *t;
+
+ mpfn = get_mfn_from_pfn(HVM_INFO_PFN);
+ if ( mpfn == INVALID_MFN ) {
+ printk("Can not get hvm info page mfn for VMX domain.\n");
domain_crash_synchronous();
}
p = map_domain_page(mpfn);
- if (p == NULL) {
- printk("Can not map vcpu number page for VMX domain.\n");
- domain_crash_synchronous();
- }
-
- vcpus = (unsigned int *)(p + VCPU_NR_OFFSET);
- if (vcpus[0] != VCPU_MAGIC) {
- printk("Bad vcpus magic, set vcpu number to 1 by default.\n");
- d->arch.vmx_platform.nr_vcpu = 1;
- }
-
- d->arch.vmx_platform.nr_vcpu = vcpus[1];
+ if ( p == NULL ) {
+ printk("Can not map hvm info page for VMX domain.\n");
+ domain_crash_synchronous();
+ }
+
+ t = (struct hvm_info_table *)(p + HVM_INFO_OFFSET);
+
+ if ( validate_hvm_info(t) ) {
+ d->arch.vmx_platform.nr_vcpus = t->nr_vcpus;
+ d->arch.vmx_platform.apic_enabled = t->apic_enabled;
+ } else {
+ printk("Bad hvm info table\n");
+ d->arch.vmx_platform.nr_vcpus = 1;
+ d->arch.vmx_platform.apic_enabled = 0;
+ }
unmap_domain_page(p);
}
@@ -244,10 +264,10 @@
struct vmx_platform *platform;
vmx_map_io_shared_page(d);
- vmx_set_vcpu_nr(d);
+ vmx_get_hvm_info(d);
platform = &d->arch.vmx_platform;
- pic_init(&platform->vmx_pic, pic_irq_request,
+ pic_init(&platform->vmx_pic, pic_irq_request,
&platform->interrupt_request);
register_pic_io_hook();
@@ -335,6 +355,8 @@
__vmwrite(HOST_RSP, (unsigned long)get_stack_bottom());
v->arch.schedule_tail = arch_vmx_do_resume;
+ v->arch.arch_vmx.launch_cpu = smp_processor_id();
+
/* init guest tsc to start from 0 */
rdtscll(host_tsc);
v->arch.arch_vmx.tsc_offset = 0 - host_tsc;
@@ -617,11 +639,21 @@
void arch_vmx_do_resume(struct vcpu *v)
{
- u64 vmcs_phys_ptr = (u64) virt_to_phys(v->arch.arch_vmx.vmcs);
-
- load_vmcs(&v->arch.arch_vmx, vmcs_phys_ptr);
- vmx_do_resume(v);
- reset_stack_and_jump(vmx_asm_do_resume);
+ if ( v->arch.arch_vmx.launch_cpu == smp_processor_id() )
+ {
+ load_vmcs(&v->arch.arch_vmx, virt_to_phys(v->arch.arch_vmx.vmcs));
+ vmx_do_resume(v);
+ reset_stack_and_jump(vmx_asm_do_resume);
+ }
+ else
+ {
+ __vmpclear(virt_to_phys(v->arch.arch_vmx.vmcs));
+ load_vmcs(&v->arch.arch_vmx, virt_to_phys(v->arch.arch_vmx.vmcs));
+ vmx_do_resume(v);
+ vmx_set_host_env(v);
+ v->arch.arch_vmx.launch_cpu = smp_processor_id();
+ reset_stack_and_jump(vmx_asm_do_relaunch);
+ }
}
void arch_vmx_do_launch(struct vcpu *v)
@@ -641,18 +673,6 @@
}
vmx_do_launch(v);
reset_stack_and_jump(vmx_asm_do_launch);
-}
-
-void arch_vmx_do_relaunch(struct vcpu *v)
-{
- u64 vmcs_phys_ptr = (u64) virt_to_phys(v->arch.arch_vmx.vmcs);
-
- load_vmcs(&v->arch.arch_vmx, vmcs_phys_ptr);
- vmx_do_resume(v);
- vmx_set_host_env(v);
- v->arch.schedule_tail = arch_vmx_do_resume;
-
- reset_stack_and_jump(vmx_asm_do_relaunch);
}
#endif /* CONFIG_VMX */
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/x86_emulate.c
--- a/xen/arch/x86/x86_emulate.c Mon Jan 9 11:19:55 2006
+++ b/xen/arch/x86/x86_emulate.c Mon Jan 9 11:22:17 2006
@@ -371,6 +371,21 @@
(_type)_x; \
})
+/* Access/update address held in a register, based on addressing mode. */
+#define register_address(sel, reg) \
+ ((ad_bytes == sizeof(unsigned long)) ? (reg) : \
+ ((mode == X86EMUL_MODE_REAL) ? /* implies ad_bytes == 2 */ \
+ (((unsigned long)(sel) << 4) + ((reg) & 0xffff)) : \
+ ((reg) & ((1UL << (ad_bytes << 3)) - 1))))
+#define register_address_increment(reg, inc) \
+do { \
+ if ( ad_bytes == sizeof(unsigned long) ) \
+ (reg) += (inc); \
+ else \
+ (reg) = ((reg) & ~((1UL << (ad_bytes << 3)) - 1)) | \
+ (((reg) + (inc)) & ((1UL << (ad_bytes << 3)) - 1)); \
+} while (0)
+
void *
decode_register(
uint8_t modrm_reg, struct cpu_user_regs *regs, int highbyte_regs)
@@ -420,32 +435,64 @@
{
uint8_t b, d, sib, twobyte = 0, rex_prefix = 0;
uint8_t modrm, modrm_mod = 0, modrm_reg = 0, modrm_rm = 0;
- unsigned int op_bytes = (mode == 8) ? 4 : mode, ad_bytes = mode;
- unsigned int lock_prefix = 0, rep_prefix = 0, i;
+ uint16_t *seg = NULL; /* override segment */
+ unsigned int op_bytes, ad_bytes, lock_prefix = 0, rep_prefix = 0, i;
int rc = 0;
struct operand src, dst;
/* Shadow copy of register state. Committed on successful emulation. */
struct cpu_user_regs _regs = *regs;
+ switch ( mode )
+ {
+ case X86EMUL_MODE_REAL:
+ case X86EMUL_MODE_PROT16:
+ op_bytes = ad_bytes = 2;
+ break;
+ case X86EMUL_MODE_PROT32:
+ op_bytes = ad_bytes = 4;
+ break;
+#ifdef __x86_64__
+ case X86EMUL_MODE_PROT64:
+ op_bytes = 4;
+ ad_bytes = 8;
+ break;
+#endif
+ default:
+ return -1;
+ }
+
/* Legacy prefixes. */
for ( i = 0; i < 8; i++ )
{
switch ( b = insn_fetch(uint8_t, 1, _regs.eip) )
{
case 0x66: /* operand-size override */
- op_bytes ^= 6; /* switch between 2/4 bytes */
+ op_bytes ^= 6; /* switch between 2/4 bytes */
break;
case 0x67: /* address-size override */
- ad_bytes ^= (mode == 8) ? 12 : 6; /* switch between 2/4/8 bytes */
+ if ( mode == X86EMUL_MODE_PROT64 )
+ ad_bytes ^= 12; /* switch between 4/8 bytes */
+ else
+ ad_bytes ^= 6; /* switch between 2/4 bytes */
break;
case 0x2e: /* CS override */
+ seg = &_regs.cs;
+ break;
case 0x3e: /* DS override */
+ seg = &_regs.ds;
+ break;
case 0x26: /* ES override */
+ seg = &_regs.es;
+ break;
case 0x64: /* FS override */
+ seg = &_regs.fs;
+ break;
case 0x65: /* GS override */
+ seg = &_regs.gs;
+ break;
case 0x36: /* SS override */
- DPRINTF("Warning: ignoring a segment override.\n");
+ seg = &_regs.ss;
break;
case 0xf0: /* LOCK */
lock_prefix = 1;
@@ -461,8 +508,12 @@
}
done_prefixes:
+ /* Note quite the same as 80386 real mode, but hopefully good enough. */
+ if ( (mode == X86EMUL_MODE_REAL) && (ad_bytes != 2) )
+ goto cannot_emulate;
+
/* REX prefix. */
- if ( (mode == 8) && ((b & 0xf0) == 0x40) )
+ if ( (mode == X86EMUL_MODE_PROT64) && ((b & 0xf0) == 0x40) )
{
rex_prefix = b;
if ( b & 8 )
@@ -674,7 +725,7 @@
emulate_2op_SrcV("cmp", src, dst, _regs.eflags);
break;
case 0x63: /* movsxd */
- if ( mode != 8 ) /* x86/64 long mode only */
+ if ( mode != X86EMUL_MODE_PROT64 )
goto cannot_emulate;
dst.val = (int32_t)src.val;
break;
@@ -721,12 +772,13 @@
dst.val = src.val;
break;
case 0x8f: /* pop (sole member of Grp1a) */
- /* 64-bit mode: POP defaults to 64-bit operands. */
- if ( (mode == 8) && (dst.bytes == 4) )
+ /* 64-bit mode: POP always pops a 64-bit operand. */
+ if ( mode == X86EMUL_MODE_PROT64 )
dst.bytes = 8;
- if ( (rc = ops->read_std(_regs.esp, &dst.val, dst.bytes)) != 0 )
+ if ( (rc = ops->read_std(register_address(_regs.ss, _regs.esp),
+ &dst.val, dst.bytes)) != 0 )
goto done;
- _regs.esp += dst.bytes;
+ register_address_increment(_regs.esp, dst.bytes);
break;
case 0xc0 ... 0xc1: grp2: /* Grp2 */
switch ( modrm_reg )
@@ -797,16 +849,17 @@
emulate_1op("dec", dst, _regs.eflags);
break;
case 6: /* push */
- /* 64-bit mode: PUSH defaults to 64-bit operands. */
- if ( (mode == 8) && (dst.bytes == 4) )
+ /* 64-bit mode: PUSH always pushes a 64-bit operand. */
+ if ( mode == X86EMUL_MODE_PROT64 )
{
dst.bytes = 8;
if ( (rc = ops->read_std((unsigned long)dst.ptr,
&dst.val, 8)) != 0 )
goto done;
}
- _regs.esp -= dst.bytes;
- if ( (rc = ops->write_std(_regs.esp, dst.val, dst.bytes)) != 0 )
+ register_address_increment(_regs.esp, -dst.bytes);
+ if ( (rc = ops->write_std(register_address(_regs.ss, _regs.esp),
+ dst.val, dst.bytes)) != 0 )
goto done;
dst.val = dst.orig_val; /* skanky: disable writeback */
break;
@@ -873,19 +926,22 @@
{
/* Write fault: destination is special memory. */
dst.ptr = (unsigned long *)cr2;
- if ( (rc = ops->read_std(_regs.esi - _regs.edi + cr2,
+ if ( (rc = ops->read_std(register_address(seg ? *seg : _regs.ds,
+ _regs.esi),
&dst.val, dst.bytes)) != 0 )
goto done;
}
else
{
/* Read fault: source is special memory. */
- dst.ptr = (unsigned long *)(_regs.edi - _regs.esi + cr2);
+ dst.ptr = (unsigned long *)register_address(_regs.es, _regs.edi);
if ( (rc = ops->read_emulated(cr2, &dst.val, dst.bytes)) != 0 )
goto done;
}
- _regs.esi += (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes;
- _regs.edi += (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes;
+ register_address_increment(
+ _regs.esi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
+ register_address_increment(
+ _regs.edi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
break;
case 0xa6 ... 0xa7: /* cmps */
DPRINTF("Urk! I don't handle CMPS.\n");
@@ -895,7 +951,8 @@
dst.bytes = (d & ByteOp) ? 1 : op_bytes;
dst.ptr = (unsigned long *)cr2;
dst.val = _regs.eax;
- _regs.edi += (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes;
+ register_address_increment(
+ _regs.edi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
break;
case 0xac ... 0xad: /* lods */
dst.type = OP_REG;
@@ -903,7 +960,8 @@
dst.ptr = (unsigned long *)&_regs.eax;
if ( (rc = ops->read_emulated(cr2, &dst.val, dst.bytes)) != 0 )
goto done;
- _regs.esi += (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes;
+ register_address_increment(
+ _regs.esi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
break;
case 0xae ... 0xaf: /* scas */
DPRINTF("Urk! I don't handle SCAS.\n");
diff -r 25e3c8668f1f -r 8af1199488d3 xen/common/bitmap.c
--- a/xen/common/bitmap.c Mon Jan 9 11:19:55 2006
+++ b/xen/common/bitmap.c Mon Jan 9 11:22:17 2006
@@ -282,6 +282,111 @@
#endif
EXPORT_SYMBOL(__bitmap_weight);
+/*
+ * Bitmap printing & parsing functions: first version by Bill Irwin,
+ * second version by Paul Jackson, third by Joe Korty.
+ */
+
+#define CHUNKSZ 32
+#define nbits_to_hold_value(val) fls(val)
+#define roundup_power2(val,modulus) (((val) + (modulus) - 1) & ~((modulus)
- 1))
+#define unhex(c) (isdigit(c) ? (c - '0') : (toupper(c) -
'A' + 10))
+#define BASEDEC 10 /* fancier cpuset lists input in decimal */
+
+/**
+ * bitmap_scnprintf - convert bitmap to an ASCII hex string.
+ * @buf: byte buffer into which string is placed
+ * @buflen: reserved size of @buf, in bytes
+ * @maskp: pointer to bitmap to convert
+ * @nmaskbits: size of bitmap, in bits
+ *
+ * Exactly @nmaskbits bits are displayed. Hex digits are grouped into
+ * comma-separated sets of eight digits per set.
+ */
+int bitmap_scnprintf(char *buf, unsigned int buflen,
+ const unsigned long *maskp, int nmaskbits)
+{
+ int i, word, bit, len = 0;
+ unsigned long val;
+ const char *sep = "";
+ int chunksz;
+ u32 chunkmask;
+
+ chunksz = nmaskbits & (CHUNKSZ - 1);
+ if (chunksz == 0)
+ chunksz = CHUNKSZ;
+
+ i = roundup_power2(nmaskbits, CHUNKSZ) - CHUNKSZ;
+ for (; i >= 0; i -= CHUNKSZ) {
+ chunkmask = ((1ULL << chunksz) - 1);
+ word = i / BITS_PER_LONG;
+ bit = i % BITS_PER_LONG;
+ val = (maskp[word] >> bit) & chunkmask;
+ len += scnprintf(buf+len, buflen-len, "%s%0*lx", sep,
+ (chunksz+3)/4, val);
+ chunksz = CHUNKSZ;
+ sep = ",";
+ }
+ return len;
+}
+EXPORT_SYMBOL(bitmap_scnprintf);
+
+/*
+ * bscnl_emit(buf, buflen, rbot, rtop, bp)
+ *
+ * Helper routine for bitmap_scnlistprintf(). Write decimal number
+ * or range to buf, suppressing output past buf+buflen, with optional
+ * comma-prefix. Return len of what would be written to buf, if it
+ * all fit.
+ */
+static inline int bscnl_emit(char *buf, int buflen, int rbot, int rtop, int
len)
+{
+ if (len > 0)
+ len += scnprintf(buf + len, buflen - len, ",");
+ if (rbot == rtop)
+ len += scnprintf(buf + len, buflen - len, "%d", rbot);
+ else
+ len += scnprintf(buf + len, buflen - len, "%d-%d", rbot, rtop);
+ return len;
+}
+
+/**
+ * bitmap_scnlistprintf - convert bitmap to list format ASCII string
+ * @buf: byte buffer into which string is placed
+ * @buflen: reserved size of @buf, in bytes
+ * @maskp: pointer to bitmap to convert
+ * @nmaskbits: size of bitmap, in bits
+ *
+ * Output format is a comma-separated list of decimal numbers and
+ * ranges. Consecutively set bits are shown as two hyphen-separated
+ * decimal numbers, the smallest and largest bit numbers set in
+ * the range. Output format is compatible with the format
+ * accepted as input by bitmap_parselist().
+ *
+ * The return value is the number of characters which would be
+ * generated for the given input, excluding the trailing '\0', as
+ * per ISO C99.
+ */
+int bitmap_scnlistprintf(char *buf, unsigned int buflen,
+ const unsigned long *maskp, int nmaskbits)
+{
+ int len = 0;
+ /* current bit is 'cur', most recently seen range is [rbot, rtop] */
+ int cur, rbot, rtop;
+
+ rbot = cur = find_first_bit(maskp, nmaskbits);
+ while (cur < nmaskbits) {
+ rtop = cur;
+ cur = find_next_bit(maskp, nmaskbits, cur+1);
+ if (cur >= nmaskbits || cur > rtop + 1) {
+ len = bscnl_emit(buf, buflen, rbot, rtop, len);
+ rbot = cur;
+ }
+ }
+ return len;
+}
+EXPORT_SYMBOL(bitmap_scnlistprintf);
+
/**
* bitmap_find_free_region - find a contiguous aligned mem region
* @bitmap: an array of unsigned longs corresponding to the bitmap
diff -r 25e3c8668f1f -r 8af1199488d3 xen/common/dom0_ops.c
--- a/xen/common/dom0_ops.c Mon Jan 9 11:19:55 2006
+++ b/xen/common/dom0_ops.c Mon Jan 9 11:22:17 2006
@@ -16,6 +16,7 @@
#include <xen/domain_page.h>
#include <xen/trace.h>
#include <xen/console.h>
+#include <xen/iocap.h>
#include <asm/current.h>
#include <public/dom0_ops.h>
#include <public/sched_ctl.h>
@@ -109,13 +110,13 @@
switch ( op->cmd )
{
- case DOM0_SETDOMAININFO:
- {
- struct domain *d = find_domain_by_id(op->u.setdomaininfo.domain);
+ case DOM0_SETVCPUCONTEXT:
+ {
+ struct domain *d = find_domain_by_id(op->u.setvcpucontext.domain);
ret = -ESRCH;
if ( d != NULL )
{
- ret = set_info_guest(d, &op->u.setdomaininfo);
+ ret = set_info_guest(d, &op->u.setvcpucontext);
put_domain(d);
}
}
@@ -283,11 +284,12 @@
}
break;
- case DOM0_PINCPUDOMAIN:
- {
- domid_t dom = op->u.pincpudomain.domain;
+ case DOM0_SETVCPUAFFINITY:
+ {
+ domid_t dom = op->u.setvcpuaffinity.domain;
struct domain *d = find_domain_by_id(dom);
struct vcpu *v;
+ cpumask_t new_affinity;
if ( d == NULL )
{
@@ -295,15 +297,15 @@
break;
}
- if ( (op->u.pincpudomain.vcpu >= MAX_VIRT_CPUS) ||
- !d->vcpu[op->u.pincpudomain.vcpu] )
+ if ( (op->u.setvcpuaffinity.vcpu >= MAX_VIRT_CPUS) ||
+ !d->vcpu[op->u.setvcpuaffinity.vcpu] )
{
ret = -EINVAL;
put_domain(d);
break;
}
- v = d->vcpu[op->u.pincpudomain.vcpu];
+ v = d->vcpu[op->u.setvcpuaffinity.vcpu];
if ( v == NULL )
{
ret = -ESRCH;
@@ -318,22 +320,13 @@
break;
}
- v->cpumap = op->u.pincpudomain.cpumap;
-
- if ( v->cpumap == CPUMAP_RUNANYWHERE )
- {
- clear_bit(_VCPUF_cpu_pinned, &v->vcpu_flags);
- }
- else
- {
- /* pick a new cpu from the usable map */
- int new_cpu;
- new_cpu = (int)find_first_set_bit(v->cpumap) % num_online_cpus();
- vcpu_pause(v);
- vcpu_migrate_cpu(v, new_cpu);
- set_bit(_VCPUF_cpu_pinned, &v->vcpu_flags);
- vcpu_unpause(v);
- }
+ new_affinity = v->cpu_affinity;
+ memcpy(cpus_addr(new_affinity),
+ &op->u.setvcpuaffinity.cpumap,
+ min((int)BITS_TO_LONGS(NR_CPUS),
+ (int)sizeof(op->u.setvcpuaffinity.cpumap)));
+
+ ret = vcpu_set_affinity(v, &new_affinity);
put_domain(d);
}
@@ -505,7 +498,11 @@
op->u.getvcpuinfo.running = test_bit(_VCPUF_running, &v->vcpu_flags);
op->u.getvcpuinfo.cpu_time = v->cpu_time;
op->u.getvcpuinfo.cpu = v->processor;
- op->u.getvcpuinfo.cpumap = v->cpumap;
+ op->u.getvcpuinfo.cpumap = 0;
+ memcpy(&op->u.getvcpuinfo.cpumap,
+ cpus_addr(v->cpu_affinity),
+ min((int)BITS_TO_LONGS(NR_CPUS),
+ (int)sizeof(op->u.getvcpuinfo.cpumap)));
ret = 0;
if ( copy_to_user(u_dom0_op, op, sizeof(*op)) )
@@ -582,6 +579,7 @@
}
}
break;
+
case DOM0_SETDEBUGGING:
{
struct domain *d;
@@ -596,6 +594,53 @@
put_domain(d);
ret = 0;
}
+ }
+ break;
+
+ case DOM0_IRQ_PERMISSION:
+ {
+ struct domain *d;
+ unsigned int pirq = op->u.irq_permission.pirq;
+
+ ret = -EINVAL;
+ if ( pirq >= NR_PIRQS )
+ break;
+
+ ret = -ESRCH;
+ d = find_domain_by_id(op->u.irq_permission.domain);
+ if ( d == NULL )
+ break;
+
+ if ( op->u.irq_permission.allow_access )
+ ret = irq_permit_access(d, pirq);
+ else
+ ret = irq_deny_access(d, pirq);
+
+ put_domain(d);
+ }
+ break;
+
+ case DOM0_IOMEM_PERMISSION:
+ {
+ struct domain *d;
+ unsigned long pfn = op->u.iomem_permission.first_pfn;
+ unsigned long nr_pfns = op->u.iomem_permission.nr_pfns;
+
+ ret = -EINVAL;
+ if ( (pfn + nr_pfns - 1) < pfn ) /* wrap? */
+ break;
+
+ ret = -ESRCH;
+ d = find_domain_by_id(op->u.iomem_permission.domain);
+ if ( d == NULL )
+ break;
+
+ if ( op->u.iomem_permission.allow_access )
+ ret = iomem_permit_access(d, pfn, pfn + nr_pfns - 1);
+ else
+ ret = iomem_deny_access(d, pfn, pfn + nr_pfns - 1);
+
+ put_domain(d);
}
break;
diff -r 25e3c8668f1f -r 8af1199488d3 xen/common/domain.c
--- a/xen/common/domain.c Mon Jan 9 11:19:55 2006
+++ b/xen/common/domain.c Mon Jan 9 11:22:17 2006
@@ -16,6 +16,7 @@
#include <xen/console.h>
#include <xen/softirq.h>
#include <xen/domain_page.h>
+#include <xen/rangeset.h>
#include <asm/debugger.h>
#include <public/dom0_ops.h>
#include <public/sched.h>
@@ -50,25 +51,24 @@
else
set_bit(_DOMF_ctrl_pause, &d->domain_flags);
- if ( !is_idle_task(d) &&
+ if ( !is_idle_domain(d) &&
((evtchn_init(d) != 0) || (grant_table_create(d) != 0)) )
- {
- evtchn_destroy(d);
- free_domain(d);
- return NULL;
- }
+ goto fail1;
if ( (v = alloc_vcpu(d, 0, cpu)) == NULL )
- {
- grant_table_destroy(d);
- evtchn_destroy(d);
- free_domain(d);
- return NULL;
- }
-
- arch_do_createdomain(v);
-
- if ( !is_idle_task(d) )
+ goto fail2;
+
+ rangeset_domain_initialise(d);
+
+ d->iomem_caps = rangeset_new(d, "I/O Memory", RANGESETF_prettyprint_hex);
+ d->irq_caps = rangeset_new(d, "Interrupts", 0);
+
+ if ( (d->iomem_caps == NULL) ||
+ (d->irq_caps == NULL) ||
+ (arch_do_createdomain(v) != 0) )
+ goto fail3;
+
+ if ( !is_idle_domain(d) )
{
write_lock(&domlist_lock);
pd = &domain_list; /* NB. domain_list maintained in order of dom_id. */
@@ -83,6 +83,15 @@
}
return d;
+
+ fail3:
+ rangeset_domain_destroy(d);
+ fail2:
+ grant_table_destroy(d);
+ fail1:
+ evtchn_destroy(d);
+ free_domain(d);
+ return NULL;
}
@@ -164,20 +173,23 @@
BUG_ON(d == NULL);
BUG_ON(d == current->domain);
- BUG_ON(!test_bit(_DOMF_shuttingdown, &d->domain_flags));
- BUG_ON(test_bit(_DOMF_shutdown, &d->domain_flags));
+
+ LOCK_BIGLOCK(d);
/* Make sure that every vcpu is descheduled before we finalise. */
for_each_vcpu ( d, v )
vcpu_sleep_sync(v);
- BUG_ON(!cpus_empty(d->cpumask));
+ BUG_ON(!cpus_empty(d->domain_dirty_cpumask));
sync_pagetable_state(d);
- set_bit(_DOMF_shutdown, &d->domain_flags);
- clear_bit(_DOMF_shuttingdown, &d->domain_flags);
-
- send_guest_virq(dom0->vcpu[0], VIRQ_DOM_EXC);
+ /* Don't set DOMF_shutdown until execution contexts are sync'ed. */
+ if ( !test_and_set_bit(_DOMF_shutdown, &d->domain_flags) )
+ send_guest_virq(dom0->vcpu[0], VIRQ_DOM_EXC);
+
+ UNLOCK_BIGLOCK(d);
+
+ put_domain(d);
}
static __init int domain_shutdown_finaliser_init(void)
@@ -213,16 +225,17 @@
/* Mark the domain as shutting down. */
d->shutdown_code = reason;
- if ( !test_and_set_bit(_DOMF_shuttingdown, &d->domain_flags) )
- {
- /* This vcpu won the race to finalise the shutdown. */
- domain_shuttingdown[smp_processor_id()] = d;
- raise_softirq(DOMAIN_SHUTDOWN_FINALISE_SOFTIRQ);
- }
/* Put every vcpu to sleep, but don't wait (avoids inter-vcpu deadlock). */
for_each_vcpu ( d, v )
+ {
+ atomic_inc(&v->pausecnt);
vcpu_sleep_nosync(v);
+ }
+
+ get_knownalive_domain(d);
+ domain_shuttingdown[smp_processor_id()] = d;
+ raise_softirq(DOMAIN_SHUTDOWN_FINALISE_SOFTIRQ);
}
@@ -271,6 +284,8 @@
*pd = d->next_in_hashbucket;
write_unlock(&domlist_lock);
+ rangeset_domain_destroy(d);
+
evtchn_destroy(d);
grant_table_destroy(d);
@@ -346,11 +361,11 @@
* of domains other than domain 0. ie. the domains that are being built by
* the userspace dom0 domain builder.
*/
-int set_info_guest(struct domain *d, dom0_setdomaininfo_t *setdomaininfo)
+int set_info_guest(struct domain *d, dom0_setvcpucontext_t *setvcpucontext)
{
int rc = 0;
struct vcpu_guest_context *c = NULL;
- unsigned long vcpu = setdomaininfo->vcpu;
+ unsigned long vcpu = setvcpucontext->vcpu;
struct vcpu *v;
if ( (vcpu >= MAX_VIRT_CPUS) || ((v = d->vcpu[vcpu]) == NULL) )
@@ -363,7 +378,7 @@
return -ENOMEM;
rc = -EFAULT;
- if ( copy_from_user(c, setdomaininfo->ctxt, sizeof(*c)) == 0 )
+ if ( copy_from_user(c, setvcpucontext->ctxt, sizeof(*c)) == 0 )
rc = arch_set_info_guest(v, c);
xfree(c);
diff -r 25e3c8668f1f -r 8af1199488d3 xen/common/event_channel.c
--- a/xen/common/event_channel.c Mon Jan 9 11:19:55 2006
+++ b/xen/common/event_channel.c Mon Jan 9 11:22:17 2006
@@ -22,6 +22,7 @@
#include <xen/sched.h>
#include <xen/event.h>
#include <xen/irq.h>
+#include <xen/iocap.h>
#include <asm/current.h>
#include <public/xen.h>
@@ -241,6 +242,9 @@
if ( pirq >= ARRAY_SIZE(d->pirq_to_evtchn) )
return -EINVAL;
+
+ if ( !irq_access_permitted(d, pirq) )
+ return -EPERM;
spin_lock(&d->evtchn_lock);
diff -r 25e3c8668f1f -r 8af1199488d3 xen/common/grant_table.c
--- a/xen/common/grant_table.c Mon Jan 9 11:19:55 2006
+++ b/xen/common/grant_table.c Mon Jan 9 11:22:17 2006
@@ -469,7 +469,7 @@
for ( i = 0; i < count; i++ )
(void)__gnttab_unmap_grant_ref(&uop[i]);
- flush_tlb_mask(current->domain->cpumask);
+ flush_tlb_mask(current->domain->domain_dirty_cpumask);
return 0;
}
diff -r 25e3c8668f1f -r 8af1199488d3 xen/common/keyhandler.c
--- a/xen/common/keyhandler.c Mon Jan 9 11:19:55 2006
+++ b/xen/common/keyhandler.c Mon Jan 9 11:22:17 2006
@@ -11,6 +11,7 @@
#include <xen/sched.h>
#include <xen/softirq.h>
#include <xen/domain.h>
+#include <xen/rangeset.h>
#include <asm/debugger.h>
#define KEY_MAX 256
@@ -96,44 +97,60 @@
machine_restart(NULL);
}
-static void do_task_queues(unsigned char key)
+static void cpuset_print(char *set, int size, cpumask_t mask)
+{
+ *set++ = '{';
+ set += cpulist_scnprintf(set, size-2, mask);
+ *set++ = '}';
+ *set++ = '\0';
+}
+
+static void dump_domains(unsigned char key)
{
struct domain *d;
struct vcpu *v;
s_time_t now = NOW();
-
- printk("'%c' pressed -> dumping task queues (now=0x%X:%08X)\n", key,
+ char cpuset[100];
+
+ printk("'%c' pressed -> dumping domain info (now=0x%X:%08X)\n", key,
(u32)(now>>32), (u32)now);
read_lock(&domlist_lock);
for_each_domain ( d )
{
- printk("Xen: DOM %u, flags=%lx refcnt=%d nr_pages=%d "
- "xenheap_pages=%d\n", d->domain_id, d->domain_flags,
- atomic_read(&d->refcnt), d->tot_pages, d->xenheap_pages);
- /* The handle is printed according to the OSF DCE UUID spec., even
- though it is not necessarily such a thing, for ease of use when it
- _is_ one of those. */
- printk(" handle=%02x%02x%02x%02x-%02x%02x-%02x%02x-"
+ printk("General information for domain %u:\n", d->domain_id);
+ cpuset_print(cpuset, sizeof(cpuset), d->domain_dirty_cpumask);
+ printk(" flags=%lx refcnt=%d nr_pages=%d xenheap_pages=%d "
+ "dirty_cpus=%s\n",
+ d->domain_flags, atomic_read(&d->refcnt),
+ d->tot_pages, d->xenheap_pages, cpuset);
+ printk(" handle=%02x%02x%02x%02x-%02x%02x-%02x%02x-"
"%02x%02x-%02x%02x%02x%02x%02x%02x\n",
d->handle[ 0], d->handle[ 1], d->handle[ 2], d->handle[ 3],
d->handle[ 4], d->handle[ 5], d->handle[ 6], d->handle[ 7],
d->handle[ 8], d->handle[ 9], d->handle[10], d->handle[11],
d->handle[12], d->handle[13], d->handle[14], d->handle[15]);
+ rangeset_domain_printk(d);
+
dump_pageframe_info(d);
+ printk("VCPU information and callbacks for domain %u:\n",
+ d->domain_id);
for_each_vcpu ( d, v ) {
- printk("Guest: %p CPU %d [has=%c] flags=%lx "
- "upcall_pend = %02x, upcall_mask = %02x\n", v,
- v->processor,
+ printk(" VCPU%d: CPU%d [has=%c] flags=%lx "
+ "upcall_pend = %02x, upcall_mask = %02x ",
+ v->vcpu_id, v->processor,
test_bit(_VCPUF_running, &v->vcpu_flags) ? 'T':'F',
v->vcpu_flags,
v->vcpu_info->evtchn_upcall_pending,
v->vcpu_info->evtchn_upcall_mask);
- printk("Notifying guest... %d/%d\n", d->domain_id, v->vcpu_id);
- printk("port %d/%d stat %d %d %d\n",
+ cpuset_print(cpuset, sizeof(cpuset), v->vcpu_dirty_cpumask);
+ printk("dirty_cpus=%s ", cpuset);
+ cpuset_print(cpuset, sizeof(cpuset), v->cpu_affinity);
+ printk("cpu_affinity=%s\n", cpuset);
+ printk(" Notifying guest (virq %d, port %d, stat %d/%d/%d)\n",
VIRQ_DEBUG, v->virq_to_evtchn[VIRQ_DEBUG],
test_bit(v->virq_to_evtchn[VIRQ_DEBUG],
&d->shared_info->evtchn_pending[0]),
@@ -191,7 +208,7 @@
register_keyhandler(
'L', reset_sched_histo, "reset sched latency histogram");
register_keyhandler(
- 'q', do_task_queues, "dump task queues + guest state");
+ 'q', dump_domains, "dump domain (and guest debug) info");
register_keyhandler(
'r', dump_runq, "dump run queues");
register_irq_keyhandler(
diff -r 25e3c8668f1f -r 8af1199488d3 xen/common/memory.c
--- a/xen/common/memory.c Mon Jan 9 11:19:55 2006
+++ b/xen/common/memory.c Mon Jan 9 11:22:17 2006
@@ -15,6 +15,7 @@
#include <xen/sched.h>
#include <xen/event.h>
#include <xen/shadow.h>
+#include <xen/iocap.h>
#include <asm/current.h>
#include <asm/hardirq.h>
#include <public/memory.h>
@@ -35,7 +36,8 @@
!array_access_ok(extent_list, nr_extents, sizeof(*extent_list)) )
return 0;
- if ( (extent_order != 0) && !IS_CAPABLE_PHYSDEV(current->domain) )
+ if ( (extent_order != 0) &&
+ !multipage_allocation_permitted(current->domain) )
{
DPRINTK("Only I/O-capable domains may allocate multi-page extents.\n");
return 0;
diff -r 25e3c8668f1f -r 8af1199488d3 xen/common/page_alloc.c
--- a/xen/common/page_alloc.c Mon Jan 9 11:19:55 2006
+++ b/xen/common/page_alloc.c Mon Jan 9 11:22:17 2006
@@ -615,7 +615,7 @@
shadow_drop_references(d, &pg[i]);
ASSERT((pg[i].u.inuse.type_info & PGT_count_mask) == 0);
pg[i].tlbflush_timestamp = tlbflush_current_time();
- pg[i].u.free.cpumask = d->cpumask;
+ pg[i].u.free.cpumask = d->domain_dirty_cpumask;
list_del(&pg[i].list);
}
diff -r 25e3c8668f1f -r 8af1199488d3 xen/common/sched_bvt.c
--- a/xen/common/sched_bvt.c Mon Jan 9 11:19:55 2006
+++ b/xen/common/sched_bvt.c Mon Jan 9 11:22:17 2006
@@ -31,7 +31,8 @@
struct list_head run_list; /* runqueue list pointers */
u32 avt; /* actual virtual time */
u32 evt; /* effective virtual time */
- struct vcpu *vcpu;
+ int migrated; /* migrated to a new CPU */
+ struct vcpu *vcpu;
struct bvt_dom_info *inf;
};
@@ -219,7 +220,7 @@
einf->vcpu = v;
- if ( is_idle_task(v->domain) )
+ if ( is_idle_domain(v->domain) )
{
einf->avt = einf->evt = ~0U;
BUG_ON(__task_on_runqueue(v));
@@ -250,9 +251,11 @@
/* Set the BVT parameters. AVT should always be updated
if CPU migration ocurred.*/
- if ( einf->avt < CPU_SVT(cpu) ||
- unlikely(test_bit(_VCPUF_cpu_migrated, &v->vcpu_flags)) )
+ if ( (einf->avt < CPU_SVT(cpu)) || einf->migrated )
+ {
einf->avt = CPU_SVT(cpu);
+ einf->migrated = 0;
+ }
/* Deal with warping here. */
einf->evt = calc_evt(v, einf->avt);
@@ -265,7 +268,7 @@
((einf->evt - curr_evt) / BVT_INFO(curr->domain)->mcu_advance) +
ctx_allow;
- if ( is_idle_task(curr->domain) || (einf->evt <= curr_evt) )
+ if ( is_idle_domain(curr->domain) || (einf->evt <= curr_evt) )
cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
else if ( schedule_data[cpu].s_timer.expires > r_time )
set_ac_timer(&schedule_data[cpu].s_timer, r_time);
@@ -274,11 +277,27 @@
static void bvt_sleep(struct vcpu *v)
{
- if ( test_bit(_VCPUF_running, &v->vcpu_flags) )
+ if ( schedule_data[v->processor].curr == v )
cpu_raise_softirq(v->processor, SCHEDULE_SOFTIRQ);
else if ( __task_on_runqueue(v) )
__del_from_runqueue(v);
}
+
+
+static int bvt_set_affinity(struct vcpu *v, cpumask_t *affinity)
+{
+ if ( v == current )
+ return cpu_isset(v->processor, *affinity) ? 0 : -EBUSY;
+
+ vcpu_pause(v);
+ v->cpu_affinity = *affinity;
+ v->processor = first_cpu(v->cpu_affinity);
+ EBVT_INFO(v)->migrated = 1;
+ vcpu_unpause(v);
+
+ return 0;
+}
+
/**
* bvt_free_task - free BVT private structures for a task
@@ -380,7 +399,7 @@
ASSERT(prev_einf != NULL);
ASSERT(__task_on_runqueue(prev));
- if ( likely(!is_idle_task(prev->domain)) )
+ if ( likely(!is_idle_domain(prev->domain)) )
{
prev_einf->avt = calc_avt(prev, now);
prev_einf->evt = calc_evt(prev, prev_einf->avt);
@@ -390,7 +409,7 @@
__del_from_runqueue(prev);
- if ( domain_runnable(prev) )
+ if ( vcpu_runnable(prev) )
__add_to_runqueue_tail(prev);
}
@@ -471,13 +490,13 @@
}
/* work out time for next run through scheduler */
- if ( is_idle_task(next->domain) )
+ if ( is_idle_domain(next->domain) )
{
r_time = ctx_allow;
goto sched_done;
}
- if ( (next_prime == NULL) || is_idle_task(next_prime->domain) )
+ if ( (next_prime == NULL) || is_idle_domain(next_prime->domain) )
{
/* We have only one runnable task besides the idle task. */
r_time = 10 * ctx_allow; /* RN: random constant */
@@ -557,6 +576,7 @@
.dump_cpu_state = bvt_dump_cpu_state,
.sleep = bvt_sleep,
.wake = bvt_wake,
+ .set_affinity = bvt_set_affinity
};
/*
diff -r 25e3c8668f1f -r 8af1199488d3 xen/common/sched_sedf.c
--- a/xen/common/sched_sedf.c Mon Jan 9 11:19:55 2006
+++ b/xen/common/sched_sedf.c Mon Jan 9 11:22:17 2006
@@ -325,21 +325,29 @@
list_insert_sort(RUNQ(d->processor), LIST(d), runq_comp);
}
+
/* Allocates memory for per domain private scheduling data*/
-static int sedf_alloc_task(struct vcpu *d) {
- PRINT(2,"sedf_alloc_task was called, domain-id
%i.%i\n",d->domain->domain_id,
- d->vcpu_id);
- if (d->domain->sched_priv == NULL) {
- if ((d->domain->sched_priv =
- xmalloc(struct sedf_dom_info)) == NULL )
+static int sedf_alloc_task(struct vcpu *d)
+{
+ PRINT(2, "sedf_alloc_task was called, domain-id %i.%i\n",
+ d->domain->domain_id, d->vcpu_id);
+
+ if ( d->domain->sched_priv == NULL )
+ {
+ d->domain->sched_priv = xmalloc(struct sedf_dom_info);
+ if ( d->domain->sched_priv == NULL )
return -1;
memset(d->domain->sched_priv, 0, sizeof(struct sedf_dom_info));
}
- if ((d->sched_priv = xmalloc(struct sedf_vcpu_info)) == NULL )
+
+ if ( (d->sched_priv = xmalloc(struct sedf_vcpu_info)) == NULL )
return -1;
+
memset(d->sched_priv, 0, sizeof(struct sedf_vcpu_info));
+
return 0;
}
+
/* Setup the sedf_dom_info */
static void sedf_add_task(struct vcpu *d)
@@ -363,14 +371,17 @@
INIT_LIST_HEAD(EXTRAQ(d->processor,EXTRA_UTIL_Q));
}
- if (d->domain->domain_id==0) {
+ if ( d->domain->domain_id == 0 )
+ {
/*set dom0 to something useful to boot the machine*/
inf->period = MILLISECS(20);
inf->slice = MILLISECS(15);
inf->latency = 0;
inf->deadl_abs = 0;
inf->status = EXTRA_AWARE | SEDF_ASLEEP;
- } else {
+ }
+ else
+ {
/*other domains run in best effort mode*/
inf->period = WEIGHT_PERIOD;
inf->slice = 0;
@@ -379,14 +390,18 @@
inf->status = EXTRA_AWARE | SEDF_ASLEEP;
inf->extraweight = 1;
}
+
inf->period_orig = inf->period; inf->slice_orig = inf->slice;
INIT_LIST_HEAD(&(inf->list));
INIT_LIST_HEAD(&(inf->extralist[EXTRA_PEN_Q]));
INIT_LIST_HEAD(&(inf->extralist[EXTRA_UTIL_Q]));
- if (!is_idle_task(d->domain)) {
+ if ( !is_idle_domain(d->domain) )
+ {
extraq_check(d);
- } else {
+ }
+ else
+ {
EDOM_INFO(d)->deadl_abs = 0;
EDOM_INFO(d)->status &= ~SEDF_ASLEEP;
}
@@ -396,19 +411,28 @@
static void sedf_free_task(struct domain *d)
{
int i;
+
PRINT(2,"sedf_free_task was called, domain-id %i\n",d->domain_id);
+
ASSERT(d->sched_priv != NULL);
xfree(d->sched_priv);
- for (i = 0; i < MAX_VIRT_CPUS; i++)
- if ( d->vcpu[i] ) {
+ for ( i = 0; i < MAX_VIRT_CPUS; i++ )
+ {
+ if ( d->vcpu[i] )
+ {
ASSERT(d->vcpu[i]->sched_priv != NULL);
xfree(d->vcpu[i]->sched_priv);
}
-}
-
-/* handles the rescheduling, bookkeeping of domains running in their
realtime-time :)*/
-static inline void desched_edf_dom (s_time_t now, struct vcpu* d) {
+ }
+}
+
+/*
+ * Handles the rescheduling & bookkeeping of domains running in their
+ * guaranteed timeslice.
+ */
+static void desched_edf_dom(s_time_t now, struct vcpu* d)
+{
struct sedf_vcpu_info* inf = EDOM_INFO(d);
/*current domain is running in real time mode*/
@@ -418,27 +442,30 @@
/*scheduling decisions, which don't remove the running domain
from the runq*/
- if ((inf->cputime < inf->slice) && sedf_runnable(d))
+ if ( (inf->cputime < inf->slice) && sedf_runnable(d) )
return;
__del_from_queue(d);
/*manage bookkeeping (i.e. calculate next deadline,
memorize overun-time of slice) of finished domains*/
- if (inf->cputime >= inf->slice) {
+ if ( inf->cputime >= inf->slice )
+ {
inf->cputime -= inf->slice;
- if (inf->period < inf->period_orig) {
+ if ( inf->period < inf->period_orig )
+ {
/*this domain runs in latency scaling or burst mode*/
#if (UNBLOCK == UNBLOCK_BURST)
/*if we are runnig in burst scaling wait for two periods
before scaling periods up again*/
- if (now - inf->unblock_abs >= 2 * inf->period)
+ if ( (now - inf->unblock_abs) >= (2 * inf->period) )
#endif
{
inf->period *= 2; inf->slice *= 2;
- if ((inf->period > inf->period_orig) ||
- (inf->slice > inf->slice_orig)) {
+ if ( (inf->period > inf->period_orig) ||
+ (inf->slice > inf->slice_orig) )
+ {
/*reset slice & period*/
inf->period = inf->period_orig;
inf->slice = inf->slice_orig;
@@ -450,36 +477,46 @@
}
/*add a runnable domain to the waitqueue*/
- if (sedf_runnable(d))
+ if ( sedf_runnable(d) )
+ {
__add_to_waitqueue_sort(d);
- else {
+ }
+ else
+ {
/*we have a blocked realtime task -> remove it from exqs too*/
#if (EXTRA > EXTRA_OFF)
#if (EXTRA == EXTRA_BLOCK_WEIGHT)
- if (extraq_on(d, EXTRA_PEN_Q)) extraq_del(d, EXTRA_PEN_Q);
-#endif
- if (extraq_on(d, EXTRA_UTIL_Q)) extraq_del(d, EXTRA_UTIL_Q);
-#endif
- }
+ if ( extraq_on(d, EXTRA_PEN_Q) )
+ extraq_del(d, EXTRA_PEN_Q);
+#endif
+ if ( extraq_on(d, EXTRA_UTIL_Q) )
+ extraq_del(d, EXTRA_UTIL_Q);
+#endif
+ }
+
ASSERT(EQ(sedf_runnable(d), __task_on_queue(d)));
ASSERT(IMPLY(extraq_on(d, EXTRA_UTIL_Q) || extraq_on(d, EXTRA_PEN_Q),
sedf_runnable(d)));
}
+
/* Update all elements on the queues */
-static inline void update_queues(s_time_t now, struct list_head* runq,
- struct list_head* waitq) {
- struct list_head *cur,*tmp;
+static void update_queues(
+ s_time_t now, struct list_head *runq, struct list_head *waitq)
+{
+ struct list_head *cur, *tmp;
struct sedf_vcpu_info *curinf;
PRINT(3,"Updating waitq..\n");
+
/*check for the first elements of the waitqueue, whether their
next period has already started*/
list_for_each_safe(cur, tmp, waitq) {
curinf = list_entry(cur, struct sedf_vcpu_info, list);
PRINT(4,"\tLooking @ dom %i.%i\n",
curinf->vcpu->domain->domain_id, curinf->vcpu->vcpu_id);
- if (PERIOD_BEGIN(curinf) <= now) {
+ if ( PERIOD_BEGIN(curinf) <= now )
+ {
__del_from_queue(curinf->vcpu);
__add_to_runqueue_sort(curinf->vcpu);
}
@@ -488,13 +525,16 @@
}
PRINT(3,"Updating runq..\n");
+
/*process the runq, find domains that are on
the runqueue which shouldn't be there*/
list_for_each_safe(cur, tmp, runq) {
curinf = list_entry(cur,struct sedf_vcpu_info,list);
PRINT(4,"\tLooking @ dom %i.%i\n",
curinf->vcpu->domain->domain_id, curinf->vcpu->vcpu_id);
- if (unlikely(curinf->slice == 0)) {
+
+ if ( unlikely(curinf->slice == 0) )
+ {
/*ignore domains with empty slice*/
PRINT(4,"\tUpdating zero-slice domain %i.%i\n",
curinf->vcpu->domain->domain_id,
@@ -504,7 +544,8 @@
/*move them to their next period*/
curinf->deadl_abs += curinf->period;
/*ensure that the start of the next period is in the future*/
- if (unlikely(PERIOD_BEGIN(curinf) < now)) {
+ if ( unlikely(PERIOD_BEGIN(curinf) < now) )
+ {
curinf->deadl_abs +=
(DIV_UP(now - PERIOD_BEGIN(curinf),
curinf->period)) * curinf->period;
@@ -513,8 +554,10 @@
__add_to_waitqueue_sort(curinf->vcpu);
continue;
}
- if (unlikely((curinf->deadl_abs < now) ||
- (curinf->cputime > curinf->slice))) {
+
+ if ( unlikely((curinf->deadl_abs < now) ||
+ (curinf->cputime > curinf->slice)) )
+ {
/*we missed the deadline or the slice was
already finished... might hapen because
of dom_adj.*/
@@ -550,6 +593,7 @@
PRINT(3,"done updating the queues\n");
}
+
#if (EXTRA > EXTRA_OFF)
/* removes a domain from the head of the according extraQ and
requeues it at a specified position:
@@ -557,9 +601,10 @@
weighted ext.: insert in sorted list by score
if the domain is blocked / has regained its short-block-loss
time it is not put on any queue */
-static inline void desched_extra_dom(s_time_t now, struct vcpu* d) {
+static void desched_extra_dom(s_time_t now, struct vcpu* d)
+{
struct sedf_vcpu_info *inf = EDOM_INFO(d);
- int i = extra_get_cur_q(inf);
+ int i = extra_get_cur_q(inf);
#if (EXTRA == EXTRA_SLICE_WEIGHT || EXTRA == EXTRA_BLOCK_WEIGHT)
unsigned long oldscore;
@@ -575,14 +620,15 @@
extraq_del(d, i);
#if (EXTRA == EXTRA_ROUNDR)
- if (sedf_runnable(d) && (inf->status & EXTRA_AWARE))
+ if ( sedf_runnable(d) && (inf->status & EXTRA_AWARE) )
/*add to the tail if it is runnable => round-robin*/
extraq_add_tail(d, EXTRA_UTIL_Q);
#elif (EXTRA == EXTRA_SLICE_WEIGHT || EXTRA == EXTRA_BLOCK_WEIGHT)
/*update the score*/
- oldscore = inf->score[i];
+ oldscore = inf->score[i];
#if (EXTRA == EXTRA_BLOCK_WEIGHT)
- if (i == EXTRA_PEN_Q) {
+ if ( i == EXTRA_PEN_Q )
+ {
/*domain was running in L0 extraq*/
/*reduce block lost, probably more sophistication here!*/
/*inf->short_block_lost_tot -= EXTRA_QUANTUM;*/
@@ -605,12 +651,13 @@
inf->score[EXTRA_PEN_Q] = (inf->period << 10) /
inf->short_block_lost_tot;
oldscore = 0;
- } else
+ }
+ else
#endif
{
/*domain was running in L1 extraq => score is inverse of
utilization and is used somewhat incremental!*/
- if (!inf->extraweight)
+ if ( !inf->extraweight )
/*NB: use fixed point arithmetic with 10 bits*/
inf->score[EXTRA_UTIL_Q] = (inf->period << 10) /
inf->slice;
@@ -619,24 +666,32 @@
full (ie 100%) utilization is equivalent to 128 extraweight*/
inf->score[EXTRA_UTIL_Q] = (1<<17) / inf->extraweight;
}
+
check_extra_queues:
/* Adding a runnable domain to the right queue and removing blocked ones*/
- if (sedf_runnable(d)) {
+ if ( sedf_runnable(d) )
+ {
/*add according to score: weighted round robin*/
if (((inf->status & EXTRA_AWARE) && (i == EXTRA_UTIL_Q)) ||
((inf->status & EXTRA_WANT_PEN_Q) && (i == EXTRA_PEN_Q)))
extraq_add_sort_update(d, i, oldscore);
}
- else {
+ else
+ {
/*remove this blocked domain from the waitq!*/
__del_from_queue(d);
#if (EXTRA == EXTRA_BLOCK_WEIGHT)
/*make sure that we remove a blocked domain from the other
extraq too*/
- if (i == EXTRA_PEN_Q) {
- if (extraq_on(d, EXTRA_UTIL_Q)) extraq_del(d, EXTRA_UTIL_Q);
- } else {
- if (extraq_on(d, EXTRA_PEN_Q)) extraq_del(d, EXTRA_PEN_Q);
+ if ( i == EXTRA_PEN_Q )
+ {
+ if ( extraq_on(d, EXTRA_UTIL_Q) )
+ extraq_del(d, EXTRA_UTIL_Q);
+ }
+ else
+ {
+ if ( extraq_on(d, EXTRA_PEN_Q) )
+ extraq_del(d, EXTRA_PEN_Q);
}
#endif
}
@@ -647,16 +702,21 @@
}
#endif
-static inline struct task_slice sedf_do_extra_schedule (s_time_t now,
- s_time_t end_xt,
struct list_head *extraq[], int cpu) {
+
+static struct task_slice sedf_do_extra_schedule(
+ s_time_t now, s_time_t end_xt, struct list_head *extraq[], int cpu)
+{
struct task_slice ret;
struct sedf_vcpu_info *runinf;
ASSERT(end_xt > now);
+
/* Enough time left to use for extratime? */
- if (end_xt - now < EXTRA_QUANTUM)
+ if ( end_xt - now < EXTRA_QUANTUM )
goto return_idle;
+
#if (EXTRA == EXTRA_BLOCK_WEIGHT)
- if (!list_empty(extraq[EXTRA_PEN_Q])) {
+ if ( !list_empty(extraq[EXTRA_PEN_Q]) )
+ {
/*we still have elements on the level 0 extraq
=> let those run first!*/
runinf = list_entry(extraq[EXTRA_PEN_Q]->next,
@@ -667,9 +727,12 @@
#ifdef SEDF_STATS
runinf->pen_extra_slices++;
#endif
- } else
-#endif
- if (!list_empty(extraq[EXTRA_UTIL_Q])) {
+ }
+ else
+#endif
+ {
+ if ( !list_empty(extraq[EXTRA_UTIL_Q]) )
+ {
/*use elements from the normal extraqueue*/
runinf = list_entry(extraq[EXTRA_UTIL_Q]->next,
struct sedf_vcpu_info,
@@ -680,6 +743,7 @@
}
else
goto return_idle;
+ }
ASSERT(ret.time > 0);
ASSERT(sedf_runnable(ret.task));
@@ -692,6 +756,8 @@
ASSERT(sedf_runnable(ret.task));
return ret;
}
+
+
/* Main scheduling function
Reasons for calling this function are:
-timeslice for the current period used up
@@ -699,7 +765,7 @@
-and various others ;) in general: determine which domain to run next*/
static struct task_slice sedf_do_schedule(s_time_t now)
{
- int cpu = current->processor;
+ int cpu = smp_processor_id();
struct list_head *runq = RUNQ(cpu);
struct list_head *waitq = WAITQ(cpu);
#if (EXTRA > EXTRA_OFF)
@@ -711,20 +777,21 @@
struct task_slice ret;
/*idle tasks don't need any of the following stuf*/
- if (is_idle_task(current->domain))
+ if (is_idle_domain(current->domain))
goto check_waitq;
/* create local state of the status of the domain, in order to avoid
inconsistent state during scheduling decisions, because data for
- domain_runnable is not protected by the scheduling lock!*/
- if(!domain_runnable(current))
+ vcpu_runnable is not protected by the scheduling lock!*/
+ if ( !vcpu_runnable(current) )
inf->status |= SEDF_ASLEEP;
- if (inf->status & SEDF_ASLEEP)
+ if ( inf->status & SEDF_ASLEEP )
inf->block_abs = now;
#if (EXTRA > EXTRA_OFF)
- if (unlikely(extra_runs(inf))) {
+ if ( unlikely(extra_runs(inf)) )
+ {
/*special treatment of domains running in extra time*/
desched_extra_dom(now, current);
}
@@ -739,10 +806,12 @@
/*now simply pick the first domain from the runqueue, which has the
earliest deadline, because the list is sorted*/
- if (!list_empty(runq)) {
+ if ( !list_empty(runq) )
+ {
runinf = list_entry(runq->next,struct sedf_vcpu_info,list);
ret.task = runinf->vcpu;
- if (!list_empty(waitq)) {
+ if ( !list_empty(waitq) )
+ {
waitinf = list_entry(waitq->next,
struct sedf_vcpu_info,list);
/*rerun scheduler, when scheduled domain reaches it's
@@ -751,14 +820,16 @@
ret.time = MIN(now + runinf->slice - runinf->cputime,
PERIOD_BEGIN(waitinf)) - now;
}
- else {
+ else
+ {
ret.time = runinf->slice - runinf->cputime;
}
CHECK(ret.time > 0);
goto sched_done;
}
- if (!list_empty(waitq)) {
+ if ( !list_empty(waitq) )
+ {
waitinf = list_entry(waitq->next,struct sedf_vcpu_info, list);
/*we could not find any suitable domain
=> look for domains that are aware of extratime*/
@@ -771,7 +842,8 @@
#endif
CHECK(ret.time > 0);
}
- else {
+ else
+ {
/*this could probably never happen, but one never knows...*/
/*it can... imagine a second CPU, which is pure scifi ATM,
but one never knows ;)*/
@@ -782,11 +854,13 @@
sched_done:
/*TODO: Do something USEFUL when this happens and find out, why it
still can happen!!!*/
- if (ret.time<0) {
+ if ( ret.time < 0)
+ {
printk("Ouch! We are seriously BEHIND schedule! %"PRIi64"\n",
ret.time);
ret.time = EXTRA_QUANTUM;
}
+
EDOM_INFO(ret.task)->sched_start_abs = now;
CHECK(ret.time > 0);
ASSERT(sedf_runnable(ret.task));
@@ -794,30 +868,36 @@
return ret;
}
-static void sedf_sleep(struct vcpu *d) {
- PRINT(2,"sedf_sleep was called, domain-id %i.%i\n",d->domain->domain_id,
d->vcpu_id);
-
- if (is_idle_task(d->domain))
+
+static void sedf_sleep(struct vcpu *d)
+{
+ PRINT(2,"sedf_sleep was called, domain-id %i.%i\n",
+ d->domain->domain_id, d->vcpu_id);
+
+ if ( is_idle_domain(d->domain) )
return;
EDOM_INFO(d)->status |= SEDF_ASLEEP;
- if ( test_bit(_VCPUF_running, &d->vcpu_flags) ) {
+ if ( schedule_data[d->processor].curr == d )
+ {
cpu_raise_softirq(d->processor, SCHEDULE_SOFTIRQ);
}
- else {
+ else
+ {
if ( __task_on_queue(d) )
__del_from_queue(d);
#if (EXTRA > EXTRA_OFF)
- if (extraq_on(d, EXTRA_UTIL_Q))
+ if ( extraq_on(d, EXTRA_UTIL_Q) )
extraq_del(d, EXTRA_UTIL_Q);
#endif
#if (EXTRA == EXTRA_BLOCK_WEIGHT)
- if (extraq_on(d, EXTRA_PEN_Q))
+ if ( extraq_on(d, EXTRA_PEN_Q) )
extraq_del(d, EXTRA_PEN_Q);
#endif
}
}
+
/* This function wakes up a domain, i.e. moves them into the waitqueue
* things to mention are: admission control is taking place nowhere at
@@ -890,17 +970,21 @@
* -either behaviour can lead to missed deadlines in other domains as
* opposed to approaches 1,2a,2b
*/
-static inline void unblock_short_vcons
-(struct sedf_vcpu_info* inf, s_time_t now) {
+#if (UNBLOCK <= UNBLOCK_SHORT_RESUME)
+static void unblock_short_vcons(struct sedf_vcpu_info* inf, s_time_t now)
+{
inf->deadl_abs += inf->period;
inf->cputime = 0;
}
-
-static inline void unblock_short_cons(struct sedf_vcpu_info* inf, s_time_t now)
+#endif
+
+#if (UNBLOCK == UNBLOCK_SHORT_RESUME)
+static void unblock_short_cons(struct sedf_vcpu_info* inf, s_time_t now)
{
/*treat blocked time as consumed by the domain*/
inf->cputime += now - inf->block_abs;
- if (inf->cputime + EXTRA_QUANTUM > inf->slice) {
+ if ( (inf->cputime + EXTRA_QUANTUM) > inf->slice )
+ {
/*we don't have a reasonable amount of time in
our slice left :( => start in next period!*/
unblock_short_vcons(inf, now);
@@ -910,8 +994,11 @@
inf->short_cont++;
#endif
}
-static inline void unblock_short_extra_support (struct sedf_vcpu_info* inf,
- s_time_t now) {
+#endif
+
+static void unblock_short_extra_support(
+ struct sedf_vcpu_info* inf, s_time_t now)
+{
/*this unblocking scheme tries to support the domain, by assigning it
a priority in extratime distribution according to the loss of time
in this slice due to blocking*/
@@ -919,26 +1006,29 @@
/*no more realtime execution in this period!*/
inf->deadl_abs += inf->period;
- if (likely(inf->block_abs)) {
+ if ( likely(inf->block_abs) )
+ {
//treat blocked time as consumed by the domain*/
/*inf->cputime += now - inf->block_abs;*/
/*penalty is time the domain would have
had if it continued to run */
pen = (inf->slice - inf->cputime);
- if (pen < 0) pen = 0;
+ if ( pen < 0 )
+ pen = 0;
/*accumulate all penalties over the periods*/
/*inf->short_block_lost_tot += pen;*/
/*set penalty to the current value*/
inf->short_block_lost_tot = pen;
/*not sure which one is better.. but seems to work well...*/
- if (inf->short_block_lost_tot) {
+ if ( inf->short_block_lost_tot )
+ {
inf->score[0] = (inf->period << 10) /
inf->short_block_lost_tot;
#ifdef SEDF_STATS
inf->pen_extra_blocks++;
#endif
- if (extraq_on(inf->vcpu, EXTRA_PEN_Q))
+ if ( extraq_on(inf->vcpu, EXTRA_PEN_Q) )
/*remove domain for possible resorting!*/
extraq_del(inf->vcpu, EXTRA_PEN_Q);
else
@@ -951,36 +1041,53 @@
extraq_add_sort_update(inf->vcpu, EXTRA_PEN_Q, 0);
}
}
+
/*give it a fresh slice in the next period!*/
inf->cputime = 0;
}
-static inline void unblock_long_vcons(struct sedf_vcpu_info* inf, s_time_t now)
+
+
+#if (UNBLOCK == UNBLOCK_ISOCHRONOUS_EDF)
+static void unblock_long_vcons(struct sedf_vcpu_info* inf, s_time_t now)
{
/* align to next future period */
inf->deadl_abs += (DIV_UP(now - inf->deadl_abs, inf->period) +1)
* inf->period;
inf->cputime = 0;
}
-
-static inline void unblock_long_cons_a (struct sedf_vcpu_info* inf,
- s_time_t now) {
+#endif
+
+
+#if 0
+static void unblock_long_cons_a (struct sedf_vcpu_info* inf, s_time_t now)
+{
/*treat the time the domain was blocked in the
- CURRENT period as consumed by the domain*/
+ CURRENT period as consumed by the domain*/
inf->cputime = (now - inf->deadl_abs) % inf->period;
- if (inf->cputime + EXTRA_QUANTUM > inf->slice) {
+ if ( (inf->cputime + EXTRA_QUANTUM) > inf->slice )
+ {
/*we don't have a reasonable amount of time in our slice
left :( => start in next period!*/
unblock_long_vcons(inf, now);
}
}
-static inline void unblock_long_cons_b(struct sedf_vcpu_info* inf,s_time_t
now) {
+#endif
+
+
+static void unblock_long_cons_b(struct sedf_vcpu_info* inf,s_time_t now)
+{
/*Conservative 2b*/
/*Treat the unblocking time as a start of a new period */
inf->deadl_abs = now + inf->period;
inf->cputime = 0;
}
-static inline void unblock_long_cons_c(struct sedf_vcpu_info* inf,s_time_t
now) {
- if (likely(inf->latency)) {
+
+
+#if (UNBLOCK == UNBLOCK_ATROPOS)
+static void unblock_long_cons_c(struct sedf_vcpu_info* inf,s_time_t now)
+{
+ if ( likely(inf->latency) )
+ {
/*scale the slice and period accordingly to the latency hint*/
/*reduce period temporarily to the latency hint*/
inf->period = inf->latency;
@@ -993,18 +1100,24 @@
inf->deadl_abs = now + inf->period;
inf->cputime = 0;
}
- else {
+ else
+ {
/*we don't have a latency hint.. use some other technique*/
unblock_long_cons_b(inf, now);
}
}
+#endif
+
+
+#if (UNBLOCK == UNBLOCK_BURST)
/*a new idea of dealing with short blocks: burst period scaling*/
-static inline void unblock_short_burst(struct sedf_vcpu_info* inf, s_time_t
now)
+static void unblock_short_burst(struct sedf_vcpu_info* inf, s_time_t now)
{
/*treat blocked time as consumed by the domain*/
inf->cputime += now - inf->block_abs;
- if (inf->cputime + EXTRA_QUANTUM <= inf->slice) {
+ if ( (inf->cputime + EXTRA_QUANTUM) <= inf->slice )
+ {
/*if we can still use some time in the current slice
then use it!*/
#ifdef SEDF_STATS
@@ -1012,10 +1125,12 @@
inf->short_cont++;
#endif
}
- else {
+ else
+ {
/*we don't have a reasonable amount of time in
our slice left => switch to burst mode*/
- if (likely(inf->unblock_abs)) {
+ if ( likely(inf->unblock_abs) )
+ {
/*set the period-length to the current blocking
interval, possible enhancements: average over last
blocking intervals, user-specified minimum,...*/
@@ -1030,17 +1145,23 @@
/*set new (shorter) deadline*/
inf->deadl_abs += inf->period;
}
- else {
+ else
+ {
/*in case we haven't unblocked before
start in next period!*/
inf->cputime=0;
inf->deadl_abs += inf->period;
}
}
+
inf->unblock_abs = now;
}
-static inline void unblock_long_burst(struct sedf_vcpu_info* inf, s_time_t
now) {
- if (unlikely(inf->latency && (inf->period > inf->latency))) {
+
+
+static void unblock_long_burst(struct sedf_vcpu_info* inf, s_time_t now)
+{
+ if ( unlikely(inf->latency && (inf->period > inf->latency)) )
+ {
/*scale the slice and period accordingly to the latency hint*/
inf->period = inf->latency;
/*check for overflows on multiplication*/
@@ -1052,23 +1173,28 @@
inf->deadl_abs = now + inf->period;
inf->cputime = 0;
}
- else {
+ else
+ {
/*we don't have a latency hint.. or we are currently in
"burst mode": use some other technique
NB: this should be in fact the normal way of operation,
when we are in sync with the device!*/
unblock_long_cons_b(inf, now);
}
+
inf->unblock_abs = now;
}
+#endif /* UNBLOCK == UNBLOCK_BURST */
+
#define DOMAIN_EDF 1
#define DOMAIN_EXTRA_PEN 2
#define DOMAIN_EXTRA_UTIL 3
#define DOMAIN_IDLE 4
-static inline int get_run_type(struct vcpu* d) {
+static inline int get_run_type(struct vcpu* d)
+{
struct sedf_vcpu_info* inf = EDOM_INFO(d);
- if (is_idle_task(d->domain))
+ if (is_idle_domain(d->domain))
return DOMAIN_IDLE;
if (inf->status & EXTRA_RUN_PEN)
return DOMAIN_EXTRA_PEN;
@@ -1076,6 +1202,8 @@
return DOMAIN_EXTRA_UTIL;
return DOMAIN_EDF;
}
+
+
/*Compares two domains in the relation of whether the one is allowed to
interrupt the others execution.
It returns true (!=0) if a switch to the other domain is good.
@@ -1085,8 +1213,10 @@
In the same class priorities are assigned as following:
EDF: early deadline > late deadline
L0 extra-time: lower score > higher score*/
-static inline int should_switch(struct vcpu* cur,
- struct vcpu* other, s_time_t now) {
+static inline int should_switch(struct vcpu *cur,
+ struct vcpu *other,
+ s_time_t now)
+{
struct sedf_vcpu_info *cur_inf, *other_inf;
cur_inf = EDOM_INFO(cur);
other_inf = EDOM_INFO(other);
@@ -1119,41 +1249,51 @@
}
return 1;
}
-void sedf_wake(struct vcpu *d) {
+
+void sedf_wake(struct vcpu *d)
+{
s_time_t now = NOW();
struct sedf_vcpu_info* inf = EDOM_INFO(d);
PRINT(3, "sedf_wake was called, domain-id %i.%i\n",d->domain->domain_id,
d->vcpu_id);
- if (unlikely(is_idle_task(d->domain)))
+ if ( unlikely(is_idle_domain(d->domain)) )
return;
- if ( unlikely(__task_on_queue(d)) ) {
+ if ( unlikely(__task_on_queue(d)) )
+ {
PRINT(3,"\tdomain %i.%i is already in some queue\n",
d->domain->domain_id, d->vcpu_id);
return;
}
+
ASSERT(!sedf_runnable(d));
inf->status &= ~SEDF_ASLEEP;
ASSERT(!extraq_on(d, EXTRA_UTIL_Q));
ASSERT(!extraq_on(d, EXTRA_PEN_Q));
- if (unlikely(inf->deadl_abs == 0))
+ if ( unlikely(inf->deadl_abs == 0) )
+ {
/*initial setup of the deadline*/
inf->deadl_abs = now + inf->slice;
+ }
- PRINT(3,"waking up domain %i.%i (deadl= %"PRIu64" period= %"PRIu64" "\
- "now= %"PRIu64")\n", d->domain->domain_id, d->vcpu_id,
inf->deadl_abs,
- inf->period, now);
+ PRINT(3, "waking up domain %i.%i (deadl= %"PRIu64" period= %"PRIu64
+ "now= %"PRIu64")\n",
+ d->domain->domain_id, d->vcpu_id, inf->deadl_abs, inf->period, now);
+
#ifdef SEDF_STATS
inf->block_tot++;
#endif
- if (unlikely(now < PERIOD_BEGIN(inf))) {
+
+ if ( unlikely(now < PERIOD_BEGIN(inf)) )
+ {
PRINT(4,"extratime unblock\n");
/* unblocking in extra-time! */
#if (EXTRA == EXTRA_BLOCK_WEIGHT)
- if (inf->status & EXTRA_WANT_PEN_Q) {
+ if ( inf->status & EXTRA_WANT_PEN_Q )
+ {
/*we have a domain that wants compensation
for block penalty and did just block in
its compensation time. Give it another
@@ -1163,8 +1303,10 @@
#endif
extraq_check_add_unblocked(d, 0);
}
- else {
- if (now < inf->deadl_abs) {
+ else
+ {
+ if ( now < inf->deadl_abs )
+ {
PRINT(4,"short unblocking\n");
/*short blocking*/
#ifdef SEDF_STATS
@@ -1182,7 +1324,8 @@
extraq_check_add_unblocked(d, 1);
}
- else {
+ else
+ {
PRINT(4,"long unblocking\n");
/*long unblocking*/
#ifdef SEDF_STATS
@@ -1197,7 +1340,6 @@
unblock_long_cons_c(inf, now);
#elif (UNBLOCK == UNBLOCK_SHORT_RESUME)
unblock_long_cons_b(inf, now);
- /*unblock_short_cons_c(inf, now);*/
#elif (UNBLOCK == UNBLOCK_BURST)
unblock_long_burst(inf, now);
#endif
@@ -1205,26 +1347,33 @@
extraq_check_add_unblocked(d, 1);
}
}
- PRINT(3,"woke up domain %i.%i (deadl= %"PRIu64" period= %"PRIu64" "\
- "now= %"PRIu64")\n", d->domain->domain_id, d->vcpu_id,
inf->deadl_abs,
+
+ PRINT(3, "woke up domain %i.%i (deadl= %"PRIu64" period= %"PRIu64
+ "now= %"PRIu64")\n",
+ d->domain->domain_id, d->vcpu_id, inf->deadl_abs,
inf->period, now);
- if (PERIOD_BEGIN(inf) > now) {
+
+ if ( PERIOD_BEGIN(inf) > now )
+ {
__add_to_waitqueue_sort(d);
PRINT(3,"added to waitq\n");
}
- else {
+ else
+ {
__add_to_runqueue_sort(d);
PRINT(3,"added to runq\n");
}
#ifdef SEDF_STATS
/*do some statistics here...*/
- if (inf->block_abs != 0) {
+ if ( inf->block_abs != 0 )
+ {
inf->block_time_tot += now - inf->block_abs;
inf->penalty_time_tot +=
PERIOD_BEGIN(inf) + inf->cputime - inf->block_abs;
}
#endif
+
/*sanity check: make sure each extra-aware domain IS on the util-q!*/
ASSERT(IMPLY(inf->status & EXTRA_AWARE, extraq_on(d, EXTRA_UTIL_Q)));
ASSERT(__task_on_queue(d));
@@ -1234,27 +1383,48 @@
ASSERT(d->processor >= 0);
ASSERT(d->processor < NR_CPUS);
ASSERT(schedule_data[d->processor].curr);
- if (should_switch(schedule_data[d->processor].curr, d, now))
+
+ if ( should_switch(schedule_data[d->processor].curr, d, now) )
cpu_raise_softirq(d->processor, SCHEDULE_SOFTIRQ);
}
-/*Print a lot of use-{full, less} information about a domains in the system*/
-static void sedf_dump_domain(struct vcpu *d) {
+
+static int sedf_set_affinity(struct vcpu *v, cpumask_t *affinity)
+{
+ if ( v == current )
+ return cpu_isset(v->processor, *affinity) ? 0 : -EBUSY;
+
+ vcpu_pause(v);
+ v->cpu_affinity = *affinity;
+ v->processor = first_cpu(v->cpu_affinity);
+ vcpu_unpause(v);
+
+ return 0;
+}
+
+
+/* Print a lot of useful information about a domains in the system */
+static void sedf_dump_domain(struct vcpu *d)
+{
printk("%i.%i has=%c ", d->domain->domain_id, d->vcpu_id,
test_bit(_VCPUF_running, &d->vcpu_flags) ? 'T':'F');
- printk("p=%"PRIu64" sl=%"PRIu64" ddl=%"PRIu64" w=%hu c=%"PRIu64" sc=%i
xtr(%s)=%"PRIu64" ew=%hu",
+ printk("p=%"PRIu64" sl=%"PRIu64" ddl=%"PRIu64" w=%hu c=%"PRIu64
+ " sc=%i xtr(%s)=%"PRIu64" ew=%hu",
EDOM_INFO(d)->period, EDOM_INFO(d)->slice, EDOM_INFO(d)->deadl_abs,
- EDOM_INFO(d)->weight, d->cpu_time,
EDOM_INFO(d)->score[EXTRA_UTIL_Q],
+ EDOM_INFO(d)->weight, d->cpu_time,
+ EDOM_INFO(d)->score[EXTRA_UTIL_Q],
(EDOM_INFO(d)->status & EXTRA_AWARE) ? "yes" : "no",
EDOM_INFO(d)->extra_time_tot, EDOM_INFO(d)->extraweight);
- if (d->cpu_time !=0)
+
+ if ( d->cpu_time != 0 )
printf(" (%"PRIu64"%%)", (EDOM_INFO(d)->extra_time_tot * 100)
/ d->cpu_time);
+
#ifdef SEDF_STATS
- if (EDOM_INFO(d)->block_time_tot!=0)
+ if ( EDOM_INFO(d)->block_time_tot != 0 )
printf(" pen=%"PRIu64"%%", (EDOM_INFO(d)->penalty_time_tot * 100) /
EDOM_INFO(d)->block_time_tot);
- if (EDOM_INFO(d)->block_tot!=0)
+ if ( EDOM_INFO(d)->block_tot != 0 )
printf("\n blks=%u sh=%u (%u%%) (shc=%u (%u%%) shex=%i "\
"shexsl=%i) l=%u (%u%%) avg: b=%"PRIu64" p=%"PRIu64"",
EDOM_INFO(d)->block_tot, EDOM_INFO(d)->short_block_tot,
@@ -1271,7 +1441,8 @@
printf("\n");
}
-/*dumps all domains on hte specified cpu*/
+
+/* dumps all domains on hte specified cpu */
static void sedf_dump_cpu_state(int i)
{
struct list_head *list, *queue, *tmp;
@@ -1284,7 +1455,8 @@
queue = RUNQ(i);
printk("RUNQ rq %lx n: %lx, p: %lx\n", (unsigned long)queue,
(unsigned long) queue->next, (unsigned long) queue->prev);
- list_for_each_safe ( list, tmp, queue ) {
+ list_for_each_safe ( list, tmp, queue )
+ {
printk("%3d: ",loop++);
d_inf = list_entry(list, struct sedf_vcpu_info, list);
sedf_dump_domain(d_inf->vcpu);
@@ -1293,7 +1465,8 @@
queue = WAITQ(i); loop = 0;
printk("\nWAITQ rq %lx n: %lx, p: %lx\n", (unsigned long)queue,
(unsigned long) queue->next, (unsigned long) queue->prev);
- list_for_each_safe ( list, tmp, queue ) {
+ list_for_each_safe ( list, tmp, queue )
+ {
printk("%3d: ",loop++);
d_inf = list_entry(list, struct sedf_vcpu_info, list);
sedf_dump_domain(d_inf->vcpu);
@@ -1303,7 +1476,8 @@
printk("\nEXTRAQ (penalty) rq %lx n: %lx, p: %lx\n",
(unsigned long)queue, (unsigned long) queue->next,
(unsigned long) queue->prev);
- list_for_each_safe ( list, tmp, queue ) {
+ list_for_each_safe ( list, tmp, queue )
+ {
d_inf = list_entry(list, struct sedf_vcpu_info,
extralist[EXTRA_PEN_Q]);
printk("%3d: ",loop++);
@@ -1314,7 +1488,8 @@
printk("\nEXTRAQ (utilization) rq %lx n: %lx, p: %lx\n",
(unsigned long)queue, (unsigned long) queue->next,
(unsigned long) queue->prev);
- list_for_each_safe ( list, tmp, queue ) {
+ list_for_each_safe ( list, tmp, queue )
+ {
d_inf = list_entry(list, struct sedf_vcpu_info,
extralist[EXTRA_UTIL_Q]);
printk("%3d: ",loop++);
@@ -1323,69 +1498,93 @@
loop = 0;
printk("\nnot on Q\n");
- for_each_domain(d)
+
+ for_each_domain ( d )
+ {
for_each_vcpu(d, ed)
- {
- if (!__task_on_queue(ed) && (ed->processor == i)) {
- printk("%3d: ",loop++);
- sedf_dump_domain(ed);
- }
- }
-}
-/*Adjusts periods and slices of the domains accordingly to their weights*/
-static inline int sedf_adjust_weights(struct sched_adjdom_cmd *cmd) {
+ {
+ if ( !__task_on_queue(ed) && (ed->processor == i) )
+ {
+ printk("%3d: ",loop++);
+ sedf_dump_domain(ed);
+ }
+ }
+ }
+}
+
+
+/* Adjusts periods and slices of the domains accordingly to their weights. */
+static int sedf_adjust_weights(struct sched_adjdom_cmd *cmd)
+{
struct vcpu *p;
struct domain *d;
int sumw[NR_CPUS];
s_time_t sumt[NR_CPUS];
int cpu;
- for (cpu=0; cpu < NR_CPUS; cpu++) {
+ for ( cpu = 0; cpu < NR_CPUS; cpu++ )
+ {
sumw[cpu] = 0;
sumt[cpu] = 0;
}
- /*sum up all weights*/
- for_each_domain(d)
- for_each_vcpu(d, p) {
- if (EDOM_INFO(p)->weight)
- sumw[p->processor] += EDOM_INFO(p)->weight;
- else {
- /*don't modify domains who don't have a weight, but sum
- up the time they need, projected to a WEIGHT_PERIOD,
- so that this time is not given to the weight-driven
- domains*/
- /*check for overflows*/
- ASSERT((WEIGHT_PERIOD < ULONG_MAX)
- && (EDOM_INFO(p)->slice_orig < ULONG_MAX));
- sumt[p->processor] +=
- (WEIGHT_PERIOD * EDOM_INFO(p)->slice_orig) /
- EDOM_INFO(p)->period_orig;
- }
- }
- /*adjust all slices (and periods) to the new weight*/
- for_each_domain(d)
- for_each_vcpu(d, p) {
- if (EDOM_INFO(p)->weight) {
- EDOM_INFO(p)->period_orig =
- EDOM_INFO(p)->period = WEIGHT_PERIOD;
- EDOM_INFO(p)->slice_orig =
- EDOM_INFO(p)->slice =
- (EDOM_INFO(p)->weight *
- (WEIGHT_PERIOD - WEIGHT_SAFETY - sumt[p->processor])) /
- sumw[p->processor];
- }
- }
+
+ /* sum up all weights */
+ for_each_domain( d )
+ {
+ for_each_vcpu( d, p )
+ {
+ if ( EDOM_INFO(p)->weight )
+ {
+ sumw[p->processor] += EDOM_INFO(p)->weight;
+ }
+ else
+ {
+ /*don't modify domains who don't have a weight, but sum
+ up the time they need, projected to a WEIGHT_PERIOD,
+ so that this time is not given to the weight-driven
+ domains*/
+ /*check for overflows*/
+ ASSERT((WEIGHT_PERIOD < ULONG_MAX)
+ && (EDOM_INFO(p)->slice_orig < ULONG_MAX));
+ sumt[p->processor] +=
+ (WEIGHT_PERIOD * EDOM_INFO(p)->slice_orig) /
+ EDOM_INFO(p)->period_orig;
+ }
+ }
+ }
+
+ /* adjust all slices (and periods) to the new weight */
+ for_each_domain( d )
+ {
+ for_each_vcpu ( d, p )
+ {
+ if ( EDOM_INFO(p)->weight )
+ {
+ EDOM_INFO(p)->period_orig =
+ EDOM_INFO(p)->period = WEIGHT_PERIOD;
+ EDOM_INFO(p)->slice_orig =
+ EDOM_INFO(p)->slice =
+ (EDOM_INFO(p)->weight *
+ (WEIGHT_PERIOD - WEIGHT_SAFETY - sumt[p->processor])) /
+ sumw[p->processor];
+ }
+ }
+ }
+
return 0;
}
+
/* set or fetch domain scheduling parameters */
-static int sedf_adjdom(struct domain *p, struct sched_adjdom_cmd *cmd) {
+static int sedf_adjdom(struct domain *p, struct sched_adjdom_cmd *cmd)
+{
struct vcpu *v;
PRINT(2,"sedf_adjdom was called, domain-id %i new period %"PRIu64" "\
"new slice %"PRIu64"\nlatency %"PRIu64" extra:%s\n",
p->domain_id, cmd->u.sedf.period, cmd->u.sedf.slice,
cmd->u.sedf.latency, (cmd->u.sedf.extratime)?"yes":"no");
+
if ( cmd->direction == SCHED_INFO_PUT )
{
/*check for sane parameters*/
@@ -1458,6 +1657,7 @@
.sleep = sedf_sleep,
.wake = sedf_wake,
.adjdom = sedf_adjdom,
+ .set_affinity = sedf_set_affinity
};
/*
diff -r 25e3c8668f1f -r 8af1199488d3 xen/common/schedule.c
--- a/xen/common/schedule.c Mon Jan 9 11:19:55 2006
+++ b/xen/common/schedule.c Mon Jan 9 11:22:17 2006
@@ -100,7 +100,9 @@
v->vcpu_id = vcpu_id;
v->processor = cpu_id;
atomic_set(&v->pausecnt, 0);
- v->cpumap = CPUMAP_RUNANYWHERE;
+
+ v->cpu_affinity = is_idle_domain(d) ?
+ cpumask_of_cpu(cpu_id) : CPU_MASK_ALL;
d->vcpu[vcpu_id] = v;
@@ -143,7 +145,7 @@
/* Initialise the per-domain timer. */
init_ac_timer(&v->timer, dom_timer_fn, v, v->processor);
- if ( is_idle_task(d) )
+ if ( is_idle_domain(d) )
{
schedule_data[v->processor].curr = v;
schedule_data[v->processor].idle = v;
@@ -166,7 +168,7 @@
unsigned long flags;
spin_lock_irqsave(&schedule_data[v->processor].schedule_lock, flags);
- if ( likely(!domain_runnable(v)) )
+ if ( likely(!vcpu_runnable(v)) )
SCHED_OP(sleep, v);
spin_unlock_irqrestore(&schedule_data[v->processor].schedule_lock, flags);
@@ -182,7 +184,7 @@
* flag is cleared and the scheduler lock is released. We also check that
* the domain continues to be unrunnable, in case someone else wakes it.
*/
- while ( !domain_runnable(v) &&
+ while ( !vcpu_runnable(v) &&
(test_bit(_VCPUF_running, &v->vcpu_flags) ||
spin_is_locked(&schedule_data[v->processor].schedule_lock)) )
cpu_relax();
@@ -195,15 +197,22 @@
unsigned long flags;
spin_lock_irqsave(&schedule_data[v->processor].schedule_lock, flags);
- if ( likely(domain_runnable(v)) )
+ if ( likely(vcpu_runnable(v)) )
{
SCHED_OP(wake, v);
v->wokenup = NOW();
}
- clear_bit(_VCPUF_cpu_migrated, &v->vcpu_flags);
spin_unlock_irqrestore(&schedule_data[v->processor].schedule_lock, flags);
TRACE_2D(TRC_SCHED_WAKE, v->domain->domain_id, v->vcpu_id);
+}
+
+int vcpu_set_affinity(struct vcpu *v, cpumask_t *affinity)
+{
+ if ( cpus_empty(*affinity) )
+ return -EINVAL;
+
+ return SCHED_OP(set_affinity, v, affinity);
}
/* Block the currently-executing domain until a pertinent event occurs. */
@@ -330,18 +339,23 @@
do {
succ = 0;
__clear_cpu_bits(have_lock);
- for_each_vcpu(d, v) {
+ for_each_vcpu ( d, v )
+ {
cpu = v->processor;
- if (!__get_cpu_bit(cpu, have_lock)) {
+ if ( !__get_cpu_bit(cpu, have_lock) )
+ {
/* if we don't have a lock on this CPU: acquire it*/
- if (spin_trylock(&schedule_data[cpu].schedule_lock)) {
+ if ( spin_trylock(&schedule_data[cpu].schedule_lock) )
+ {
/*we have this lock!*/
__set_cpu_bit(cpu, have_lock);
succ = 1;
- } else {
+ }
+ else
+ {
/*we didn,t get this lock -> free all other locks too!*/
- for (cpu = 0; cpu < NR_CPUS; cpu++)
- if (__get_cpu_bit(cpu, have_lock))
+ for ( cpu = 0; cpu < NR_CPUS; cpu++ )
+ if ( __get_cpu_bit(cpu, have_lock) )
spin_unlock(&schedule_data[cpu].schedule_lock);
/* and start from the beginning! */
succ = 0;
@@ -354,8 +368,8 @@
SCHED_OP(adjdom, d, cmd);
- for (cpu = 0; cpu < NR_CPUS; cpu++)
- if (__get_cpu_bit(cpu, have_lock))
+ for ( cpu = 0; cpu < NR_CPUS; cpu++ )
+ if ( __get_cpu_bit(cpu, have_lock) )
spin_unlock(&schedule_data[cpu].schedule_lock);
__clear_cpu_bits(have_lock);
@@ -371,22 +385,20 @@
*/
static void __enter_scheduler(void)
{
- struct vcpu *prev = current, *next = NULL;
- int cpu = prev->processor;
- s_time_t now;
+ struct vcpu *prev = current, *next = NULL;
+ int cpu = smp_processor_id();
+ s_time_t now = NOW();
struct task_slice next_slice;
s32 r_time; /* time for new dom to run */
+ ASSERT(!in_irq());
+
perfc_incrc(sched_run);
-
+
spin_lock_irq(&schedule_data[cpu].schedule_lock);
-
- now = NOW();
rem_ac_timer(&schedule_data[cpu].s_timer);
- ASSERT(!in_irq());
-
prev->cpu_time += now - prev->lastschd;
/* get policy-specific decision on scheduling... */
@@ -394,7 +406,7 @@
r_time = next_slice.time;
next = next_slice.task;
-
+
schedule_data[cpu].curr = next;
next->lastschd = now;
@@ -411,11 +423,6 @@
prev->domain->domain_id, now - prev->lastschd);
TRACE_3D(TRC_SCHED_SWITCH_INFNEXT,
next->domain->domain_id, now - next->wokenup, r_time);
-
- clear_bit(_VCPUF_running, &prev->vcpu_flags);
- set_bit(_VCPUF_running, &next->vcpu_flags);
-
- perfc_incrc(sched_ctx);
/*
* Logic of wokenup field in domain struct:
@@ -425,10 +432,10 @@
* also set here then a preempted runnable domain will get a screwed up
* "waiting time" value next time it is scheduled.
*/
- prev->wokenup = NOW();
+ prev->wokenup = now;
#if defined(WAKE_HISTO)
- if ( !is_idle_task(next->domain) && next->wokenup )
+ if ( !is_idle_domain(next->domain) && next->wokenup )
{
ulong diff = (ulong)(now - next->wokenup);
diff /= (ulong)MILLISECS(1);
@@ -438,7 +445,7 @@
next->wokenup = (s_time_t)0;
#elif defined(BLOCKTIME_HISTO)
prev->lastdeschd = now;
- if ( !is_idle_task(next->domain) )
+ if ( !is_idle_domain(next->domain) )
{
ulong diff = (ulong)((now - next->lastdeschd) / MILLISECS(10));
if (diff <= BUCKETS-2) schedule_data[cpu].hist[diff]++;
@@ -446,10 +453,16 @@
}
#endif
+ set_bit(_VCPUF_running, &next->vcpu_flags);
+
+ spin_unlock_irq(&schedule_data[cpu].schedule_lock);
+
+ perfc_incrc(sched_ctx);
+
prev->sleep_tick = schedule_data[cpu].tick;
/* Ensure that the domain has an up-to-date time base. */
- if ( !is_idle_task(next->domain) )
+ if ( !is_idle_domain(next->domain) )
{
update_dom_time(next);
if ( next->sleep_tick != schedule_data[cpu].tick )
@@ -461,17 +474,6 @@
next->domain->domain_id, next->vcpu_id);
context_switch(prev, next);
-
- spin_unlock_irq(&schedule_data[cpu].schedule_lock);
-
- context_switch_finalise(next);
-}
-
-/* No locking needed -- pointer comparison is safe :-) */
-int idle_cpu(int cpu)
-{
- struct vcpu *p = schedule_data[cpu].curr;
- return p == idle_task[cpu];
}
@@ -493,11 +495,11 @@
static void t_timer_fn(void *unused)
{
struct vcpu *v = current;
- unsigned int cpu = v->processor;
+ unsigned int cpu = smp_processor_id();
schedule_data[cpu].tick++;
- if ( !is_idle_task(v->domain) )
+ if ( !is_idle_domain(v->domain) )
{
update_dom_time(v);
send_guest_virq(v, VIRQ_TIMER);
@@ -531,8 +533,8 @@
init_ac_timer(&t_timer[i], t_timer_fn, NULL, i);
}
- schedule_data[0].curr = idle_task[0];
- schedule_data[0].idle = idle_task[0];
+ schedule_data[0].curr = idle_domain[0];
+ schedule_data[0].idle = idle_domain[0];
for ( i = 0; schedulers[i] != NULL; i++ )
{
@@ -546,10 +548,10 @@
printk("Using scheduler: %s (%s)\n", ops.name, ops.opt_name);
- rc = SCHED_OP(alloc_task, idle_task[0]);
+ rc = SCHED_OP(alloc_task, idle_domain[0]);
BUG_ON(rc < 0);
- sched_add_domain(idle_task[0]);
+ sched_add_domain(idle_domain[0]);
}
/*
diff -r 25e3c8668f1f -r 8af1199488d3 xen/common/vsprintf.c
--- a/xen/common/vsprintf.c Mon Jan 9 11:19:55 2006
+++ b/xen/common/vsprintf.c Mon Jan 9 11:22:17 2006
@@ -12,11 +12,15 @@
/*
* Fri Jul 13 2001 Crutcher Dunnavant <crutcher+kernel@xxxxxxxxxxxxxx>
* - changed to provide snprintf and vsnprintf functions
+ * So Feb 1 16:51:32 CET 2004 Juergen Quade <quade@xxxxxxx>
+ * - scnprintf and vscnprintf
*/
#include <stdarg.h>
#include <xen/ctype.h>
#include <xen/lib.h>
+#include <asm/div64.h>
+#include <asm/page.h>
/**
* simple_strtoul - convert a string to an unsigned long
@@ -33,11 +37,14 @@
if (*cp == '0') {
base = 8;
cp++;
- if ((*cp == 'x') && isxdigit(cp[1])) {
+ if ((toupper(*cp) == 'X') && isxdigit(cp[1])) {
cp++;
base = 16;
}
}
+ } else if (base == 16) {
+ if (cp[0] == '0' && toupper(cp[1]) == 'X')
+ cp += 2;
}
while (isxdigit(*cp) &&
(value = isdigit(*cp) ? *cp-'0' : toupper(*cp)-'A'+10) < base) {
@@ -49,6 +56,8 @@
return result;
}
+EXPORT_SYMBOL(simple_strtoul);
+
/**
* simple_strtol - convert a string to a signed long
* @cp: The start of the string
@@ -61,6 +70,8 @@
return -simple_strtoul(cp+1,endp,base);
return simple_strtoul(cp,endp,base);
}
+
+EXPORT_SYMBOL(simple_strtol);
/**
* simple_strtoull - convert a string to an unsigned long long
@@ -77,11 +88,14 @@
if (*cp == '0') {
base = 8;
cp++;
- if ((*cp == 'x') && isxdigit(cp[1])) {
+ if ((toupper(*cp) == 'X') && isxdigit(cp[1])) {
cp++;
base = 16;
}
}
+ } else if (base == 16) {
+ if (cp[0] == '0' && toupper(cp[1]) == 'X')
+ cp += 2;
}
while (isxdigit(*cp) && (value = isdigit(*cp) ? *cp-'0' : (islower(*cp)
? toupper(*cp)
: *cp)-'A'+10) < base) {
@@ -92,6 +106,8 @@
*endp = (char *)cp;
return result;
}
+
+EXPORT_SYMBOL(simple_strtoull);
/**
* simple_strtoll - convert a string to a signed long long
@@ -123,25 +139,25 @@
#define SPECIAL 32 /* 0x */
#define LARGE 64 /* use 'ABCDEF' instead of 'abcdef' */
-static char * number(char * buf, char * end, long long num, int base, int
size, int precision, int type)
+static char * number(char * buf, char * end, unsigned long long num, int base,
int size, int precision, int type)
{
char c,sign,tmp[66];
const char *digits;
- const char small_digits[] = "0123456789abcdefghijklmnopqrstuvwxyz";
- const char large_digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+ static const char small_digits[] = "0123456789abcdefghijklmnopqrstuvwxyz";
+ static const char large_digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
int i;
digits = (type & LARGE) ? large_digits : small_digits;
if (type & LEFT)
type &= ~ZEROPAD;
if (base < 2 || base > 36)
- return buf;
+ return NULL;
c = (type & ZEROPAD) ? '0' : ' ';
sign = 0;
if (type & SIGN) {
- if (num < 0) {
+ if ((signed long long) num < 0) {
sign = '-';
- num = -num;
+ num = - (signed long long) num;
size--;
} else if (type & PLUS) {
sign = '+';
@@ -160,6 +176,9 @@
i = 0;
if (num == 0)
tmp[i++]='0';
+ else while (num != 0)
+ tmp[i++] = digits[do_div(num,base)];
+#if 0
else
{
/* XXX KAF: force unsigned mod and div. */
@@ -167,6 +186,7 @@
unsigned int base2=(unsigned int)base;
while (num2 != 0) { tmp[i++] = digits[num2%base2]; num2 /= base2; }
}
+#endif
if (i > precision)
precision = i;
size -= precision;
@@ -222,14 +242,22 @@
}
/**
-* vsnprintf - Format a string and place it in a buffer
-* @buf: The buffer to place the result into
-* @size: The size of the buffer, including the trailing null space
-* @fmt: The format string to use
-* @args: Arguments for the format string
-*
-* Call this function if you are already dealing with a va_list.
-* You probably want snprintf instead.
+ * vsnprintf - Format a string and place it in a buffer
+ * @buf: The buffer to place the result into
+ * @size: The size of the buffer, including the trailing null space
+ * @fmt: The format string to use
+ * @args: Arguments for the format string
+ *
+ * The return value is the number of characters which would
+ * be generated for the given input, excluding the trailing
+ * '\0', as per ISO C99. If you want to have the exact
+ * number of characters written into @buf as return value
+ * (not including the trailing '\0'), use vscnprintf. If the
+ * return is greater than or equal to @size, the resulting
+ * string is truncated.
+ *
+ * Call this function if you are already dealing with a va_list.
+ * You probably want snprintf instead.
*/
int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
{
@@ -247,6 +275,9 @@
int qualifier; /* 'h', 'l', or 'L' for integer fields */
/* 'z' support added 23/7/1999 S.H. */
/* 'z' changed to 'Z' --davidm 1/25/99 */
+
+ /* Reject out-of-range values early */
+ BUG_ON((int)size < 0);
str = buf;
end = buf + size - 1;
@@ -307,17 +338,14 @@
/* get the conversion qualifier */
qualifier = -1;
- if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' || *fmt =='Z') {
+ if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' ||
+ *fmt =='Z' || *fmt == 'z') {
qualifier = *fmt;
++fmt;
if (qualifier == 'l' && *fmt == 'l') {
qualifier = 'L';
++fmt;
}
- }
- if (*fmt == 'q') {
- qualifier = 'L';
- ++fmt;
}
/* default base */
@@ -345,7 +373,7 @@
case 's':
s = va_arg(args, char *);
- if (!s)
+ if ((unsigned long)s < PAGE_SIZE)
s = "<NULL>";
len = strnlen(s, precision);
@@ -386,7 +414,7 @@
if (qualifier == 'l') {
long * ip = va_arg(args, long *);
*ip = (str - buf);
- } else if (qualifier == 'Z') {
+ } else if (qualifier == 'Z' || qualifier == 'z') {
size_t * ip = va_arg(args, size_t *);
*ip = (str - buf);
} else {
@@ -437,7 +465,7 @@
num = va_arg(args, unsigned long);
if (flags & SIGN)
num = (signed long) num;
- } else if (qualifier == 'Z') {
+ } else if (qualifier == 'Z' || qualifier == 'z') {
num = va_arg(args, size_t);
} else if (qualifier == 'h') {
num = (unsigned short) va_arg(args, int);
@@ -463,12 +491,43 @@
return str-buf;
}
+EXPORT_SYMBOL(vsnprintf);
+
+/**
+ * vscnprintf - Format a string and place it in a buffer
+ * @buf: The buffer to place the result into
+ * @size: The size of the buffer, including the trailing null space
+ * @fmt: The format string to use
+ * @args: Arguments for the format string
+ *
+ * The return value is the number of characters which have been written into
+ * the @buf not including the trailing '\0'. If @size is <= 0 the function
+ * returns 0.
+ *
+ * Call this function if you are already dealing with a va_list.
+ * You probably want scnprintf instead.
+ */
+int vscnprintf(char *buf, size_t size, const char *fmt, va_list args)
+{
+ int i;
+
+ i=vsnprintf(buf,size,fmt,args);
+ return (i >= size) ? (size - 1) : i;
+}
+
+EXPORT_SYMBOL(vscnprintf);
+
/**
* snprintf - Format a string and place it in a buffer
* @buf: The buffer to place the result into
* @size: The size of the buffer, including the trailing null space
* @fmt: The format string to use
* @...: Arguments for the format string
+ *
+ * The return value is the number of characters which would be
+ * generated for the given input, excluding the trailing null,
+ * as per ISO C99. If the return is greater than or equal to
+ * @size, the resulting string is truncated.
*/
int snprintf(char * buf, size_t size, const char *fmt, ...)
{
@@ -481,26 +540,61 @@
return i;
}
+EXPORT_SYMBOL(snprintf);
+
+/**
+ * scnprintf - Format a string and place it in a buffer
+ * @buf: The buffer to place the result into
+ * @size: The size of the buffer, including the trailing null space
+ * @fmt: The format string to use
+ * @...: Arguments for the format string
+ *
+ * The return value is the number of characters written into @buf not including
+ * the trailing '\0'. If @size is <= 0 the function returns 0. If the return is
+ * greater than or equal to @size, the resulting string is truncated.
+ */
+
+int scnprintf(char * buf, size_t size, const char *fmt, ...)
+{
+ va_list args;
+ int i;
+
+ va_start(args, fmt);
+ i = vsnprintf(buf, size, fmt, args);
+ va_end(args);
+ return (i >= size) ? (size - 1) : i;
+}
+EXPORT_SYMBOL(scnprintf);
+
/**
* vsprintf - Format a string and place it in a buffer
* @buf: The buffer to place the result into
* @fmt: The format string to use
* @args: Arguments for the format string
*
+ * The function returns the number of characters written
+ * into @buf. Use vsnprintf or vscnprintf in order to avoid
+ * buffer overflows.
+ *
* Call this function if you are already dealing with a va_list.
* You probably want sprintf instead.
*/
int vsprintf(char *buf, const char *fmt, va_list args)
{
- return vsnprintf(buf, 0xFFFFFFFFUL, fmt, args);
-}
-
+ return vsnprintf(buf, INT_MAX, fmt, args);
+}
+
+EXPORT_SYMBOL(vsprintf);
/**
* sprintf - Format a string and place it in a buffer
* @buf: The buffer to place the result into
* @fmt: The format string to use
* @...: Arguments for the format string
+ *
+ * The function returns the number of characters written
+ * into @buf. Use snprintf or scnprintf in order to avoid
+ * buffer overflows.
*/
int sprintf(char * buf, const char *fmt, ...)
{
@@ -508,11 +602,12 @@
int i;
va_start(args, fmt);
- i=vsprintf(buf,fmt,args);
+ i=vsnprintf(buf, INT_MAX, fmt, args);
va_end(args);
return i;
}
+EXPORT_SYMBOL(sprintf);
/*
* Local variables:
diff -r 25e3c8668f1f -r 8af1199488d3 xen/drivers/char/ns16550.c
--- a/xen/drivers/char/ns16550.c Mon Jan 9 11:19:55 2006
+++ b/xen/drivers/char/ns16550.c Mon Jan 9 11:22:17 2006
@@ -13,6 +13,7 @@
#include <xen/irq.h>
#include <xen/sched.h>
#include <xen/serial.h>
+#include <xen/iocap.h>
#include <asm/io.h>
/*
@@ -233,11 +234,11 @@
}
#ifdef CONFIG_X86
-#include <asm/physdev.h>
static void ns16550_endboot(struct serial_port *port)
{
struct ns16550 *uart = port->uart;
- physdev_modify_ioport_access_range(dom0, 0, uart->io_base, 8);
+ if ( ioports_deny_access(dom0, uart->io_base, uart->io_base + 7) != 0 )
+ BUG();
}
#else
#define ns16550_endboot NULL
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/asm-ia64/domain.h
--- a/xen/include/asm-ia64/domain.h Mon Jan 9 11:19:55 2006
+++ b/xen/include/asm-ia64/domain.h Mon Jan 9 11:22:17 2006
@@ -10,7 +10,7 @@
#include <asm/vmx_platform.h>
#include <xen/list.h>
-extern void arch_do_createdomain(struct vcpu *);
+extern int arch_do_createdomain(struct vcpu *);
extern void domain_relinquish_resources(struct domain *);
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/asm-ia64/linux-xen/asm/pal.h
--- a/xen/include/asm-ia64/linux-xen/asm/pal.h Mon Jan 9 11:19:55 2006
+++ b/xen/include/asm-ia64/linux-xen/asm/pal.h Mon Jan 9 11:22:17 2006
@@ -925,7 +925,11 @@
ia64_pal_cache_flush (u64 cache_type, u64 invalidate, u64 *progress, u64
*vector)
{
struct ia64_pal_retval iprv;
+#ifdef XEN /* fix a bug in Linux... PAL has changed */
+ PAL_CALL(iprv, PAL_CACHE_FLUSH, cache_type, invalidate, *progress);
+#else
PAL_CALL_IC_OFF(iprv, PAL_CACHE_FLUSH, cache_type, invalidate,
*progress);
+#endif
if (vector)
*vector = iprv.v0;
*progress = iprv.v1;
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/asm-ia64/vmx.h
--- a/xen/include/asm-ia64/vmx.h Mon Jan 9 11:19:55 2006
+++ b/xen/include/asm-ia64/vmx.h Mon Jan 9 11:22:17 2006
@@ -23,7 +23,7 @@
#define _ASM_IA64_VT_H
#define RR7_SWITCH_SHIFT 12 /* 4k enough */
-#include <public/io/ioreq.h>
+#include <public/hvm/ioreq.h>
extern void identify_vmx_feature(void);
extern unsigned int vmx_enabled;
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/asm-x86/config.h
--- a/xen/include/asm-x86/config.h Mon Jan 9 11:19:55 2006
+++ b/xen/include/asm-x86/config.h Mon Jan 9 11:22:17 2006
@@ -248,12 +248,10 @@
#ifdef CONFIG_X86_PAE
/* Hypervisor owns top 168MB of virtual address space. */
-# define __HYPERVISOR_VIRT_START 0xF5800000
-# define HYPERVISOR_VIRT_START (0xF5800000UL)
+#define HYPERVISOR_VIRT_START mk_unsigned_long(0xF5800000)
#else
/* Hypervisor owns top 64MB of virtual address space. */
-# define __HYPERVISOR_VIRT_START 0xFC000000
-# define HYPERVISOR_VIRT_START (0xFC000000UL)
+#define HYPERVISOR_VIRT_START mk_unsigned_long(0xFC000000)
#endif
#define L2_PAGETABLE_FIRST_XEN_SLOT \
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/asm-x86/current.h
--- a/xen/include/asm-x86/current.h Mon Jan 9 11:19:55 2006
+++ b/xen/include/asm-x86/current.h Mon Jan 9 11:22:17 2006
@@ -49,7 +49,7 @@
#define reset_stack_and_jump(__fn) \
__asm__ __volatile__ ( \
"mov %0,%%"__OP"sp; jmp "STR(__fn) \
- : : "r" (guest_cpu_user_regs()) )
+ : : "r" (guest_cpu_user_regs()) : "memory" )
#define schedule_tail(_ed) (((_ed)->arch.schedule_tail)(_ed))
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/asm-x86/domain.h
--- a/xen/include/asm-x86/domain.h Mon Jan 9 11:19:55 2006
+++ b/xen/include/asm-x86/domain.h Mon Jan 9 11:22:17 2006
@@ -24,8 +24,8 @@
/* Writable pagetables. */
struct ptwr_info ptwr[2];
- /* I/O-port access bitmap mask. */
- u8 *iobmp_mask; /* Address of IO bitmap mask, or NULL. */
+ /* I/O-port admin-specified access capabilities. */
+ struct rangeset *ioport_caps;
/* Shadow mode status and controls. */
struct shadow_ops *ops;
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/asm-x86/mm.h
--- a/xen/include/asm-x86/mm.h Mon Jan 9 11:19:55 2006
+++ b/xen/include/asm-x86/mm.h Mon Jan 9 11:22:17 2006
@@ -336,11 +336,13 @@
int revalidate_l1(struct domain *, l1_pgentry_t *, l1_pgentry_t *);
void cleanup_writable_pagetable(struct domain *d);
-#define sync_pagetable_state(d) \
- do { \
- LOCK_BIGLOCK(d); \
- cleanup_writable_pagetable(d); \
- UNLOCK_BIGLOCK(d); \
+#define sync_pagetable_state(d) \
+ do { \
+ LOCK_BIGLOCK(d); \
+ /* Avoid racing with ptwr_destroy(). */ \
+ if ( !test_bit(_DOMF_dying, &(d)->domain_flags) ) \
+ cleanup_writable_pagetable(d); \
+ UNLOCK_BIGLOCK(d); \
} while ( 0 )
int audit_adjust_pgtables(struct domain *d, int dir, int noisy);
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/asm-x86/msr.h
--- a/xen/include/asm-x86/msr.h Mon Jan 9 11:19:55 2006
+++ b/xen/include/asm-x86/msr.h Mon Jan 9 11:22:17 2006
@@ -12,7 +12,7 @@
__asm__ __volatile__("rdmsr" \
: "=a" (a__), "=d" (b__) \
: "c" (msr)); \
- val = a__ | (b__<<32); \
+ val = a__ | ((u64)b__<<32); \
} while(0);
#define wrmsr(msr,val1,val2) \
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/asm-x86/processor.h
--- a/xen/include/asm-x86/processor.h Mon Jan 9 11:19:55 2006
+++ b/xen/include/asm-x86/processor.h Mon Jan 9 11:22:17 2006
@@ -190,7 +190,7 @@
#ifdef CONFIG_X86_HT
extern void detect_ht(struct cpuinfo_x86 *c);
#else
-static inline void detect_ht(struct cpuinfo_x86 *c) {}
+static always_inline void detect_ht(struct cpuinfo_x86 *c) {}
#endif
/*
@@ -209,7 +209,7 @@
/*
* CPUID functions returning a single datum
*/
-static inline unsigned int cpuid_eax(unsigned int op)
+static always_inline unsigned int cpuid_eax(unsigned int op)
{
unsigned int eax;
@@ -219,7 +219,7 @@
: "bx", "cx", "dx");
return eax;
}
-static inline unsigned int cpuid_ebx(unsigned int op)
+static always_inline unsigned int cpuid_ebx(unsigned int op)
{
unsigned int eax, ebx;
@@ -229,7 +229,7 @@
: "cx", "dx" );
return ebx;
}
-static inline unsigned int cpuid_ecx(unsigned int op)
+static always_inline unsigned int cpuid_ecx(unsigned int op)
{
unsigned int eax, ecx;
@@ -239,7 +239,7 @@
: "bx", "dx" );
return ecx;
}
-static inline unsigned int cpuid_edx(unsigned int op)
+static always_inline unsigned int cpuid_edx(unsigned int op)
{
unsigned int eax, edx;
@@ -281,7 +281,7 @@
*/
extern unsigned long mmu_cr4_features;
-static inline void set_in_cr4 (unsigned long mask)
+static always_inline void set_in_cr4 (unsigned long mask)
{
unsigned long dummy;
mmu_cr4_features |= mask;
@@ -292,7 +292,7 @@
: "=&r" (dummy) : "irg" (mask) );
}
-static inline void clear_in_cr4 (unsigned long mask)
+static always_inline void clear_in_cr4 (unsigned long mask)
{
unsigned long dummy;
mmu_cr4_features &= ~mask;
@@ -334,7 +334,7 @@
outb((data), 0x23); \
} while (0)
-static inline void __monitor(const void *eax, unsigned long ecx,
+static always_inline void __monitor(const void *eax, unsigned long ecx,
unsigned long edx)
{
/* "monitor %eax,%ecx,%edx;" */
@@ -343,7 +343,7 @@
: :"a" (eax), "c" (ecx), "d"(edx));
}
-static inline void __mwait(unsigned long eax, unsigned long ecx)
+static always_inline void __mwait(unsigned long eax, unsigned long ecx)
{
/* "mwait %eax,%ecx;" */
asm volatile(
@@ -460,7 +460,7 @@
};
/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
-static inline void rep_nop(void)
+static always_inline void rep_nop(void)
{
__asm__ __volatile__ ( "rep;nop" : : : "memory" );
}
@@ -471,7 +471,7 @@
#ifdef CONFIG_MPENTIUMIII
#define ARCH_HAS_PREFETCH
-extern inline void prefetch(const void *x)
+extern always_inline void prefetch(const void *x)
{
__asm__ __volatile__ ("prefetchnta (%0)" : : "r"(x));
}
@@ -482,12 +482,12 @@
#define ARCH_HAS_PREFETCHW
#define ARCH_HAS_SPINLOCK_PREFETCH
-extern inline void prefetch(const void *x)
+extern always_inline void prefetch(const void *x)
{
__asm__ __volatile__ ("prefetch (%0)" : : "r"(x));
}
-extern inline void prefetchw(const void *x)
+extern always_inline void prefetchw(const void *x)
{
__asm__ __volatile__ ("prefetchw (%0)" : : "r"(x));
}
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/asm-x86/shadow.h
--- a/xen/include/asm-x86/shadow.h Mon Jan 9 11:19:55 2006
+++ b/xen/include/asm-x86/shadow.h Mon Jan 9 11:22:17 2006
@@ -341,10 +341,10 @@
#if SHADOW_VERBOSE_DEBUG
#define SH_LOG(_f, _a...) \
printk("DOM%uP%u: SH_LOG(%d): " _f "\n", \
- current->domain->domain_id , current->processor, __LINE__ , ## _a )
+ current->domain->domain_id , smp_processor_id(), __LINE__ , ## _a )
#define SH_VLOG(_f, _a...) \
printk("DOM%uP%u: SH_VLOG(%d): " _f "\n", \
- current->domain->domain_id, current->processor, __LINE__ , ## _a )
+ current->domain->domain_id, smp_processor_id(), __LINE__ , ## _a )
#else
#define SH_LOG(_f, _a...) ((void)0)
#define SH_VLOG(_f, _a...) ((void)0)
@@ -353,7 +353,7 @@
#if SHADOW_VVERBOSE_DEBUG
#define SH_VVLOG(_f, _a...) \
printk("DOM%uP%u: SH_VVLOG(%d): " _f "\n", \
- current->domain->domain_id, current->processor, __LINE__ , ## _a )
+ current->domain->domain_id, smp_processor_id(), __LINE__ , ## _a )
#else
#define SH_VVLOG(_f, _a...) ((void)0)
#endif
@@ -361,7 +361,7 @@
#if SHADOW_VVVERBOSE_DEBUG
#define SH_VVVLOG(_f, _a...) \
printk("DOM%uP%u: SH_VVVLOG(%d): " _f "\n", \
- current->domain->domain_id, current->processor, __LINE__ , ## _a )
+ current->domain->domain_id, smp_processor_id(), __LINE__ , ## _a )
#else
#define SH_VVVLOG(_f, _a...) ((void)0)
#endif
@@ -369,7 +369,7 @@
#if FULLSHADOW_DEBUG
#define FSH_LOG(_f, _a...) \
printk("DOM%uP%u: FSH_LOG(%d): " _f "\n", \
- current->domain->domain_id, current->processor, __LINE__ , ## _a )
+ current->domain->domain_id, smp_processor_id(), __LINE__ , ## _a )
#else
#define FSH_LOG(_f, _a...) ((void)0)
#endif
@@ -591,7 +591,7 @@
if ( need_flush )
{
perfc_incrc(update_hl2e_invlpg);
- flush_tlb_one_mask(v->domain->cpumask,
+ flush_tlb_one_mask(v->domain->domain_dirty_cpumask,
&linear_pg_table[l1_linear_offset(va)]);
}
}
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/asm-x86/vmx.h
--- a/xen/include/asm-x86/vmx.h Mon Jan 9 11:19:55 2006
+++ b/xen/include/asm-x86/vmx.h Mon Jan 9 11:22:17 2006
@@ -26,7 +26,7 @@
#include <asm/vmx_vmcs.h>
#include <asm/i387.h>
-#include <public/io/ioreq.h>
+#include <public/hvm/ioreq.h>
extern int hvm_enabled;
@@ -38,7 +38,6 @@
extern void arch_vmx_do_launch(struct vcpu *);
extern void arch_vmx_do_resume(struct vcpu *);
-extern void arch_vmx_do_relaunch(struct vcpu *);
extern unsigned int cpu_rev;
@@ -506,7 +505,7 @@
static inline unsigned int vmx_get_vcpu_nr(struct domain *d)
{
- return d->arch.vmx_platform.nr_vcpu;
+ return d->arch.vmx_platform.nr_vcpus;
}
static inline shared_iopage_t *get_sp(struct domain *d)
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/asm-x86/vmx_intercept.h
--- a/xen/include/asm-x86/vmx_intercept.h Mon Jan 9 11:19:55 2006
+++ b/xen/include/asm-x86/vmx_intercept.h Mon Jan 9 11:22:17 2006
@@ -6,7 +6,7 @@
#include <xen/lib.h>
#include <xen/time.h>
#include <xen/errno.h>
-#include <public/io/ioreq.h>
+#include <public/hvm/ioreq.h>
#define MAX_IO_HANDLER 8
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/asm-x86/vmx_platform.h
--- a/xen/include/asm-x86/vmx_platform.h Mon Jan 9 11:19:55 2006
+++ b/xen/include/asm-x86/vmx_platform.h Mon Jan 9 11:22:17 2006
@@ -33,10 +33,10 @@
(((size_reg) << 24) | ((index) << 16) | ((seg) << 8) | (flag))
#define operand_size(operand) \
- ((operand >> 24) & 0xFF)
+ ((operand >> 24) & 0xFF)
#define operand_index(operand) \
- ((operand >> 16) & 0xFF)
+ ((operand >> 16) & 0xFF)
/* for instruction.operand[].size */
#define BYTE 1
@@ -81,13 +81,13 @@
struct vmx_platform {
unsigned long shared_page_va;
- unsigned int nr_vcpu;
- unsigned int lapic_enable;
+ unsigned int nr_vcpus;
+ unsigned int apic_enabled;
struct vmx_virpit vmx_pit;
struct vmx_io_handler vmx_io_handler;
struct vmx_virpic vmx_pic;
- struct vmx_vioapic vmx_vioapic;
+ struct vmx_vioapic vmx_vioapic;
unsigned char round_info[256];
spinlock_t round_robin_lock;
int interrupt_request;
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/asm-x86/vmx_vlapic.h
--- a/xen/include/asm-x86/vmx_vlapic.h Mon Jan 9 11:19:55 2006
+++ b/xen/include/asm-x86/vmx_vlapic.h Mon Jan 9 11:22:17 2006
@@ -21,7 +21,7 @@
#define VMX_VLAPIC_H
#include <asm/msr.h>
-#include <public/io/ioreq.h>
+#include <public/hvm/ioreq.h>
#if defined(__i386__) || defined(__x86_64__)
static inline int __fls(uint32_t word)
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/asm-x86/vmx_vmcs.h
--- a/xen/include/asm-x86/vmx_vmcs.h Mon Jan 9 11:19:55 2006
+++ b/xen/include/asm-x86/vmx_vmcs.h Mon Jan 9 11:22:17 2006
@@ -23,7 +23,7 @@
#include <asm/vmx_cpu.h>
#include <asm/vmx_platform.h>
#include <asm/vmx_vlapic.h>
-#include <public/vmx_assist.h>
+#include <public/hvm/vmx_assist.h>
extern int start_vmx(void);
extern void stop_vmx(void);
@@ -86,7 +86,8 @@
#define PC_DEBUG_PORT 0x80
struct arch_vmx_struct {
- struct vmcs_struct *vmcs; /* VMCS pointer in virtual */
+ struct vmcs_struct *vmcs; /* VMCS pointer in virtual. */
+ unsigned int launch_cpu; /* VMCS is valid on this CPU. */
unsigned long flags; /* VMCS flags */
unsigned long cpu_cr0; /* copy of guest CR0 */
unsigned long cpu_shadow_cr0; /* copy of guest read shadow CR0 */
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/asm-x86/x86_emulate.h
--- a/xen/include/asm-x86/x86_emulate.h Mon Jan 9 11:19:55 2006
+++ b/xen/include/asm-x86/x86_emulate.h Mon Jan 9 11:22:17 2006
@@ -18,10 +18,11 @@
* special treatment or emulation (*_emulated).
*
* The emulator assumes that an instruction accesses only one 'emulated memory'
- * location, and that this is one of its data operands. Instruction fetches and
+ * location, that this location is the given linear faulting address (cr2), and
+ * that this is one of the instruction's data operands. Instruction fetches and
* stack operations are assumed never to access emulated memory. The emulator
* automatically deduces which operand of a string-move operation is accessing
- * emulated memory, and requires that the other operand accesses normal memory.
+ * emulated memory, and assumes that the other operand accesses normal memory.
*
* NOTES:
* 1. The emulator isn't very smart about emulated vs. standard memory.
@@ -36,6 +37,7 @@
* then immediately bail.
* 3. Valid access sizes are 1, 2, 4 and 8 bytes. On x86/32 systems only
* cmpxchg8b_emulated need support 8-byte accesses.
+ * 4. The emulator cannot handle 64-bit mode emulation on an x86/32 system.
*/
/* Access completed successfully: continue emulation as normal. */
#define X86EMUL_CONTINUE 0
@@ -141,14 +143,27 @@
struct cpu_user_regs;
+/* Execution mode, passed to the emulator. */
+#define X86EMUL_MODE_REAL 0 /* Real mode. */
+#define X86EMUL_MODE_PROT16 2 /* 16-bit protected mode. */
+#define X86EMUL_MODE_PROT32 4 /* 32-bit protected mode. */
+#define X86EMUL_MODE_PROT64 8 /* 64-bit (long) mode. */
+
+/* Host execution mode. */
+#if defined(__i386__)
+#define X86EMUL_MODE_HOST X86EMUL_MODE_PROT32
+#elif defined(__x86_64__)
+#define X86EMUL_MODE_HOST X86EMUL_MODE_PROT64
+#endif
+
/*
* x86_emulate_memop: Emulate an instruction that faulted attempting to
* read/write a 'special' memory area.
* @regs: Register state at time of fault.
- * @cr2: Linear faulting address.
+ * @cr2: Linear faulting address within an emulated/special memory area.
* @ops: Interface to access special memory.
- * @mode: Current execution mode, represented by the default size of memory
- * addresses, in bytes. Valid values are 2, 4 and 8 (x86/64 only).
+ * @mode: Emulated execution mode, represented by an X86EMUL_MODE value.
+ * Returns -1 on failure, 0 on success.
*/
extern int
x86_emulate_memop(
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/public/arch-x86_32.h
--- a/xen/include/public/arch-x86_32.h Mon Jan 9 11:19:55 2006
+++ b/xen/include/public/arch-x86_32.h Mon Jan 9 11:22:17 2006
@@ -49,10 +49,15 @@
* machine->physical mapping table starts at this address, read-only.
*/
#ifdef CONFIG_X86_PAE
-# define HYPERVISOR_VIRT_START (0xF5800000UL)
+#define __HYPERVISOR_VIRT_START 0xF5800000
#else
-# define HYPERVISOR_VIRT_START (0xFC000000UL)
+#define __HYPERVISOR_VIRT_START 0xFC000000
#endif
+
+#ifndef HYPERVISOR_VIRT_START
+#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START)
+#endif
+
#ifndef machine_to_phys_mapping
#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START)
#endif
@@ -137,7 +142,7 @@
unsigned long pad[5]; /* sizeof(vcpu_info_t) == 64 */
} arch_vcpu_info_t;
-#endif
+#endif /* !__ASSEMBLY__ */
#endif
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/public/arch-x86_64.h
--- a/xen/include/public/arch-x86_64.h Mon Jan 9 11:19:55 2006
+++ b/xen/include/public/arch-x86_64.h Mon Jan 9 11:22:17 2006
@@ -59,9 +59,12 @@
/* And the trap vector is... */
#define TRAP_INSTR "syscall"
+#define __HYPERVISOR_VIRT_START 0xFFFF800000000000
+#define __HYPERVISOR_VIRT_END 0xFFFF880000000000
+
#ifndef HYPERVISOR_VIRT_START
-#define HYPERVISOR_VIRT_START (0xFFFF800000000000UL)
-#define HYPERVISOR_VIRT_END (0xFFFF880000000000UL)
+#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START)
+#define HYPERVISOR_VIRT_END mk_unsigned_long(__HYPERVISOR_VIRT_END)
#endif
/* Maximum number of virtual CPUs in multi-processor guests. */
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/public/dom0_ops.h
--- a/xen/include/public/dom0_ops.h Mon Jan 9 11:19:55 2006
+++ b/xen/include/public/dom0_ops.h Mon Jan 9 11:22:17 2006
@@ -94,14 +94,14 @@
xen_domain_handle_t handle;
} dom0_getdomaininfo_t;
-#define DOM0_SETDOMAININFO 13
+#define DOM0_SETVCPUCONTEXT 13
typedef struct {
/* IN variables. */
domid_t domain;
uint32_t vcpu;
/* IN/OUT parameters */
vcpu_guest_context_t *ctxt;
-} dom0_setdomaininfo_t;
+} dom0_setvcpucontext_t;
#define DOM0_MSR 15
typedef struct {
@@ -163,13 +163,13 @@
/*
* Set which physical cpus a vcpu can execute on.
*/
-#define DOM0_PINCPUDOMAIN 20
+#define DOM0_SETVCPUAFFINITY 20
typedef struct {
/* IN variables. */
domid_t domain;
uint32_t vcpu;
cpumap_t cpumap;
-} dom0_pincpudomain_t;
+} dom0_setvcpuaffinity_t;
/* Get trace buffers machine base address */
#define DOM0_TBUFCONTROL 21
@@ -410,6 +410,21 @@
uint8_t enable;
} dom0_setdebugging_t;
+#define DOM0_IRQ_PERMISSION 46
+typedef struct {
+ domid_t domain; /* domain to be affected */
+ uint8_t pirq;
+ uint8_t allow_access; /* flag to specify enable/disable of IRQ access */
+} dom0_irq_permission_t;
+
+#define DOM0_IOMEM_PERMISSION 47
+typedef struct {
+ domid_t domain; /* domain to be affected */
+ unsigned long first_pfn; /* first page (physical page number) in range */
+ unsigned long nr_pfns; /* number of pages in range (>0) */
+ uint8_t allow_access; /* allow (!0) or deny (0) access to range? */
+} dom0_iomem_permission_t;
+
typedef struct {
uint32_t cmd;
uint32_t interface_version; /* DOM0_INTERFACE_VERSION */
@@ -421,13 +436,13 @@
dom0_getmemlist_t getmemlist;
dom0_schedctl_t schedctl;
dom0_adjustdom_t adjustdom;
- dom0_setdomaininfo_t setdomaininfo;
+ dom0_setvcpucontext_t setvcpucontext;
dom0_getdomaininfo_t getdomaininfo;
dom0_getpageframeinfo_t getpageframeinfo;
dom0_msr_t msr;
dom0_settime_t settime;
dom0_readconsole_t readconsole;
- dom0_pincpudomain_t pincpudomain;
+ dom0_setvcpuaffinity_t setvcpuaffinity;
dom0_tbufcontrol_t tbufcontrol;
dom0_physinfo_t physinfo;
dom0_sched_id_t sched_id;
@@ -448,6 +463,8 @@
dom0_max_vcpus_t max_vcpus;
dom0_setdomainhandle_t setdomainhandle;
dom0_setdebugging_t setdebugging;
+ dom0_irq_permission_t irq_permission;
+ dom0_iomem_permission_t iomem_permission;
uint8_t pad[128];
} u;
} dom0_op_t;
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/public/xen.h
--- a/xen/include/public/xen.h Mon Jan 9 11:19:55 2006
+++ b/xen/include/public/xen.h Mon Jan 9 11:22:17 2006
@@ -426,6 +426,15 @@
typedef uint8_t xen_domain_handle_t[16];
+/* Turn a plain number into a C unsigned long constant. */
+#define __mk_unsigned_long(x) x ## UL
+#define mk_unsigned_long(x) __mk_unsigned_long(x)
+
+#else /* __ASSEMBLY__ */
+
+/* In assembly code we cannot use C numeric constant suffixes. */
+#define mk_unsigned_long(x) x
+
#endif /* !__ASSEMBLY__ */
#endif /* __XEN_PUBLIC_XEN_H__ */
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/xen/bitmap.h
--- a/xen/include/xen/bitmap.h Mon Jan 9 11:19:55 2006
+++ b/xen/include/xen/bitmap.h Mon Jan 9 11:22:17 2006
@@ -41,6 +41,8 @@
* bitmap_weight(src, nbits) Hamming Weight: number set bits
* bitmap_shift_right(dst, src, n, nbits) *dst = *src >> n
* bitmap_shift_left(dst, src, n, nbits) *dst = *src << n
+ * bitmap_scnprintf(buf, len, src, nbits) Print bitmap src to buf
+ * bitmap_scnlistprintf(buf, len, src, nbits) Print bitmap src as list to buf
*/
/*
@@ -93,6 +95,10 @@
const unsigned long *bitmap2, int bits);
extern int __bitmap_weight(const unsigned long *bitmap, int bits);
+extern int bitmap_scnprintf(char *buf, unsigned int len,
+ const unsigned long *src, int nbits);
+extern int bitmap_scnlistprintf(char *buf, unsigned int len,
+ const unsigned long *src, int nbits);
extern int bitmap_find_free_region(unsigned long *bitmap, int bits, int order);
extern void bitmap_release_region(unsigned long *bitmap, int pos, int order);
extern int bitmap_allocate_region(unsigned long *bitmap, int pos, int order);
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/xen/compiler.h
--- a/xen/include/xen/compiler.h Mon Jan 9 11:19:55 2006
+++ b/xen/include/xen/compiler.h Mon Jan 9 11:22:17 2006
@@ -19,4 +19,10 @@
#define __attribute_used__ __attribute__((__unused__))
#endif
+#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
+#define __must_check __attribute__((warn_unused_result))
+#else
+#define __must_check
+#endif
+
#endif /* __LINUX_COMPILER_H */
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/xen/config.h
--- a/xen/include/xen/config.h Mon Jan 9 11:19:55 2006
+++ b/xen/include/xen/config.h Mon Jan 9 11:22:17 2006
@@ -43,4 +43,13 @@
#define __STR(...) #__VA_ARGS__
#define STR(...) __STR(__VA_ARGS__)
+#ifndef __ASSEMBLY__
+/* Turn a plain number into a C unsigned long constant. */
+#define __mk_unsigned_long(x) x ## UL
+#define mk_unsigned_long(x) __mk_unsigned_long(x)
+#else /* __ASSEMBLY__ */
+/* In assembly code we cannot use C numeric constant suffixes. */
+#define mk_unsigned_long(x) x
+#endif /* !__ASSEMBLY__ */
+
#endif /* __XEN_CONFIG_H__ */
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/xen/cpumask.h
--- a/xen/include/xen/cpumask.h Mon Jan 9 11:19:55 2006
+++ b/xen/include/xen/cpumask.h Mon Jan 9 11:22:17 2006
@@ -8,8 +8,8 @@
* See detailed comments in the file xen/bitmap.h describing the
* data type on which these cpumasks are based.
*
- * For details of cpumask_scnprintf() and cpumask_parse(),
- * see bitmap_scnprintf() and bitmap_parse() in lib/bitmap.c.
+ * For details of cpumask_scnprintf() and cpulist_scnprintf(),
+ * see bitmap_scnprintf() and bitmap_scnlistprintf() in lib/bitmap.c.
*
* The available cpumask operations are:
*
@@ -36,8 +36,8 @@
* void cpus_shift_right(dst, src, n) Shift right
* void cpus_shift_left(dst, src, n) Shift left
*
- * int first_cpu(mask) Number lowest set bit, or >= NR_CPUS
- * int next_cpu(cpu, mask) Next cpu past 'cpu', or >= NR_CPUS
+ * int first_cpu(mask) Number lowest set bit, or NR_CPUS
+ * int next_cpu(cpu, mask) Next cpu past 'cpu', or NR_CPUS
*
* cpumask_t cpumask_of_cpu(cpu) Return cpumask with bit 'cpu' set
* CPU_MASK_ALL Initializer - all bits set
@@ -45,7 +45,7 @@
* unsigned long *cpus_addr(mask) Array of unsigned long's in mask
*
* int cpumask_scnprintf(buf, len, mask) Format cpumask for printing
- * int cpumask_parse(ubuf, ulen, mask) Parse ascii string as cpumask
+ * int cpulist_scnprintf(buf, len, mask) Format cpumask as list for printing
*
* for_each_cpu_mask(cpu, mask) for-loop cpu over mask
*
@@ -207,13 +207,13 @@
#define first_cpu(src) __first_cpu(&(src), NR_CPUS)
static inline int __first_cpu(const cpumask_t *srcp, int nbits)
{
- return find_first_bit(srcp->bits, nbits);
+ return min_t(int, nbits, find_first_bit(srcp->bits, nbits));
}
#define next_cpu(n, src) __next_cpu((n), &(src), NR_CPUS)
static inline int __next_cpu(int n, const cpumask_t *srcp, int nbits)
{
- return find_next_bit(srcp->bits, nbits, n+1);
+ return min_t(int, nbits, find_next_bit(srcp->bits, nbits, n+1));
}
#define cpumask_of_cpu(cpu) \
@@ -259,7 +259,6 @@
#define cpus_addr(src) ((src).bits)
-/*
#define cpumask_scnprintf(buf, len, src) \
__cpumask_scnprintf((buf), (len), &(src), NR_CPUS)
static inline int __cpumask_scnprintf(char *buf, int len,
@@ -268,14 +267,13 @@
return bitmap_scnprintf(buf, len, srcp->bits, nbits);
}
-#define cpumask_parse(ubuf, ulen, src) \
- __cpumask_parse((ubuf), (ulen), &(src), NR_CPUS)
-static inline int __cpumask_parse(const char __user *buf, int len,
- cpumask_t *dstp, int nbits)
-{
- return bitmap_parse(buf, len, dstp->bits, nbits);
-}
-*/
+#define cpulist_scnprintf(buf, len, src) \
+ __cpulist_scnprintf((buf), (len), &(src), NR_CPUS)
+static inline int __cpulist_scnprintf(char *buf, int len,
+ const cpumask_t *srcp, int nbits)
+{
+ return bitmap_scnlistprintf(buf, len, srcp->bits, nbits);
+}
#if NR_CPUS > 1
#define for_each_cpu_mask(cpu, mask) \
@@ -368,7 +366,7 @@
for_each_cpu_mask(cpu, (mask)) \
if (cpu_online(cpu)) \
break; \
- min_t(int, NR_CPUS, cpu); \
+ cpu; \
})
#define for_each_cpu(cpu) for_each_cpu_mask((cpu), cpu_possible_map)
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/xen/domain.h
--- a/xen/include/xen/domain.h Mon Jan 9 11:19:55 2006
+++ b/xen/include/xen/domain.h Mon Jan 9 11:22:17 2006
@@ -13,12 +13,10 @@
extern void free_vcpu_struct(struct vcpu *v);
-extern void arch_do_createdomain(struct vcpu *v);
+extern int arch_do_createdomain(struct vcpu *v);
-extern int arch_set_info_guest(
+extern int arch_set_info_guest(
struct vcpu *v, struct vcpu_guest_context *c);
-
-extern void vcpu_migrate_cpu(struct vcpu *v, int newcpu);
extern void free_perdomain_pt(struct domain *d);
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/xen/lib.h
--- a/xen/include/xen/lib.h Mon Jan 9 11:19:55 2006
+++ b/xen/include/xen/lib.h Mon Jan 9 11:22:17 2006
@@ -53,10 +53,16 @@
/* vsprintf.c */
extern int sprintf(char * buf, const char * fmt, ...)
__attribute__ ((format (printf, 2, 3)));
-extern int vsprintf(char *buf, const char *, va_list);
+extern int vsprintf(char *buf, const char *, va_list)
+ __attribute__ ((format (printf, 2, 0)));
extern int snprintf(char * buf, size_t size, const char * fmt, ...)
__attribute__ ((format (printf, 3, 4)));
-extern int vsnprintf(char *buf, size_t size, const char *fmt, va_list args);
+extern int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
+ __attribute__ ((format (printf, 3, 0)));
+extern int scnprintf(char * buf, size_t size, const char * fmt, ...)
+ __attribute__ ((format (printf, 3, 4)));
+extern int vscnprintf(char *buf, size_t size, const char *fmt, va_list args)
+ __attribute__ ((format (printf, 3, 0)));
long simple_strtol(
const char *cp,char **endp, unsigned int base);
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/xen/sched-if.h
--- a/xen/include/xen/sched-if.h Mon Jan 9 11:19:55 2006
+++ b/xen/include/xen/sched-if.h Mon Jan 9 11:22:17 2006
@@ -13,8 +13,8 @@
struct schedule_data {
spinlock_t schedule_lock; /* spinlock protecting curr */
- struct vcpu *curr; /* current task */
- struct vcpu *idle; /* idle task for this cpu */
+ struct vcpu *curr; /* current task */
+ struct vcpu *idle; /* idle task for this cpu */
void *sched_priv;
struct ac_timer s_timer; /* scheduling timer */
unsigned long tick; /* current periodic 'tick' */
@@ -39,6 +39,7 @@
void (*rem_task) (struct vcpu *);
void (*sleep) (struct vcpu *);
void (*wake) (struct vcpu *);
+ int (*set_affinity) (struct vcpu *, cpumask_t *);
struct task_slice (*do_schedule) (s_time_t);
int (*control) (struct sched_ctl_cmd *);
int (*adjdom) (struct domain *,
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/xen/sched.h
--- a/xen/include/xen/sched.h Mon Jan 9 11:19:55 2006
+++ b/xen/include/xen/sched.h Mon Jan 9 11:22:17 2006
@@ -11,6 +11,7 @@
#include <xen/time.h>
#include <xen/ac_timer.h>
#include <xen/grant_table.h>
+#include <xen/rangeset.h>
#include <asm/domain.h>
extern unsigned long volatile jiffies;
@@ -50,8 +51,6 @@
int evtchn_init(struct domain *d);
void evtchn_destroy(struct domain *d);
-#define CPUMAP_RUNANYWHERE 0xFFFFFFFF
-
struct vcpu
{
int vcpu_id;
@@ -79,7 +78,11 @@
atomic_t pausecnt;
- cpumap_t cpumap; /* which cpus this domain can run on */
+ /* Bitmask of CPUs on which this VCPU may run. */
+ cpumask_t cpu_affinity;
+
+ /* Bitmask of CPUs which are holding onto this VCPU's state. */
+ cpumask_t vcpu_dirty_cpumask;
struct arch_vcpu arch;
};
@@ -109,6 +112,9 @@
struct domain *next_in_list;
struct domain *next_in_hashbucket;
+
+ struct list_head rangesets;
+ spinlock_t rangesets_lock;
/* Event channel information. */
struct evtchn *evtchn[NR_EVTCHN_BUCKETS];
@@ -125,6 +131,10 @@
u16 pirq_to_evtchn[NR_PIRQS];
u32 pirq_mask[NR_PIRQS/32];
+ /* I/O capabilities (access to IRQs and memory-mapped I/O). */
+ struct rangeset *iomem_caps;
+ struct rangeset *irq_caps;
+
unsigned long domain_flags;
unsigned long vm_assist;
@@ -133,7 +143,7 @@
struct vcpu *vcpu[MAX_VIRT_CPUS];
/* Bitmask of CPUs which are holding onto this domain's state. */
- cpumask_t cpumask;
+ cpumask_t domain_dirty_cpumask;
struct arch_domain arch;
@@ -165,9 +175,9 @@
extern struct domain idle0_domain;
extern struct vcpu idle0_vcpu;
-extern struct vcpu *idle_task[NR_CPUS];
+extern struct vcpu *idle_domain[NR_CPUS];
#define IDLE_DOMAIN_ID (0x7FFFU)
-#define is_idle_task(_d) (test_bit(_DOMF_idle_domain, &(_d)->domain_flags))
+#define is_idle_domain(_d) (test_bit(_DOMF_idle_domain, &(_d)->domain_flags))
struct vcpu *alloc_vcpu(
struct domain *d, unsigned int vcpu_id, unsigned int cpu_id);
@@ -215,7 +225,7 @@
unsigned long image_start, unsigned long image_len,
unsigned long initrd_start, unsigned long initrd_len,
char *cmdline);
-extern int set_info_guest(struct domain *d, dom0_setdomaininfo_t *);
+extern int set_info_guest(struct domain *d, dom0_setvcpucontext_t *);
struct domain *find_domain_by_id(domid_t dom);
extern void domain_destruct(struct domain *d);
@@ -261,36 +271,27 @@
extern void sync_vcpu_execstate(struct vcpu *v);
/*
- * Called by the scheduler to switch to another VCPU. On entry, although
- * VCPUF_running is no longer asserted for @prev, its context is still running
- * on the local CPU and is not committed to memory. The local scheduler lock
- * is therefore still held, and interrupts are disabled, because the local CPU
- * is in an inconsistent state.
- *
- * The callee must ensure that the local CPU is no longer running in @prev's
- * context, and that the context is saved to memory, before returning.
- * Alternatively, if implementing lazy context switching, it suffices to ensure
- * that invoking sync_vcpu_execstate() will switch and commit @prev's state.
+ * Called by the scheduler to switch to another VCPU. This function must
+ * call context_saved(@prev) when the local CPU is no longer running in
+ * @prev's context, and that context is saved to memory. Alternatively, if
+ * implementing lazy context switching, it suffices to ensure that invoking
+ * sync_vcpu_execstate() will switch and commit @prev's state.
*/
extern void context_switch(
struct vcpu *prev,
struct vcpu *next);
/*
- * On some architectures (notably x86) it is not possible to entirely load
- * @next's context with interrupts disabled. These may implement a function to
- * finalise loading the new context after interrupts are re-enabled. This
- * function is not given @prev and is not permitted to access it.
- */
-extern void context_switch_finalise(
- struct vcpu *next);
+ * As described above, context_switch() must call this function when the
+ * local CPU is no longer running in @prev's context, and @prev's context is
+ * saved to memory. Alternatively, if implementing lazy context switching,
+ * ensure that invoking sync_vcpu_execstate() will switch and commit @prev.
+ */
+#define context_saved(prev) (clear_bit(_VCPUF_running, &(prev)->vcpu_flags))
/* Called by the scheduler to continue running the current VCPU. */
extern void continue_running(
struct vcpu *same);
-
-/* Is CPU 'cpu' idle right now? */
-int idle_cpu(int cpu);
void startup_cpu_idle_loop(void);
@@ -356,17 +357,11 @@
/* Currently running on a CPU? */
#define _VCPUF_running 3
#define VCPUF_running (1UL<<_VCPUF_running)
- /* Disables auto-migration between CPUs. */
-#define _VCPUF_cpu_pinned 4
-#define VCPUF_cpu_pinned (1UL<<_VCPUF_cpu_pinned)
- /* Domain migrated between CPUs. */
-#define _VCPUF_cpu_migrated 5
-#define VCPUF_cpu_migrated (1UL<<_VCPUF_cpu_migrated)
/* Initialization completed. */
-#define _VCPUF_initialised 6
+#define _VCPUF_initialised 4
#define VCPUF_initialised (1UL<<_VCPUF_initialised)
/* VCPU is not-runnable */
-#define _VCPUF_down 7
+#define _VCPUF_down 5
#define VCPUF_down (1UL<<_VCPUF_down)
/*
@@ -378,32 +373,25 @@
/* Is this domain privileged? */
#define _DOMF_privileged 1
#define DOMF_privileged (1UL<<_DOMF_privileged)
- /* May this domain do IO to physical devices? */
-#define _DOMF_physdev_access 2
-#define DOMF_physdev_access (1UL<<_DOMF_physdev_access)
/* Guest shut itself down for some reason. */
-#define _DOMF_shutdown 3
+#define _DOMF_shutdown 2
#define DOMF_shutdown (1UL<<_DOMF_shutdown)
- /* Guest is in process of shutting itself down (becomes DOMF_shutdown). */
-#define _DOMF_shuttingdown 4
-#define DOMF_shuttingdown (1UL<<_DOMF_shuttingdown)
/* Death rattle. */
-#define _DOMF_dying 5
+#define _DOMF_dying 3
#define DOMF_dying (1UL<<_DOMF_dying)
/* Domain is paused by controller software. */
-#define _DOMF_ctrl_pause 6
+#define _DOMF_ctrl_pause 4
#define DOMF_ctrl_pause (1UL<<_DOMF_ctrl_pause)
/* Domain is being debugged by controller software. */
-#define _DOMF_debugging 7
+#define _DOMF_debugging 5
#define DOMF_debugging (1UL<<_DOMF_debugging)
-static inline int domain_runnable(struct vcpu *v)
+static inline int vcpu_runnable(struct vcpu *v)
{
return ( (atomic_read(&v->pausecnt) == 0) &&
!(v->vcpu_flags & (VCPUF_blocked|VCPUF_down)) &&
- !(v->domain->domain_flags &
- (DOMF_shutdown|DOMF_shuttingdown|DOMF_ctrl_pause)) );
+ !(v->domain->domain_flags & (DOMF_shutdown|DOMF_ctrl_pause)) );
}
void vcpu_pause(struct vcpu *v);
@@ -414,6 +402,8 @@
void domain_unpause_by_systemcontroller(struct domain *d);
void cpu_init(void);
+int vcpu_set_affinity(struct vcpu *v, cpumask_t *affinity);
+
static inline void vcpu_unblock(struct vcpu *v)
{
if ( test_and_clear_bit(_VCPUF_blocked, &v->vcpu_flags) )
@@ -422,8 +412,6 @@
#define IS_PRIV(_d) \
(test_bit(_DOMF_privileged, &(_d)->domain_flags))
-#define IS_CAPABLE_PHYSDEV(_d) \
- (test_bit(_DOMF_physdev_access, &(_d)->domain_flags))
#define VM_ASSIST(_d,_t) (test_bit((_t), &(_d)->vm_assist))
diff -r 25e3c8668f1f -r 8af1199488d3 tools/guest-headers/Makefile
--- /dev/null Mon Jan 9 11:19:55 2006
+++ b/tools/guest-headers/Makefile Mon Jan 9 11:22:17 2006
@@ -0,0 +1,11 @@
+
+XEN_ROOT=../..
+linuxsparsetree = $(XEN_ROOT)/linux-2.6-xen-sparse
+
+check:
+
+install:
+ mkdir -p $(DESTDIR)/usr/include/xen/linux
+ install -m0644 $(linuxsparsetree)/include/asm-xen/linux-public/*.h
$(DESTDIR)/usr/include/xen/linux
+
+clean:
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/ia64/linux-xen/process-linux-xen.c
--- /dev/null Mon Jan 9 11:19:55 2006
+++ b/xen/arch/ia64/linux-xen/process-linux-xen.c Mon Jan 9 11:22:17 2006
@@ -0,0 +1,848 @@
+/*
+ * Architecture-specific setup.
+ *
+ * Copyright (C) 1998-2003 Hewlett-Packard Co
+ * David Mosberger-Tang <davidm@xxxxxxxxxx>
+ * 04/11/17 Ashok Raj <ashok.raj@xxxxxxxxx> Added CPU Hotplug Support
+ */
+#ifdef XEN
+#include <xen/types.h>
+#include <xen/lib.h>
+#include <xen/symbols.h>
+#include <xen/smp.h>
+#include <asm/uaccess.h>
+#include <asm/processor.h>
+#include <asm/ptrace.h>
+#include <asm/unwind.h>
+#else
+#define __KERNEL_SYSCALLS__ /* see <asm/unistd.h> */
+#include <linux/config.h>
+
+#include <linux/cpu.h>
+#include <linux/pm.h>
+#include <linux/elf.h>
+#include <linux/errno.h>
+#include <linux/kallsyms.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/notifier.h>
+#include <linux/personality.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/smp_lock.h>
+#include <linux/stddef.h>
+#include <linux/thread_info.h>
+#include <linux/unistd.h>
+#include <linux/efi.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/kprobes.h>
+
+#include <asm/cpu.h>
+#include <asm/delay.h>
+#include <asm/elf.h>
+#include <asm/ia32.h>
+#include <asm/irq.h>
+#include <asm/pgalloc.h>
+#include <asm/processor.h>
+#include <asm/sal.h>
+#include <asm/tlbflush.h>
+#include <asm/uaccess.h>
+#include <asm/unwind.h>
+#include <asm/user.h>
+
+#include "entry.h"
+
+#ifdef CONFIG_PERFMON
+# include <asm/perfmon.h>
+#endif
+
+#include "sigframe.h"
+
+void (*ia64_mark_idle)(int);
+static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
+
+unsigned long boot_option_idle_override = 0;
+EXPORT_SYMBOL(boot_option_idle_override);
+#endif
+
+void
+ia64_do_show_stack (struct unw_frame_info *info, void *arg)
+{
+ unsigned long ip, sp, bsp;
+ char buf[128]; /* don't make it so big that it
overflows the stack! */
+
+ printk("\nCall Trace:\n");
+ do {
+ unw_get_ip(info, &ip);
+ if (ip == 0)
+ break;
+
+ unw_get_sp(info, &sp);
+ unw_get_bsp(info, &bsp);
+ snprintf(buf, sizeof(buf),
+ " [<%016lx>] %%s\n"
+ " sp=%016lx
bsp=%016lx\n",
+ ip, sp, bsp);
+ print_symbol(buf, ip);
+ } while (unw_unwind(info) >= 0);
+}
+
+void
+show_stack (struct task_struct *task, unsigned long *sp)
+{
+ if (!task)
+ unw_init_running(ia64_do_show_stack, NULL);
+ else {
+ struct unw_frame_info info;
+
+ unw_init_from_blocked_task(&info, task);
+ ia64_do_show_stack(&info, NULL);
+ }
+}
+
+#ifndef XEN
+void
+dump_stack (void)
+{
+ show_stack(NULL, NULL);
+}
+
+EXPORT_SYMBOL(dump_stack);
+#endif
+
+#ifdef XEN
+void
+show_registers(struct pt_regs *regs)
+#else
+void
+show_regs (struct pt_regs *regs)
+#endif
+{
+ unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri;
+
+#ifndef XEN
+ print_modules();
+ printk("\nPid: %d, CPU %d, comm: %20s\n", current->pid,
smp_processor_id(), current->comm);
+ printk("psr : %016lx ifs : %016lx ip : [<%016lx>] %s\n",
+ regs->cr_ipsr, regs->cr_ifs, ip, print_tainted());
+#else
+ printk("\nCPU %d\n", smp_processor_id());
+ printk("psr : %016lx ifs : %016lx ip : [<%016lx>]\n",
+ regs->cr_ipsr, regs->cr_ifs, ip);
+#endif
+ print_symbol("ip is at %s\n", ip);
+ printk("unat: %016lx pfs : %016lx rsc : %016lx\n",
+ regs->ar_unat, regs->ar_pfs, regs->ar_rsc);
+ printk("rnat: %016lx bsps: %016lx pr : %016lx\n",
+ regs->ar_rnat, regs->ar_bspstore, regs->pr);
+ printk("ldrs: %016lx ccv : %016lx fpsr: %016lx\n",
+ regs->loadrs, regs->ar_ccv, regs->ar_fpsr);
+ printk("csd : %016lx ssd : %016lx\n", regs->ar_csd, regs->ar_ssd);
+ printk("b0 : %016lx b6 : %016lx b7 : %016lx\n", regs->b0, regs->b6,
regs->b7);
+ printk("f6 : %05lx%016lx f7 : %05lx%016lx\n",
+ regs->f6.u.bits[1], regs->f6.u.bits[0],
+ regs->f7.u.bits[1], regs->f7.u.bits[0]);
+ printk("f8 : %05lx%016lx f9 : %05lx%016lx\n",
+ regs->f8.u.bits[1], regs->f8.u.bits[0],
+ regs->f9.u.bits[1], regs->f9.u.bits[0]);
+ printk("f10 : %05lx%016lx f11 : %05lx%016lx\n",
+ regs->f10.u.bits[1], regs->f10.u.bits[0],
+ regs->f11.u.bits[1], regs->f11.u.bits[0]);
+
+ printk("r1 : %016lx r2 : %016lx r3 : %016lx\n", regs->r1, regs->r2,
regs->r3);
+ printk("r8 : %016lx r9 : %016lx r10 : %016lx\n", regs->r8, regs->r9,
regs->r10);
+ printk("r11 : %016lx r12 : %016lx r13 : %016lx\n", regs->r11,
regs->r12, regs->r13);
+ printk("r14 : %016lx r15 : %016lx r16 : %016lx\n", regs->r14,
regs->r15, regs->r16);
+ printk("r17 : %016lx r18 : %016lx r19 : %016lx\n", regs->r17,
regs->r18, regs->r19);
+ printk("r20 : %016lx r21 : %016lx r22 : %016lx\n", regs->r20,
regs->r21, regs->r22);
+ printk("r23 : %016lx r24 : %016lx r25 : %016lx\n", regs->r23,
regs->r24, regs->r25);
+ printk("r26 : %016lx r27 : %016lx r28 : %016lx\n", regs->r26,
regs->r27, regs->r28);
+ printk("r29 : %016lx r30 : %016lx r31 : %016lx\n", regs->r29,
regs->r30, regs->r31);
+
+ if (user_mode(regs)) {
+ /* print the stacked registers */
+ unsigned long val, *bsp, ndirty;
+ int i, sof, is_nat = 0;
+
+ sof = regs->cr_ifs & 0x7f; /* size of frame */
+ ndirty = (regs->loadrs >> 19);
+ bsp = ia64_rse_skip_regs((unsigned long *) regs->ar_bspstore,
ndirty);
+ for (i = 0; i < sof; ++i) {
+ get_user(val, (unsigned long __user *)
ia64_rse_skip_regs(bsp, i));
+ printk("r%-3u:%c%016lx%s", 32 + i, is_nat ? '*' : ' ',
val,
+ ((i == sof - 1) || (i % 3) == 2) ? "\n" : " ");
+ }
+ } else
+ show_stack(NULL, NULL);
+}
+
+#ifndef XEN
+void
+do_notify_resume_user (sigset_t *oldset, struct sigscratch *scr, long
in_syscall)
+{
+ if (fsys_mode(current, &scr->pt)) {
+ /* defer signal-handling etc. until we return to
privilege-level 0. */
+ if (!ia64_psr(&scr->pt)->lp)
+ ia64_psr(&scr->pt)->lp = 1;
+ return;
+ }
+
+#ifdef CONFIG_PERFMON
+ if (current->thread.pfm_needs_checking)
+ pfm_handle_work();
+#endif
+
+ /* deal with pending signal delivery */
+ if (test_thread_flag(TIF_SIGPENDING))
+ ia64_do_signal(oldset, scr, in_syscall);
+}
+
+static int pal_halt = 1;
+static int can_do_pal_halt = 1;
+
+static int __init nohalt_setup(char * str)
+{
+ pal_halt = can_do_pal_halt = 0;
+ return 1;
+}
+__setup("nohalt", nohalt_setup);
+
+void
+update_pal_halt_status(int status)
+{
+ can_do_pal_halt = pal_halt && status;
+}
+
+/*
+ * We use this if we don't have any better idle routine..
+ */
+void
+default_idle (void)
+{
+ local_irq_enable();
+ while (!need_resched())
+ if (can_do_pal_halt)
+ safe_halt();
+ else
+ cpu_relax();
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+/* We don't actually take CPU down, just spin without interrupts. */
+static inline void play_dead(void)
+{
+ extern void ia64_cpu_local_tick (void);
+ unsigned int this_cpu = smp_processor_id();
+
+ /* Ack it */
+ __get_cpu_var(cpu_state) = CPU_DEAD;
+
+ max_xtp();
+ local_irq_disable();
+ idle_domain_exit();
+ ia64_jump_to_sal(&sal_boot_rendez_state[this_cpu]);
+ /*
+ * The above is a point of no-return, the processor is
+ * expected to be in SAL loop now.
+ */
+ BUG();
+}
+#else
+static inline void play_dead(void)
+{
+ BUG();
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
+void cpu_idle_wait(void)
+{
+ unsigned int cpu, this_cpu = get_cpu();
+ cpumask_t map;
+
+ set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
+ put_cpu();
+
+ cpus_clear(map);
+ for_each_online_cpu(cpu) {
+ per_cpu(cpu_idle_state, cpu) = 1;
+ cpu_set(cpu, map);
+ }
+
+ __get_cpu_var(cpu_idle_state) = 0;
+
+ wmb();
+ do {
+ ssleep(1);
+ for_each_online_cpu(cpu) {
+ if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state,
cpu))
+ cpu_clear(cpu, map);
+ }
+ cpus_and(map, map, cpu_online_map);
+ } while (!cpus_empty(map));
+}
+EXPORT_SYMBOL_GPL(cpu_idle_wait);
+
+void __attribute__((noreturn))
+cpu_idle (void)
+{
+ void (*mark_idle)(int) = ia64_mark_idle;
+
+ /* endless idle loop with no priority at all */
+ while (1) {
+#ifdef CONFIG_SMP
+ if (!need_resched())
+ min_xtp();
+#endif
+ while (!need_resched()) {
+ void (*idle)(void);
+
+ if (__get_cpu_var(cpu_idle_state))
+ __get_cpu_var(cpu_idle_state) = 0;
+
+ rmb();
+ if (mark_idle)
+ (*mark_idle)(1);
+
+ idle = pm_idle;
+ if (!idle)
+ idle = default_idle;
+ (*idle)();
+ }
+
+ if (mark_idle)
+ (*mark_idle)(0);
+
+#ifdef CONFIG_SMP
+ normal_xtp();
+#endif
+ schedule();
+ check_pgt_cache();
+ if (cpu_is_offline(smp_processor_id()))
+ play_dead();
+ }
+}
+
+void
+ia64_save_extra (struct task_struct *task)
+{
+#ifdef CONFIG_PERFMON
+ unsigned long info;
+#endif
+
+ if ((task->thread.flags & IA64_THREAD_DBG_VALID) != 0)
+ ia64_save_debug_regs(&task->thread.dbr[0]);
+
+#ifdef CONFIG_PERFMON
+ if ((task->thread.flags & IA64_THREAD_PM_VALID) != 0)
+ pfm_save_regs(task);
+
+ info = __get_cpu_var(pfm_syst_info);
+ if (info & PFM_CPUINFO_SYST_WIDE)
+ pfm_syst_wide_update_task(task, info, 0);
+#endif
+
+#ifdef CONFIG_IA32_SUPPORT
+ if (IS_IA32_PROCESS(ia64_task_regs(task)))
+ ia32_save_state(task);
+#endif
+}
+
+void
+ia64_load_extra (struct task_struct *task)
+{
+#ifdef CONFIG_PERFMON
+ unsigned long info;
+#endif
+
+ if ((task->thread.flags & IA64_THREAD_DBG_VALID) != 0)
+ ia64_load_debug_regs(&task->thread.dbr[0]);
+
+#ifdef CONFIG_PERFMON
+ if ((task->thread.flags & IA64_THREAD_PM_VALID) != 0)
+ pfm_load_regs(task);
+
+ info = __get_cpu_var(pfm_syst_info);
+ if (info & PFM_CPUINFO_SYST_WIDE)
+ pfm_syst_wide_update_task(task, info, 1);
+#endif
+
+#ifdef CONFIG_IA32_SUPPORT
+ if (IS_IA32_PROCESS(ia64_task_regs(task)))
+ ia32_load_state(task);
+#endif
+}
+
+/*
+ * Copy the state of an ia-64 thread.
+ *
+ * We get here through the following call chain:
+ *
+ * from user-level: from kernel:
+ *
+ * <clone syscall> <some kernel call frames>
+ * sys_clone :
+ * do_fork do_fork
+ * copy_thread copy_thread
+ *
+ * This means that the stack layout is as follows:
+ *
+ * +---------------------+ (highest addr)
+ * | struct pt_regs |
+ * +---------------------+
+ * | struct switch_stack |
+ * +---------------------+
+ * | |
+ * | memory stack |
+ * | | <-- sp (lowest addr)
+ * +---------------------+
+ *
+ * Observe that we copy the unat values that are in pt_regs and switch_stack.
Spilling an
+ * integer to address X causes bit N in ar.unat to be set to the NaT bit of
the register,
+ * with N=(X & 0x1ff)/8. Thus, copying the unat value preserves the NaT bits
ONLY if the
+ * pt_regs structure in the parent is congruent to that of the child, modulo
512. Since
+ * the stack is page aligned and the page size is at least 4KB, this is always
the case,
+ * so there is nothing to worry about.
+ */
+int
+copy_thread (int nr, unsigned long clone_flags,
+ unsigned long user_stack_base, unsigned long user_stack_size,
+ struct task_struct *p, struct pt_regs *regs)
+{
+ extern char ia64_ret_from_clone, ia32_ret_from_clone;
+ struct switch_stack *child_stack, *stack;
+ unsigned long rbs, child_rbs, rbs_size;
+ struct pt_regs *child_ptregs;
+ int retval = 0;
+
+#ifdef CONFIG_SMP
+ /*
+ * For SMP idle threads, fork_by_hand() calls do_fork with
+ * NULL regs.
+ */
+ if (!regs)
+ return 0;
+#endif
+
+ stack = ((struct switch_stack *) regs) - 1;
+
+ child_ptregs = (struct pt_regs *) ((unsigned long) p + IA64_STK_OFFSET)
- 1;
+ child_stack = (struct switch_stack *) child_ptregs - 1;
+
+ /* copy parent's switch_stack & pt_regs to child: */
+ memcpy(child_stack, stack, sizeof(*child_ptregs) +
sizeof(*child_stack));
+
+ rbs = (unsigned long) current + IA64_RBS_OFFSET;
+ child_rbs = (unsigned long) p + IA64_RBS_OFFSET;
+ rbs_size = stack->ar_bspstore - rbs;
+
+ /* copy the parent's register backing store to the child: */
+ memcpy((void *) child_rbs, (void *) rbs, rbs_size);
+
+ if (likely(user_mode(child_ptregs))) {
+ if ((clone_flags & CLONE_SETTLS) && !IS_IA32_PROCESS(regs))
+ child_ptregs->r13 = regs->r16; /* see sys_clone2() in
entry.S */
+ if (user_stack_base) {
+ child_ptregs->r12 = user_stack_base + user_stack_size -
16;
+ child_ptregs->ar_bspstore = user_stack_base;
+ child_ptregs->ar_rnat = 0;
+ child_ptregs->loadrs = 0;
+ }
+ } else {
+ /*
+ * Note: we simply preserve the relative position of
+ * the stack pointer here. There is no need to
+ * allocate a scratch area here, since that will have
+ * been taken care of by the caller of sys_clone()
+ * already.
+ */
+ child_ptregs->r12 = (unsigned long) child_ptregs - 16; /*
kernel sp */
+ child_ptregs->r13 = (unsigned long) p; /* set
`current' pointer */
+ }
+ child_stack->ar_bspstore = child_rbs + rbs_size;
+ if (IS_IA32_PROCESS(regs))
+ child_stack->b0 = (unsigned long) &ia32_ret_from_clone;
+ else
+ child_stack->b0 = (unsigned long) &ia64_ret_from_clone;
+
+ /* copy parts of thread_struct: */
+ p->thread.ksp = (unsigned long) child_stack - 16;
+
+ /* stop some PSR bits from being inherited.
+ * the psr.up/psr.pp bits must be cleared on fork but inherited on
execve()
+ * therefore we must specify them explicitly here and not include them
in
+ * IA64_PSR_BITS_TO_CLEAR.
+ */
+ child_ptregs->cr_ipsr = ((child_ptregs->cr_ipsr | IA64_PSR_BITS_TO_SET)
+ & ~(IA64_PSR_BITS_TO_CLEAR | IA64_PSR_PP |
IA64_PSR_UP));
+
+ /*
+ * NOTE: The calling convention considers all floating point
+ * registers in the high partition (fph) to be scratch. Since
+ * the only way to get to this point is through a system call,
+ * we know that the values in fph are all dead. Hence, there
+ * is no need to inherit the fph state from the parent to the
+ * child and all we have to do is to make sure that
+ * IA64_THREAD_FPH_VALID is cleared in the child.
+ *
+ * XXX We could push this optimization a bit further by
+ * clearing IA64_THREAD_FPH_VALID on ANY system call.
+ * However, it's not clear this is worth doing. Also, it
+ * would be a slight deviation from the normal Linux system
+ * call behavior where scratch registers are preserved across
+ * system calls (unless used by the system call itself).
+ */
+# define THREAD_FLAGS_TO_CLEAR (IA64_THREAD_FPH_VALID |
IA64_THREAD_DBG_VALID \
+ | IA64_THREAD_PM_VALID)
+# define THREAD_FLAGS_TO_SET 0
+ p->thread.flags = ((current->thread.flags & ~THREAD_FLAGS_TO_CLEAR)
+ | THREAD_FLAGS_TO_SET);
+ ia64_drop_fpu(p); /* don't pick up stale state from a CPU's fph */
+#ifdef CONFIG_IA32_SUPPORT
+ /*
+ * If we're cloning an IA32 task then save the IA32 extra
+ * state from the current task to the new task
+ */
+ if (IS_IA32_PROCESS(ia64_task_regs(current))) {
+ ia32_save_state(p);
+ if (clone_flags & CLONE_SETTLS)
+ retval = ia32_clone_tls(p, child_ptregs);
+
+ /* Copy partially mapped page list */
+ if (!retval)
+ retval = ia32_copy_partial_page_list(p, clone_flags);
+ }
+#endif
+
+#ifdef CONFIG_PERFMON
+ if (current->thread.pfm_context)
+ pfm_inherit(p, child_ptregs);
+#endif
+ return retval;
+}
+
+static void
+do_copy_task_regs (struct task_struct *task, struct unw_frame_info *info, void
*arg)
+{
+ unsigned long mask, sp, nat_bits = 0, ip, ar_rnat, urbs_end, cfm;
+ elf_greg_t *dst = arg;
+ struct pt_regs *pt;
+ char nat;
+ int i;
+
+ memset(dst, 0, sizeof(elf_gregset_t)); /* don't leak any kernel bits
to user-level */
+
+ if (unw_unwind_to_user(info) < 0)
+ return;
+
+ unw_get_sp(info, &sp);
+ pt = (struct pt_regs *) (sp + 16);
+
+ urbs_end = ia64_get_user_rbs_end(task, pt, &cfm);
+
+ if (ia64_sync_user_rbs(task, info->sw, pt->ar_bspstore, urbs_end) < 0)
+ return;
+
+ ia64_peek(task, info->sw, urbs_end, (long) ia64_rse_rnat_addr((long *)
urbs_end),
+ &ar_rnat);
+
+ /*
+ * coredump format:
+ * r0-r31
+ * NaT bits (for r0-r31; bit N == 1 iff rN is a NaT)
+ * predicate registers (p0-p63)
+ * b0-b7
+ * ip cfm user-mask
+ * ar.rsc ar.bsp ar.bspstore ar.rnat
+ * ar.ccv ar.unat ar.fpsr ar.pfs ar.lc ar.ec
+ */
+
+ /* r0 is zero */
+ for (i = 1, mask = (1UL << i); i < 32; ++i) {
+ unw_get_gr(info, i, &dst[i], &nat);
+ if (nat)
+ nat_bits |= mask;
+ mask <<= 1;
+ }
+ dst[32] = nat_bits;
+ unw_get_pr(info, &dst[33]);
+
+ for (i = 0; i < 8; ++i)
+ unw_get_br(info, i, &dst[34 + i]);
+
+ unw_get_rp(info, &ip);
+ dst[42] = ip + ia64_psr(pt)->ri;
+ dst[43] = cfm;
+ dst[44] = pt->cr_ipsr & IA64_PSR_UM;
+
+ unw_get_ar(info, UNW_AR_RSC, &dst[45]);
+ /*
+ * For bsp and bspstore, unw_get_ar() would return the kernel
+ * addresses, but we need the user-level addresses instead:
+ */
+ dst[46] = urbs_end; /* note: by convention PT_AR_BSP points to the
end of the urbs! */
+ dst[47] = pt->ar_bspstore;
+ dst[48] = ar_rnat;
+ unw_get_ar(info, UNW_AR_CCV, &dst[49]);
+ unw_get_ar(info, UNW_AR_UNAT, &dst[50]);
+ unw_get_ar(info, UNW_AR_FPSR, &dst[51]);
+ dst[52] = pt->ar_pfs; /* UNW_AR_PFS is == to pt->cr_ifs for interrupt
frames */
+ unw_get_ar(info, UNW_AR_LC, &dst[53]);
+ unw_get_ar(info, UNW_AR_EC, &dst[54]);
+ unw_get_ar(info, UNW_AR_CSD, &dst[55]);
+ unw_get_ar(info, UNW_AR_SSD, &dst[56]);
+}
+
+void
+do_dump_task_fpu (struct task_struct *task, struct unw_frame_info *info, void
*arg)
+{
+ elf_fpreg_t *dst = arg;
+ int i;
+
+ memset(dst, 0, sizeof(elf_fpregset_t)); /* don't leak any "random" bits
*/
+
+ if (unw_unwind_to_user(info) < 0)
+ return;
+
+ /* f0 is 0.0, f1 is 1.0 */
+
+ for (i = 2; i < 32; ++i)
+ unw_get_fr(info, i, dst + i);
+
+ ia64_flush_fph(task);
+ if ((task->thread.flags & IA64_THREAD_FPH_VALID) != 0)
+ memcpy(dst + 32, task->thread.fph, 96*16);
+}
+
+void
+do_copy_regs (struct unw_frame_info *info, void *arg)
+{
+ do_copy_task_regs(current, info, arg);
+}
+
+void
+do_dump_fpu (struct unw_frame_info *info, void *arg)
+{
+ do_dump_task_fpu(current, info, arg);
+}
+
+int
+dump_task_regs(struct task_struct *task, elf_gregset_t *regs)
+{
+ struct unw_frame_info tcore_info;
+
+ if (current == task) {
+ unw_init_running(do_copy_regs, regs);
+ } else {
+ memset(&tcore_info, 0, sizeof(tcore_info));
+ unw_init_from_blocked_task(&tcore_info, task);
+ do_copy_task_regs(task, &tcore_info, regs);
+ }
+ return 1;
+}
+
+void
+ia64_elf_core_copy_regs (struct pt_regs *pt, elf_gregset_t dst)
+{
+ unw_init_running(do_copy_regs, dst);
+}
+
+int
+dump_task_fpu (struct task_struct *task, elf_fpregset_t *dst)
+{
+ struct unw_frame_info tcore_info;
+
+ if (current == task) {
+ unw_init_running(do_dump_fpu, dst);
+ } else {
+ memset(&tcore_info, 0, sizeof(tcore_info));
+ unw_init_from_blocked_task(&tcore_info, task);
+ do_dump_task_fpu(task, &tcore_info, dst);
+ }
+ return 1;
+}
+
+int
+dump_fpu (struct pt_regs *pt, elf_fpregset_t dst)
+{
+ unw_init_running(do_dump_fpu, dst);
+ return 1; /* f0-f31 are always valid so we always return 1 */
+}
+
+long
+sys_execve (char __user *filename, char __user * __user *argv, char __user *
__user *envp,
+ struct pt_regs *regs)
+{
+ char *fname;
+ int error;
+
+ fname = getname(filename);
+ error = PTR_ERR(fname);
+ if (IS_ERR(fname))
+ goto out;
+ error = do_execve(fname, argv, envp, regs);
+ putname(fname);
+out:
+ return error;
+}
+
+pid_t
+kernel_thread (int (*fn)(void *), void *arg, unsigned long flags)
+{
+ extern void start_kernel_thread (void);
+ unsigned long *helper_fptr = (unsigned long *) &start_kernel_thread;
+ struct {
+ struct switch_stack sw;
+ struct pt_regs pt;
+ } regs;
+
+ memset(®s, 0, sizeof(regs));
+ regs.pt.cr_iip = helper_fptr[0]; /* set entry point (IP) */
+ regs.pt.r1 = helper_fptr[1]; /* set GP */
+ regs.pt.r9 = (unsigned long) fn; /* 1st argument */
+ regs.pt.r11 = (unsigned long) arg; /* 2nd argument */
+ /* Preserve PSR bits, except for bits 32-34 and 37-45, which we can't
read. */
+ regs.pt.cr_ipsr = ia64_getreg(_IA64_REG_PSR) | IA64_PSR_BN;
+ regs.pt.cr_ifs = 1UL << 63; /* mark as valid, empty frame */
+ regs.sw.ar_fpsr = regs.pt.ar_fpsr = ia64_getreg(_IA64_REG_AR_FPSR);
+ regs.sw.ar_bspstore = (unsigned long) current + IA64_RBS_OFFSET;
+ regs.sw.pr = (1 << PRED_KERNEL_STACK);
+ return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s.pt, 0, NULL,
NULL);
+}
+EXPORT_SYMBOL(kernel_thread);
+
+/* This gets called from kernel_thread() via ia64_invoke_thread_helper(). */
+int
+kernel_thread_helper (int (*fn)(void *), void *arg)
+{
+#ifdef CONFIG_IA32_SUPPORT
+ if (IS_IA32_PROCESS(ia64_task_regs(current))) {
+ /* A kernel thread is always a 64-bit process. */
+ current->thread.map_base = DEFAULT_MAP_BASE;
+ current->thread.task_size = DEFAULT_TASK_SIZE;
+ ia64_set_kr(IA64_KR_IO_BASE, current->thread.old_iob);
+ ia64_set_kr(IA64_KR_TSSD, current->thread.old_k1);
+ }
+#endif
+ return (*fn)(arg);
+}
+
+/*
+ * Flush thread state. This is called when a thread does an execve().
+ */
+void
+flush_thread (void)
+{
+ /*
+ * Remove function-return probe instances associated with this task
+ * and put them back on the free list. Do not insert an exit probe for
+ * this function, it will be disabled by kprobe_flush_task if you do.
+ */
+ kprobe_flush_task(current);
+
+ /* drop floating-point and debug-register state if it exists: */
+ current->thread.flags &= ~(IA64_THREAD_FPH_VALID |
IA64_THREAD_DBG_VALID);
+ ia64_drop_fpu(current);
+ if (IS_IA32_PROCESS(ia64_task_regs(current)))
+ ia32_drop_partial_page_list(current);
+}
+
+/*
+ * Clean up state associated with current thread. This is called when
+ * the thread calls exit().
+ */
+void
+exit_thread (void)
+{
+
+ /*
+ * Remove function-return probe instances associated with this task
+ * and put them back on the free list. Do not insert an exit probe for
+ * this function, it will be disabled by kprobe_flush_task if you do.
+ */
+ kprobe_flush_task(current);
+
+ ia64_drop_fpu(current);
+#ifdef CONFIG_PERFMON
+ /* if needed, stop monitoring and flush state to perfmon context */
+ if (current->thread.pfm_context)
+ pfm_exit_thread(current);
+
+ /* free debug register resources */
+ if (current->thread.flags & IA64_THREAD_DBG_VALID)
+ pfm_release_debug_registers(current);
+#endif
+ if (IS_IA32_PROCESS(ia64_task_regs(current)))
+ ia32_drop_partial_page_list(current);
+}
+
+unsigned long
+get_wchan (struct task_struct *p)
+{
+ struct unw_frame_info info;
+ unsigned long ip;
+ int count = 0;
+
+ /*
+ * Note: p may not be a blocked task (it could be current or
+ * another process running on some other CPU. Rather than
+ * trying to determine if p is really blocked, we just assume
+ * it's blocked and rely on the unwind routines to fail
+ * gracefully if the process wasn't really blocked after all.
+ * --davidm 99/12/15
+ */
+ unw_init_from_blocked_task(&info, p);
+ do {
+ if (unw_unwind(&info) < 0)
+ return 0;
+ unw_get_ip(&info, &ip);
+ if (!in_sched_functions(ip))
+ return ip;
+ } while (count++ < 16);
+ return 0;
+}
+
+void
+cpu_halt (void)
+{
+ pal_power_mgmt_info_u_t power_info[8];
+ unsigned long min_power;
+ int i, min_power_state;
+
+ if (ia64_pal_halt_info(power_info) != 0)
+ return;
+
+ min_power_state = 0;
+ min_power = power_info[0].pal_power_mgmt_info_s.power_consumption;
+ for (i = 1; i < 8; ++i)
+ if (power_info[i].pal_power_mgmt_info_s.im
+ && power_info[i].pal_power_mgmt_info_s.power_consumption <
min_power) {
+ min_power =
power_info[i].pal_power_mgmt_info_s.power_consumption;
+ min_power_state = i;
+ }
+
+ while (1)
+ ia64_pal_halt(min_power_state);
+}
+
+void
+machine_restart (char *restart_cmd)
+{
+ (*efi.reset_system)(EFI_RESET_WARM, 0, 0, NULL);
+}
+
+void
+machine_halt (void)
+{
+ cpu_halt();
+}
+
+void
+machine_power_off (void)
+{
+ if (pm_power_off)
+ pm_power_off();
+ machine_halt();
+}
+#endif // !XEN
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/ia64/linux-xen/unwind.c
--- /dev/null Mon Jan 9 11:19:55 2006
+++ b/xen/arch/ia64/linux-xen/unwind.c Mon Jan 9 11:22:17 2006
@@ -0,0 +1,2332 @@
+/*
+ * Copyright (C) 1999-2004 Hewlett-Packard Co
+ * David Mosberger-Tang <davidm@xxxxxxxxxx>
+ * Copyright (C) 2003 Fenghua Yu <fenghua.yu@xxxxxxxxx>
+ * - Change pt_regs_off() to make it less dependant on pt_regs structure.
+ */
+/*
+ * This file implements call frame unwind support for the Linux
+ * kernel. Parsing and processing the unwind information is
+ * time-consuming, so this implementation translates the unwind
+ * descriptors into unwind scripts. These scripts are very simple
+ * (basically a sequence of assignments) and efficient to execute.
+ * They are cached for later re-use. Each script is specific for a
+ * given instruction pointer address and the set of predicate values
+ * that the script depends on (most unwind descriptors are
+ * unconditional and scripts often do not depend on predicates at
+ * all). This code is based on the unwind conventions described in
+ * the "IA-64 Software Conventions and Runtime Architecture" manual.
+ *
+ * SMP conventions:
+ * o updates to the global unwind data (in structure "unw") are serialized
+ * by the unw.lock spinlock
+ * o each unwind script has its own read-write lock; a thread must acquire
+ * a read lock before executing a script and must acquire a write lock
+ * before modifying a script
+ * o if both the unw.lock spinlock and a script's read-write lock must be
+ * acquired, then the read-write lock must be acquired first.
+ */
+#ifdef XEN
+#include <xen/types.h>
+#include <xen/elf.h>
+#include <xen/kernel.h>
+#include <xen/sched.h>
+#include <xen/xmalloc.h>
+#include <xen/spinlock.h>
+
+// work around
+#ifdef CONFIG_SMP
+#define write_trylock(lock) _raw_write_trylock(lock)
+#else
+#define write_trylock(lock) ({1;})
+#endif
+
+#else
+#include <linux/module.h>
+#include <linux/bootmem.h>
+#include <linux/elf.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#endif
+
+#include <asm/unwind.h>
+
+#include <asm/delay.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include <asm/ptrace_offsets.h>
+#include <asm/rse.h>
+#include <asm/sections.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+#include "entry.h"
+#include "unwind_i.h"
+
+#define UNW_LOG_CACHE_SIZE 7 /* each unw_script is ~256 bytes in
size */
+#define UNW_CACHE_SIZE (1 << UNW_LOG_CACHE_SIZE)
+
+#define UNW_LOG_HASH_SIZE (UNW_LOG_CACHE_SIZE + 1)
+#define UNW_HASH_SIZE (1 << UNW_LOG_HASH_SIZE)
+
+#define UNW_STATS 0 /* WARNING: this disabled interrupts for long
time-spans!! */
+
+#ifdef UNW_DEBUG
+ static unsigned int unw_debug_level = UNW_DEBUG;
+# define UNW_DEBUG_ON(n) unw_debug_level >= n
+ /* Do not code a printk level, not all debug lines end in newline */
+# define UNW_DPRINT(n, ...) if (UNW_DEBUG_ON(n)) printk(__VA_ARGS__)
+# define inline
+#else /* !UNW_DEBUG */
+# define UNW_DEBUG_ON(n) 0
+# define UNW_DPRINT(n, ...)
+#endif /* UNW_DEBUG */
+
+#if UNW_STATS
+# define STAT(x...) x
+#else
+# define STAT(x...)
+#endif
+
+#ifdef XEN
+#define alloc_reg_state() xmalloc(struct unw_reg_state)
+#define free_reg_state(usr) xfree(usr)
+#define alloc_labeled_state() xmalloc(struct unw_labeled_state)
+#define free_labeled_state(usr) xfree(usr)
+#else
+#define alloc_reg_state() kmalloc(sizeof(struct unw_reg_state),
GFP_ATOMIC)
+#define free_reg_state(usr) kfree(usr)
+#define alloc_labeled_state() kmalloc(sizeof(struct unw_labeled_state),
GFP_ATOMIC)
+#define free_labeled_state(usr) kfree(usr)
+#endif
+
+typedef unsigned long unw_word;
+typedef unsigned char unw_hash_index_t;
+
+static struct {
+ spinlock_t lock; /* spinlock for unwind data */
+
+ /* list of unwind tables (one per load-module) */
+ struct unw_table *tables;
+
+ unsigned long r0; /* constant 0 for r0 */
+
+ /* table of registers that prologues can save (and order in which
they're saved): */
+ const unsigned char save_order[8];
+
+ /* maps a preserved register index (preg_index) to corresponding
switch_stack offset: */
+ unsigned short sw_off[sizeof(struct unw_frame_info) / 8];
+
+ unsigned short lru_head; /* index of lead-recently used
script */
+ unsigned short lru_tail; /* index of most-recently used
script */
+
+ /* index into unw_frame_info for preserved register i */
+ unsigned short preg_index[UNW_NUM_REGS];
+
+ short pt_regs_offsets[32];
+
+ /* unwind table for the kernel: */
+ struct unw_table kernel_table;
+
+ /* unwind table describing the gate page (kernel code that is mapped
into user space): */
+ size_t gate_table_size;
+ unsigned long *gate_table;
+
+ /* hash table that maps instruction pointer to script index: */
+ unsigned short hash[UNW_HASH_SIZE];
+
+ /* script cache: */
+ struct unw_script cache[UNW_CACHE_SIZE];
+
+# ifdef UNW_DEBUG
+ const char *preg_name[UNW_NUM_REGS];
+# endif
+# if UNW_STATS
+ struct {
+ struct {
+ int lookups;
+ int hinted_hits;
+ int normal_hits;
+ int collision_chain_traversals;
+ } cache;
+ struct {
+ unsigned long build_time;
+ unsigned long run_time;
+ unsigned long parse_time;
+ int builds;
+ int news;
+ int collisions;
+ int runs;
+ } script;
+ struct {
+ unsigned long init_time;
+ unsigned long unwind_time;
+ int inits;
+ int unwinds;
+ } api;
+ } stat;
+# endif
+} unw = {
+ .tables = &unw.kernel_table,
+ .lock = SPIN_LOCK_UNLOCKED,
+ .save_order = {
+ UNW_REG_RP, UNW_REG_PFS, UNW_REG_PSP, UNW_REG_PR,
+ UNW_REG_UNAT, UNW_REG_LC, UNW_REG_FPSR, UNW_REG_PRI_UNAT_GR
+ },
+ .preg_index = {
+ offsetof(struct unw_frame_info, pri_unat_loc)/8, /*
PRI_UNAT_GR */
+ offsetof(struct unw_frame_info, pri_unat_loc)/8, /*
PRI_UNAT_MEM */
+ offsetof(struct unw_frame_info, bsp_loc)/8,
+ offsetof(struct unw_frame_info, bspstore_loc)/8,
+ offsetof(struct unw_frame_info, pfs_loc)/8,
+ offsetof(struct unw_frame_info, rnat_loc)/8,
+ offsetof(struct unw_frame_info, psp)/8,
+ offsetof(struct unw_frame_info, rp_loc)/8,
+ offsetof(struct unw_frame_info, r4)/8,
+ offsetof(struct unw_frame_info, r5)/8,
+ offsetof(struct unw_frame_info, r6)/8,
+ offsetof(struct unw_frame_info, r7)/8,
+ offsetof(struct unw_frame_info, unat_loc)/8,
+ offsetof(struct unw_frame_info, pr_loc)/8,
+ offsetof(struct unw_frame_info, lc_loc)/8,
+ offsetof(struct unw_frame_info, fpsr_loc)/8,
+ offsetof(struct unw_frame_info, b1_loc)/8,
+ offsetof(struct unw_frame_info, b2_loc)/8,
+ offsetof(struct unw_frame_info, b3_loc)/8,
+ offsetof(struct unw_frame_info, b4_loc)/8,
+ offsetof(struct unw_frame_info, b5_loc)/8,
+ offsetof(struct unw_frame_info, f2_loc)/8,
+ offsetof(struct unw_frame_info, f3_loc)/8,
+ offsetof(struct unw_frame_info, f4_loc)/8,
+ offsetof(struct unw_frame_info, f5_loc)/8,
+ offsetof(struct unw_frame_info, fr_loc[16 - 16])/8,
+ offsetof(struct unw_frame_info, fr_loc[17 - 16])/8,
+ offsetof(struct unw_frame_info, fr_loc[18 - 16])/8,
+ offsetof(struct unw_frame_info, fr_loc[19 - 16])/8,
+ offsetof(struct unw_frame_info, fr_loc[20 - 16])/8,
+ offsetof(struct unw_frame_info, fr_loc[21 - 16])/8,
+ offsetof(struct unw_frame_info, fr_loc[22 - 16])/8,
+ offsetof(struct unw_frame_info, fr_loc[23 - 16])/8,
+ offsetof(struct unw_frame_info, fr_loc[24 - 16])/8,
+ offsetof(struct unw_frame_info, fr_loc[25 - 16])/8,
+ offsetof(struct unw_frame_info, fr_loc[26 - 16])/8,
+ offsetof(struct unw_frame_info, fr_loc[27 - 16])/8,
+ offsetof(struct unw_frame_info, fr_loc[28 - 16])/8,
+ offsetof(struct unw_frame_info, fr_loc[29 - 16])/8,
+ offsetof(struct unw_frame_info, fr_loc[30 - 16])/8,
+ offsetof(struct unw_frame_info, fr_loc[31 - 16])/8,
+ },
+ .pt_regs_offsets = {
+ [0] = -1,
+ offsetof(struct pt_regs, r1),
+ offsetof(struct pt_regs, r2),
+ offsetof(struct pt_regs, r3),
+ [4] = -1, [5] = -1, [6] = -1, [7] = -1,
+ offsetof(struct pt_regs, r8),
+ offsetof(struct pt_regs, r9),
+ offsetof(struct pt_regs, r10),
+ offsetof(struct pt_regs, r11),
+ offsetof(struct pt_regs, r12),
+ offsetof(struct pt_regs, r13),
+ offsetof(struct pt_regs, r14),
+ offsetof(struct pt_regs, r15),
+ offsetof(struct pt_regs, r16),
+ offsetof(struct pt_regs, r17),
+ offsetof(struct pt_regs, r18),
+ offsetof(struct pt_regs, r19),
+ offsetof(struct pt_regs, r20),
+ offsetof(struct pt_regs, r21),
+ offsetof(struct pt_regs, r22),
+ offsetof(struct pt_regs, r23),
+ offsetof(struct pt_regs, r24),
+ offsetof(struct pt_regs, r25),
+ offsetof(struct pt_regs, r26),
+ offsetof(struct pt_regs, r27),
+ offsetof(struct pt_regs, r28),
+ offsetof(struct pt_regs, r29),
+ offsetof(struct pt_regs, r30),
+ offsetof(struct pt_regs, r31),
+ },
+ .hash = { [0 ... UNW_HASH_SIZE - 1] = -1 },
+#ifdef UNW_DEBUG
+ .preg_name = {
+ "pri_unat_gr", "pri_unat_mem", "bsp", "bspstore", "ar.pfs",
"ar.rnat", "psp", "rp",
+ "r4", "r5", "r6", "r7",
+ "ar.unat", "pr", "ar.lc", "ar.fpsr",
+ "b1", "b2", "b3", "b4", "b5",
+ "f2", "f3", "f4", "f5",
+ "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23",
+ "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31"
+ }
+#endif
+};
+
+static inline int
+read_only (void *addr)
+{
+ return (unsigned long) ((char *) addr - (char *) &unw.r0) <
sizeof(unw.r0);
+}
+
+/*
+ * Returns offset of rREG in struct pt_regs.
+ */
+static inline unsigned long
+pt_regs_off (unsigned long reg)
+{
+ short off = -1;
+
+ if (reg < ARRAY_SIZE(unw.pt_regs_offsets))
+ off = unw.pt_regs_offsets[reg];
+
+ if (off < 0) {
+ UNW_DPRINT(0, "unwind.%s: bad scratch reg r%lu\n",
__FUNCTION__, reg);
+ off = 0;
+ }
+ return (unsigned long) off;
+}
+
+static inline struct pt_regs *
+get_scratch_regs (struct unw_frame_info *info)
+{
+ if (!info->pt) {
+ /* This should not happen with valid unwind info. */
+ UNW_DPRINT(0, "unwind.%s: bad unwind info: resetting
info->pt\n", __FUNCTION__);
+ if (info->flags & UNW_FLAG_INTERRUPT_FRAME)
+ info->pt = (unsigned long) ((struct pt_regs *)
info->psp - 1);
+ else
+ info->pt = info->sp - 16;
+ }
+ UNW_DPRINT(3, "unwind.%s: sp 0x%lx pt 0x%lx\n", __FUNCTION__, info->sp,
info->pt);
+ return (struct pt_regs *) info->pt;
+}
+
+/* Unwind accessors. */
+
+int
+unw_access_gr (struct unw_frame_info *info, int regnum, unsigned long *val,
char *nat, int write)
+{
+ unsigned long *addr, *nat_addr, nat_mask = 0, dummy_nat;
+ struct unw_ireg *ireg;
+ struct pt_regs *pt;
+
+ if ((unsigned) regnum - 1 >= 127) {
+ if (regnum == 0 && !write) {
+ *val = 0; /* read r0 always returns 0 */
+ *nat = 0;
+ return 0;
+ }
+ UNW_DPRINT(0, "unwind.%s: trying to access non-existent r%u\n",
+ __FUNCTION__, regnum);
+ return -1;
+ }
+
+ if (regnum < 32) {
+ if (regnum >= 4 && regnum <= 7) {
+ /* access a preserved register */
+ ireg = &info->r4 + (regnum - 4);
+ addr = ireg->loc;
+ if (addr) {
+ nat_addr = addr + ireg->nat.off;
+ switch (ireg->nat.type) {
+ case UNW_NAT_VAL:
+ /* simulate getf.sig/setf.sig */
+ if (write) {
+ if (*nat) {
+ /* write NaTVal and be
done with it */
+ addr[0] = 0;
+ addr[1] = 0x1fffe;
+ return 0;
+ }
+ addr[1] = 0x1003e;
+ } else {
+ if (addr[0] == 0 && addr[1] ==
0x1ffe) {
+ /* return NaT and be
done with it */
+ *val = 0;
+ *nat = 1;
+ return 0;
+ }
+ }
+ /* fall through */
+ case UNW_NAT_NONE:
+ dummy_nat = 0;
+ nat_addr = &dummy_nat;
+ break;
+
+ case UNW_NAT_MEMSTK:
+ nat_mask = (1UL << ((long) addr &
0x1f8)/8);
+ break;
+
+ case UNW_NAT_REGSTK:
+ nat_addr = ia64_rse_rnat_addr(addr);
+ if ((unsigned long) addr <
info->regstk.limit
+ || (unsigned long) addr >=
info->regstk.top)
+ {
+ UNW_DPRINT(0, "unwind.%s: %p
outside of regstk "
+ "[0x%lx-0x%lx)\n",
+ __FUNCTION__, (void *)
addr,
+ info->regstk.limit,
+ info->regstk.top);
+ return -1;
+ }
+ if ((unsigned long) nat_addr >=
info->regstk.top)
+ nat_addr = &info->sw->ar_rnat;
+ nat_mask = (1UL <<
ia64_rse_slot_num(addr));
+ break;
+ }
+ } else {
+ addr = &info->sw->r4 + (regnum - 4);
+ nat_addr = &info->sw->ar_unat;
+ nat_mask = (1UL << ((long) addr & 0x1f8)/8);
+ }
+ } else {
+ /* access a scratch register */
+ pt = get_scratch_regs(info);
+ addr = (unsigned long *) ((unsigned long)pt +
pt_regs_off(regnum));
+ if (info->pri_unat_loc)
+ nat_addr = info->pri_unat_loc;
+ else
+ nat_addr = &info->sw->caller_unat;
+ nat_mask = (1UL << ((long) addr & 0x1f8)/8);
+ }
+ } else {
+ /* access a stacked register */
+ addr = ia64_rse_skip_regs((unsigned long *) info->bsp, regnum -
32);
+ nat_addr = ia64_rse_rnat_addr(addr);
+ if ((unsigned long) addr < info->regstk.limit
+ || (unsigned long) addr >= info->regstk.top)
+ {
+ UNW_DPRINT(0, "unwind.%s: ignoring attempt to access
register outside "
+ "of rbs\n", __FUNCTION__);
+ return -1;
+ }
+ if ((unsigned long) nat_addr >= info->regstk.top)
+ nat_addr = &info->sw->ar_rnat;
+ nat_mask = (1UL << ia64_rse_slot_num(addr));
+ }
+
+ if (write) {
+ if (read_only(addr)) {
+ UNW_DPRINT(0, "unwind.%s: ignoring attempt to write
read-only location\n",
+ __FUNCTION__);
+ } else {
+ *addr = *val;
+ if (*nat)
+ *nat_addr |= nat_mask;
+ else
+ *nat_addr &= ~nat_mask;
+ }
+ } else {
+ if ((*nat_addr & nat_mask) == 0) {
+ *val = *addr;
+ *nat = 0;
+ } else {
+ *val = 0; /* if register is a NaT, *addr may
contain kernel data! */
+ *nat = 1;
+ }
+ }
+ return 0;
+}
+EXPORT_SYMBOL(unw_access_gr);
+
+int
+unw_access_br (struct unw_frame_info *info, int regnum, unsigned long *val,
int write)
+{
+ unsigned long *addr;
+ struct pt_regs *pt;
+
+ switch (regnum) {
+ /* scratch: */
+ case 0: pt = get_scratch_regs(info); addr = &pt->b0; break;
+ case 6: pt = get_scratch_regs(info); addr = &pt->b6; break;
+ case 7: pt = get_scratch_regs(info); addr = &pt->b7; break;
+
+ /* preserved: */
+ case 1: case 2: case 3: case 4: case 5:
+ addr = *(&info->b1_loc + (regnum - 1));
+ if (!addr)
+ addr = &info->sw->b1 + (regnum - 1);
+ break;
+
+ default:
+ UNW_DPRINT(0, "unwind.%s: trying to access non-existent b%u\n",
+ __FUNCTION__, regnum);
+ return -1;
+ }
+ if (write)
+ if (read_only(addr)) {
+ UNW_DPRINT(0, "unwind.%s: ignoring attempt to write
read-only location\n",
+ __FUNCTION__);
+ } else
+ *addr = *val;
+ else
+ *val = *addr;
+ return 0;
+}
+EXPORT_SYMBOL(unw_access_br);
+
+int
+unw_access_fr (struct unw_frame_info *info, int regnum, struct ia64_fpreg
*val, int write)
+{
+ struct ia64_fpreg *addr = NULL;
+ struct pt_regs *pt;
+
+ if ((unsigned) (regnum - 2) >= 126) {
+ UNW_DPRINT(0, "unwind.%s: trying to access non-existent f%u\n",
+ __FUNCTION__, regnum);
+ return -1;
+ }
+
+ if (regnum <= 5) {
+ addr = *(&info->f2_loc + (regnum - 2));
+ if (!addr)
+ addr = &info->sw->f2 + (regnum - 2);
+ } else if (regnum <= 15) {
+ if (regnum <= 11) {
+ pt = get_scratch_regs(info);
+ addr = &pt->f6 + (regnum - 6);
+ }
+ else
+ addr = &info->sw->f12 + (regnum - 12);
+ } else if (regnum <= 31) {
+ addr = info->fr_loc[regnum - 16];
+ if (!addr)
+ addr = &info->sw->f16 + (regnum - 16);
+ } else {
+ struct task_struct *t = info->task;
+
+ if (write)
+ ia64_sync_fph(t);
+ else
+ ia64_flush_fph(t);
+#ifdef XEN
+ addr = t->arch._thread.fph + (regnum - 32);
+#else
+ addr = t->thread.fph + (regnum - 32);
+#endif
+ }
+
+ if (write)
+ if (read_only(addr)) {
+ UNW_DPRINT(0, "unwind.%s: ignoring attempt to write
read-only location\n",
+ __FUNCTION__);
+ } else
+ *addr = *val;
+ else
+ *val = *addr;
+ return 0;
+}
+EXPORT_SYMBOL(unw_access_fr);
+
+int
+unw_access_ar (struct unw_frame_info *info, int regnum, unsigned long *val,
int write)
+{
+ unsigned long *addr;
+ struct pt_regs *pt;
+
+ switch (regnum) {
+ case UNW_AR_BSP:
+ addr = info->bsp_loc;
+ if (!addr)
+ addr = &info->sw->ar_bspstore;
+ break;
+
+ case UNW_AR_BSPSTORE:
+ addr = info->bspstore_loc;
+ if (!addr)
+ addr = &info->sw->ar_bspstore;
+ break;
+
+ case UNW_AR_PFS:
+ addr = info->pfs_loc;
+ if (!addr)
+ addr = &info->sw->ar_pfs;
+ break;
+
+ case UNW_AR_RNAT:
+ addr = info->rnat_loc;
+ if (!addr)
+ addr = &info->sw->ar_rnat;
+ break;
+
+ case UNW_AR_UNAT:
+ addr = info->unat_loc;
+ if (!addr)
+ addr = &info->sw->caller_unat;
+ break;
+
+ case UNW_AR_LC:
+ addr = info->lc_loc;
+ if (!addr)
+ addr = &info->sw->ar_lc;
+ break;
+
+ case UNW_AR_EC:
+ if (!info->cfm_loc)
+ return -1;
+ if (write)
+ *info->cfm_loc =
+ (*info->cfm_loc & ~(0x3fUL << 52)) | ((*val &
0x3f) << 52);
+ else
+ *val = (*info->cfm_loc >> 52) & 0x3f;
+ return 0;
+
+ case UNW_AR_FPSR:
+ addr = info->fpsr_loc;
+ if (!addr)
+ addr = &info->sw->ar_fpsr;
+ break;
+
+ case UNW_AR_RSC:
+ pt = get_scratch_regs(info);
+ addr = &pt->ar_rsc;
+ break;
+
+ case UNW_AR_CCV:
+ pt = get_scratch_regs(info);
+ addr = &pt->ar_ccv;
+ break;
+
+ case UNW_AR_CSD:
+ pt = get_scratch_regs(info);
+ addr = &pt->ar_csd;
+ break;
+
+ case UNW_AR_SSD:
+ pt = get_scratch_regs(info);
+ addr = &pt->ar_ssd;
+ break;
+
+ default:
+ UNW_DPRINT(0, "unwind.%s: trying to access non-existent ar%u\n",
+ __FUNCTION__, regnum);
+ return -1;
+ }
+
+ if (write) {
+ if (read_only(addr)) {
+ UNW_DPRINT(0, "unwind.%s: ignoring attempt to write
read-only location\n",
+ __FUNCTION__);
+ } else
+ *addr = *val;
+ } else
+ *val = *addr;
+ return 0;
+}
+EXPORT_SYMBOL(unw_access_ar);
+
+int
+unw_access_pr (struct unw_frame_info *info, unsigned long *val, int write)
+{
+ unsigned long *addr;
+
+ addr = info->pr_loc;
+ if (!addr)
+ addr = &info->sw->pr;
+
+ if (write) {
+ if (read_only(addr)) {
+ UNW_DPRINT(0, "unwind.%s: ignoring attempt to write
read-only location\n",
+ __FUNCTION__);
+ } else
+ *addr = *val;
+ } else
+ *val = *addr;
+ return 0;
+}
+EXPORT_SYMBOL(unw_access_pr);
+
+
+/* Routines to manipulate the state stack. */
+
+static inline void
+push (struct unw_state_record *sr)
+{
+ struct unw_reg_state *rs;
+
+ rs = alloc_reg_state();
+ if (!rs) {
+ printk(KERN_ERR "unwind: cannot stack reg state!\n");
+ return;
+ }
+ memcpy(rs, &sr->curr, sizeof(*rs));
+ sr->curr.next = rs;
+}
+
+static void
+pop (struct unw_state_record *sr)
+{
+ struct unw_reg_state *rs = sr->curr.next;
+
+ if (!rs) {
+ printk(KERN_ERR "unwind: stack underflow!\n");
+ return;
+ }
+ memcpy(&sr->curr, rs, sizeof(*rs));
+ free_reg_state(rs);
+}
+
+/* Make a copy of the state stack. Non-recursive to avoid stack overflows. */
+static struct unw_reg_state *
+dup_state_stack (struct unw_reg_state *rs)
+{
+ struct unw_reg_state *copy, *prev = NULL, *first = NULL;
+
+ while (rs) {
+ copy = alloc_reg_state();
+ if (!copy) {
+ printk(KERN_ERR "unwind.dup_state_stack: out of
memory\n");
+ return NULL;
+ }
+ memcpy(copy, rs, sizeof(*copy));
+ if (first)
+ prev->next = copy;
+ else
+ first = copy;
+ rs = rs->next;
+ prev = copy;
+ }
+ return first;
+}
+
+/* Free all stacked register states (but not RS itself). */
+static void
+free_state_stack (struct unw_reg_state *rs)
+{
+ struct unw_reg_state *p, *next;
+
+ for (p = rs->next; p != NULL; p = next) {
+ next = p->next;
+ free_reg_state(p);
+ }
+ rs->next = NULL;
+}
+
+/* Unwind decoder routines */
+
+static enum unw_register_index __attribute_const__
+decode_abreg (unsigned char abreg, int memory)
+{
+ switch (abreg) {
+ case 0x04 ... 0x07: return UNW_REG_R4 + (abreg - 0x04);
+ case 0x22 ... 0x25: return UNW_REG_F2 + (abreg - 0x22);
+ case 0x30 ... 0x3f: return UNW_REG_F16 + (abreg - 0x30);
+ case 0x41 ... 0x45: return UNW_REG_B1 + (abreg - 0x41);
+ case 0x60: return UNW_REG_PR;
+ case 0x61: return UNW_REG_PSP;
+ case 0x62: return memory ? UNW_REG_PRI_UNAT_MEM :
UNW_REG_PRI_UNAT_GR;
+ case 0x63: return UNW_REG_RP;
+ case 0x64: return UNW_REG_BSP;
+ case 0x65: return UNW_REG_BSPSTORE;
+ case 0x66: return UNW_REG_RNAT;
+ case 0x67: return UNW_REG_UNAT;
+ case 0x68: return UNW_REG_FPSR;
+ case 0x69: return UNW_REG_PFS;
+ case 0x6a: return UNW_REG_LC;
+ default:
+ break;
+ }
+ UNW_DPRINT(0, "unwind.%s: bad abreg=0x%x\n", __FUNCTION__, abreg);
+ return UNW_REG_LC;
+}
+
+static void
+set_reg (struct unw_reg_info *reg, enum unw_where where, int when, unsigned
long val)
+{
+ reg->val = val;
+ reg->where = where;
+ if (reg->when == UNW_WHEN_NEVER)
+ reg->when = when;
+}
+
+static void
+alloc_spill_area (unsigned long *offp, unsigned long regsize,
+ struct unw_reg_info *lo, struct unw_reg_info *hi)
+{
+ struct unw_reg_info *reg;
+
+ for (reg = hi; reg >= lo; --reg) {
+ if (reg->where == UNW_WHERE_SPILL_HOME) {
+ reg->where = UNW_WHERE_PSPREL;
+ *offp -= regsize;
+ reg->val = *offp;
+ }
+ }
+}
+
+static inline void
+spill_next_when (struct unw_reg_info **regp, struct unw_reg_info *lim,
unw_word t)
+{
+ struct unw_reg_info *reg;
+
+ for (reg = *regp; reg <= lim; ++reg) {
+ if (reg->where == UNW_WHERE_SPILL_HOME) {
+ reg->when = t;
+ *regp = reg + 1;
+ return;
+ }
+ }
+ UNW_DPRINT(0, "unwind.%s: excess spill!\n", __FUNCTION__);
+}
+
+static inline void
+finish_prologue (struct unw_state_record *sr)
+{
+ struct unw_reg_info *reg;
+ unsigned long off;
+ int i;
+
+ /*
+ * First, resolve implicit register save locations (see Section
"11.4.2.3 Rules
+ * for Using Unwind Descriptors", rule 3):
+ */
+ for (i = 0; i < (int) ARRAY_SIZE(unw.save_order); ++i) {
+ reg = sr->curr.reg + unw.save_order[i];
+ if (reg->where == UNW_WHERE_GR_SAVE) {
+ reg->where = UNW_WHERE_GR;
+ reg->val = sr->gr_save_loc++;
+ }
+ }
+
+ /*
+ * Next, compute when the fp, general, and branch registers get
+ * saved. This must come before alloc_spill_area() because
+ * we need to know which registers are spilled to their home
+ * locations.
+ */
+ if (sr->imask) {
+ unsigned char kind, mask = 0, *cp = sr->imask;
+ int t;
+ static const unsigned char limit[3] = {
+ UNW_REG_F31, UNW_REG_R7, UNW_REG_B5
+ };
+ struct unw_reg_info *(regs[3]);
+
+ regs[0] = sr->curr.reg + UNW_REG_F2;
+ regs[1] = sr->curr.reg + UNW_REG_R4;
+ regs[2] = sr->curr.reg + UNW_REG_B1;
+
+ for (t = 0; t < sr->region_len; ++t) {
+ if ((t & 3) == 0)
+ mask = *cp++;
+ kind = (mask >> 2*(3-(t & 3))) & 3;
+ if (kind > 0)
+ spill_next_when(®s[kind - 1], sr->curr.reg +
limit[kind - 1],
+ sr->region_start + t);
+ }
+ }
+ /*
+ * Next, lay out the memory stack spill area:
+ */
+ if (sr->any_spills) {
+ off = sr->spill_offset;
+ alloc_spill_area(&off, 16, sr->curr.reg + UNW_REG_F2,
sr->curr.reg + UNW_REG_F31);
+ alloc_spill_area(&off, 8, sr->curr.reg + UNW_REG_B1,
sr->curr.reg + UNW_REG_B5);
+ alloc_spill_area(&off, 8, sr->curr.reg + UNW_REG_R4,
sr->curr.reg + UNW_REG_R7);
+ }
+}
+
+/*
+ * Region header descriptors.
+ */
+
+static void
+desc_prologue (int body, unw_word rlen, unsigned char mask, unsigned char
grsave,
+ struct unw_state_record *sr)
+{
+ int i, region_start;
+
+ if (!(sr->in_body || sr->first_region))
+ finish_prologue(sr);
+ sr->first_region = 0;
+
+ /* check if we're done: */
+ if (sr->when_target < sr->region_start + sr->region_len) {
+ sr->done = 1;
+ return;
+ }
+
+ region_start = sr->region_start + sr->region_len;
+
+ for (i = 0; i < sr->epilogue_count; ++i)
+ pop(sr);
+ sr->epilogue_count = 0;
+ sr->epilogue_start = UNW_WHEN_NEVER;
+
+ sr->region_start = region_start;
+ sr->region_len = rlen;
+ sr->in_body = body;
+
+ if (!body) {
+ push(sr);
+
+ for (i = 0; i < 4; ++i) {
+ if (mask & 0x8)
+ set_reg(sr->curr.reg + unw.save_order[i],
UNW_WHERE_GR,
+ sr->region_start + sr->region_len - 1,
grsave++);
+ mask <<= 1;
+ }
+ sr->gr_save_loc = grsave;
+ sr->any_spills = 0;
+ sr->imask = NULL;
+ sr->spill_offset = 0x10; /* default to psp+16 */
+ }
+}
+
+/*
+ * Prologue descriptors.
+ */
+
+static inline void
+desc_abi (unsigned char abi, unsigned char context, struct unw_state_record
*sr)
+{
+ if (abi == 3 && context == 'i') {
+ sr->flags |= UNW_FLAG_INTERRUPT_FRAME;
+ UNW_DPRINT(3, "unwind.%s: interrupt frame\n", __FUNCTION__);
+ }
+ else
+ UNW_DPRINT(0, "unwind%s: ignoring
unwabi(abi=0x%x,context=0x%x)\n",
+ __FUNCTION__, abi, context);
+}
+
+static inline void
+desc_br_gr (unsigned char brmask, unsigned char gr, struct unw_state_record
*sr)
+{
+ int i;
+
+ for (i = 0; i < 5; ++i) {
+ if (brmask & 1)
+ set_reg(sr->curr.reg + UNW_REG_B1 + i, UNW_WHERE_GR,
+ sr->region_start + sr->region_len - 1, gr++);
+ brmask >>= 1;
+ }
+}
+
+static inline void
+desc_br_mem (unsigned char brmask, struct unw_state_record *sr)
+{
+ int i;
+
+ for (i = 0; i < 5; ++i) {
+ if (brmask & 1) {
+ set_reg(sr->curr.reg + UNW_REG_B1 + i,
UNW_WHERE_SPILL_HOME,
+ sr->region_start + sr->region_len - 1, 0);
+ sr->any_spills = 1;
+ }
+ brmask >>= 1;
+ }
+}
+
+static inline void
+desc_frgr_mem (unsigned char grmask, unw_word frmask, struct unw_state_record
*sr)
+{
+ int i;
+
+ for (i = 0; i < 4; ++i) {
+ if ((grmask & 1) != 0) {
+ set_reg(sr->curr.reg + UNW_REG_R4 + i,
UNW_WHERE_SPILL_HOME,
+ sr->region_start + sr->region_len - 1, 0);
+ sr->any_spills = 1;
+ }
+ grmask >>= 1;
+ }
+ for (i = 0; i < 20; ++i) {
+ if ((frmask & 1) != 0) {
+ int base = (i < 4) ? UNW_REG_F2 : UNW_REG_F16 - 4;
+ set_reg(sr->curr.reg + base + i, UNW_WHERE_SPILL_HOME,
+ sr->region_start + sr->region_len - 1, 0);
+ sr->any_spills = 1;
+ }
+ frmask >>= 1;
+ }
+}
+
+static inline void
+desc_fr_mem (unsigned char frmask, struct unw_state_record *sr)
+{
+ int i;
+
+ for (i = 0; i < 4; ++i) {
+ if ((frmask & 1) != 0) {
+ set_reg(sr->curr.reg + UNW_REG_F2 + i,
UNW_WHERE_SPILL_HOME,
+ sr->region_start + sr->region_len - 1, 0);
+ sr->any_spills = 1;
+ }
+ frmask >>= 1;
+ }
+}
+
+static inline void
+desc_gr_gr (unsigned char grmask, unsigned char gr, struct unw_state_record
*sr)
+{
+ int i;
+
+ for (i = 0; i < 4; ++i) {
+ if ((grmask & 1) != 0)
+ set_reg(sr->curr.reg + UNW_REG_R4 + i, UNW_WHERE_GR,
+ sr->region_start + sr->region_len - 1, gr++);
+ grmask >>= 1;
+ }
+}
+
+static inline void
+desc_gr_mem (unsigned char grmask, struct unw_state_record *sr)
+{
+ int i;
+
+ for (i = 0; i < 4; ++i) {
+ if ((grmask & 1) != 0) {
+ set_reg(sr->curr.reg + UNW_REG_R4 + i,
UNW_WHERE_SPILL_HOME,
+ sr->region_start + sr->region_len - 1, 0);
+ sr->any_spills = 1;
+ }
+ grmask >>= 1;
+ }
+}
+
+static inline void
+desc_mem_stack_f (unw_word t, unw_word size, struct unw_state_record *sr)
+{
+ set_reg(sr->curr.reg + UNW_REG_PSP, UNW_WHERE_NONE,
+ sr->region_start + min_t(int, t, sr->region_len - 1), 16*size);
+}
+
+static inline void
+desc_mem_stack_v (unw_word t, struct unw_state_record *sr)
+{
+ sr->curr.reg[UNW_REG_PSP].when = sr->region_start + min_t(int, t,
sr->region_len - 1);
+}
+
+static inline void
+desc_reg_gr (unsigned char reg, unsigned char dst, struct unw_state_record *sr)
+{
+ set_reg(sr->curr.reg + reg, UNW_WHERE_GR, sr->region_start +
sr->region_len - 1, dst);
+}
+
+static inline void
+desc_reg_psprel (unsigned char reg, unw_word pspoff, struct unw_state_record
*sr)
+{
+ set_reg(sr->curr.reg + reg, UNW_WHERE_PSPREL, sr->region_start +
sr->region_len - 1,
+ 0x10 - 4*pspoff);
+}
+
+static inline void
+desc_reg_sprel (unsigned char reg, unw_word spoff, struct unw_state_record *sr)
+{
+ set_reg(sr->curr.reg + reg, UNW_WHERE_SPREL, sr->region_start +
sr->region_len - 1,
+ 4*spoff);
+}
+
+static inline void
+desc_rp_br (unsigned char dst, struct unw_state_record *sr)
+{
+ sr->return_link_reg = dst;
+}
+
+static inline void
+desc_reg_when (unsigned char regnum, unw_word t, struct unw_state_record *sr)
+{
+ struct unw_reg_info *reg = sr->curr.reg + regnum;
+
+ if (reg->where == UNW_WHERE_NONE)
+ reg->where = UNW_WHERE_GR_SAVE;
+ reg->when = sr->region_start + min_t(int, t, sr->region_len - 1);
+}
+
+static inline void
+desc_spill_base (unw_word pspoff, struct unw_state_record *sr)
+{
+ sr->spill_offset = 0x10 - 4*pspoff;
+}
+
+static inline unsigned char *
+desc_spill_mask (unsigned char *imaskp, struct unw_state_record *sr)
+{
+ sr->imask = imaskp;
+ return imaskp + (2*sr->region_len + 7)/8;
+}
+
+/*
+ * Body descriptors.
+ */
+static inline void
+desc_epilogue (unw_word t, unw_word ecount, struct unw_state_record *sr)
+{
+ sr->epilogue_start = sr->region_start + sr->region_len - 1 - t;
+ sr->epilogue_count = ecount + 1;
+}
+
+static inline void
+desc_copy_state (unw_word label, struct unw_state_record *sr)
+{
+ struct unw_labeled_state *ls;
+
+ for (ls = sr->labeled_states; ls; ls = ls->next) {
+ if (ls->label == label) {
+ free_state_stack(&sr->curr);
+ memcpy(&sr->curr, &ls->saved_state, sizeof(sr->curr));
+ sr->curr.next = dup_state_stack(ls->saved_state.next);
+ return;
+ }
+ }
+ printk(KERN_ERR "unwind: failed to find state labeled 0x%lx\n", label);
+}
+
+static inline void
+desc_label_state (unw_word label, struct unw_state_record *sr)
+{
+ struct unw_labeled_state *ls;
+
+ ls = alloc_labeled_state();
+ if (!ls) {
+ printk(KERN_ERR "unwind.desc_label_state(): out of memory\n");
+ return;
+ }
+ ls->label = label;
+ memcpy(&ls->saved_state, &sr->curr, sizeof(ls->saved_state));
+ ls->saved_state.next = dup_state_stack(sr->curr.next);
+
+ /* insert into list of labeled states: */
+ ls->next = sr->labeled_states;
+ sr->labeled_states = ls;
+}
+
+/*
+ * General descriptors.
+ */
+
+static inline int
+desc_is_active (unsigned char qp, unw_word t, struct unw_state_record *sr)
+{
+ if (sr->when_target <= sr->region_start + min_t(int, t, sr->region_len
- 1))
+ return 0;
+ if (qp > 0) {
+ if ((sr->pr_val & (1UL << qp)) == 0)
+ return 0;
+ sr->pr_mask |= (1UL << qp);
+ }
+ return 1;
+}
+
+static inline void
+desc_restore_p (unsigned char qp, unw_word t, unsigned char abreg, struct
unw_state_record *sr)
+{
+ struct unw_reg_info *r;
+
+ if (!desc_is_active(qp, t, sr))
+ return;
+
+ r = sr->curr.reg + decode_abreg(abreg, 0);
+ r->where = UNW_WHERE_NONE;
+ r->when = UNW_WHEN_NEVER;
+ r->val = 0;
+}
+
+static inline void
+desc_spill_reg_p (unsigned char qp, unw_word t, unsigned char abreg, unsigned
char x,
+ unsigned char ytreg, struct unw_state_record *sr)
+{
+ enum unw_where where = UNW_WHERE_GR;
+ struct unw_reg_info *r;
+
+ if (!desc_is_active(qp, t, sr))
+ return;
+
+ if (x)
+ where = UNW_WHERE_BR;
+ else if (ytreg & 0x80)
+ where = UNW_WHERE_FR;
+
+ r = sr->curr.reg + decode_abreg(abreg, 0);
+ r->where = where;
+ r->when = sr->region_start + min_t(int, t, sr->region_len - 1);
+ r->val = (ytreg & 0x7f);
+}
+
+static inline void
+desc_spill_psprel_p (unsigned char qp, unw_word t, unsigned char abreg,
unw_word pspoff,
+ struct unw_state_record *sr)
+{
+ struct unw_reg_info *r;
+
+ if (!desc_is_active(qp, t, sr))
+ return;
+
+ r = sr->curr.reg + decode_abreg(abreg, 1);
+ r->where = UNW_WHERE_PSPREL;
+ r->when = sr->region_start + min_t(int, t, sr->region_len - 1);
+ r->val = 0x10 - 4*pspoff;
+}
+
+static inline void
+desc_spill_sprel_p (unsigned char qp, unw_word t, unsigned char abreg,
unw_word spoff,
+ struct unw_state_record *sr)
+{
+ struct unw_reg_info *r;
+
+ if (!desc_is_active(qp, t, sr))
+ return;
+
+ r = sr->curr.reg + decode_abreg(abreg, 1);
+ r->where = UNW_WHERE_SPREL;
+ r->when = sr->region_start + min_t(int, t, sr->region_len - 1);
+ r->val = 4*spoff;
+}
+
+#define UNW_DEC_BAD_CODE(code) printk(KERN_ERR "unwind:
unknown code 0x%02x\n", \
+ code);
+
+/*
+ * region headers:
+ */
+#define UNW_DEC_PROLOGUE_GR(fmt,r,m,gr,arg) desc_prologue(0,r,m,gr,arg)
+#define UNW_DEC_PROLOGUE(fmt,b,r,arg) desc_prologue(b,r,0,32,arg)
+/*
+ * prologue descriptors:
+ */
+#define UNW_DEC_ABI(fmt,a,c,arg) desc_abi(a,c,arg)
+#define UNW_DEC_BR_GR(fmt,b,g,arg) desc_br_gr(b,g,arg)
+#define UNW_DEC_BR_MEM(fmt,b,arg) desc_br_mem(b,arg)
+#define UNW_DEC_FRGR_MEM(fmt,g,f,arg) desc_frgr_mem(g,f,arg)
+#define UNW_DEC_FR_MEM(fmt,f,arg) desc_fr_mem(f,arg)
+#define UNW_DEC_GR_GR(fmt,m,g,arg) desc_gr_gr(m,g,arg)
+#define UNW_DEC_GR_MEM(fmt,m,arg) desc_gr_mem(m,arg)
+#define UNW_DEC_MEM_STACK_F(fmt,t,s,arg) desc_mem_stack_f(t,s,arg)
+#define UNW_DEC_MEM_STACK_V(fmt,t,arg) desc_mem_stack_v(t,arg)
+#define UNW_DEC_REG_GR(fmt,r,d,arg) desc_reg_gr(r,d,arg)
+#define UNW_DEC_REG_PSPREL(fmt,r,o,arg) desc_reg_psprel(r,o,arg)
+#define UNW_DEC_REG_SPREL(fmt,r,o,arg) desc_reg_sprel(r,o,arg)
+#define UNW_DEC_REG_WHEN(fmt,r,t,arg) desc_reg_when(r,t,arg)
+#define UNW_DEC_PRIUNAT_WHEN_GR(fmt,t,arg)
desc_reg_when(UNW_REG_PRI_UNAT_GR,t,arg)
+#define UNW_DEC_PRIUNAT_WHEN_MEM(fmt,t,arg)
desc_reg_when(UNW_REG_PRI_UNAT_MEM,t,arg)
+#define UNW_DEC_PRIUNAT_GR(fmt,r,arg)
desc_reg_gr(UNW_REG_PRI_UNAT_GR,r,arg)
+#define UNW_DEC_PRIUNAT_PSPREL(fmt,o,arg)
desc_reg_psprel(UNW_REG_PRI_UNAT_MEM,o,arg)
+#define UNW_DEC_PRIUNAT_SPREL(fmt,o,arg)
desc_reg_sprel(UNW_REG_PRI_UNAT_MEM,o,arg)
+#define UNW_DEC_RP_BR(fmt,d,arg) desc_rp_br(d,arg)
+#define UNW_DEC_SPILL_BASE(fmt,o,arg) desc_spill_base(o,arg)
+#define UNW_DEC_SPILL_MASK(fmt,m,arg) (m = desc_spill_mask(m,arg))
+/*
+ * body descriptors:
+ */
+#define UNW_DEC_EPILOGUE(fmt,t,c,arg) desc_epilogue(t,c,arg)
+#define UNW_DEC_COPY_STATE(fmt,l,arg) desc_copy_state(l,arg)
+#define UNW_DEC_LABEL_STATE(fmt,l,arg) desc_label_state(l,arg)
+/*
+ * general unwind descriptors:
+ */
+#define UNW_DEC_SPILL_REG_P(f,p,t,a,x,y,arg) desc_spill_reg_p(p,t,a,x,y,arg)
+#define UNW_DEC_SPILL_REG(f,t,a,x,y,arg) desc_spill_reg_p(0,t,a,x,y,arg)
+#define UNW_DEC_SPILL_PSPREL_P(f,p,t,a,o,arg) desc_spill_psprel_p(p,t,a,o,arg)
+#define UNW_DEC_SPILL_PSPREL(f,t,a,o,arg) desc_spill_psprel_p(0,t,a,o,arg)
+#define UNW_DEC_SPILL_SPREL_P(f,p,t,a,o,arg) desc_spill_sprel_p(p,t,a,o,arg)
+#define UNW_DEC_SPILL_SPREL(f,t,a,o,arg) desc_spill_sprel_p(0,t,a,o,arg)
+#define UNW_DEC_RESTORE_P(f,p,t,a,arg) desc_restore_p(p,t,a,arg)
+#define UNW_DEC_RESTORE(f,t,a,arg) desc_restore_p(0,t,a,arg)
+
+#include "unwind_decoder.c"
+
+
+/* Unwind scripts. */
+
+static inline unw_hash_index_t
+hash (unsigned long ip)
+{
+# define hashmagic 0x9e3779b97f4a7c16UL /* based on
(sqrt(5)/2-1)*2^64 */
+
+ return (ip >> 4)*hashmagic >> (64 - UNW_LOG_HASH_SIZE);
+#undef hashmagic
+}
+
+static inline long
+cache_match (struct unw_script *script, unsigned long ip, unsigned long pr)
+{
+ read_lock(&script->lock);
+ if (ip == script->ip && ((pr ^ script->pr_val) & script->pr_mask) == 0)
+ /* keep the read lock... */
+ return 1;
+ read_unlock(&script->lock);
+ return 0;
+}
+
+static inline struct unw_script *
+script_lookup (struct unw_frame_info *info)
+{
+ struct unw_script *script = unw.cache + info->hint;
+ unsigned short index;
+ unsigned long ip, pr;
+
+ if (UNW_DEBUG_ON(0))
+ return NULL; /* Always regenerate scripts in debug mode */
+
+ STAT(++unw.stat.cache.lookups);
+
+ ip = info->ip;
+ pr = info->pr;
+
+ if (cache_match(script, ip, pr)) {
+ STAT(++unw.stat.cache.hinted_hits);
+ return script;
+ }
+
+ index = unw.hash[hash(ip)];
+ if (index >= UNW_CACHE_SIZE)
+ return NULL;
+
+ script = unw.cache + index;
+ while (1) {
+ if (cache_match(script, ip, pr)) {
+ /* update hint; no locking required as single-word
writes are atomic */
+ STAT(++unw.stat.cache.normal_hits);
+ unw.cache[info->prev_script].hint = script - unw.cache;
+ return script;
+ }
+ if (script->coll_chain >= UNW_HASH_SIZE)
+ return NULL;
+ script = unw.cache + script->coll_chain;
+ STAT(++unw.stat.cache.collision_chain_traversals);
+ }
+}
+
+/*
+ * On returning, a write lock for the SCRIPT is still being held.
+ */
+static inline struct unw_script *
+script_new (unsigned long ip)
+{
+ struct unw_script *script, *prev, *tmp;
+ unw_hash_index_t index;
+ unsigned short head;
+
+ STAT(++unw.stat.script.news);
+
+ /*
+ * Can't (easily) use cmpxchg() here because of ABA problem
+ * that is intrinsic in cmpxchg()...
+ */
+ head = unw.lru_head;
+ script = unw.cache + head;
+ unw.lru_head = script->lru_chain;
+
+ /*
+ * We'd deadlock here if we interrupted a thread that is holding a read
lock on
+ * script->lock. Thus, if the write_trylock() fails, we simply bail
out. The
+ * alternative would be to disable interrupts whenever we hold a
read-lock, but
+ * that seems silly.
+ */
+ if (!write_trylock(&script->lock))
+ return NULL;
+
+ /* re-insert script at the tail of the LRU chain: */
+ unw.cache[unw.lru_tail].lru_chain = head;
+ unw.lru_tail = head;
+
+ /* remove the old script from the hash table (if it's there): */
+ if (script->ip) {
+ index = hash(script->ip);
+ tmp = unw.cache + unw.hash[index];
+ prev = NULL;
+ while (1) {
+ if (tmp == script) {
+ if (prev)
+ prev->coll_chain = tmp->coll_chain;
+ else
+ unw.hash[index] = tmp->coll_chain;
+ break;
+ } else
+ prev = tmp;
+ if (tmp->coll_chain >= UNW_CACHE_SIZE)
+ /* old script wasn't in the hash-table */
+ break;
+ tmp = unw.cache + tmp->coll_chain;
+ }
+ }
+
+ /* enter new script in the hash table */
+ index = hash(ip);
+ script->coll_chain = unw.hash[index];
+ unw.hash[index] = script - unw.cache;
+
+ script->ip = ip; /* set new IP while we're holding the locks */
+
+ STAT(if (script->coll_chain < UNW_CACHE_SIZE)
++unw.stat.script.collisions);
+
+ script->flags = 0;
+ script->hint = 0;
+ script->count = 0;
+ return script;
+}
+
+static void
+script_finalize (struct unw_script *script, struct unw_state_record *sr)
+{
+ script->pr_mask = sr->pr_mask;
+ script->pr_val = sr->pr_val;
+ /*
+ * We could down-grade our write-lock on script->lock here but
+ * the rwlock API doesn't offer atomic lock downgrading, so
+ * we'll just keep the write-lock and release it later when
+ * we're done using the script.
+ */
+}
+
+static inline void
+script_emit (struct unw_script *script, struct unw_insn insn)
+{
+ if (script->count >= UNW_MAX_SCRIPT_LEN) {
+ UNW_DPRINT(0, "unwind.%s: script exceeds maximum size of %u
instructions!\n",
+ __FUNCTION__, UNW_MAX_SCRIPT_LEN);
+ return;
+ }
+ script->insn[script->count++] = insn;
+}
+
+static inline void
+emit_nat_info (struct unw_state_record *sr, int i, struct unw_script *script)
+{
+ struct unw_reg_info *r = sr->curr.reg + i;
+ enum unw_insn_opcode opc;
+ struct unw_insn insn;
+ unsigned long val = 0;
+
+ switch (r->where) {
+ case UNW_WHERE_GR:
+ if (r->val >= 32) {
+ /* register got spilled to a stacked register */
+ opc = UNW_INSN_SETNAT_TYPE;
+ val = UNW_NAT_REGSTK;
+ } else
+ /* register got spilled to a scratch register */
+ opc = UNW_INSN_SETNAT_MEMSTK;
+ break;
+
+ case UNW_WHERE_FR:
+ opc = UNW_INSN_SETNAT_TYPE;
+ val = UNW_NAT_VAL;
+ break;
+
+ case UNW_WHERE_BR:
+ opc = UNW_INSN_SETNAT_TYPE;
+ val = UNW_NAT_NONE;
+ break;
+
+ case UNW_WHERE_PSPREL:
+ case UNW_WHERE_SPREL:
+ opc = UNW_INSN_SETNAT_MEMSTK;
+ break;
+
+ default:
+ UNW_DPRINT(0, "unwind.%s: don't know how to emit nat info for
where = %u\n",
+ __FUNCTION__, r->where);
+ return;
+ }
+ insn.opc = opc;
+ insn.dst = unw.preg_index[i];
+ insn.val = val;
+ script_emit(script, insn);
+}
+
+static void
+compile_reg (struct unw_state_record *sr, int i, struct unw_script *script)
+{
+ struct unw_reg_info *r = sr->curr.reg + i;
+ enum unw_insn_opcode opc;
+ unsigned long val, rval;
+ struct unw_insn insn;
+ long need_nat_info;
+
+ if (r->where == UNW_WHERE_NONE || r->when >= sr->when_target)
+ return;
+
+ opc = UNW_INSN_MOVE;
+ val = rval = r->val;
+ need_nat_info = (i >= UNW_REG_R4 && i <= UNW_REG_R7);
+
+ switch (r->where) {
+ case UNW_WHERE_GR:
+ if (rval >= 32) {
+ opc = UNW_INSN_MOVE_STACKED;
+ val = rval - 32;
+ } else if (rval >= 4 && rval <= 7) {
+ if (need_nat_info) {
+ opc = UNW_INSN_MOVE2;
+ need_nat_info = 0;
+ }
+ val = unw.preg_index[UNW_REG_R4 + (rval - 4)];
+ } else if (rval == 0) {
+ opc = UNW_INSN_MOVE_CONST;
+ val = 0;
+ } else {
+ /* register got spilled to a scratch register */
+ opc = UNW_INSN_MOVE_SCRATCH;
+ val = pt_regs_off(rval);
+ }
+ break;
+
+ case UNW_WHERE_FR:
+ if (rval <= 5)
+ val = unw.preg_index[UNW_REG_F2 + (rval - 2)];
+ else if (rval >= 16 && rval <= 31)
+ val = unw.preg_index[UNW_REG_F16 + (rval - 16)];
+ else {
+ opc = UNW_INSN_MOVE_SCRATCH;
+ if (rval <= 11)
+ val = offsetof(struct pt_regs, f6) + 16*(rval -
6);
+ else
+ UNW_DPRINT(0, "unwind.%s: kernel may not touch
f%lu\n",
+ __FUNCTION__, rval);
+ }
+ break;
+
+ case UNW_WHERE_BR:
+ if (rval >= 1 && rval <= 5)
+ val = unw.preg_index[UNW_REG_B1 + (rval - 1)];
+ else {
+ opc = UNW_INSN_MOVE_SCRATCH;
+ if (rval == 0)
+ val = offsetof(struct pt_regs, b0);
+ else if (rval == 6)
+ val = offsetof(struct pt_regs, b6);
+ else
+ val = offsetof(struct pt_regs, b7);
+ }
+ break;
+
+ case UNW_WHERE_SPREL:
+ opc = UNW_INSN_ADD_SP;
+ break;
+
+ case UNW_WHERE_PSPREL:
+ opc = UNW_INSN_ADD_PSP;
+ break;
+
+ default:
+ UNW_DPRINT(0, "unwind%s: register %u has unexpected `where'
value of %u\n",
+ __FUNCTION__, i, r->where);
+ break;
+ }
+ insn.opc = opc;
+ insn.dst = unw.preg_index[i];
+ insn.val = val;
+ script_emit(script, insn);
+ if (need_nat_info)
+ emit_nat_info(sr, i, script);
+
+ if (i == UNW_REG_PSP) {
+ /*
+ * info->psp must contain the _value_ of the previous
+ * sp, not it's save location. We get this by
+ * dereferencing the value we just stored in
+ * info->psp:
+ */
+ insn.opc = UNW_INSN_LOAD;
+ insn.dst = insn.val = unw.preg_index[UNW_REG_PSP];
+ script_emit(script, insn);
+ }
+}
+
+static inline const struct unw_table_entry *
+lookup (struct unw_table *table, unsigned long rel_ip)
+{
+ const struct unw_table_entry *e = NULL;
+ unsigned long lo, hi, mid;
+
+ /* do a binary search for right entry: */
+ for (lo = 0, hi = table->length; lo < hi; ) {
+ mid = (lo + hi) / 2;
+ e = &table->array[mid];
+ if (rel_ip < e->start_offset)
+ hi = mid;
+ else if (rel_ip >= e->end_offset)
+ lo = mid + 1;
+ else
+ break;
+ }
+ if (rel_ip < e->start_offset || rel_ip >= e->end_offset)
+ return NULL;
+ return e;
+}
+
+/*
+ * Build an unwind script that unwinds from state OLD_STATE to the
+ * entrypoint of the function that called OLD_STATE.
+ */
+static inline struct unw_script *
+build_script (struct unw_frame_info *info)
+{
+ const struct unw_table_entry *e = NULL;
+ struct unw_script *script = NULL;
+ struct unw_labeled_state *ls, *next;
+ unsigned long ip = info->ip;
+ struct unw_state_record sr;
+ struct unw_table *table;
+ struct unw_reg_info *r;
+ struct unw_insn insn;
+ u8 *dp, *desc_end;
+ u64 hdr;
+ int i;
+ STAT(unsigned long start, parse_start;)
+
+ STAT(++unw.stat.script.builds; start = ia64_get_itc());
+
+ /* build state record */
+ memset(&sr, 0, sizeof(sr));
+ for (r = sr.curr.reg; r < sr.curr.reg + UNW_NUM_REGS; ++r)
+ r->when = UNW_WHEN_NEVER;
+ sr.pr_val = info->pr;
+
+ UNW_DPRINT(3, "unwind.%s: ip 0x%lx\n", __FUNCTION__, ip);
+ script = script_new(ip);
+ if (!script) {
+ UNW_DPRINT(0, "unwind.%s: failed to create unwind script\n",
__FUNCTION__);
+ STAT(unw.stat.script.build_time += ia64_get_itc() - start);
+ return NULL;
+ }
+ unw.cache[info->prev_script].hint = script - unw.cache;
+
+ /* search the kernels and the modules' unwind tables for IP: */
+
+ STAT(parse_start = ia64_get_itc());
+
+ for (table = unw.tables; table; table = table->next) {
+ if (ip >= table->start && ip < table->end) {
+ e = lookup(table, ip - table->segment_base);
+ break;
+ }
+ }
+ if (!e) {
+ /* no info, return default unwinder (leaf proc, no mem stack,
no saved regs) */
+ UNW_DPRINT(1, "unwind.%s: no unwind info for ip=0x%lx (prev
ip=0x%lx)\n",
+ __FUNCTION__, ip, unw.cache[info->prev_script].ip);
+ sr.curr.reg[UNW_REG_RP].where = UNW_WHERE_BR;
+ sr.curr.reg[UNW_REG_RP].when = -1;
+ sr.curr.reg[UNW_REG_RP].val = 0;
+ compile_reg(&sr, UNW_REG_RP, script);
+ script_finalize(script, &sr);
+ STAT(unw.stat.script.parse_time += ia64_get_itc() -
parse_start);
+ STAT(unw.stat.script.build_time += ia64_get_itc() - start);
+ return script;
+ }
+
+ sr.when_target = (3*((ip & ~0xfUL) - (table->segment_base +
e->start_offset))/16
+ + (ip & 0xfUL));
+ hdr = *(u64 *) (table->segment_base + e->info_offset);
+ dp = (u8 *) (table->segment_base + e->info_offset + 8);
+ desc_end = dp + 8*UNW_LENGTH(hdr);
+
+ while (!sr.done && dp < desc_end)
+ dp = unw_decode(dp, sr.in_body, &sr);
+
+ if (sr.when_target > sr.epilogue_start) {
+ /*
+ * sp has been restored and all values on the memory stack below
+ * psp also have been restored.
+ */
+ sr.curr.reg[UNW_REG_PSP].val = 0;
+ sr.curr.reg[UNW_REG_PSP].where = UNW_WHERE_NONE;
+ sr.curr.reg[UNW_REG_PSP].when = UNW_WHEN_NEVER;
+ for (r = sr.curr.reg; r < sr.curr.reg + UNW_NUM_REGS; ++r)
+ if ((r->where == UNW_WHERE_PSPREL && r->val <= 0x10)
+ || r->where == UNW_WHERE_SPREL)
+ {
+ r->val = 0;
+ r->where = UNW_WHERE_NONE;
+ r->when = UNW_WHEN_NEVER;
+ }
+ }
+
+ script->flags = sr.flags;
+
+ /*
+ * If RP did't get saved, generate entry for the return link
+ * register.
+ */
+ if (sr.curr.reg[UNW_REG_RP].when >= sr.when_target) {
+ sr.curr.reg[UNW_REG_RP].where = UNW_WHERE_BR;
+ sr.curr.reg[UNW_REG_RP].when = -1;
+ sr.curr.reg[UNW_REG_RP].val = sr.return_link_reg;
+ UNW_DPRINT(1, "unwind.%s: using default for rp at ip=0x%lx
where=%d val=0x%lx\n",
+ __FUNCTION__, ip, sr.curr.reg[UNW_REG_RP].where,
+ sr.curr.reg[UNW_REG_RP].val);
+ }
+
+#ifdef UNW_DEBUG
+ UNW_DPRINT(1, "unwind.%s: state record for func 0x%lx, t=%u:\n",
+ __FUNCTION__, table->segment_base + e->start_offset,
sr.when_target);
+ for (r = sr.curr.reg; r < sr.curr.reg + UNW_NUM_REGS; ++r) {
+ if (r->where != UNW_WHERE_NONE || r->when != UNW_WHEN_NEVER) {
+ UNW_DPRINT(1, " %s <- ", unw.preg_name[r -
sr.curr.reg]);
+ switch (r->where) {
+ case UNW_WHERE_GR: UNW_DPRINT(1, "r%lu",
r->val); break;
+ case UNW_WHERE_FR: UNW_DPRINT(1, "f%lu",
r->val); break;
+ case UNW_WHERE_BR: UNW_DPRINT(1, "b%lu",
r->val); break;
+ case UNW_WHERE_SPREL: UNW_DPRINT(1,
"[sp+0x%lx]", r->val); break;
+ case UNW_WHERE_PSPREL: UNW_DPRINT(1,
"[psp+0x%lx]", r->val); break;
+ case UNW_WHERE_NONE:
+ UNW_DPRINT(1, "%s+0x%lx", unw.preg_name[r -
sr.curr.reg], r->val);
+ break;
+
+ default:
+ UNW_DPRINT(1, "BADWHERE(%d)", r->where);
+ break;
+ }
+ UNW_DPRINT(1, "\t\t%d\n", r->when);
+ }
+ }
+#endif
+
+ STAT(unw.stat.script.parse_time += ia64_get_itc() - parse_start);
+
+ /* translate state record into unwinder instructions: */
+
+ /*
+ * First, set psp if we're dealing with a fixed-size frame;
+ * subsequent instructions may depend on this value.
+ */
+ if (sr.when_target > sr.curr.reg[UNW_REG_PSP].when
+ && (sr.curr.reg[UNW_REG_PSP].where == UNW_WHERE_NONE)
+ && sr.curr.reg[UNW_REG_PSP].val != 0) {
+ /* new psp is sp plus frame size */
+ insn.opc = UNW_INSN_ADD;
+ insn.dst = offsetof(struct unw_frame_info, psp)/8;
+ insn.val = sr.curr.reg[UNW_REG_PSP].val; /* frame size */
+ script_emit(script, insn);
+ }
+
+ /* determine where the primary UNaT is: */
+ if (sr.when_target < sr.curr.reg[UNW_REG_PRI_UNAT_GR].when)
+ i = UNW_REG_PRI_UNAT_MEM;
+ else if (sr.when_target < sr.curr.reg[UNW_REG_PRI_UNAT_MEM].when)
+ i = UNW_REG_PRI_UNAT_GR;
+ else if (sr.curr.reg[UNW_REG_PRI_UNAT_MEM].when >
sr.curr.reg[UNW_REG_PRI_UNAT_GR].when)
+ i = UNW_REG_PRI_UNAT_MEM;
+ else
+ i = UNW_REG_PRI_UNAT_GR;
+
+ compile_reg(&sr, i, script);
+
+ for (i = UNW_REG_BSP; i < UNW_NUM_REGS; ++i)
+ compile_reg(&sr, i, script);
+
+ /* free labeled register states & stack: */
+
+ STAT(parse_start = ia64_get_itc());
+ for (ls = sr.labeled_states; ls; ls = next) {
+ next = ls->next;
+ free_state_stack(&ls->saved_state);
+ free_labeled_state(ls);
+ }
+ free_state_stack(&sr.curr);
+ STAT(unw.stat.script.parse_time += ia64_get_itc() - parse_start);
+
+ script_finalize(script, &sr);
+ STAT(unw.stat.script.build_time += ia64_get_itc() - start);
+ return script;
+}
+
+/*
+ * Apply the unwinding actions represented by OPS and update SR to
+ * reflect the state that existed upon entry to the function that this
+ * unwinder represents.
+ */
+static inline void
+run_script (struct unw_script *script, struct unw_frame_info *state)
+{
+ struct unw_insn *ip, *limit, next_insn;
+ unsigned long opc, dst, val, off;
+ unsigned long *s = (unsigned long *) state;
+ STAT(unsigned long start;)
+
+ STAT(++unw.stat.script.runs; start = ia64_get_itc());
+ state->flags = script->flags;
+ ip = script->insn;
+ limit = script->insn + script->count;
+ next_insn = *ip;
+
+ while (ip++ < limit) {
+ opc = next_insn.opc;
+ dst = next_insn.dst;
+ val = next_insn.val;
+ next_insn = *ip;
+
+ redo:
+ switch (opc) {
+ case UNW_INSN_ADD:
+ s[dst] += val;
+ break;
+
+ case UNW_INSN_MOVE2:
+ if (!s[val])
+ goto lazy_init;
+ s[dst+1] = s[val+1];
+ s[dst] = s[val];
+ break;
+
+ case UNW_INSN_MOVE:
+ if (!s[val])
+ goto lazy_init;
+ s[dst] = s[val];
+ break;
+
+ case UNW_INSN_MOVE_SCRATCH:
+ if (state->pt) {
+ s[dst] = (unsigned long)
get_scratch_regs(state) + val;
+ } else {
+ s[dst] = 0;
+ UNW_DPRINT(0, "unwind.%s: no state->pt,
dst=%ld, val=%ld\n",
+ __FUNCTION__, dst, val);
+ }
+ break;
+
+ case UNW_INSN_MOVE_CONST:
+ if (val == 0)
+ s[dst] = (unsigned long) &unw.r0;
+ else {
+ s[dst] = 0;
+ UNW_DPRINT(0, "unwind.%s: UNW_INSN_MOVE_CONST
bad val=%ld\n",
+ __FUNCTION__, val);
+ }
+ break;
+
+
+ case UNW_INSN_MOVE_STACKED:
+ s[dst] = (unsigned long) ia64_rse_skip_regs((unsigned
long *)state->bsp,
+ val);
+ break;
+
+ case UNW_INSN_ADD_PSP:
+ s[dst] = state->psp + val;
+ break;
+
+ case UNW_INSN_ADD_SP:
+ s[dst] = state->sp + val;
+ break;
+
+ case UNW_INSN_SETNAT_MEMSTK:
+ if (!state->pri_unat_loc)
+ state->pri_unat_loc = &state->sw->caller_unat;
+ /* register off. is a multiple of 8, so the least 3
bits (type) are 0 */
+ s[dst+1] = ((unsigned long) state->pri_unat_loc -
s[dst]) | UNW_NAT_MEMSTK;
+ break;
+
+ case UNW_INSN_SETNAT_TYPE:
+ s[dst+1] = val;
+ break;
+
+ case UNW_INSN_LOAD:
+#ifdef UNW_DEBUG
+ if ((s[val] & (local_cpu_data->unimpl_va_mask | 0x7))
!= 0
+#ifndef XEN
+ || s[val] < TASK_SIZE
+#endif
+ )
+ {
+ UNW_DPRINT(0, "unwind.%s: rejecting bad
psp=0x%lx\n",
+ __FUNCTION__, s[val]);
+ break;
+ }
+#endif
+ s[dst] = *(unsigned long *) s[val];
+ break;
+ }
+ }
+ STAT(unw.stat.script.run_time += ia64_get_itc() - start);
+ return;
+
+ lazy_init:
+ off = unw.sw_off[val];
+ s[val] = (unsigned long) state->sw + off;
+ if (off >= offsetof(struct switch_stack, r4) && off <= offsetof(struct
switch_stack, r7))
+ /*
+ * We're initializing a general register: init NaT info, too.
Note that
+ * the offset is a multiple of 8 which gives us the 3 bits
needed for
+ * the type field.
+ */
+ s[val+1] = (offsetof(struct switch_stack, ar_unat) - off) |
UNW_NAT_MEMSTK;
+ goto redo;
+}
+
+static int
+find_save_locs (struct unw_frame_info *info)
+{
+ int have_write_lock = 0;
+ struct unw_script *scr;
+ unsigned long flags = 0;
+
+ if ((info->ip & (local_cpu_data->unimpl_va_mask | 0xf))
+#ifndef XEN
+ || info->ip < TASK_SIZE
+#endif
+ ) {
+ /* don't let obviously bad addresses pollute the cache */
+ /* FIXME: should really be level 0 but it occurs too often. KAO
*/
+ UNW_DPRINT(1, "unwind.%s: rejecting bad ip=0x%lx\n",
__FUNCTION__, info->ip);
+ info->rp_loc = NULL;
+ return -1;
+ }
+
+ scr = script_lookup(info);
+ if (!scr) {
+ spin_lock_irqsave(&unw.lock, flags);
+ scr = build_script(info);
+ if (!scr) {
+ spin_unlock_irqrestore(&unw.lock, flags);
+ UNW_DPRINT(0,
+ "unwind.%s: failed to locate/build unwind
script for ip %lx\n",
+ __FUNCTION__, info->ip);
+ return -1;
+ }
+ have_write_lock = 1;
+ }
+ info->hint = scr->hint;
+ info->prev_script = scr - unw.cache;
+
+ run_script(scr, info);
+
+ if (have_write_lock) {
+ write_unlock(&scr->lock);
+ spin_unlock_irqrestore(&unw.lock, flags);
+ } else
+ read_unlock(&scr->lock);
+ return 0;
+}
+
+int
+unw_unwind (struct unw_frame_info *info)
+{
+ unsigned long prev_ip, prev_sp, prev_bsp;
+ unsigned long ip, pr, num_regs;
+ STAT(unsigned long start, flags;)
+ int retval;
+
+ STAT(local_irq_save(flags); ++unw.stat.api.unwinds; start =
ia64_get_itc());
+
+ prev_ip = info->ip;
+ prev_sp = info->sp;
+ prev_bsp = info->bsp;
+
+ /* restore the ip */
+ if (!info->rp_loc) {
+ /* FIXME: should really be level 0 but it occurs too often. KAO
*/
+ UNW_DPRINT(1, "unwind.%s: failed to locate return link
(ip=0x%lx)!\n",
+ __FUNCTION__, info->ip);
+ STAT(unw.stat.api.unwind_time += ia64_get_itc() - start;
local_irq_restore(flags));
+ return -1;
+ }
+ ip = info->ip = *info->rp_loc;
+ if (ip < GATE_ADDR) {
+ UNW_DPRINT(2, "unwind.%s: reached user-space (ip=0x%lx)\n",
__FUNCTION__, ip);
+ STAT(unw.stat.api.unwind_time += ia64_get_itc() - start;
local_irq_restore(flags));
+ return -1;
+ }
+
+ /* restore the cfm: */
+ if (!info->pfs_loc) {
+ UNW_DPRINT(0, "unwind.%s: failed to locate ar.pfs!\n",
__FUNCTION__);
+ STAT(unw.stat.api.unwind_time += ia64_get_itc() - start;
local_irq_restore(flags));
+ return -1;
+ }
+ info->cfm_loc = info->pfs_loc;
+
+ /* restore the bsp: */
+ pr = info->pr;
+ num_regs = 0;
+ if ((info->flags & UNW_FLAG_INTERRUPT_FRAME)) {
+ info->pt = info->sp + 16;
+ if ((pr & (1UL << PRED_NON_SYSCALL)) != 0)
+ num_regs = *info->cfm_loc & 0x7f; /* size
of frame */
+ info->pfs_loc =
+ (unsigned long *) (info->pt + offsetof(struct pt_regs,
ar_pfs));
+ UNW_DPRINT(3, "unwind.%s: interrupt_frame pt 0x%lx\n",
__FUNCTION__, info->pt);
+ } else
+ num_regs = (*info->cfm_loc >> 7) & 0x7f; /* size of
locals */
+ info->bsp = (unsigned long) ia64_rse_skip_regs((unsigned long *)
info->bsp, -num_regs);
+ if (info->bsp < info->regstk.limit || info->bsp > info->regstk.top) {
+ UNW_DPRINT(0, "unwind.%s: bsp (0x%lx) out of range
[0x%lx-0x%lx]\n",
+ __FUNCTION__, info->bsp, info->regstk.limit,
info->regstk.top);
+ STAT(unw.stat.api.unwind_time += ia64_get_itc() - start;
local_irq_restore(flags));
+ return -1;
+ }
+
+ /* restore the sp: */
+ info->sp = info->psp;
+ if (info->sp < info->memstk.top || info->sp > info->memstk.limit) {
+ UNW_DPRINT(0, "unwind.%s: sp (0x%lx) out of range
[0x%lx-0x%lx]\n",
+ __FUNCTION__, info->sp, info->memstk.top,
info->memstk.limit);
+ STAT(unw.stat.api.unwind_time += ia64_get_itc() - start;
local_irq_restore(flags));
+ return -1;
+ }
+
+ if (info->ip == prev_ip && info->sp == prev_sp && info->bsp ==
prev_bsp) {
+ UNW_DPRINT(0, "unwind.%s: ip, sp, bsp unchanged; stopping here
(ip=0x%lx)\n",
+ __FUNCTION__, ip);
+ STAT(unw.stat.api.unwind_time += ia64_get_itc() - start;
local_irq_restore(flags));
+ return -1;
+ }
+
+ /* as we unwind, the saved ar.unat becomes the primary unat: */
+ info->pri_unat_loc = info->unat_loc;
+
+ /* finally, restore the predicates: */
+ unw_get_pr(info, &info->pr);
+
+ retval = find_save_locs(info);
+ STAT(unw.stat.api.unwind_time += ia64_get_itc() - start;
local_irq_restore(flags));
+ return retval;
+}
+EXPORT_SYMBOL(unw_unwind);
+
+int
+unw_unwind_to_user (struct unw_frame_info *info)
+{
+ unsigned long ip, sp, pr = 0;
+
+ while (unw_unwind(info) >= 0) {
+ unw_get_sp(info, &sp);
+ if ((long)((unsigned long)info->task + IA64_STK_OFFSET - sp)
+ < IA64_PT_REGS_SIZE) {
+ UNW_DPRINT(0, "unwind.%s: ran off the top of the kernel
stack\n",
+ __FUNCTION__);
+ break;
+ }
+ if (unw_is_intr_frame(info) &&
+ (pr & (1UL << PRED_USER_STACK)))
+ return 0;
+ if (unw_get_pr (info, &pr) < 0) {
+ unw_get_rp(info, &ip);
+ UNW_DPRINT(0, "unwind.%s: failed to read "
+ "predicate register (ip=0x%lx)\n",
+ __FUNCTION__, ip);
+ return -1;
+ }
+ }
+ unw_get_ip(info, &ip);
+ UNW_DPRINT(0, "unwind.%s: failed to unwind to user-level (ip=0x%lx)\n",
+ __FUNCTION__, ip);
+ return -1;
+}
+EXPORT_SYMBOL(unw_unwind_to_user);
+
+static void
+init_frame_info (struct unw_frame_info *info, struct task_struct *t,
+ struct switch_stack *sw, unsigned long stktop)
+{
+ unsigned long rbslimit, rbstop, stklimit;
+ STAT(unsigned long start, flags;)
+
+ STAT(local_irq_save(flags); ++unw.stat.api.inits; start =
ia64_get_itc());
+
+ /*
+ * Subtle stuff here: we _could_ unwind through the switch_stack frame
but we
+ * don't want to do that because it would be slow as each preserved
register would
+ * have to be processed. Instead, what we do here is zero out the
frame info and
+ * start the unwind process at the function that created the
switch_stack frame.
+ * When a preserved value in switch_stack needs to be accessed,
run_script() will
+ * initialize the appropriate pointer on demand.
+ */
+ memset(info, 0, sizeof(*info));
+
+ rbslimit = (unsigned long) t + IA64_RBS_OFFSET;
+ rbstop = sw->ar_bspstore;
+ if (rbstop - (unsigned long) t >= IA64_STK_OFFSET)
+ rbstop = rbslimit;
+
+ stklimit = (unsigned long) t + IA64_STK_OFFSET;
+ if (stktop <= rbstop)
+ stktop = rbstop;
+
+ info->regstk.limit = rbslimit;
+ info->regstk.top = rbstop;
+ info->memstk.limit = stklimit;
+ info->memstk.top = stktop;
+ info->task = t;
+ info->sw = sw;
+ info->sp = info->psp = stktop;
+ info->pr = sw->pr;
+ UNW_DPRINT(3, "unwind.%s:\n"
+ " task 0x%lx\n"
+ " rbs = [0x%lx-0x%lx)\n"
+ " stk = [0x%lx-0x%lx)\n"
+ " pr 0x%lx\n"
+ " sw 0x%lx\n"
+ " sp 0x%lx\n",
+ __FUNCTION__, (unsigned long) t, rbslimit, rbstop, stktop,
stklimit,
+ info->pr, (unsigned long) info->sw, info->sp);
+ STAT(unw.stat.api.init_time += ia64_get_itc() - start;
local_irq_restore(flags));
+}
+
+void
+unw_init_frame_info (struct unw_frame_info *info, struct task_struct *t,
struct switch_stack *sw)
+{
+ unsigned long sol;
+
+ init_frame_info(info, t, sw, (unsigned long) (sw + 1) - 16);
+ info->cfm_loc = &sw->ar_pfs;
+ sol = (*info->cfm_loc >> 7) & 0x7f;
+ info->bsp = (unsigned long) ia64_rse_skip_regs((unsigned long *)
info->regstk.top, -sol);
+ info->ip = sw->b0;
+ UNW_DPRINT(3, "unwind.%s:\n"
+ " bsp 0x%lx\n"
+ " sol 0x%lx\n"
+ " ip 0x%lx\n",
+ __FUNCTION__, info->bsp, sol, info->ip);
+ find_save_locs(info);
+}
+
+EXPORT_SYMBOL(unw_init_frame_info);
+
+void
+unw_init_from_blocked_task (struct unw_frame_info *info, struct task_struct *t)
+{
+#ifdef XEN
+ struct switch_stack *sw = (struct switch_stack *) (t->arch._thread.ksp
+ 16);
+#else
+ struct switch_stack *sw = (struct switch_stack *) (t->thread.ksp + 16);
+#endif
+
+ UNW_DPRINT(1, "unwind.%s\n", __FUNCTION__);
+ unw_init_frame_info(info, t, sw);
+}
+EXPORT_SYMBOL(unw_init_from_blocked_task);
+
+static void
+init_unwind_table (struct unw_table *table, const char *name, unsigned long
segment_base,
+ unsigned long gp, const void *table_start, const void
*table_end)
+{
+ const struct unw_table_entry *start = table_start, *end = table_end;
+
+ table->name = name;
+ table->segment_base = segment_base;
+ table->gp = gp;
+ table->start = segment_base + start[0].start_offset;
+ table->end = segment_base + end[-1].end_offset;
+ table->array = start;
+ table->length = end - start;
+}
+
+#ifndef XEN
+void *
+unw_add_unwind_table (const char *name, unsigned long segment_base, unsigned
long gp,
+ const void *table_start, const void *table_end)
+{
+ const struct unw_table_entry *start = table_start, *end = table_end;
+ struct unw_table *table;
+ unsigned long flags;
+
+ if (end - start <= 0) {
+ UNW_DPRINT(0, "unwind.%s: ignoring attempt to insert empty
unwind table\n",
+ __FUNCTION__);
+ return NULL;
+ }
+
+ table = kmalloc(sizeof(*table), GFP_USER);
+ if (!table)
+ return NULL;
+
+ init_unwind_table(table, name, segment_base, gp, table_start,
table_end);
+
+ spin_lock_irqsave(&unw.lock, flags);
+ {
+ /* keep kernel unwind table at the front (it's searched most
commonly): */
+ table->next = unw.tables->next;
+ unw.tables->next = table;
+ }
+ spin_unlock_irqrestore(&unw.lock, flags);
+
+ return table;
+}
+
+void
+unw_remove_unwind_table (void *handle)
+{
+ struct unw_table *table, *prev;
+ struct unw_script *tmp;
+ unsigned long flags;
+ long index;
+
+ if (!handle) {
+ UNW_DPRINT(0, "unwind.%s: ignoring attempt to remove
non-existent unwind table\n",
+ __FUNCTION__);
+ return;
+ }
+
+ table = handle;
+ if (table == &unw.kernel_table) {
+ UNW_DPRINT(0, "unwind.%s: sorry, freeing the kernel's unwind
table is a "
+ "no-can-do!\n", __FUNCTION__);
+ return;
+ }
+
+ spin_lock_irqsave(&unw.lock, flags);
+ {
+ /* first, delete the table: */
+
+ for (prev = (struct unw_table *) &unw.tables; prev; prev =
prev->next)
+ if (prev->next == table)
+ break;
+ if (!prev) {
+ UNW_DPRINT(0, "unwind.%s: failed to find unwind table
%p\n",
+ __FUNCTION__, (void *) table);
+ spin_unlock_irqrestore(&unw.lock, flags);
+ return;
+ }
+ prev->next = table->next;
+ }
+ spin_unlock_irqrestore(&unw.lock, flags);
+
+ /* next, remove hash table entries for this table */
+
+ for (index = 0; index <= UNW_HASH_SIZE; ++index) {
+ tmp = unw.cache + unw.hash[index];
+ if (unw.hash[index] >= UNW_CACHE_SIZE
+ || tmp->ip < table->start || tmp->ip >= table->end)
+ continue;
+
+ write_lock(&tmp->lock);
+ {
+ if (tmp->ip >= table->start && tmp->ip < table->end) {
+ unw.hash[index] = tmp->coll_chain;
+ tmp->ip = 0;
+ }
+ }
+ write_unlock(&tmp->lock);
+ }
+
+ kfree(table);
+}
+
+static int __init
+create_gate_table (void)
+{
+ const struct unw_table_entry *entry, *start, *end;
+ unsigned long *lp, segbase = GATE_ADDR;
+ size_t info_size, size;
+ char *info;
+ Elf64_Phdr *punw = NULL, *phdr = (Elf64_Phdr *) (GATE_ADDR +
GATE_EHDR->e_phoff);
+ int i;
+
+ for (i = 0; i < GATE_EHDR->e_phnum; ++i, ++phdr)
+ if (phdr->p_type == PT_IA_64_UNWIND) {
+ punw = phdr;
+ break;
+ }
+
+ if (!punw) {
+ printk("%s: failed to find gate DSO's unwind table!\n",
__FUNCTION__);
+ return 0;
+ }
+
+ start = (const struct unw_table_entry *) punw->p_vaddr;
+ end = (struct unw_table_entry *) ((char *) start + punw->p_memsz);
+ size = 0;
+
+ unw_add_unwind_table("linux-gate.so", segbase, 0, start, end);
+
+ for (entry = start; entry < end; ++entry)
+ size += 3*8 + 8 + 8*UNW_LENGTH(*(u64 *) (segbase +
entry->info_offset));
+ size += 8; /* reserve space for "end of table" marker */
+
+ unw.gate_table = kmalloc(size, GFP_KERNEL);
+ if (!unw.gate_table) {
+ unw.gate_table_size = 0;
+ printk(KERN_ERR "%s: unable to create unwind data for gate
page!\n", __FUNCTION__);
+ return 0;
+ }
+ unw.gate_table_size = size;
+
+ lp = unw.gate_table;
+ info = (char *) unw.gate_table + size;
+
+ for (entry = start; entry < end; ++entry, lp += 3) {
+ info_size = 8 + 8*UNW_LENGTH(*(u64 *) (segbase +
entry->info_offset));
+ info -= info_size;
+ memcpy(info, (char *) segbase + entry->info_offset, info_size);
+
+ lp[0] = segbase + entry->start_offset; /* start */
+ lp[1] = segbase + entry->end_offset; /* end */
+ lp[2] = info - (char *) unw.gate_table; /* info */
+ }
+ *lp = 0; /* end-of-table marker */
+ return 0;
+}
+
+__initcall(create_gate_table);
+#endif // !XEN
+
+void __init
+unw_init (void)
+{
+ extern char __gp[];
+ extern void unw_hash_index_t_is_too_narrow (void);
+ long i, off;
+
+ if (8*sizeof(unw_hash_index_t) < UNW_LOG_HASH_SIZE)
+ unw_hash_index_t_is_too_narrow();
+
+ unw.sw_off[unw.preg_index[UNW_REG_PRI_UNAT_GR]] = SW(CALLER_UNAT);
+ unw.sw_off[unw.preg_index[UNW_REG_BSPSTORE]] = SW(AR_BSPSTORE);
+ unw.sw_off[unw.preg_index[UNW_REG_PFS]] = SW(AR_PFS);
+ unw.sw_off[unw.preg_index[UNW_REG_RP]] = SW(B0);
+ unw.sw_off[unw.preg_index[UNW_REG_UNAT]] = SW(CALLER_UNAT);
+ unw.sw_off[unw.preg_index[UNW_REG_PR]] = SW(PR);
+ unw.sw_off[unw.preg_index[UNW_REG_LC]] = SW(AR_LC);
+ unw.sw_off[unw.preg_index[UNW_REG_FPSR]] = SW(AR_FPSR);
+ for (i = UNW_REG_R4, off = SW(R4); i <= UNW_REG_R7; ++i, off += 8)
+ unw.sw_off[unw.preg_index[i]] = off;
+ for (i = UNW_REG_B1, off = SW(B1); i <= UNW_REG_B5; ++i, off += 8)
+ unw.sw_off[unw.preg_index[i]] = off;
+ for (i = UNW_REG_F2, off = SW(F2); i <= UNW_REG_F5; ++i, off += 16)
+ unw.sw_off[unw.preg_index[i]] = off;
+ for (i = UNW_REG_F16, off = SW(F16); i <= UNW_REG_F31; ++i, off += 16)
+ unw.sw_off[unw.preg_index[i]] = off;
+
+ for (i = 0; i < UNW_CACHE_SIZE; ++i) {
+ if (i > 0)
+ unw.cache[i].lru_chain = (i - 1);
+ unw.cache[i].coll_chain = -1;
+ rwlock_init(&unw.cache[i].lock);
+ }
+ unw.lru_head = UNW_CACHE_SIZE - 1;
+ unw.lru_tail = 0;
+
+ init_unwind_table(&unw.kernel_table, "kernel", KERNEL_START, (unsigned
long) __gp,
+ __start_unwind, __end_unwind);
+}
+
+/*
+ * DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED
+ *
+ * This system call has been deprecated. The new and improved way to get
+ * at the kernel's unwind info is via the gate DSO. The address of the
+ * ELF header for this DSO is passed to user-level via AT_SYSINFO_EHDR.
+ *
+ * DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED
+ *
+ * This system call copies the unwind data into the buffer pointed to by BUF
and returns
+ * the size of the unwind data. If BUF_SIZE is smaller than the size of the
unwind data
+ * or if BUF is NULL, nothing is copied, but the system call still returns the
size of the
+ * unwind data.
+ *
+ * The first portion of the unwind data contains an unwind table and rest
contains the
+ * associated unwind info (in no particular order). The unwind table consists
of a table
+ * of entries of the form:
+ *
+ * u64 start; (64-bit address of start of function)
+ * u64 end; (64-bit address of start of function)
+ * u64 info; (BUF-relative offset to unwind info)
+ *
+ * The end of the unwind table is indicated by an entry with a START address
of zero.
+ *
+ * Please see the IA-64 Software Conventions and Runtime Architecture manual
for details
+ * on the format of the unwind info.
+ *
+ * ERRORS
+ * EFAULT BUF points outside your accessible address space.
+ */
+asmlinkage long
+sys_getunwind (void __user *buf, size_t buf_size)
+{
+ if (buf && buf_size >= unw.gate_table_size)
+ if (copy_to_user(buf, unw.gate_table, unw.gate_table_size) != 0)
+ return -EFAULT;
+ return unw.gate_table_size;
+}
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/ia64/linux-xen/unwind_decoder.c
--- /dev/null Mon Jan 9 11:19:55 2006
+++ b/xen/arch/ia64/linux-xen/unwind_decoder.c Mon Jan 9 11:22:17 2006
@@ -0,0 +1,459 @@
+/*
+ * Copyright (C) 2000 Hewlett-Packard Co
+ * Copyright (C) 2000 David Mosberger-Tang <davidm@xxxxxxxxxx>
+ *
+ * Generic IA-64 unwind info decoder.
+ *
+ * This file is used both by the Linux kernel and objdump. Please keep
+ * the two copies of this file in sync.
+ *
+ * You need to customize the decoder by defining the following
+ * macros/constants before including this file:
+ *
+ * Types:
+ * unw_word Unsigned integer type with at least 64 bits
+ *
+ * Register names:
+ * UNW_REG_BSP
+ * UNW_REG_BSPSTORE
+ * UNW_REG_FPSR
+ * UNW_REG_LC
+ * UNW_REG_PFS
+ * UNW_REG_PR
+ * UNW_REG_RNAT
+ * UNW_REG_PSP
+ * UNW_REG_RP
+ * UNW_REG_UNAT
+ *
+ * Decoder action macros:
+ * UNW_DEC_BAD_CODE(code)
+ * UNW_DEC_ABI(fmt,abi,context,arg)
+ * UNW_DEC_BR_GR(fmt,brmask,gr,arg)
+ * UNW_DEC_BR_MEM(fmt,brmask,arg)
+ * UNW_DEC_COPY_STATE(fmt,label,arg)
+ * UNW_DEC_EPILOGUE(fmt,t,ecount,arg)
+ * UNW_DEC_FRGR_MEM(fmt,grmask,frmask,arg)
+ * UNW_DEC_FR_MEM(fmt,frmask,arg)
+ * UNW_DEC_GR_GR(fmt,grmask,gr,arg)
+ * UNW_DEC_GR_MEM(fmt,grmask,arg)
+ * UNW_DEC_LABEL_STATE(fmt,label,arg)
+ * UNW_DEC_MEM_STACK_F(fmt,t,size,arg)
+ * UNW_DEC_MEM_STACK_V(fmt,t,arg)
+ * UNW_DEC_PRIUNAT_GR(fmt,r,arg)
+ * UNW_DEC_PRIUNAT_WHEN_GR(fmt,t,arg)
+ * UNW_DEC_PRIUNAT_WHEN_MEM(fmt,t,arg)
+ * UNW_DEC_PRIUNAT_WHEN_PSPREL(fmt,pspoff,arg)
+ * UNW_DEC_PRIUNAT_WHEN_SPREL(fmt,spoff,arg)
+ * UNW_DEC_PROLOGUE(fmt,body,rlen,arg)
+ * UNW_DEC_PROLOGUE_GR(fmt,rlen,mask,grsave,arg)
+ * UNW_DEC_REG_PSPREL(fmt,reg,pspoff,arg)
+ * UNW_DEC_REG_REG(fmt,src,dst,arg)
+ * UNW_DEC_REG_SPREL(fmt,reg,spoff,arg)
+ * UNW_DEC_REG_WHEN(fmt,reg,t,arg)
+ * UNW_DEC_RESTORE(fmt,t,abreg,arg)
+ * UNW_DEC_RESTORE_P(fmt,qp,t,abreg,arg)
+ * UNW_DEC_SPILL_BASE(fmt,pspoff,arg)
+ * UNW_DEC_SPILL_MASK(fmt,imaskp,arg)
+ * UNW_DEC_SPILL_PSPREL(fmt,t,abreg,pspoff,arg)
+ * UNW_DEC_SPILL_PSPREL_P(fmt,qp,t,abreg,pspoff,arg)
+ * UNW_DEC_SPILL_REG(fmt,t,abreg,x,ytreg,arg)
+ * UNW_DEC_SPILL_REG_P(fmt,qp,t,abreg,x,ytreg,arg)
+ * UNW_DEC_SPILL_SPREL(fmt,t,abreg,spoff,arg)
+ * UNW_DEC_SPILL_SPREL_P(fmt,qp,t,abreg,pspoff,arg)
+ */
+
+static unw_word
+unw_decode_uleb128 (unsigned char **dpp)
+{
+ unsigned shift = 0;
+ unw_word byte, result = 0;
+ unsigned char *bp = *dpp;
+
+ while (1)
+ {
+ byte = *bp++;
+ result |= (byte & 0x7f) << shift;
+ if ((byte & 0x80) == 0)
+ break;
+ shift += 7;
+ }
+ *dpp = bp;
+ return result;
+}
+
+static unsigned char *
+unw_decode_x1 (unsigned char *dp, unsigned char code, void *arg)
+{
+ unsigned char byte1, abreg;
+ unw_word t, off;
+
+ byte1 = *dp++;
+ t = unw_decode_uleb128 (&dp);
+ off = unw_decode_uleb128 (&dp);
+ abreg = (byte1 & 0x7f);
+ if (byte1 & 0x80)
+ UNW_DEC_SPILL_SPREL(X1, t, abreg, off, arg);
+ else
+ UNW_DEC_SPILL_PSPREL(X1, t, abreg, off, arg);
+ return dp;
+}
+
+static unsigned char *
+unw_decode_x2 (unsigned char *dp, unsigned char code, void *arg)
+{
+ unsigned char byte1, byte2, abreg, x, ytreg;
+ unw_word t;
+
+ byte1 = *dp++; byte2 = *dp++;
+ t = unw_decode_uleb128 (&dp);
+ abreg = (byte1 & 0x7f);
+ ytreg = byte2;
+ x = (byte1 >> 7) & 1;
+ if ((byte1 & 0x80) == 0 && ytreg == 0)
+ UNW_DEC_RESTORE(X2, t, abreg, arg);
+ else
+ UNW_DEC_SPILL_REG(X2, t, abreg, x, ytreg, arg);
+ return dp;
+}
+
+static unsigned char *
+unw_decode_x3 (unsigned char *dp, unsigned char code, void *arg)
+{
+ unsigned char byte1, byte2, abreg, qp;
+ unw_word t, off;
+
+ byte1 = *dp++; byte2 = *dp++;
+ t = unw_decode_uleb128 (&dp);
+ off = unw_decode_uleb128 (&dp);
+
+ qp = (byte1 & 0x3f);
+ abreg = (byte2 & 0x7f);
+
+ if (byte1 & 0x80)
+ UNW_DEC_SPILL_SPREL_P(X3, qp, t, abreg, off, arg);
+ else
+ UNW_DEC_SPILL_PSPREL_P(X3, qp, t, abreg, off, arg);
+ return dp;
+}
+
+static unsigned char *
+unw_decode_x4 (unsigned char *dp, unsigned char code, void *arg)
+{
+ unsigned char byte1, byte2, byte3, qp, abreg, x, ytreg;
+ unw_word t;
+
+ byte1 = *dp++; byte2 = *dp++; byte3 = *dp++;
+ t = unw_decode_uleb128 (&dp);
+
+ qp = (byte1 & 0x3f);
+ abreg = (byte2 & 0x7f);
+ x = (byte2 >> 7) & 1;
+ ytreg = byte3;
+
+ if ((byte2 & 0x80) == 0 && byte3 == 0)
+ UNW_DEC_RESTORE_P(X4, qp, t, abreg, arg);
+ else
+ UNW_DEC_SPILL_REG_P(X4, qp, t, abreg, x, ytreg, arg);
+ return dp;
+}
+
+static unsigned char *
+unw_decode_r1 (unsigned char *dp, unsigned char code, void *arg)
+{
+ int body = (code & 0x20) != 0;
+ unw_word rlen;
+
+ rlen = (code & 0x1f);
+ UNW_DEC_PROLOGUE(R1, body, rlen, arg);
+ return dp;
+}
+
+static unsigned char *
+unw_decode_r2 (unsigned char *dp, unsigned char code, void *arg)
+{
+ unsigned char byte1, mask, grsave;
+ unw_word rlen;
+
+ byte1 = *dp++;
+
+ mask = ((code & 0x7) << 1) | ((byte1 >> 7) & 1);
+ grsave = (byte1 & 0x7f);
+ rlen = unw_decode_uleb128 (&dp);
+ UNW_DEC_PROLOGUE_GR(R2, rlen, mask, grsave, arg);
+ return dp;
+}
+
+static unsigned char *
+unw_decode_r3 (unsigned char *dp, unsigned char code, void *arg)
+{
+ unw_word rlen;
+
+ rlen = unw_decode_uleb128 (&dp);
+ UNW_DEC_PROLOGUE(R3, ((code & 0x3) == 1), rlen, arg);
+ return dp;
+}
+
+static unsigned char *
+unw_decode_p1 (unsigned char *dp, unsigned char code, void *arg)
+{
+ unsigned char brmask = (code & 0x1f);
+
+ UNW_DEC_BR_MEM(P1, brmask, arg);
+ return dp;
+}
+
+static unsigned char *
+unw_decode_p2_p5 (unsigned char *dp, unsigned char code, void *arg)
+{
+ if ((code & 0x10) == 0)
+ {
+ unsigned char byte1 = *dp++;
+
+ UNW_DEC_BR_GR(P2, ((code & 0xf) << 1) | ((byte1 >> 7) & 1),
+ (byte1 & 0x7f), arg);
+ }
+ else if ((code & 0x08) == 0)
+ {
+ unsigned char byte1 = *dp++, r, dst;
+
+ r = ((code & 0x7) << 1) | ((byte1 >> 7) & 1);
+ dst = (byte1 & 0x7f);
+ switch (r)
+ {
+ case 0: UNW_DEC_REG_GR(P3, UNW_REG_PSP, dst, arg); break;
+ case 1: UNW_DEC_REG_GR(P3, UNW_REG_RP, dst, arg); break;
+ case 2: UNW_DEC_REG_GR(P3, UNW_REG_PFS, dst, arg); break;
+ case 3: UNW_DEC_REG_GR(P3, UNW_REG_PR, dst, arg); break;
+ case 4: UNW_DEC_REG_GR(P3, UNW_REG_UNAT, dst, arg); break;
+ case 5: UNW_DEC_REG_GR(P3, UNW_REG_LC, dst, arg); break;
+ case 6: UNW_DEC_RP_BR(P3, dst, arg); break;
+ case 7: UNW_DEC_REG_GR(P3, UNW_REG_RNAT, dst, arg); break;
+ case 8: UNW_DEC_REG_GR(P3, UNW_REG_BSP, dst, arg); break;
+ case 9: UNW_DEC_REG_GR(P3, UNW_REG_BSPSTORE, dst, arg); break;
+ case 10: UNW_DEC_REG_GR(P3, UNW_REG_FPSR, dst, arg); break;
+ case 11: UNW_DEC_PRIUNAT_GR(P3, dst, arg); break;
+ default: UNW_DEC_BAD_CODE(r); break;
+ }
+ }
+ else if ((code & 0x7) == 0)
+ UNW_DEC_SPILL_MASK(P4, dp, arg);
+ else if ((code & 0x7) == 1)
+ {
+ unw_word grmask, frmask, byte1, byte2, byte3;
+
+ byte1 = *dp++; byte2 = *dp++; byte3 = *dp++;
+ grmask = ((byte1 >> 4) & 0xf);
+ frmask = ((byte1 & 0xf) << 16) | (byte2 << 8) | byte3;
+ UNW_DEC_FRGR_MEM(P5, grmask, frmask, arg);
+ }
+ else
+ UNW_DEC_BAD_CODE(code);
+ return dp;
+}
+
+static unsigned char *
+unw_decode_p6 (unsigned char *dp, unsigned char code, void *arg)
+{
+ int gregs = (code & 0x10) != 0;
+ unsigned char mask = (code & 0x0f);
+
+ if (gregs)
+ UNW_DEC_GR_MEM(P6, mask, arg);
+ else
+ UNW_DEC_FR_MEM(P6, mask, arg);
+ return dp;
+}
+
+static unsigned char *
+unw_decode_p7_p10 (unsigned char *dp, unsigned char code, void *arg)
+{
+ unsigned char r, byte1, byte2;
+ unw_word t, size;
+
+ if ((code & 0x10) == 0)
+ {
+ r = (code & 0xf);
+ t = unw_decode_uleb128 (&dp);
+ switch (r)
+ {
+ case 0:
+ size = unw_decode_uleb128 (&dp);
+ UNW_DEC_MEM_STACK_F(P7, t, size, arg);
+ break;
+
+ case 1: UNW_DEC_MEM_STACK_V(P7, t, arg); break;
+ case 2: UNW_DEC_SPILL_BASE(P7, t, arg); break;
+ case 3: UNW_DEC_REG_SPREL(P7, UNW_REG_PSP, t, arg); break;
+ case 4: UNW_DEC_REG_WHEN(P7, UNW_REG_RP, t, arg); break;
+ case 5: UNW_DEC_REG_PSPREL(P7, UNW_REG_RP, t, arg); break;
+ case 6: UNW_DEC_REG_WHEN(P7, UNW_REG_PFS, t, arg); break;
+ case 7: UNW_DEC_REG_PSPREL(P7, UNW_REG_PFS, t, arg); break;
+ case 8: UNW_DEC_REG_WHEN(P7, UNW_REG_PR, t, arg); break;
+ case 9: UNW_DEC_REG_PSPREL(P7, UNW_REG_PR, t, arg); break;
+ case 10: UNW_DEC_REG_WHEN(P7, UNW_REG_LC, t, arg); break;
+ case 11: UNW_DEC_REG_PSPREL(P7, UNW_REG_LC, t, arg); break;
+ case 12: UNW_DEC_REG_WHEN(P7, UNW_REG_UNAT, t, arg); break;
+ case 13: UNW_DEC_REG_PSPREL(P7, UNW_REG_UNAT, t, arg); break;
+ case 14: UNW_DEC_REG_WHEN(P7, UNW_REG_FPSR, t, arg); break;
+ case 15: UNW_DEC_REG_PSPREL(P7, UNW_REG_FPSR, t, arg); break;
+ default: UNW_DEC_BAD_CODE(r); break;
+ }
+ }
+ else
+ {
+ switch (code & 0xf)
+ {
+ case 0x0: /* p8 */
+ {
+ r = *dp++;
+ t = unw_decode_uleb128 (&dp);
+ switch (r)
+ {
+ case 1: UNW_DEC_REG_SPREL(P8, UNW_REG_RP, t, arg); break;
+ case 2: UNW_DEC_REG_SPREL(P8, UNW_REG_PFS, t, arg); break;
+ case 3: UNW_DEC_REG_SPREL(P8, UNW_REG_PR, t, arg); break;
+ case 4: UNW_DEC_REG_SPREL(P8, UNW_REG_LC, t, arg); break;
+ case 5: UNW_DEC_REG_SPREL(P8, UNW_REG_UNAT, t, arg); break;
+ case 6: UNW_DEC_REG_SPREL(P8, UNW_REG_FPSR, t, arg); break;
+ case 7: UNW_DEC_REG_WHEN(P8, UNW_REG_BSP, t, arg); break;
+ case 8: UNW_DEC_REG_PSPREL(P8, UNW_REG_BSP, t, arg); break;
+ case 9: UNW_DEC_REG_SPREL(P8, UNW_REG_BSP, t, arg); break;
+ case 10: UNW_DEC_REG_WHEN(P8, UNW_REG_BSPSTORE, t, arg); break;
+ case 11: UNW_DEC_REG_PSPREL(P8, UNW_REG_BSPSTORE, t, arg); break;
+ case 12: UNW_DEC_REG_SPREL(P8, UNW_REG_BSPSTORE, t, arg); break;
+ case 13: UNW_DEC_REG_WHEN(P8, UNW_REG_RNAT, t, arg); break;
+ case 14: UNW_DEC_REG_PSPREL(P8, UNW_REG_RNAT, t, arg); break;
+ case 15: UNW_DEC_REG_SPREL(P8, UNW_REG_RNAT, t, arg); break;
+ case 16: UNW_DEC_PRIUNAT_WHEN_GR(P8, t, arg); break;
+ case 17: UNW_DEC_PRIUNAT_PSPREL(P8, t, arg); break;
+ case 18: UNW_DEC_PRIUNAT_SPREL(P8, t, arg); break;
+ case 19: UNW_DEC_PRIUNAT_WHEN_MEM(P8, t, arg); break;
+ default: UNW_DEC_BAD_CODE(r); break;
+ }
+ }
+ break;
+
+ case 0x1:
+ byte1 = *dp++; byte2 = *dp++;
+ UNW_DEC_GR_GR(P9, (byte1 & 0xf), (byte2 & 0x7f), arg);
+ break;
+
+ case 0xf: /* p10 */
+ byte1 = *dp++; byte2 = *dp++;
+ UNW_DEC_ABI(P10, byte1, byte2, arg);
+ break;
+
+ case 0x9:
+ return unw_decode_x1 (dp, code, arg);
+
+ case 0xa:
+ return unw_decode_x2 (dp, code, arg);
+
+ case 0xb:
+ return unw_decode_x3 (dp, code, arg);
+
+ case 0xc:
+ return unw_decode_x4 (dp, code, arg);
+
+ default:
+ UNW_DEC_BAD_CODE(code);
+ break;
+ }
+ }
+ return dp;
+}
+
+static unsigned char *
+unw_decode_b1 (unsigned char *dp, unsigned char code, void *arg)
+{
+ unw_word label = (code & 0x1f);
+
+ if ((code & 0x20) != 0)
+ UNW_DEC_COPY_STATE(B1, label, arg);
+ else
+ UNW_DEC_LABEL_STATE(B1, label, arg);
+ return dp;
+}
+
+static unsigned char *
+unw_decode_b2 (unsigned char *dp, unsigned char code, void *arg)
+{
+ unw_word t;
+
+ t = unw_decode_uleb128 (&dp);
+ UNW_DEC_EPILOGUE(B2, t, (code & 0x1f), arg);
+ return dp;
+}
+
+static unsigned char *
+unw_decode_b3_x4 (unsigned char *dp, unsigned char code, void *arg)
+{
+ unw_word t, ecount, label;
+
+ if ((code & 0x10) == 0)
+ {
+ t = unw_decode_uleb128 (&dp);
+ ecount = unw_decode_uleb128 (&dp);
+ UNW_DEC_EPILOGUE(B3, t, ecount, arg);
+ }
+ else if ((code & 0x07) == 0)
+ {
+ label = unw_decode_uleb128 (&dp);
+ if ((code & 0x08) != 0)
+ UNW_DEC_COPY_STATE(B4, label, arg);
+ else
+ UNW_DEC_LABEL_STATE(B4, label, arg);
+ }
+ else
+ switch (code & 0x7)
+ {
+ case 1: return unw_decode_x1 (dp, code, arg);
+ case 2: return unw_decode_x2 (dp, code, arg);
+ case 3: return unw_decode_x3 (dp, code, arg);
+ case 4: return unw_decode_x4 (dp, code, arg);
+ default: UNW_DEC_BAD_CODE(code); break;
+ }
+ return dp;
+}
+
+typedef unsigned char *(*unw_decoder) (unsigned char *, unsigned char, void *);
+
+static unw_decoder unw_decode_table[2][8] =
+{
+ /* prologue table: */
+ {
+ unw_decode_r1, /* 0 */
+ unw_decode_r1,
+ unw_decode_r2,
+ unw_decode_r3,
+ unw_decode_p1, /* 4 */
+ unw_decode_p2_p5,
+ unw_decode_p6,
+ unw_decode_p7_p10
+ },
+ {
+ unw_decode_r1, /* 0 */
+ unw_decode_r1,
+ unw_decode_r2,
+ unw_decode_r3,
+ unw_decode_b1, /* 4 */
+ unw_decode_b1,
+ unw_decode_b2,
+ unw_decode_b3_x4
+ }
+};
+
+/*
+ * Decode one descriptor and return address of next descriptor.
+ */
+static inline unsigned char *
+unw_decode (unsigned char *dp, int inside_body, void *arg)
+{
+ unw_decoder decoder;
+ unsigned char code;
+
+ code = *dp++;
+ decoder = unw_decode_table[inside_body][code >> 5];
+ dp = (*decoder) (dp, code, arg);
+ return dp;
+}
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/ia64/linux-xen/unwind_i.h
--- /dev/null Mon Jan 9 11:19:55 2006
+++ b/xen/arch/ia64/linux-xen/unwind_i.h Mon Jan 9 11:22:17 2006
@@ -0,0 +1,164 @@
+/*
+ * Copyright (C) 2000, 2002-2003 Hewlett-Packard Co
+ * David Mosberger-Tang <davidm@xxxxxxxxxx>
+ *
+ * Kernel unwind support.
+ */
+
+#define UNW_VER(x) ((x) >> 48)
+#define UNW_FLAG_MASK 0x0000ffff00000000
+#define UNW_FLAG_OSMASK 0x0000f00000000000
+#define UNW_FLAG_EHANDLER(x) ((x) & 0x0000000100000000L)
+#define UNW_FLAG_UHANDLER(x) ((x) & 0x0000000200000000L)
+#define UNW_LENGTH(x) ((x) & 0x00000000ffffffffL)
+
+enum unw_register_index {
+ /* primary unat: */
+ UNW_REG_PRI_UNAT_GR,
+ UNW_REG_PRI_UNAT_MEM,
+
+ /* register stack */
+ UNW_REG_BSP, /* register stack
pointer */
+ UNW_REG_BSPSTORE,
+ UNW_REG_PFS, /* previous function
state */
+ UNW_REG_RNAT,
+ /* memory stack */
+ UNW_REG_PSP, /* previous memory
stack pointer */
+ /* return pointer: */
+ UNW_REG_RP,
+
+ /* preserved registers: */
+ UNW_REG_R4, UNW_REG_R5, UNW_REG_R6, UNW_REG_R7,
+ UNW_REG_UNAT, UNW_REG_PR, UNW_REG_LC, UNW_REG_FPSR,
+ UNW_REG_B1, UNW_REG_B2, UNW_REG_B3, UNW_REG_B4, UNW_REG_B5,
+ UNW_REG_F2, UNW_REG_F3, UNW_REG_F4, UNW_REG_F5,
+ UNW_REG_F16, UNW_REG_F17, UNW_REG_F18, UNW_REG_F19,
+ UNW_REG_F20, UNW_REG_F21, UNW_REG_F22, UNW_REG_F23,
+ UNW_REG_F24, UNW_REG_F25, UNW_REG_F26, UNW_REG_F27,
+ UNW_REG_F28, UNW_REG_F29, UNW_REG_F30, UNW_REG_F31,
+ UNW_NUM_REGS
+};
+
+struct unw_info_block {
+ u64 header;
+ u64 desc[0]; /* unwind descriptors */
+ /* personality routine and language-specific data follow behind
descriptors */
+};
+
+struct unw_table {
+ struct unw_table *next; /* must be first member! */
+ const char *name;
+ unsigned long gp; /* global pointer for this load-module
*/
+ unsigned long segment_base; /* base for offsets in the unwind table
entries */
+ unsigned long start;
+ unsigned long end;
+ const struct unw_table_entry *array;
+ unsigned long length;
+};
+
+enum unw_where {
+ UNW_WHERE_NONE, /* register isn't saved at all */
+ UNW_WHERE_GR, /* register is saved in a general
register */
+ UNW_WHERE_FR, /* register is saved in a
floating-point register */
+ UNW_WHERE_BR, /* register is saved in a branch
register */
+ UNW_WHERE_SPREL, /* register is saved on memstack
(sp-relative) */
+ UNW_WHERE_PSPREL, /* register is saved on memstack
(psp-relative) */
+ /*
+ * At the end of each prologue these locations get resolved to
+ * UNW_WHERE_PSPREL and UNW_WHERE_GR, respectively:
+ */
+ UNW_WHERE_SPILL_HOME, /* register is saved in its spill home
*/
+ UNW_WHERE_GR_SAVE /* register is saved in next general
register */
+};
+
+#define UNW_WHEN_NEVER 0x7fffffff
+
+struct unw_reg_info {
+ unsigned long val; /* save location: register number or
offset */
+ enum unw_where where; /* where the register gets saved */
+ int when; /* when the register gets saved */
+};
+
+struct unw_reg_state {
+ struct unw_reg_state *next; /* next (outer) element on
state stack */
+ struct unw_reg_info reg[UNW_NUM_REGS]; /* register save locations */
+};
+
+struct unw_labeled_state {
+ struct unw_labeled_state *next; /* next labeled state (or NULL)
*/
+ unsigned long label; /* label for this state */
+ struct unw_reg_state saved_state;
+};
+
+struct unw_state_record {
+ unsigned int first_region : 1; /* is this the first region? */
+ unsigned int done : 1; /* are we done scanning descriptors? */
+ unsigned int any_spills : 1; /* got any register spills? */
+ unsigned int in_body : 1; /* are we inside a body (as opposed to
a prologue)? */
+ unsigned long flags; /* see UNW_FLAG_* in unwind.h */
+
+ u8 *imask; /* imask of spill_mask record or NULL */
+ unsigned long pr_val; /* predicate values */
+ unsigned long pr_mask; /* predicate mask */
+ long spill_offset; /* psp-relative offset for spill base */
+ int region_start;
+ int region_len;
+ int epilogue_start;
+ int epilogue_count;
+ int when_target;
+
+ u8 gr_save_loc; /* next general register to use for
saving a register */
+ u8 return_link_reg; /* branch register in which the return
link is passed */
+
+ struct unw_labeled_state *labeled_states; /* list of all labeled
states */
+ struct unw_reg_state curr; /* current state */
+};
+
+enum unw_nat_type {
+ UNW_NAT_NONE, /* NaT not represented */
+ UNW_NAT_VAL, /* NaT represented by NaT value (fp reg) */
+ UNW_NAT_MEMSTK, /* NaT value is in unat word at offset OFF */
+ UNW_NAT_REGSTK /* NaT is in rnat */
+};
+
+enum unw_insn_opcode {
+ UNW_INSN_ADD, /* s[dst] += val */
+ UNW_INSN_ADD_PSP, /* s[dst] = (s.psp + val) */
+ UNW_INSN_ADD_SP, /* s[dst] = (s.sp + val) */
+ UNW_INSN_MOVE, /* s[dst] = s[val] */
+ UNW_INSN_MOVE2, /* s[dst] = s[val]; s[dst+1] = s[val+1]
*/
+ UNW_INSN_MOVE_STACKED, /* s[dst] = ia64_rse_skip(*s.bsp, val)
*/
+ UNW_INSN_SETNAT_MEMSTK, /* s[dst+1].nat.type = MEMSTK;
+ s[dst+1].nat.off = *s.pri_unat -
s[dst] */
+ UNW_INSN_SETNAT_TYPE, /* s[dst+1].nat.type = val */
+ UNW_INSN_LOAD, /* s[dst] = *s[val] */
+ UNW_INSN_MOVE_SCRATCH, /* s[dst] = scratch reg "val" */
+ UNW_INSN_MOVE_CONST, /* s[dst] = constant reg "val" */
+};
+
+struct unw_insn {
+ unsigned int opc : 4;
+ unsigned int dst : 9;
+ signed int val : 19;
+};
+
+/*
+ * Preserved general static registers (r4-r7) give rise to two script
+ * instructions; everything else yields at most one instruction; at
+ * the end of the script, the psp gets popped, accounting for one more
+ * instruction.
+ */
+#define UNW_MAX_SCRIPT_LEN (UNW_NUM_REGS + 5)
+
+struct unw_script {
+ unsigned long ip; /* ip this script is for */
+ unsigned long pr_mask; /* mask of predicates script depends on
*/
+ unsigned long pr_val; /* predicate values this script is for
*/
+ rwlock_t lock;
+ unsigned int flags; /* see UNW_FLAG_* in unwind.h */
+ unsigned short lru_chain; /* used for least-recently-used chain */
+ unsigned short coll_chain; /* used for hash collisions */
+ unsigned short hint; /* hint for next script to try (or -1)
*/
+ unsigned short count; /* number of instructions in script */
+ struct unw_insn insn[UNW_MAX_SCRIPT_LEN];
+};
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/x86_32/xen.lds.S
--- /dev/null Mon Jan 9 11:19:55 2006
+++ b/xen/arch/x86/x86_32/xen.lds.S Mon Jan 9 11:22:17 2006
@@ -0,0 +1,85 @@
+/* ld script to make i386 Linux kernel
+ * Written by Martin Mares <mj@xxxxxxxxxxxxxxxxxxxxxxxx>
+ * Modified for i386 Xen by Keir Fraser
+ */
+
+#include <xen/config.h>
+#include <asm/page.h>
+#undef ENTRY
+#undef ALIGN
+
+OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
+OUTPUT_ARCH(i386)
+ENTRY(start)
+PHDRS
+{
+ text PT_LOAD ;
+}
+SECTIONS
+{
+ . = 0xFF000000 + 0x100000;
+ _text = .; /* Text and read-only data */
+ .text : {
+ *(.text)
+ *(.fixup)
+ *(.gnu.warning)
+ } :text =0x9090
+ .text.lock : { *(.text.lock) } :text /* out-of-line lock text */
+
+ _etext = .; /* End of text section */
+
+ .rodata : { *(.rodata) *(.rodata.*) } :text
+
+ . = ALIGN(32); /* Exception table */
+ __start___ex_table = .;
+ __ex_table : { *(__ex_table) } :text
+ __stop___ex_table = .;
+
+ . = ALIGN(32); /* Pre-exception table */
+ __start___pre_ex_table = .;
+ __pre_ex_table : { *(__pre_ex_table) } :text
+ __stop___pre_ex_table = .;
+
+ .data : { /* Data */
+ *(.data)
+ CONSTRUCTORS
+ } :text
+
+ . = ALIGN(4096); /* Init code and data */
+ __init_begin = .;
+ .text.init : { *(.text.init) } :text
+ .data.init : { *(.data.init) } :text
+ . = ALIGN(32);
+ __setup_start = .;
+ .setup.init : { *(.setup.init) } :text
+ __setup_end = .;
+ __initcall_start = .;
+ .initcall.init : { *(.initcall.init) } :text
+ __initcall_end = .;
+ . = ALIGN(STACK_SIZE);
+ __init_end = .;
+
+ __bss_start = .; /* BSS */
+ .bss : {
+ *(.bss.stack_aligned)
+ *(.bss.page_aligned)
+ *(.bss)
+ } :text
+ _end = . ;
+
+ /* Sections to be discarded */
+ /DISCARD/ : {
+ *(.text.exit)
+ *(.data.exit)
+ *(.exitcall.exit)
+ }
+
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+}
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/x86_64/xen.lds.S
--- /dev/null Mon Jan 9 11:19:55 2006
+++ b/xen/arch/x86/x86_64/xen.lds.S Mon Jan 9 11:22:17 2006
@@ -0,0 +1,83 @@
+/* Excerpts written by Martin Mares <mj@xxxxxxxxxxxxxxxxxxxxxxxx> */
+/* Modified for x86-64 Xen by Keir Fraser */
+
+#include <xen/config.h>
+#include <asm/page.h>
+#undef ENTRY
+#undef ALIGN
+
+OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64")
+OUTPUT_ARCH(i386:x86-64)
+ENTRY(start)
+PHDRS
+{
+ text PT_LOAD ;
+}
+SECTIONS
+{
+ . = 0xFFFF830000100000;
+ _text = .; /* Text and read-only data */
+ .text : {
+ *(.text)
+ *(.fixup)
+ *(.gnu.warning)
+ } :text = 0x9090
+ .text.lock : { *(.text.lock) } :text /* out-of-line lock text */
+
+ _etext = .; /* End of text section */
+
+ .rodata : { *(.rodata) *(.rodata.*) } :text
+
+ . = ALIGN(32); /* Exception table */
+ __start___ex_table = .;
+ __ex_table : { *(__ex_table) } :text
+ __stop___ex_table = .;
+
+ . = ALIGN(32); /* Pre-exception table */
+ __start___pre_ex_table = .;
+ __pre_ex_table : { *(__pre_ex_table) } :text
+ __stop___pre_ex_table = .;
+
+ .data : { /* Data */
+ *(.data)
+ CONSTRUCTORS
+ } :text
+
+ . = ALIGN(4096); /* Init code and data */
+ __init_begin = .;
+ .text.init : { *(.text.init) } :text
+ .data.init : { *(.data.init) } :text
+ . = ALIGN(32);
+ __setup_start = .;
+ .setup.init : { *(.setup.init) } :text
+ __setup_end = .;
+ __initcall_start = .;
+ .initcall.init : { *(.initcall.init) } :text
+ __initcall_end = .;
+ . = ALIGN(STACK_SIZE);
+ __init_end = .;
+
+ __bss_start = .; /* BSS */
+ .bss : {
+ *(.bss.stack_aligned)
+ *(.bss.page_aligned)
+ *(.bss)
+ } :text
+ _end = . ;
+
+ /* Sections to be discarded */
+ /DISCARD/ : {
+ *(.text.exit)
+ *(.data.exit)
+ *(.exitcall.exit)
+ }
+
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+}
diff -r 25e3c8668f1f -r 8af1199488d3 xen/common/rangeset.c
--- /dev/null Mon Jan 9 11:19:55 2006
+++ b/xen/common/rangeset.c Mon Jan 9 11:22:17 2006
@@ -0,0 +1,399 @@
+/******************************************************************************
+ * rangeset.c
+ *
+ * Creation, maintenance and automatic destruction of per-domain sets of
+ * numeric ranges.
+ *
+ * Copyright (c) 2005, K A Fraser
+ */
+
+#include <xen/sched.h>
+#include <xen/rangeset.h>
+
+/* An inclusive range [s,e] and pointer to next range in ascending order. */
+struct range {
+ struct list_head list;
+ unsigned long s, e;
+};
+
+struct rangeset {
+ /* Owning domain and threaded list of rangesets. */
+ struct list_head rangeset_list;
+ struct domain *domain;
+
+ /* Ordered list of ranges contained in this set, and protecting lock. */
+ struct list_head range_list;
+ spinlock_t lock;
+
+ /* Pretty-printing name. */
+ char name[32];
+
+ /* RANGESETF flags. */
+ unsigned int flags;
+};
+
+/*****************************
+ * Private range functions hide the underlying linked-list implemnetation.
+ */
+
+/* Find highest range lower than or containing s. NULL if no such range. */
+static struct range *find_range(
+ struct rangeset *r, unsigned long s)
+{
+ struct range *x = NULL, *y;
+
+ list_for_each_entry ( y, &r->range_list, list )
+ {
+ if ( y->s > s )
+ break;
+ x = y;
+ }
+
+ return x;
+}
+
+/* Return the lowest range in the set r, or NULL if r is empty. */
+static struct range *first_range(
+ struct rangeset *r)
+{
+ if ( list_empty(&r->range_list) )
+ return NULL;
+ return list_entry(r->range_list.next, struct range, list);
+}
+
+/* Return range following x in ascending order, or NULL if x is the highest. */
+static struct range *next_range(
+ struct rangeset *r, struct range *x)
+{
+ if ( x->list.next == &r->range_list )
+ return NULL;
+ return list_entry(x->list.next, struct range, list);
+}
+
+/* Insert range y after range x in r. Insert as first range if x is NULL. */
+static void insert_range(
+ struct rangeset *r, struct range *x, struct range *y)
+{
+ list_add(&y->list, (x != NULL) ? &x->list : &r->range_list);
+}
+
+/* Remove a range from its list and free it. */
+static void destroy_range(
+ struct range *x)
+{
+ list_del(&x->list);
+ xfree(x);
+}
+
+/*****************************
+ * Core public functions
+ */
+
+int rangeset_add_range(
+ struct rangeset *r, unsigned long s, unsigned long e)
+{
+ struct range *x, *y;
+ int rc = 0;
+
+ spin_lock(&r->lock);
+
+ x = find_range(r, s);
+ y = find_range(r, e);
+
+ if ( x == y )
+ {
+ if ( (x == NULL) || ((x->e < s) && ((x->e + 1) != s)) )
+ {
+ x = xmalloc(struct range);
+ if ( x == NULL )
+ {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ x->s = s;
+ x->e = e;
+
+ insert_range(r, y, x);
+ }
+ else if ( x->e < e )
+ x->e = e;
+ }
+ else
+ {
+ if ( x == NULL )
+ {
+ x = first_range(r);
+ x->s = s;
+ }
+ else if ( (x->e < s) && ((x->e + 1) != s) )
+ {
+ x = next_range(r, x);
+ x->s = s;
+ }
+
+ x->e = (y->e > e) ? y->e : e;
+
+ for ( ; ; )
+ {
+ y = next_range(r, x);
+ if ( (y == NULL) || (y->e > x->e) )
+ break;
+ destroy_range(y);
+ }
+ }
+
+ y = next_range(r, x);
+ if ( (y != NULL) && ((x->e + 1) == y->s) )
+ {
+ x->e = y->e;
+ destroy_range(y);
+ }
+
+ out:
+ spin_unlock(&r->lock);
+ return rc;
+}
+
+int rangeset_remove_range(
+ struct rangeset *r, unsigned long s, unsigned long e)
+{
+ struct range *x, *y, *t;
+ int rc = 0;
+
+ spin_lock(&r->lock);
+
+ x = find_range(r, s);
+ y = find_range(r, e);
+
+ if ( x == y )
+ {
+ if ( (x == NULL) || (x->e < s) )
+ goto out;
+
+ if ( (x->s < s) && (x->e > e) )
+ {
+ y = xmalloc(struct range);
+ if ( y == NULL )
+ {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ y->s = e + 1;
+ y->e = x->e;
+ x->e = s - 1;
+
+ insert_range(r, x, y);
+ }
+ else if ( (x->s == s) && (x->e <= e) )
+ destroy_range(x);
+ else if ( x->s == s )
+ x->s = e + 1;
+ else if ( x->e <= e )
+ x->e = s - 1;
+ }
+ else
+ {
+ if ( x == NULL )
+ x = first_range(r);
+
+ if ( x->s < s )
+ {
+ x->e = s - 1;
+ x = next_range(r, x);
+ }
+
+ while ( x != y )
+ {
+ t = x;
+ x = next_range(r, x);
+ destroy_range(t);
+ }
+
+ x->s = e + 1;
+ if ( x->s > x->e )
+ destroy_range(x);
+ }
+
+ out:
+ spin_unlock(&r->lock);
+ return rc;
+}
+
+int rangeset_contains_range(
+ struct rangeset *r, unsigned long s, unsigned long e)
+{
+ struct range *x;
+ int contains;
+
+ spin_lock(&r->lock);
+ x = find_range(r, s);
+ contains = (x && (x->e >= e));
+ spin_unlock(&r->lock);
+
+ return contains;
+}
+
+int rangeset_add_singleton(
+ struct rangeset *r, unsigned long s)
+{
+ return rangeset_add_range(r, s, s);
+}
+
+int rangeset_remove_singleton(
+ struct rangeset *r, unsigned long s)
+{
+ return rangeset_remove_range(r, s, s);
+}
+
+int rangeset_contains_singleton(
+ struct rangeset *r, unsigned long s)
+{
+ return rangeset_contains_range(r, s, s);
+}
+
+int rangeset_is_empty(
+ struct rangeset *r)
+{
+ return list_empty(&r->range_list);
+}
+
+struct rangeset *rangeset_new(
+ struct domain *d, char *name, unsigned int flags)
+{
+ struct rangeset *r;
+
+ r = xmalloc(struct rangeset);
+ if ( r == NULL )
+ return NULL;
+
+ spin_lock_init(&r->lock);
+ INIT_LIST_HEAD(&r->range_list);
+
+ BUG_ON(flags & ~RANGESETF_prettyprint_hex);
+ r->flags = flags;
+
+ if ( name != NULL )
+ {
+ strncpy(r->name, name, sizeof(r->name));
+ r->name[sizeof(r->name)-1] = '\0';
+ }
+ else
+ {
+ sprintf(r->name, "(no name)");
+ }
+
+ if ( (r->domain = d) != NULL )
+ {
+ spin_lock(&d->rangesets_lock);
+ list_add(&r->rangeset_list, &d->rangesets);
+ spin_unlock(&d->rangesets_lock);
+ }
+
+ return r;
+}
+
+void rangeset_destroy(
+ struct rangeset *r)
+{
+ struct range *x;
+
+ if ( r == NULL )
+ return;
+
+ if ( r->domain != NULL )
+ {
+ spin_lock(&r->domain->rangesets_lock);
+ list_del(&r->rangeset_list);
+ spin_unlock(&r->domain->rangesets_lock);
+ }
+
+ while ( (x = first_range(r)) != NULL )
+ destroy_range(x);
+
+ xfree(r);
+}
+
+void rangeset_domain_initialise(
+ struct domain *d)
+{
+ INIT_LIST_HEAD(&d->rangesets);
+ spin_lock_init(&d->rangesets_lock);
+}
+
+void rangeset_domain_destroy(
+ struct domain *d)
+{
+ struct rangeset *r;
+
+ while ( !list_empty(&d->rangesets) )
+ {
+ r = list_entry(d->rangesets.next, struct rangeset, rangeset_list);
+
+ BUG_ON(r->domain != d);
+ r->domain = NULL;
+ list_del(&r->rangeset_list);
+
+ rangeset_destroy(r);
+ }
+}
+
+/*****************************
+ * Pretty-printing functions
+ */
+
+static void print_limit(struct rangeset *r, unsigned long s)
+{
+ printk((r->flags & RANGESETF_prettyprint_hex) ? "%lx" : "%lu", s);
+}
+
+void rangeset_printk(
+ struct rangeset *r)
+{
+ int nr_printed = 0;
+ struct range *x;
+
+ spin_lock(&r->lock);
+
+ printk("%-10s {", r->name);
+
+ for ( x = first_range(r); x != NULL; x = next_range(r, x) )
+ {
+ if ( nr_printed++ )
+ printk(",");
+ printk(" ");
+ print_limit(r, x->s);
+ if ( x->s != x->e )
+ {
+ printk("-");
+ print_limit(r, x->e);
+ }
+ }
+
+ printk(" }");
+
+ spin_unlock(&r->lock);
+}
+
+void rangeset_domain_printk(
+ struct domain *d)
+{
+ struct rangeset *r;
+
+ printk("Rangesets belonging to domain %u:\n", d->domain_id);
+
+ spin_lock(&d->rangesets_lock);
+
+ if ( list_empty(&d->rangesets) )
+ printk(" None\n");
+
+ list_for_each_entry ( r, &d->rangesets, rangeset_list )
+ {
+ printk(" ");
+ rangeset_printk(r);
+ printk("\n");
+ }
+
+ spin_unlock(&d->rangesets_lock);
+}
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/asm-ia64/iocap.h
--- /dev/null Mon Jan 9 11:19:55 2006
+++ b/xen/include/asm-ia64/iocap.h Mon Jan 9 11:22:17 2006
@@ -0,0 +1,10 @@
+/******************************************************************************
+ * iocap.h
+ *
+ * Architecture-specific per-domain I/O capabilities.
+ */
+
+#ifndef __IA64_IOCAP_H__
+#define __IA64_IOCAP_H__
+
+#endif /* __IA64_IOCAP_H__ */
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/asm-x86/iocap.h
--- /dev/null Mon Jan 9 11:19:55 2006
+++ b/xen/include/asm-x86/iocap.h Mon Jan 9 11:22:17 2006
@@ -0,0 +1,20 @@
+/******************************************************************************
+ * iocap.h
+ *
+ * Architecture-specific per-domain I/O capabilities.
+ */
+
+#ifndef __X86_IOCAP_H__
+#define __X86_IOCAP_H__
+
+#define ioports_permit_access(d, s, e) \
+ rangeset_add_range((d)->arch.ioport_caps, s, e)
+#define ioports_deny_access(d, s, e) \
+ rangeset_remove_range((d)->arch.ioport_caps, s, e)
+#define ioports_access_permitted(d, s, e) \
+ rangeset_contains_range((d)->arch.ioport_caps, s, e)
+
+#define cache_flush_permitted(d) \
+ (!rangeset_is_empty((d)->iomem_caps))
+
+#endif /* __X86_IOCAP_H__ */
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/public/hvm/hvm_info_table.h
--- /dev/null Mon Jan 9 11:19:55 2006
+++ b/xen/include/public/hvm/hvm_info_table.h Mon Jan 9 11:22:17 2006
@@ -0,0 +1,24 @@
+/******************************************************************************
+ * hvm/hvm_info_table.h
+ *
+ * HVM parameter and information table, written into guest memory map.
+ */
+
+#ifndef __XEN_PUBLIC_HVM_HVM_INFO_TABLE_H__
+#define __XEN_PUBLIC_HVM_HVM_INFO_TABLE_H__
+
+#define HVM_INFO_PFN 0x09F
+#define HVM_INFO_OFFSET 0x800
+#define HVM_INFO_PADDR ((HVM_INFO_PFN << 12) + HVM_INFO_OFFSET)
+
+struct hvm_info_table {
+ char signature[8]; /* "HVM INFO" */
+ uint32_t length;
+ uint8_t checksum;
+ uint8_t acpi_enabled;
+ uint8_t apic_enabled;
+ uint8_t pad[1];
+ uint32_t nr_vcpus;
+};
+
+#endif /* __XEN_PUBLIC_HVM_HVM_INFO_TABLE_H__ */
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/public/hvm/ioreq.h
--- /dev/null Mon Jan 9 11:19:55 2006
+++ b/xen/include/public/hvm/ioreq.h Mon Jan 9 11:22:17 2006
@@ -0,0 +1,90 @@
+/*
+ * ioreq.h: I/O request definitions for device models
+ * Copyright (c) 2004, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#ifndef _IOREQ_H_
+#define _IOREQ_H_
+
+#define IOREQ_READ 1
+#define IOREQ_WRITE 0
+
+#define STATE_INVALID 0
+#define STATE_IOREQ_READY 1
+#define STATE_IOREQ_INPROCESS 2
+#define STATE_IORESP_READY 3
+#define STATE_IORESP_HOOK 4
+
+#define IOREQ_TYPE_PIO 0 /* pio */
+#define IOREQ_TYPE_COPY 1 /* mmio ops */
+#define IOREQ_TYPE_AND 2
+#define IOREQ_TYPE_OR 3
+#define IOREQ_TYPE_XOR 4
+
+/*
+ * VMExit dispatcher should cooperate with instruction decoder to
+ * prepare this structure and notify service OS and DM by sending
+ * virq
+ */
+typedef struct {
+ uint64_t addr; /* physical address */
+ uint64_t size; /* size in bytes */
+ uint64_t count; /* for rep prefixes */
+ union {
+ uint64_t data; /* data */
+ void *pdata; /* pointer to data */
+ } u;
+ uint8_t state:4;
+ uint8_t pdata_valid:1; /* if 1, use pdata above */
+ uint8_t dir:1; /* 1=read, 0=write */
+ uint8_t df:1;
+ uint8_t type; /* I/O type */
+} ioreq_t;
+
+#define MAX_VECTOR 256
+#define BITS_PER_BYTE 8
+#define INTR_LEN (MAX_VECTOR/(BITS_PER_BYTE * sizeof(uint64_t)))
+#define INTR_LEN_32 (MAX_VECTOR/(BITS_PER_BYTE * sizeof(uint32_t)))
+
+typedef struct {
+ uint16_t pic_elcr;
+ uint16_t pic_irr;
+ uint16_t pic_last_irr;
+ uint16_t pic_clear_irr;
+ int eport; /* Event channel port */
+} global_iodata_t;
+
+typedef struct {
+ ioreq_t vp_ioreq;
+} vcpu_iodata_t;
+
+typedef struct {
+ global_iodata_t sp_global;
+ vcpu_iodata_t vcpu_iodata[1];
+} shared_iopage_t;
+
+#endif /* _IOREQ_H_ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/public/hvm/vmx_assist.h
--- /dev/null Mon Jan 9 11:19:55 2006
+++ b/xen/include/public/hvm/vmx_assist.h Mon Jan 9 11:22:17 2006
@@ -0,0 +1,97 @@
+/*
+ * vmx_assist.h: Context definitions for the VMXASSIST world switch.
+ *
+ * Leendert van Doorn, leendert@xxxxxxxxxxxxxx
+ * Copyright (c) 2005, International Business Machines Corporation.
+ */
+
+#ifndef _VMX_ASSIST_H_
+#define _VMX_ASSIST_H_
+
+#define VMXASSIST_BASE 0xD0000
+#define VMXASSIST_MAGIC 0x17101966
+#define VMXASSIST_MAGIC_OFFSET (VMXASSIST_BASE+8)
+
+#define VMXASSIST_NEW_CONTEXT (VMXASSIST_BASE + 12)
+#define VMXASSIST_OLD_CONTEXT (VMXASSIST_NEW_CONTEXT + 4)
+
+#ifndef __ASSEMBLY__
+
+union vmcs_arbytes {
+ struct arbyte_fields {
+ unsigned int seg_type : 4,
+ s : 1,
+ dpl : 2,
+ p : 1,
+ reserved0 : 4,
+ avl : 1,
+ reserved1 : 1,
+ default_ops_size: 1,
+ g : 1,
+ null_bit : 1,
+ reserved2 : 15;
+ } fields;
+ unsigned int bytes;
+};
+
+/*
+ * World switch state
+ */
+typedef struct vmx_assist_context {
+ uint32_t eip; /* execution pointer */
+ uint32_t esp; /* stack pointer */
+ uint32_t eflags; /* flags register */
+ uint32_t cr0;
+ uint32_t cr3; /* page table directory */
+ uint32_t cr4;
+ uint32_t idtr_limit; /* idt */
+ uint32_t idtr_base;
+ uint32_t gdtr_limit; /* gdt */
+ uint32_t gdtr_base;
+ uint32_t cs_sel; /* cs selector */
+ uint32_t cs_limit;
+ uint32_t cs_base;
+ union vmcs_arbytes cs_arbytes;
+ uint32_t ds_sel; /* ds selector */
+ uint32_t ds_limit;
+ uint32_t ds_base;
+ union vmcs_arbytes ds_arbytes;
+ uint32_t es_sel; /* es selector */
+ uint32_t es_limit;
+ uint32_t es_base;
+ union vmcs_arbytes es_arbytes;
+ uint32_t ss_sel; /* ss selector */
+ uint32_t ss_limit;
+ uint32_t ss_base;
+ union vmcs_arbytes ss_arbytes;
+ uint32_t fs_sel; /* fs selector */
+ uint32_t fs_limit;
+ uint32_t fs_base;
+ union vmcs_arbytes fs_arbytes;
+ uint32_t gs_sel; /* gs selector */
+ uint32_t gs_limit;
+ uint32_t gs_base;
+ union vmcs_arbytes gs_arbytes;
+ uint32_t tr_sel; /* task selector */
+ uint32_t tr_limit;
+ uint32_t tr_base;
+ union vmcs_arbytes tr_arbytes;
+ uint32_t ldtr_sel; /* ldtr selector */
+ uint32_t ldtr_limit;
+ uint32_t ldtr_base;
+ union vmcs_arbytes ldtr_arbytes;
+} vmx_assist_context_t;
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _VMX_ASSIST_H_ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/xen/iocap.h
--- /dev/null Mon Jan 9 11:19:55 2006
+++ b/xen/include/xen/iocap.h Mon Jan 9 11:22:17 2006
@@ -0,0 +1,34 @@
+/******************************************************************************
+ * iocap.h
+ *
+ * Per-domain I/O capabilities.
+ */
+
+#ifndef __XEN_IOCAP_H__
+#define __XEN_IOCAP_H__
+
+#include <xen/rangeset.h>
+#include <asm/iocap.h>
+
+#define iomem_permit_access(d, s, e) \
+ rangeset_add_range((d)->iomem_caps, s, e)
+#define iomem_deny_access(d, s, e) \
+ rangeset_remove_range((d)->iomem_caps, s, e)
+#define iomem_access_permitted(d, s, e) \
+ rangeset_contains_range((d)->iomem_caps, s, e)
+
+#define irq_permit_access(d, i) \
+ rangeset_add_singleton((d)->irq_caps, i)
+#define irq_deny_access(d, i) \
+ rangeset_remove_singleton((d)->irq_caps, i)
+#define irqs_permit_access(d, s, e) \
+ rangeset_add_range((d)->irq_caps, s, e)
+#define irqs_deny_access(d, s, e) \
+ rangeset_remove_range((d)->irq_caps, s, e)
+#define irq_access_permitted(d, i) \
+ rangeset_contains_singleton((d)->irq_caps, i)
+
+#define multipage_allocation_permitted(d) \
+ (!rangeset_is_empty((d)->iomem_caps))
+
+#endif /* __XEN_IOCAP_H__ */
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/xen/rangeset.h
--- /dev/null Mon Jan 9 11:19:55 2006
+++ b/xen/include/xen/rangeset.h Mon Jan 9 11:22:17 2006
@@ -0,0 +1,71 @@
+/******************************************************************************
+ * rangeset.h
+ *
+ * Creation, maintenance and automatic destruction of per-domain sets of
+ * numeric ranges.
+ *
+ * Copyright (c) 2005, K A Fraser
+ */
+
+#ifndef __XEN_RANGESET_H__
+#define __XEN_RANGESET_H__
+
+struct domain;
+struct rangeset;
+
+/*
+ * Initialise/destroy per-domain rangeset information.
+ *
+ * It is invalid to create or destroy a rangeset belonging to a domain @d
+ * before rangeset_domain_initialise(d) returns or after calling
+ * rangeset_domain_destroy(d).
+ */
+void rangeset_domain_initialise(
+ struct domain *d);
+void rangeset_domain_destroy(
+ struct domain *d);
+
+/*
+ * Create/destroy a rangeset. Optionally attach to specified domain @d for
+ * auto-destruction when the domain dies. A name may be specified, for use
+ * in debug pretty-printing, and various RANGESETF flags (defined below).
+ *
+ * It is invalid to perform any operation on a rangeset @r after calling
+ * rangeset_destroy(r).
+ */
+struct rangeset *rangeset_new(
+ struct domain *d, char *name, unsigned int flags);
+void rangeset_destroy(
+ struct rangeset *r);
+
+/* Flags for passing to rangeset_new(). */
+ /* Pretty-print range limits in hexadecimal. */
+#define _RANGESETF_prettyprint_hex 0
+#define RANGESETF_prettyprint_hex (1U << _RANGESETF_prettyprint_hex)
+
+int __must_check rangeset_is_empty(
+ struct rangeset *r);
+
+/* Add/remove/query a numeric range. */
+int __must_check rangeset_add_range(
+ struct rangeset *r, unsigned long s, unsigned long e);
+int __must_check rangeset_remove_range(
+ struct rangeset *r, unsigned long s, unsigned long e);
+int __must_check rangeset_contains_range(
+ struct rangeset *r, unsigned long s, unsigned long e);
+
+/* Add/remove/query a single number. */
+int __must_check rangeset_add_singleton(
+ struct rangeset *r, unsigned long s);
+int __must_check rangeset_remove_singleton(
+ struct rangeset *r, unsigned long s);
+int __must_check rangeset_contains_singleton(
+ struct rangeset *r, unsigned long s);
+
+/* Rangeset pretty printing. */
+void rangeset_printk(
+ struct rangeset *r);
+void rangeset_domain_printk(
+ struct domain *d);
+
+#endif /* __XEN_RANGESET_H__ */
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/x86_32/xen.lds
--- a/xen/arch/x86/x86_32/xen.lds Mon Jan 9 11:19:55 2006
+++ /dev/null Mon Jan 9 11:22:17 2006
@@ -1,79 +0,0 @@
-/* ld script to make i386 Linux kernel
- * Written by Martin Mares <mj@xxxxxxxxxxxxxxxxxxxxxxxx>
- * Modified for i386 Xen by Keir Fraser
- */
-OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
-OUTPUT_ARCH(i386)
-ENTRY(start)
-PHDRS
-{
- text PT_LOAD ;
-}
-SECTIONS
-{
- . = 0xFF000000 + 0x100000;
- _text = .; /* Text and read-only data */
- .text : {
- *(.text)
- *(.fixup)
- *(.gnu.warning)
- } :text =0x9090
- .text.lock : { *(.text.lock) } :text /* out-of-line lock text */
-
- _etext = .; /* End of text section */
-
- .rodata : { *(.rodata) *(.rodata.*) } :text
-
- . = ALIGN(32); /* Exception table */
- __start___ex_table = .;
- __ex_table : { *(__ex_table) } :text
- __stop___ex_table = .;
-
- . = ALIGN(32); /* Pre-exception table */
- __start___pre_ex_table = .;
- __pre_ex_table : { *(__pre_ex_table) } :text
- __stop___pre_ex_table = .;
-
- .data : { /* Data */
- *(.data)
- CONSTRUCTORS
- } :text
-
- . = ALIGN(4096); /* Init code and data */
- __init_begin = .;
- .text.init : { *(.text.init) } :text
- .data.init : { *(.data.init) } :text
- . = ALIGN(32);
- __setup_start = .;
- .setup.init : { *(.setup.init) } :text
- __setup_end = .;
- __initcall_start = .;
- .initcall.init : { *(.initcall.init) } :text
- __initcall_end = .;
- . = ALIGN(8192);
- __init_end = .;
-
- __bss_start = .; /* BSS */
- .bss : {
- *(.bss.twopage_aligned)
- *(.bss.page_aligned)
- *(.bss)
- } :text
- _end = . ;
-
- /* Sections to be discarded */
- /DISCARD/ : {
- *(.text.exit)
- *(.data.exit)
- *(.exitcall.exit)
- }
-
- /* Stabs debugging sections. */
- .stab 0 : { *(.stab) }
- .stabstr 0 : { *(.stabstr) }
- .stab.excl 0 : { *(.stab.excl) }
- .stab.exclstr 0 : { *(.stab.exclstr) }
- .stab.index 0 : { *(.stab.index) }
- .stab.indexstr 0 : { *(.stab.indexstr) }
- .comment 0 : { *(.comment) }
-}
diff -r 25e3c8668f1f -r 8af1199488d3 xen/arch/x86/x86_64/xen.lds
--- a/xen/arch/x86/x86_64/xen.lds Mon Jan 9 11:19:55 2006
+++ /dev/null Mon Jan 9 11:22:17 2006
@@ -1,77 +0,0 @@
-/* Excerpts written by Martin Mares <mj@xxxxxxxxxxxxxxxxxxxxxxxx> */
-/* Modified for x86-64 Xen by Keir Fraser */
-OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64")
-OUTPUT_ARCH(i386:x86-64)
-ENTRY(start)
-PHDRS
-{
- text PT_LOAD ;
-}
-SECTIONS
-{
- . = 0xFFFF830000100000;
- _text = .; /* Text and read-only data */
- .text : {
- *(.text)
- *(.fixup)
- *(.gnu.warning)
- } :text = 0x9090
- .text.lock : { *(.text.lock) } :text /* out-of-line lock text */
-
- _etext = .; /* End of text section */
-
- .rodata : { *(.rodata) *(.rodata.*) } :text
-
- . = ALIGN(32); /* Exception table */
- __start___ex_table = .;
- __ex_table : { *(__ex_table) } :text
- __stop___ex_table = .;
-
- . = ALIGN(32); /* Pre-exception table */
- __start___pre_ex_table = .;
- __pre_ex_table : { *(__pre_ex_table) } :text
- __stop___pre_ex_table = .;
-
- .data : { /* Data */
- *(.data)
- CONSTRUCTORS
- } :text
-
- . = ALIGN(4096); /* Init code and data */
- __init_begin = .;
- .text.init : { *(.text.init) } :text
- .data.init : { *(.data.init) } :text
- . = ALIGN(32);
- __setup_start = .;
- .setup.init : { *(.setup.init) } :text
- __setup_end = .;
- __initcall_start = .;
- .initcall.init : { *(.initcall.init) } :text
- __initcall_end = .;
- . = ALIGN(8192);
- __init_end = .;
-
- __bss_start = .; /* BSS */
- .bss : {
- *(.bss.twopage_aligned)
- *(.bss.page_aligned)
- *(.bss)
- } :text
- _end = . ;
-
- /* Sections to be discarded */
- /DISCARD/ : {
- *(.text.exit)
- *(.data.exit)
- *(.exitcall.exit)
- }
-
- /* Stabs debugging sections. */
- .stab 0 : { *(.stab) }
- .stabstr 0 : { *(.stabstr) }
- .stab.excl 0 : { *(.stab.excl) }
- .stab.exclstr 0 : { *(.stab.exclstr) }
- .stab.index 0 : { *(.stab.index) }
- .stab.indexstr 0 : { *(.stab.indexstr) }
- .comment 0 : { *(.comment) }
-}
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/asm-x86/physdev.h
--- a/xen/include/asm-x86/physdev.h Mon Jan 9 11:19:55 2006
+++ /dev/null Mon Jan 9 11:22:17 2006
@@ -1,17 +0,0 @@
-/******************************************************************************
- * physdev.h
- */
-
-#ifndef __XEN_PHYSDEV_H__
-#define __XEN_PHYSDEV_H__
-
-#include <public/physdev.h>
-
-void physdev_modify_ioport_access_range(
- struct domain *d, int enable, int port, int num );
-void physdev_destroy_state(struct domain *d);
-int domain_iomem_in_pfn(struct domain *p, unsigned long pfn);
-long do_physdev_op(physdev_op_t *uop);
-void physdev_init_dom0(struct domain *d);
-
-#endif /* __XEN_PHYSDEV_H__ */
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/public/io/ioreq.h
--- a/xen/include/public/io/ioreq.h Mon Jan 9 11:19:55 2006
+++ /dev/null Mon Jan 9 11:22:17 2006
@@ -1,91 +0,0 @@
-/*
- * ioreq.h: I/O request definitions for device models
- * Copyright (c) 2004, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-#ifndef _IOREQ_H_
-#define _IOREQ_H_
-
-#define IOREQ_READ 1
-#define IOREQ_WRITE 0
-
-#define STATE_INVALID 0
-#define STATE_IOREQ_READY 1
-#define STATE_IOREQ_INPROCESS 2
-#define STATE_IORESP_READY 3
-#define STATE_IORESP_HOOK 4
-
-#define IOREQ_TYPE_PIO 0 /* pio */
-#define IOREQ_TYPE_COPY 1 /* mmio ops */
-#define IOREQ_TYPE_AND 2
-#define IOREQ_TYPE_OR 3
-#define IOREQ_TYPE_XOR 4
-
-/*
- * VMExit dispatcher should cooperate with instruction decoder to
- * prepare this structure and notify service OS and DM by sending
- * virq
- */
-typedef struct {
- uint64_t addr; /* physical address */
- uint64_t size; /* size in bytes */
- uint64_t count; /* for rep prefixes */
- union {
- uint64_t data; /* data */
- void *pdata; /* pointer to data */
- } u;
- uint8_t state:4;
- uint8_t pdata_valid:1; /* if 1, use pdata above */
- uint8_t dir:1; /* 1=read, 0=write */
- uint8_t df:1;
- uint8_t type; /* I/O type */
-} ioreq_t;
-
-#define MAX_VECTOR 256
-#define BITS_PER_BYTE 8
-#define INTR_LEN (MAX_VECTOR/(BITS_PER_BYTE * sizeof(uint64_t)))
-#define INTR_LEN_32 (MAX_VECTOR/(BITS_PER_BYTE * sizeof(uint32_t)))
-
-typedef struct {
- uint16_t pic_elcr;
- uint16_t pic_irr;
- uint16_t pic_last_irr;
- uint16_t pic_clear_irr;
- int eport; /* Event channel port */
-} global_iodata_t;
-
-typedef struct {
- ioreq_t vp_ioreq;
- unsigned long vp_intr[INTR_LEN];
-} vcpu_iodata_t;
-
-typedef struct {
- global_iodata_t sp_global;
- vcpu_iodata_t vcpu_iodata[1];
-} shared_iopage_t;
-
-#endif /* _IOREQ_H_ */
-
-/*
- * Local variables:
- * mode: C
- * c-set-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */
diff -r 25e3c8668f1f -r 8af1199488d3 xen/include/public/vmx_assist.h
--- a/xen/include/public/vmx_assist.h Mon Jan 9 11:19:55 2006
+++ /dev/null Mon Jan 9 11:22:17 2006
@@ -1,97 +0,0 @@
-/*
- * vmx_assist.h: Context definitions for the VMXASSIST world switch.
- *
- * Leendert van Doorn, leendert@xxxxxxxxxxxxxx
- * Copyright (c) 2005, International Business Machines Corporation.
- */
-
-#ifndef _VMX_ASSIST_H_
-#define _VMX_ASSIST_H_
-
-#define VMXASSIST_BASE 0xD0000
-#define VMXASSIST_MAGIC 0x17101966
-#define VMXASSIST_MAGIC_OFFSET (VMXASSIST_BASE+8)
-
-#define VMXASSIST_NEW_CONTEXT (VMXASSIST_BASE + 12)
-#define VMXASSIST_OLD_CONTEXT (VMXASSIST_NEW_CONTEXT + 4)
-
-#ifndef __ASSEMBLY__
-
-union vmcs_arbytes {
- struct arbyte_fields {
- unsigned int seg_type : 4,
- s : 1,
- dpl : 2,
- p : 1,
- reserved0 : 4,
- avl : 1,
- reserved1 : 1,
- default_ops_size: 1,
- g : 1,
- null_bit : 1,
- reserved2 : 15;
- } fields;
- unsigned int bytes;
-};
-
-/*
- * World switch state
- */
-typedef struct vmx_assist_context {
- uint32_t eip; /* execution pointer */
- uint32_t esp; /* stack pointer */
- uint32_t eflags; /* flags register */
- uint32_t cr0;
- uint32_t cr3; /* page table directory */
- uint32_t cr4;
- uint32_t idtr_limit; /* idt */
- uint32_t idtr_base;
- uint32_t gdtr_limit; /* gdt */
- uint32_t gdtr_base;
- uint32_t cs_sel; /* cs selector */
- uint32_t cs_limit;
- uint32_t cs_base;
- union vmcs_arbytes cs_arbytes;
- uint32_t ds_sel; /* ds selector */
- uint32_t ds_limit;
- uint32_t ds_base;
- union vmcs_arbytes ds_arbytes;
- uint32_t es_sel; /* es selector */
- uint32_t es_limit;
- uint32_t es_base;
- union vmcs_arbytes es_arbytes;
- uint32_t ss_sel; /* ss selector */
- uint32_t ss_limit;
- uint32_t ss_base;
- union vmcs_arbytes ss_arbytes;
- uint32_t fs_sel; /* fs selector */
- uint32_t fs_limit;
- uint32_t fs_base;
- union vmcs_arbytes fs_arbytes;
- uint32_t gs_sel; /* gs selector */
- uint32_t gs_limit;
- uint32_t gs_base;
- union vmcs_arbytes gs_arbytes;
- uint32_t tr_sel; /* task selector */
- uint32_t tr_limit;
- uint32_t tr_base;
- union vmcs_arbytes tr_arbytes;
- uint32_t ldtr_sel; /* ldtr selector */
- uint32_t ldtr_limit;
- uint32_t ldtr_base;
- union vmcs_arbytes ldtr_arbytes;
-} vmx_assist_context_t;
-
-#endif /* __ASSEMBLY__ */
-
-#endif /* _VMX_ASSIST_H_ */
-
-/*
- * Local variables:
- * mode: C
- * c-set-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|