# HG changeset patch
# User Hollis Blanchard <hollisb@xxxxxxxxxx>
# Node ID 156a0963a1aed529e5c5517e7153b0ad64d99276
# Parent d3e181fa238b93c616bd010edd45f707c359cf99
# Parent c191c649cdb387e7ec573d218c9581c639c87700
merge
---
linux-2.6-xen-sparse/arch/i386/mm/init-xen.c | 14
linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c | 7
linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c | 27
linux-2.6-xen-sparse/drivers/xen/netback/netback.c | 288
++++++++--
linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c | 26
linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c | 173 ++++--
linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c | 14
linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h | 7
linux-2.6-xen-sparse/include/xen/public/privcmd.h | 16
tools/debugger/libxendebug/xendebug.c | 2
tools/firmware/vmxassist/vm86.c | 65 ++
tools/ioemu/hw/cirrus_vga.c | 12
tools/ioemu/vl.c | 15
tools/libxc/xc_core.c | 8
tools/libxc/xc_domain.c | 10
tools/libxc/xc_hvm_build.c | 6
tools/libxc/xc_ia64_stubs.c | 12
tools/libxc/xc_linux.c | 2
tools/libxc/xc_linux_build.c | 58 +-
tools/libxc/xc_linux_restore.c | 210 ++++++-
tools/libxc/xc_linux_save.c | 51 +
tools/libxc/xc_load_aout9.c | 4
tools/libxc/xc_load_bin.c | 4
tools/libxc/xc_load_elf.c | 19
tools/libxc/xc_private.c | 62 +-
tools/libxc/xenctrl.h | 19
tools/libxc/xg_private.h | 7
tools/libxc/xg_save_restore.h | 12
tools/tests/test_x86_emulator.c | 131 ++--
xen/arch/x86/domain.c | 21
xen/arch/x86/domain_build.c | 3
xen/arch/x86/hvm/vmx/vmx.c | 22
xen/arch/x86/hvm/vmx/x86_32/exits.S | 35 -
xen/arch/x86/hvm/vmx/x86_64/exits.S | 71 +-
xen/arch/x86/mm.c | 15
xen/arch/x86/x86_32/asm-offsets.c | 2
xen/arch/x86/x86_32/entry.S | 5
xen/arch/x86/x86_32/traps.c | 6
xen/arch/x86/x86_64/asm-offsets.c | 3
xen/arch/x86/x86_64/entry.S | 10
xen/arch/x86/x86_64/traps.c | 12
xen/arch/x86/x86_emulate.c | 4
xen/common/kernel.c | 5
xen/common/keyhandler.c | 5
xen/common/memory.c | 20
xen/include/public/arch-ia64.h | 3
xen/include/public/arch-x86_32.h | 19
xen/include/public/arch-x86_64.h | 21
xen/include/public/callback.h | 15
xen/include/public/dom0_ops.h | 56 -
xen/include/public/grant_table.h | 2
xen/include/public/io/netif.h | 4
xen/include/public/io/ring.h | 16
xen/include/public/memory.h | 10
xen/include/public/xen.h | 22
55 files changed, 1228 insertions(+), 460 deletions(-)
diff -r d3e181fa238b -r 156a0963a1ae
linux-2.6-xen-sparse/arch/i386/mm/init-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/mm/init-xen.c Fri Jun 02 12:54:22
2006 -0500
+++ b/linux-2.6-xen-sparse/arch/i386/mm/init-xen.c Tue Jun 06 13:25:31
2006 -0500
@@ -558,15 +558,11 @@ void __init paging_init(void)
kmap_init();
- if (!xen_feature(XENFEAT_auto_translated_physmap) ||
- xen_start_info->shared_info >= xen_start_info->nr_pages) {
- /* Switch to the real shared_info page, and clear the
- * dummy page. */
- set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info);
- HYPERVISOR_shared_info =
- (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
- memset(empty_zero_page, 0, sizeof(empty_zero_page));
- }
+ /* Switch to the real shared_info page, and clear the
+ * dummy page. */
+ set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info);
+ HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
+ memset(empty_zero_page, 0, sizeof(empty_zero_page));
/* Setup mapping of lower 1st MB */
for (i = 0; i < NR_FIX_ISAMAPS; i++)
diff -r d3e181fa238b -r 156a0963a1ae
linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c Fri Jun 02
12:54:22 2006 -0500
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c Tue Jun 06
13:25:31 2006 -0500
@@ -665,13 +665,6 @@ void __init setup_arch(char **cmdline_p)
setup_xen_features();
- if (xen_feature(XENFEAT_auto_translated_physmap) &&
- xen_start_info->shared_info < xen_start_info->nr_pages) {
- HYPERVISOR_shared_info =
- (shared_info_t *)__va(xen_start_info->shared_info);
- memset(empty_zero_page, 0, sizeof(empty_zero_page));
- }
-
HYPERVISOR_vm_assist(VMASST_CMD_enable,
VMASST_TYPE_writable_pagetables);
diff -r d3e181fa238b -r 156a0963a1ae
linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c Fri Jun 02 12:54:22
2006 -0500
+++ b/linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c Tue Jun 06 13:25:31
2006 -0500
@@ -666,7 +666,18 @@ void __meminit init_memory_mapping(unsig
set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys));
}
- BUG_ON(!after_bootmem && start_pfn != table_end);
+ if (!after_bootmem) {
+ BUG_ON(start_pfn != table_end);
+ /*
+ * Destroy the temporary mappings created above. Prevents
+ * overlap with modules area (if init mapping is very big).
+ */
+ start = __START_KERNEL_map + (table_start << PAGE_SHIFT);
+ end = __START_KERNEL_map + (table_end << PAGE_SHIFT);
+ for (; start < end; start += PAGE_SIZE)
+ WARN_ON(HYPERVISOR_update_va_mapping(
+ start, __pte_ma(0), 0));
+ }
__flush_tlb_all();
}
@@ -752,15 +763,11 @@ void __init paging_init(void)
free_area_init_node(0, NODE_DATA(0), zones,
__pa(PAGE_OFFSET) >> PAGE_SHIFT, holes);
- if (!xen_feature(XENFEAT_auto_translated_physmap) ||
- xen_start_info->shared_info >= xen_start_info->nr_pages) {
- /* Switch to the real shared_info page, and clear the
- * dummy page. */
- set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info);
- HYPERVISOR_shared_info =
- (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
- memset(empty_zero_page, 0, sizeof(empty_zero_page));
- }
+ /* Switch to the real shared_info page, and clear the
+ * dummy page. */
+ set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info);
+ HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
+ memset(empty_zero_page, 0, sizeof(empty_zero_page));
init_mm.context.pinned = 1;
diff -r d3e181fa238b -r 156a0963a1ae
linux-2.6-xen-sparse/drivers/xen/netback/netback.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c Fri Jun 02
12:54:22 2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c Tue Jun 06
13:25:31 2006 -0500
@@ -458,6 +458,9 @@ inline static void net_tx_action_dealloc
dc = dealloc_cons;
dp = dealloc_prod;
+ /* Ensure we see all indexes enqueued by netif_idx_release(). */
+ smp_rmb();
+
/*
* Free up any grants we have finished using
*/
@@ -487,6 +490,177 @@ inline static void net_tx_action_dealloc
}
}
+static void netbk_tx_err(netif_t *netif, RING_IDX end)
+{
+ RING_IDX cons = netif->tx.req_cons;
+
+ do {
+ netif_tx_request_t *txp = RING_GET_REQUEST(&netif->tx, cons);
+ make_tx_response(netif, txp->id, NETIF_RSP_ERROR);
+ } while (++cons < end);
+ netif->tx.req_cons = cons;
+ netif_schedule_work(netif);
+ netif_put(netif);
+}
+
+static int netbk_count_requests(netif_t *netif, netif_tx_request_t *txp,
+ int work_to_do)
+{
+ netif_tx_request_t *first = txp;
+ RING_IDX cons = netif->tx.req_cons;
+ int frags = 1;
+
+ while (txp->flags & NETTXF_more_data) {
+ if (frags >= work_to_do) {
+ DPRINTK("Need more frags\n");
+ return -frags;
+ }
+
+ txp = RING_GET_REQUEST(&netif->tx, cons + frags);
+ if (txp->size > first->size) {
+ DPRINTK("Frags galore\n");
+ return -frags;
+ }
+
+ first->size -= txp->size;
+ frags++;
+
+ if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) {
+ DPRINTK("txp->offset: %x, size: %u\n",
+ txp->offset, txp->size);
+ return -frags;
+ }
+ }
+
+ return frags;
+}
+
+static gnttab_map_grant_ref_t *netbk_get_requests(netif_t *netif,
+ struct sk_buff *skb,
+ gnttab_map_grant_ref_t *mop)
+{
+ struct skb_shared_info *shinfo = skb_shinfo(skb);
+ skb_frag_t *frags = shinfo->frags;
+ netif_tx_request_t *txp;
+ unsigned long pending_idx = *((u16 *)skb->data);
+ RING_IDX cons = netif->tx.req_cons + 1;
+ int i, start;
+
+ /* Skip first skb fragment if it is on same page as header fragment. */
+ start = ((unsigned long)shinfo->frags[0].page == pending_idx);
+
+ for (i = start; i < shinfo->nr_frags; i++) {
+ txp = RING_GET_REQUEST(&netif->tx, cons++);
+ pending_idx = pending_ring[MASK_PEND_IDX(pending_cons++)];
+
+ gnttab_set_map_op(mop++, MMAP_VADDR(pending_idx),
+ GNTMAP_host_map | GNTMAP_readonly,
+ txp->gref, netif->domid);
+
+ memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp));
+ netif_get(netif);
+ pending_tx_info[pending_idx].netif = netif;
+ frags[i].page = (void *)pending_idx;
+ }
+
+ return mop;
+}
+
+static int netbk_tx_check_mop(struct sk_buff *skb,
+ gnttab_map_grant_ref_t **mopp)
+{
+ gnttab_map_grant_ref_t *mop = *mopp;
+ int pending_idx = *((u16 *)skb->data);
+ netif_t *netif = pending_tx_info[pending_idx].netif;
+ netif_tx_request_t *txp;
+ struct skb_shared_info *shinfo = skb_shinfo(skb);
+ int nr_frags = shinfo->nr_frags;
+ int i, err, start;
+
+ /* Check status of header. */
+ err = mop->status;
+ if (unlikely(err)) {
+ txp = &pending_tx_info[pending_idx].req;
+ make_tx_response(netif, txp->id, NETIF_RSP_ERROR);
+ pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
+ netif_put(netif);
+ } else {
+ set_phys_to_machine(
+ __pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT,
+ FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT));
+ grant_tx_handle[pending_idx] = mop->handle;
+ }
+
+ /* Skip first skb fragment if it is on same page as header fragment. */
+ start = ((unsigned long)shinfo->frags[0].page == pending_idx);
+
+ for (i = start; i < nr_frags; i++) {
+ int j, newerr;
+
+ pending_idx = (unsigned long)shinfo->frags[i].page;
+
+ /* Check error status: if okay then remember grant handle. */
+ newerr = (++mop)->status;
+ if (likely(!newerr)) {
+ set_phys_to_machine(
+ __pa(MMAP_VADDR(pending_idx))>>PAGE_SHIFT,
+ FOREIGN_FRAME(mop->dev_bus_addr>>PAGE_SHIFT));
+ grant_tx_handle[pending_idx] = mop->handle;
+ /* Had a previous error? Invalidate this fragment. */
+ if (unlikely(err))
+ netif_idx_release(pending_idx);
+ continue;
+ }
+
+ /* Error on this fragment: respond to client with an error. */
+ txp = &pending_tx_info[pending_idx].req;
+ make_tx_response(netif, txp->id, NETIF_RSP_ERROR);
+ pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
+ netif_put(netif);
+
+ /* Not the first error? Preceding frags already invalidated. */
+ if (err)
+ continue;
+
+ /* First error: invalidate header and preceding fragments. */
+ pending_idx = *((u16 *)skb->data);
+ netif_idx_release(pending_idx);
+ for (j = start; j < i; j++) {
+ pending_idx = (unsigned long)shinfo->frags[i].page;
+ netif_idx_release(pending_idx);
+ }
+
+ /* Remember the error: invalidate all subsequent fragments. */
+ err = newerr;
+ }
+
+ *mopp = mop + 1;
+ return err;
+}
+
+static void netbk_fill_frags(struct sk_buff *skb)
+{
+ struct skb_shared_info *shinfo = skb_shinfo(skb);
+ int nr_frags = shinfo->nr_frags;
+ int i;
+
+ for (i = 0; i < nr_frags; i++) {
+ skb_frag_t *frag = shinfo->frags + i;
+ netif_tx_request_t *txp;
+ unsigned long pending_idx;
+
+ pending_idx = (unsigned long)frag->page;
+ txp = &pending_tx_info[pending_idx].req;
+ frag->page = virt_to_page(MMAP_VADDR(pending_idx));
+ frag->size = txp->size;
+ frag->page_offset = txp->offset;
+
+ skb->len += txp->size;
+ skb->data_len += txp->size;
+ skb->truesize += txp->size;
+ }
+}
+
/* Called after netfront has transmitted */
static void net_tx_action(unsigned long unused)
{
@@ -504,7 +678,7 @@ static void net_tx_action(unsigned long
net_tx_action_dealloc();
mop = tx_map_ops;
- while ((NR_PENDING_REQS < MAX_PENDING_REQS) &&
+ while (((NR_PENDING_REQS + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
!list_empty(&net_schedule_list)) {
/* Get a netif from the list with work to do. */
ent = net_schedule_list.next;
@@ -552,38 +726,44 @@ static void net_tx_action(unsigned long
}
netif->remaining_credit -= txreq.size;
- netif->tx.req_cons++;
-
- netif_schedule_work(netif);
-
- if (unlikely(txreq.size < ETH_HLEN) ||
- unlikely(txreq.size > ETH_FRAME_LEN)) {
+ ret = netbk_count_requests(netif, &txreq, work_to_do);
+ if (unlikely(ret < 0)) {
+ netbk_tx_err(netif, i - ret);
+ continue;
+ }
+ i += ret;
+
+ if (unlikely(ret > MAX_SKB_FRAGS + 1)) {
+ DPRINTK("Too many frags\n");
+ netbk_tx_err(netif, i);
+ continue;
+ }
+
+ if (unlikely(txreq.size < ETH_HLEN)) {
DPRINTK("Bad packet size: %d\n", txreq.size);
- make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
- netif_put(netif);
+ netbk_tx_err(netif, i);
continue;
}
/* No crossing a page as the payload mustn't fragment. */
- if (unlikely((txreq.offset + txreq.size) >= PAGE_SIZE)) {
+ if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) {
DPRINTK("txreq.offset: %x, size: %u, end: %lu\n",
txreq.offset, txreq.size,
(txreq.offset &~PAGE_MASK) + txreq.size);
- make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
- netif_put(netif);
+ netbk_tx_err(netif, i);
continue;
}
pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
- data_len = (txreq.size > PKT_PROT_LEN) ?
+ data_len = (txreq.size > PKT_PROT_LEN &&
+ ret < MAX_SKB_FRAGS + 1) ?
PKT_PROT_LEN : txreq.size;
skb = alloc_skb(data_len+16, GFP_ATOMIC);
if (unlikely(skb == NULL)) {
DPRINTK("Can't allocate a skb in start_xmit.\n");
- make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
- netif_put(netif);
+ netbk_tx_err(netif, i);
break;
}
@@ -600,9 +780,23 @@ static void net_tx_action(unsigned long
pending_tx_info[pending_idx].netif = netif;
*((u16 *)skb->data) = pending_idx;
+ __skb_put(skb, data_len);
+
+ skb_shinfo(skb)->nr_frags = ret - 1;
+ if (data_len < txreq.size) {
+ skb_shinfo(skb)->nr_frags++;
+ skb_shinfo(skb)->frags[0].page =
+ (void *)(unsigned long)pending_idx;
+ }
+
__skb_queue_tail(&tx_queue, skb);
pending_cons++;
+
+ mop = netbk_get_requests(netif, skb, mop);
+
+ netif->tx.req_cons = i;
+ netif_schedule_work(netif);
if ((mop - tx_map_ops) >= ARRAY_SIZE(tx_map_ops))
break;
@@ -617,75 +811,56 @@ static void net_tx_action(unsigned long
mop = tx_map_ops;
while ((skb = __skb_dequeue(&tx_queue)) != NULL) {
+ netif_tx_request_t *txp;
+
pending_idx = *((u16 *)skb->data);
netif = pending_tx_info[pending_idx].netif;
- memcpy(&txreq, &pending_tx_info[pending_idx].req,
- sizeof(txreq));
+ txp = &pending_tx_info[pending_idx].req;
/* Check the remap error code. */
- if (unlikely(mop->status)) {
+ if (unlikely(netbk_tx_check_mop(skb, &mop))) {
printk(KERN_ALERT "#### netback grant fails\n");
- make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
- netif_put(netif);
+ skb_shinfo(skb)->nr_frags = 0;
kfree_skb(skb);
- mop++;
- pending_ring[MASK_PEND_IDX(pending_prod++)] =
- pending_idx;
continue;
}
- set_phys_to_machine(
- __pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT,
- FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT));
- grant_tx_handle[pending_idx] = mop->handle;
-
- data_len = (txreq.size > PKT_PROT_LEN) ?
- PKT_PROT_LEN : txreq.size;
-
- __skb_put(skb, data_len);
+
+ data_len = skb->len;
memcpy(skb->data,
- (void *)(MMAP_VADDR(pending_idx)|txreq.offset),
+ (void *)(MMAP_VADDR(pending_idx)|txp->offset),
data_len);
- if (data_len < txreq.size) {
+ if (data_len < txp->size) {
/* Append the packet payload as a fragment. */
- skb_shinfo(skb)->frags[0].page =
- virt_to_page(MMAP_VADDR(pending_idx));
- skb_shinfo(skb)->frags[0].size =
- txreq.size - data_len;
- skb_shinfo(skb)->frags[0].page_offset =
- txreq.offset + data_len;
- skb_shinfo(skb)->nr_frags = 1;
+ txp->offset += data_len;
+ txp->size -= data_len;
} else {
/* Schedule a response immediately. */
netif_idx_release(pending_idx);
}
-
- skb->data_len = txreq.size - data_len;
- skb->len += skb->data_len;
- skb->truesize += skb->data_len;
-
- skb->dev = netif->dev;
- skb->protocol = eth_type_trans(skb, skb->dev);
/*
* Old frontends do not assert data_validated but we
* can infer it from csum_blank so test both flags.
*/
- if (txreq.flags & (NETTXF_data_validated|NETTXF_csum_blank)) {
+ if (txp->flags & (NETTXF_data_validated|NETTXF_csum_blank)) {
skb->ip_summed = CHECKSUM_UNNECESSARY;
skb->proto_data_valid = 1;
} else {
skb->ip_summed = CHECKSUM_NONE;
skb->proto_data_valid = 0;
}
- skb->proto_csum_blank = !!(txreq.flags & NETTXF_csum_blank);
-
- netif->stats.rx_bytes += txreq.size;
+ skb->proto_csum_blank = !!(txp->flags & NETTXF_csum_blank);
+
+ netbk_fill_frags(skb);
+
+ skb->dev = netif->dev;
+ skb->protocol = eth_type_trans(skb, skb->dev);
+
+ netif->stats.rx_bytes += skb->len;
netif->stats.rx_packets++;
netif_rx(skb);
netif->dev->last_rx = jiffies;
-
- mop++;
}
}
@@ -695,7 +870,10 @@ static void netif_idx_release(u16 pendin
unsigned long flags;
spin_lock_irqsave(&_lock, flags);
- dealloc_ring[MASK_PEND_IDX(dealloc_prod++)] = pending_idx;
+ dealloc_ring[MASK_PEND_IDX(dealloc_prod)] = pending_idx;
+ /* Sync with net_tx_action_dealloc: insert idx /then/ incr producer. */
+ smp_wmb();
+ dealloc_prod++;
spin_unlock_irqrestore(&_lock, flags);
tasklet_schedule(&net_tx_tasklet);
diff -r d3e181fa238b -r 156a0963a1ae
linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Fri Jun 02 12:54:22
2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Tue Jun 06 13:25:31
2006 -0500
@@ -69,6 +69,8 @@ static int netback_probe(struct xenbus_d
static int netback_probe(struct xenbus_device *dev,
const struct xenbus_device_id *id)
{
+ const char *message;
+ xenbus_transaction_t xbt;
int err;
struct backend_info *be = kzalloc(sizeof(struct backend_info),
GFP_KERNEL);
@@ -86,6 +88,27 @@ static int netback_probe(struct xenbus_d
if (err)
goto fail;
+ do {
+ err = xenbus_transaction_start(&xbt);
+ if (err) {
+ xenbus_dev_fatal(dev, err, "starting transaction");
+ goto fail;
+ }
+
+ err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", 1);
+ if (err) {
+ message = "writing feature-sg";
+ goto abort_transaction;
+ }
+
+ err = xenbus_transaction_end(xbt, 0);
+ } while (err == -EAGAIN);
+
+ if (err) {
+ xenbus_dev_fatal(dev, err, "completing transaction");
+ goto fail;
+ }
+
err = xenbus_switch_state(dev, XenbusStateInitWait);
if (err) {
goto fail;
@@ -93,6 +116,9 @@ static int netback_probe(struct xenbus_d
return 0;
+abort_transaction:
+ xenbus_transaction_end(xbt, 1);
+ xenbus_dev_fatal(dev, err, "%s", message);
fail:
DPRINTK("failed");
netback_remove(dev);
diff -r d3e181fa238b -r 156a0963a1ae
linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Fri Jun 02
12:54:22 2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Tue Jun 06
13:25:31 2006 -0500
@@ -45,6 +45,7 @@
#include <linux/bitops.h>
#include <linux/ethtool.h>
#include <linux/in.h>
+#include <linux/if_ether.h>
#include <net/sock.h>
#include <net/pkt_sched.h>
#include <net/arp.h>
@@ -173,6 +174,11 @@ static void xennet_sysfs_delif(struct ne
#define xennet_sysfs_delif(dev) do { } while(0)
#endif
+static inline int xennet_can_sg(struct net_device *dev)
+{
+ return dev->features & NETIF_F_SG;
+}
+
/**
* Entry point to this code when a new device is created. Allocate the basic
* structures and the ring buffers for communication with the backend, and
@@ -307,8 +313,6 @@ again:
goto destroy_ring;
}
- xenbus_switch_state(dev, XenbusStateConnected);
-
return 0;
abort_transaction:
@@ -370,12 +374,9 @@ static int setup_device(struct xenbus_de
goto fail;
memcpy(netdev->dev_addr, info->mac, ETH_ALEN);
- network_connect(netdev);
info->irq = bind_evtchn_to_irqhandler(
info->evtchn, netif_int, SA_SAMPLE_RANDOM, netdev->name,
netdev);
- (void)send_fake_arp(netdev);
- show_device(info);
return 0;
@@ -391,15 +392,24 @@ static void backend_changed(struct xenbu
static void backend_changed(struct xenbus_device *dev,
enum xenbus_state backend_state)
{
+ struct netfront_info *np = dev->data;
+ struct net_device *netdev = np->netdev;
+
DPRINTK("\n");
switch (backend_state) {
case XenbusStateInitialising:
- case XenbusStateInitWait:
case XenbusStateInitialised:
case XenbusStateConnected:
case XenbusStateUnknown:
case XenbusStateClosed:
+ break;
+
+ case XenbusStateInitWait:
+ network_connect(netdev);
+ xenbus_switch_state(dev, XenbusStateConnected);
+ (void)send_fake_arp(netdev);
+ show_device(np);
break;
case XenbusStateClosing:
@@ -452,13 +462,17 @@ static int network_open(struct net_devic
return 0;
}
+static inline int netfront_tx_slot_available(struct netfront_info *np)
+{
+ return RING_FREE_REQUESTS(&np->tx) >= MAX_SKB_FRAGS + 1;
+}
+
static inline void network_maybe_wake_tx(struct net_device *dev)
{
struct netfront_info *np = netdev_priv(dev);
if (unlikely(netif_queue_stopped(dev)) &&
- !RING_FULL(&np->tx) &&
- !gnttab_empty_grant_references(&np->gref_tx_head) &&
+ netfront_tx_slot_available(np) &&
likely(netif_running(dev)))
netif_wake_queue(dev);
}
@@ -485,7 +499,7 @@ static void network_tx_buf_gc(struct net
printk(KERN_ALERT "network_tx_buf_gc: warning "
"-- grant still in use by backend "
"domain.\n");
- break; /* bail immediately */
+ BUG();
}
gnttab_end_foreign_access_ref(
np->grant_tx_ref[id], GNTMAP_readonly);
@@ -638,36 +652,95 @@ static void network_alloc_rx_buffers(str
RING_PUSH_REQUESTS(&np->rx);
}
+static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev,
+ struct netif_tx_request *tx)
+{
+ struct netfront_info *np = netdev_priv(dev);
+ char *data = skb->data;
+ unsigned long mfn;
+ RING_IDX prod = np->tx.req_prod_pvt;
+ int frags = skb_shinfo(skb)->nr_frags;
+ unsigned int offset = offset_in_page(data);
+ unsigned int len = skb_headlen(skb);
+ unsigned int id;
+ grant_ref_t ref;
+ int i;
+
+ while (len > PAGE_SIZE - offset) {
+ tx->size = PAGE_SIZE - offset;
+ tx->flags |= NETTXF_more_data;
+ len -= tx->size;
+ data += tx->size;
+ offset = 0;
+
+ id = get_id_from_freelist(np->tx_skbs);
+ np->tx_skbs[id] = skb_get(skb);
+ tx = RING_GET_REQUEST(&np->tx, prod++);
+ tx->id = id;
+ ref = gnttab_claim_grant_reference(&np->gref_tx_head);
+ BUG_ON((signed short)ref < 0);
+
+ mfn = virt_to_mfn(data);
+ gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id,
+ mfn, GNTMAP_readonly);
+
+ tx->gref = np->grant_tx_ref[id] = ref;
+ tx->offset = offset;
+ tx->size = len;
+ tx->flags = 0;
+ }
+
+ for (i = 0; i < frags; i++) {
+ skb_frag_t *frag = skb_shinfo(skb)->frags + i;
+
+ tx->flags |= NETTXF_more_data;
+
+ id = get_id_from_freelist(np->tx_skbs);
+ np->tx_skbs[id] = skb_get(skb);
+ tx = RING_GET_REQUEST(&np->tx, prod++);
+ tx->id = id;
+ ref = gnttab_claim_grant_reference(&np->gref_tx_head);
+ BUG_ON((signed short)ref < 0);
+
+ mfn = pfn_to_mfn(page_to_pfn(frag->page));
+ gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id,
+ mfn, GNTMAP_readonly);
+
+ tx->gref = np->grant_tx_ref[id] = ref;
+ tx->offset = frag->page_offset;
+ tx->size = frag->size;
+ tx->flags = 0;
+ }
+
+ np->tx.req_prod_pvt = prod;
+}
static int network_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
unsigned short id;
struct netfront_info *np = netdev_priv(dev);
struct netif_tx_request *tx;
+ char *data = skb->data;
RING_IDX i;
grant_ref_t ref;
unsigned long mfn;
int notify;
-
- if (unlikely((((unsigned long)skb->data & ~PAGE_MASK) + skb->len) >=
- PAGE_SIZE)) {
- struct sk_buff *nskb;
- nskb = __dev_alloc_skb(skb->len, GFP_ATOMIC|__GFP_NOWARN);
- if (unlikely(nskb == NULL))
- goto drop;
- skb_put(nskb, skb->len);
- memcpy(nskb->data, skb->data, skb->len);
- /* Copy only the header fields we use in this driver. */
- nskb->dev = skb->dev;
- nskb->ip_summed = skb->ip_summed;
- nskb->proto_data_valid = skb->proto_data_valid;
- dev_kfree_skb(skb);
- skb = nskb;
+ int frags = skb_shinfo(skb)->nr_frags;
+ unsigned int offset = offset_in_page(data);
+ unsigned int len = skb_headlen(skb);
+
+ frags += (offset + len + PAGE_SIZE - 1) / PAGE_SIZE;
+ if (unlikely(frags > MAX_SKB_FRAGS + 1)) {
+ printk(KERN_ALERT "xennet: skb rides the rocket: %d frags\n",
+ frags);
+ dump_stack();
+ goto drop;
}
spin_lock_irq(&np->tx_lock);
- if (unlikely(!netif_carrier_ok(dev))) {
+ if (unlikely(!netif_carrier_ok(dev) ||
+ (frags > 1 && !xennet_can_sg(dev)))) {
spin_unlock_irq(&np->tx_lock);
goto drop;
}
@@ -682,12 +755,12 @@ static int network_start_xmit(struct sk_
tx->id = id;
ref = gnttab_claim_grant_reference(&np->gref_tx_head);
BUG_ON((signed short)ref < 0);
- mfn = virt_to_mfn(skb->data);
+ mfn = virt_to_mfn(data);
gnttab_grant_foreign_access_ref(
ref, np->xbdev->otherend_id, mfn, GNTMAP_readonly);
tx->gref = np->grant_tx_ref[id] = ref;
- tx->offset = (unsigned long)skb->data & ~PAGE_MASK;
- tx->size = skb->len;
+ tx->offset = offset;
+ tx->size = len;
tx->flags = 0;
if (skb->ip_summed == CHECKSUM_HW) /* local packet? */
@@ -696,14 +769,17 @@ static int network_start_xmit(struct sk_
tx->flags |= NETTXF_data_validated;
np->tx.req_prod_pvt = i + 1;
+
+ xennet_make_frags(skb, dev, tx);
+ tx->size = skb->len;
+
RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->tx, notify);
if (notify)
notify_remote_via_irq(np->irq);
network_tx_buf_gc(dev);
- if (RING_FULL(&np->tx) ||
- gnttab_empty_grant_references(&np->gref_tx_head))
+ if (!netfront_tx_slot_available(np))
netif_stop_queue(dev);
spin_unlock_irq(&np->tx_lock);
@@ -963,12 +1039,46 @@ static struct net_device_stats *network_
return &np->stats;
}
+static int xennet_change_mtu(struct net_device *dev, int mtu)
+{
+ int max = xennet_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN;
+
+ if (mtu > max)
+ return -EINVAL;
+ dev->mtu = mtu;
+ return 0;
+}
+
+static int xennet_set_sg(struct net_device *dev, u32 data)
+{
+ if (data) {
+ struct netfront_info *np = netdev_priv(dev);
+ int val;
+
+ if (xenbus_scanf(XBT_NULL, np->xbdev->otherend, "feature-sg",
+ "%d", &val) < 0)
+ val = 0;
+ if (!val)
+ return -ENOSYS;
+ } else if (dev->mtu > ETH_DATA_LEN)
+ dev->mtu = ETH_DATA_LEN;
+
+ return ethtool_op_set_sg(dev, data);
+}
+
+static void xennet_set_features(struct net_device *dev)
+{
+ xennet_set_sg(dev, 1);
+}
+
static void network_connect(struct net_device *dev)
{
struct netfront_info *np;
int i, requeue_idx;
struct netif_tx_request *tx;
struct sk_buff *skb;
+
+ xennet_set_features(dev);
np = netdev_priv(dev);
spin_lock_irq(&np->tx_lock);
@@ -1081,6 +1191,8 @@ static struct ethtool_ops network_ethtoo
{
.get_tx_csum = ethtool_op_get_tx_csum,
.set_tx_csum = ethtool_op_set_tx_csum,
+ .get_sg = ethtool_op_get_sg,
+ .set_sg = xennet_set_sg,
};
#ifdef CONFIG_SYSFS
@@ -1297,6 +1409,7 @@ static struct net_device * __devinit cre
netdev->poll = netif_poll;
netdev->set_multicast_list = network_set_multicast_list;
netdev->uninit = netif_uninit;
+ netdev->change_mtu = xennet_change_mtu;
netdev->weight = 64;
netdev->features = NETIF_F_IP_CSUM;
diff -r d3e181fa238b -r 156a0963a1ae
linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c
--- a/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c Fri Jun 02
12:54:22 2006 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c Tue Jun 06
13:25:31 2006 -0500
@@ -61,11 +61,11 @@ static int privcmd_ioctl(struct inode *i
__asm__ __volatile__ (
"pushl %%ebx; pushl %%ecx; pushl %%edx; "
"pushl %%esi; pushl %%edi; "
- "movl 4(%%eax),%%ebx ;"
- "movl 8(%%eax),%%ecx ;"
- "movl 12(%%eax),%%edx ;"
- "movl 16(%%eax),%%esi ;"
- "movl 20(%%eax),%%edi ;"
+ "movl 8(%%eax),%%ebx ;"
+ "movl 16(%%eax),%%ecx ;"
+ "movl 24(%%eax),%%edx ;"
+ "movl 32(%%eax),%%esi ;"
+ "movl 40(%%eax),%%edi ;"
"movl (%%eax),%%eax ;"
"shll $5,%%eax ;"
"addl $hypercall_page,%%eax ;"
@@ -161,7 +161,7 @@ static int privcmd_ioctl(struct inode *i
case IOCTL_PRIVCMD_MMAPBATCH: {
privcmd_mmapbatch_t m;
struct vm_area_struct *vma = NULL;
- unsigned long __user *p;
+ xen_pfn_t __user *p;
unsigned long addr, mfn;
int i;
@@ -210,7 +210,7 @@ static int privcmd_ioctl(struct inode *i
batch_err:
printk("batch_err ret=%d vma=%p addr=%lx "
"num=%d arr=%p %lx-%lx\n",
- ret, vma, m.addr, m.num, m.arr,
+ ret, vma, (unsigned long)m.addr, m.num, m.arr,
vma ? vma->vm_start : 0, vma ? vma->vm_end : 0);
break;
}
diff -r d3e181fa238b -r 156a0963a1ae
linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h Fri Jun
02 12:54:22 2006 -0500
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h Tue Jun
06 13:25:31 2006 -0500
@@ -61,13 +61,6 @@ static void __init machine_specific_arch
.address = { __KERNEL_CS, (unsigned long)nmi },
};
- if (xen_feature(XENFEAT_auto_translated_physmap) &&
- xen_start_info->shared_info < xen_start_info->nr_pages) {
- HYPERVISOR_shared_info =
- (shared_info_t *)__va(xen_start_info->shared_info);
- memset(empty_zero_page, 0, sizeof(empty_zero_page));
- }
-
ret = HYPERVISOR_callback_op(CALLBACKOP_register, &event);
if (ret == 0)
ret = HYPERVISOR_callback_op(CALLBACKOP_register, &failsafe);
diff -r d3e181fa238b -r 156a0963a1ae
linux-2.6-xen-sparse/include/xen/public/privcmd.h
--- a/linux-2.6-xen-sparse/include/xen/public/privcmd.h Fri Jun 02 12:54:22
2006 -0500
+++ b/linux-2.6-xen-sparse/include/xen/public/privcmd.h Tue Jun 06 13:25:31
2006 -0500
@@ -33,20 +33,22 @@
#ifndef __LINUX_PUBLIC_PRIVCMD_H__
#define __LINUX_PUBLIC_PRIVCMD_H__
+#include <linux/types.h>
+
#ifndef __user
#define __user
#endif
typedef struct privcmd_hypercall
{
- unsigned long op;
- unsigned long arg[5];
+ __u64 op;
+ __u64 arg[5];
} privcmd_hypercall_t;
typedef struct privcmd_mmap_entry {
- unsigned long va;
- unsigned long mfn;
- unsigned long npages;
+ __u64 va;
+ __u64 mfn;
+ __u64 npages;
} privcmd_mmap_entry_t;
typedef struct privcmd_mmap {
@@ -58,8 +60,8 @@ typedef struct privcmd_mmapbatch {
typedef struct privcmd_mmapbatch {
int num; /* number of pages to populate */
domid_t dom; /* target domain */
- unsigned long addr; /* virtual address */
- unsigned long __user *arr; /* array of mfns - top nibble set on err */
+ __u64 addr; /* virtual address */
+ xen_pfn_t __user *arr; /* array of mfns - top nibble set on err */
} privcmd_mmapbatch_t;
/*
diff -r d3e181fa238b -r 156a0963a1ae tools/debugger/libxendebug/xendebug.c
--- a/tools/debugger/libxendebug/xendebug.c Fri Jun 02 12:54:22 2006 -0500
+++ b/tools/debugger/libxendebug/xendebug.c Tue Jun 06 13:25:31 2006 -0500
@@ -57,7 +57,7 @@ typedef struct domain_context
vcpu_guest_context_t context[MAX_VIRT_CPUS];
long total_pages;
- unsigned long *page_array;
+ xen_pfn_t *page_array;
unsigned long cr3_phys[MAX_VIRT_CPUS];
unsigned long *cr3_virt[MAX_VIRT_CPUS];
diff -r d3e181fa238b -r 156a0963a1ae tools/firmware/vmxassist/vm86.c
--- a/tools/firmware/vmxassist/vm86.c Fri Jun 02 12:54:22 2006 -0500
+++ b/tools/firmware/vmxassist/vm86.c Tue Jun 06 13:25:31 2006 -0500
@@ -36,6 +36,8 @@
static unsigned prev_eip = 0;
enum vm86_mode mode = 0;
+
+static struct regs saved_rm_regs;
#ifdef DEBUG
int traceset = 0;
@@ -795,6 +797,8 @@ protected_mode(struct regs *regs)
oldctx.esp = regs->uesp;
oldctx.eflags = regs->eflags;
+ memset(&saved_rm_regs, 0, sizeof(struct regs));
+
/* reload all segment registers */
if (!load_seg(regs->cs, &oldctx.cs_base,
&oldctx.cs_limit, &oldctx.cs_arbytes))
@@ -808,6 +812,7 @@ protected_mode(struct regs *regs)
load_seg(0, &oldctx.es_base,
&oldctx.es_limit, &oldctx.es_arbytes);
oldctx.es_sel = 0;
+ saved_rm_regs.ves = regs->ves;
}
if (load_seg(regs->uss, &oldctx.ss_base,
@@ -817,6 +822,7 @@ protected_mode(struct regs *regs)
load_seg(0, &oldctx.ss_base,
&oldctx.ss_limit, &oldctx.ss_arbytes);
oldctx.ss_sel = 0;
+ saved_rm_regs.uss = regs->uss;
}
if (load_seg(regs->vds, &oldctx.ds_base,
@@ -826,6 +832,7 @@ protected_mode(struct regs *regs)
load_seg(0, &oldctx.ds_base,
&oldctx.ds_limit, &oldctx.ds_arbytes);
oldctx.ds_sel = 0;
+ saved_rm_regs.vds = regs->vds;
}
if (load_seg(regs->vfs, &oldctx.fs_base,
@@ -835,6 +842,7 @@ protected_mode(struct regs *regs)
load_seg(0, &oldctx.fs_base,
&oldctx.fs_limit, &oldctx.fs_arbytes);
oldctx.fs_sel = 0;
+ saved_rm_regs.vfs = regs->vfs;
}
if (load_seg(regs->vgs, &oldctx.gs_base,
@@ -844,6 +852,7 @@ protected_mode(struct regs *regs)
load_seg(0, &oldctx.gs_base,
&oldctx.gs_limit, &oldctx.gs_arbytes);
oldctx.gs_sel = 0;
+ saved_rm_regs.vgs = regs->vgs;
}
/* initialize jump environment to warp back to protected mode */
@@ -880,16 +889,22 @@ real_mode(struct regs *regs)
if (regs->uss >= HIGHMEM)
panic("%%ss 0x%lx higher than 1MB", regs->uss);
regs->uss = address(regs, regs->uss, 0) >> 4;
+ } else {
+ regs->uss = saved_rm_regs.uss;
}
if (regs->vds != 0) {
if (regs->vds >= HIGHMEM)
panic("%%ds 0x%lx higher than 1MB", regs->vds);
regs->vds = address(regs, regs->vds, 0) >> 4;
+ } else {
+ regs->vds = saved_rm_regs.vds;
}
if (regs->ves != 0) {
if (regs->ves >= HIGHMEM)
panic("%%es 0x%lx higher than 1MB", regs->ves);
regs->ves = address(regs, regs->ves, 0) >> 4;
+ } else {
+ regs->ves = saved_rm_regs.ves;
}
/* this should get us into 16-bit mode */
@@ -971,6 +986,39 @@ jmpl(struct regs *regs, int prefix)
} else if (mode == VM86_PROTECTED_TO_REAL) { /* jump to real mode */
eip = (prefix & DATA32) ? fetch32(regs) : fetch16(regs);
cs = fetch16(regs);
+
+ TRACE((regs, (regs->eip - n) + 1, "jmpl 0x%x:0x%x", cs, eip));
+
+ regs->cs = cs;
+ regs->eip = eip;
+ set_mode(regs, VM86_REAL);
+ } else
+ panic("jmpl");
+}
+
+static void
+jmpl_indirect(struct regs *regs, int prefix, unsigned modrm)
+{
+ unsigned n = regs->eip;
+ unsigned cs, eip;
+ unsigned addr;
+
+ addr = operand(prefix, regs, modrm);
+
+ if (mode == VM86_REAL_TO_PROTECTED) { /* jump to protected mode */
+ eip = (prefix & DATA32) ? read32(addr) : read16(addr);
+ addr += (prefix & DATA32) ? 4 : 2;
+ cs = read16(addr);
+
+ TRACE((regs, (regs->eip - n) + 1, "jmpl 0x%x:0x%x", cs, eip));
+
+ regs->cs = cs;
+ regs->eip = eip;
+ set_mode(regs, VM86_PROTECTED);
+ } else if (mode == VM86_PROTECTED_TO_REAL) { /* jump to real mode */
+ eip = (prefix & DATA32) ? read32(addr) : read16(addr);
+ addr += (prefix & DATA32) ? 4 : 2;
+ cs = read16(addr);
TRACE((regs, (regs->eip - n) + 1, "jmpl 0x%x:0x%x", cs, eip));
@@ -1306,6 +1354,23 @@ opcode(struct regs *regs)
}
goto invalid;
+ case 0xFF: /* jmpl (indirect) */
+ if ((mode == VM86_REAL_TO_PROTECTED) ||
+ (mode == VM86_PROTECTED_TO_REAL)) {
+ unsigned modrm = fetch8(regs);
+
+ switch((modrm >> 3) & 7) {
+ case 5:
+ jmpl_indirect(regs, prefix, modrm);
+ return OPC_INVALID;
+
+ default:
+ break;
+ }
+
+ }
+ goto invalid;
+
case 0xEB: /* short jump */
if ((mode == VM86_REAL_TO_PROTECTED) ||
(mode == VM86_PROTECTED_TO_REAL)) {
diff -r d3e181fa238b -r 156a0963a1ae tools/ioemu/hw/cirrus_vga.c
--- a/tools/ioemu/hw/cirrus_vga.c Fri Jun 02 12:54:22 2006 -0500
+++ b/tools/ioemu/hw/cirrus_vga.c Tue Jun 06 13:25:31 2006 -0500
@@ -2462,7 +2462,7 @@ extern FILE *logfile;
extern FILE *logfile;
static void * set_vram_mapping(unsigned long begin, unsigned long end)
{
- unsigned long * extent_start = NULL;
+ xen_pfn_t *extent_start = NULL;
unsigned long nr_extents;
void *vram_pointer = NULL;
int i;
@@ -2473,14 +2473,14 @@ static void * set_vram_mapping(unsigned
end = (end + TARGET_PAGE_SIZE -1 ) & TARGET_PAGE_MASK;
nr_extents = (end - begin) >> TARGET_PAGE_BITS;
- extent_start = malloc(sizeof(unsigned long) * nr_extents );
+ extent_start = malloc(sizeof(xen_pfn_t) * nr_extents );
if (extent_start == NULL)
{
fprintf(stderr, "Failed malloc on set_vram_mapping\n");
return NULL;
}
- memset(extent_start, 0, sizeof(unsigned long) * nr_extents);
+ memset(extent_start, 0, sizeof(xen_pfn_t) * nr_extents);
for (i = 0; i < nr_extents; i++)
{
@@ -2508,7 +2508,7 @@ static void * set_vram_mapping(unsigned
static int unset_vram_mapping(unsigned long begin, unsigned long end)
{
- unsigned long * extent_start = NULL;
+ xen_pfn_t *extent_start = NULL;
unsigned long nr_extents;
int i;
@@ -2519,7 +2519,7 @@ static int unset_vram_mapping(unsigned l
end = (end + TARGET_PAGE_SIZE -1 ) & TARGET_PAGE_MASK;
nr_extents = (end - begin) >> TARGET_PAGE_BITS;
- extent_start = malloc(sizeof(unsigned long) * nr_extents );
+ extent_start = malloc(sizeof(xen_pfn_t) * nr_extents );
if (extent_start == NULL)
{
@@ -2527,7 +2527,7 @@ static int unset_vram_mapping(unsigned l
return -1;
}
- memset(extent_start, 0, sizeof(unsigned long) * nr_extents);
+ memset(extent_start, 0, sizeof(xen_pfn_t) * nr_extents);
for (i = 0; i < nr_extents; i++)
extent_start[i] = (begin + (i * TARGET_PAGE_SIZE)) >> TARGET_PAGE_BITS;
diff -r d3e181fa238b -r 156a0963a1ae tools/ioemu/vl.c
--- a/tools/ioemu/vl.c Fri Jun 02 12:54:22 2006 -0500
+++ b/tools/ioemu/vl.c Tue Jun 06 13:25:31 2006 -0500
@@ -2458,7 +2458,7 @@ int unset_mm_mapping(int xc_handle,
uint32_t domid,
unsigned long nr_pages,
unsigned int address_bits,
- unsigned long *extent_start)
+ xen_pfn_t *extent_start)
{
int err = 0;
xc_dominfo_t info;
@@ -2491,7 +2491,7 @@ int set_mm_mapping(int xc_handle,
uint32_t domid,
unsigned long nr_pages,
unsigned int address_bits,
- unsigned long *extent_start)
+ xen_pfn_t *extent_start)
{
xc_dominfo_t info;
int err = 0;
@@ -2557,7 +2557,8 @@ int main(int argc, char **argv)
int serial_device_index;
char qemu_dm_logfilename[64];
const char *loadvm = NULL;
- unsigned long nr_pages, *page_array;
+ unsigned long nr_pages;
+ xen_pfn_t *page_array;
extern void *shared_page;
#if !defined(CONFIG_SOFTMMU)
@@ -3023,8 +3024,8 @@ int main(int argc, char **argv)
xc_handle = xc_interface_open();
- if ( (page_array = (unsigned long *)
- malloc(nr_pages * sizeof(unsigned long))) == NULL)
+ if ( (page_array = (xen_pfn_t *)
+ malloc(nr_pages * sizeof(xen_pfn_t))) == NULL)
{
fprintf(logfile, "malloc returned error %d\n", errno);
exit(-1);
@@ -3079,8 +3080,8 @@ int main(int argc, char **argv)
page_array[0]);
#endif
- fprintf(logfile, "shared page at pfn:%lx, mfn: %lx\n", (nr_pages-1),
- (page_array[nr_pages - 1]));
+ fprintf(logfile, "shared page at pfn:%lx, mfn: %"PRIx64"\n", (nr_pages-1),
+ (uint64_t)(page_array[nr_pages - 1]));
/* we always create the cdrom drive, even if no disk is there */
bdrv_init();
diff -r d3e181fa238b -r 156a0963a1ae tools/libxc/xc_core.c
--- a/tools/libxc/xc_core.c Fri Jun 02 12:54:22 2006 -0500
+++ b/tools/libxc/xc_core.c Tue Jun 06 13:25:31 2006 -0500
@@ -28,7 +28,7 @@ xc_domain_dumpcore_via_callback(int xc_h
dumpcore_rtn_t dump_rtn)
{
unsigned long nr_pages;
- unsigned long *page_array = NULL;
+ xen_pfn_t *page_array = NULL;
xc_dominfo_t info;
int i, nr_vcpus = 0;
char *dump_mem, *dump_mem_start = NULL;
@@ -70,7 +70,7 @@ xc_domain_dumpcore_via_callback(int xc_h
sizeof(vcpu_guest_context_t)*nr_vcpus;
dummy_len = (sizeof(struct xc_core_header) +
(sizeof(vcpu_guest_context_t) * nr_vcpus) +
- (nr_pages * sizeof(unsigned long)));
+ (nr_pages * sizeof(xen_pfn_t)));
header.xch_pages_offset = round_pgup(dummy_len);
sts = dump_rtn(args, (char *)&header, sizeof(struct xc_core_header));
@@ -81,7 +81,7 @@ xc_domain_dumpcore_via_callback(int xc_h
if ( sts != 0 )
goto error_out;
- if ( (page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL )
+ if ( (page_array = malloc(nr_pages * sizeof(xen_pfn_t))) == NULL )
{
printf("Could not allocate memory\n");
goto error_out;
@@ -91,7 +91,7 @@ xc_domain_dumpcore_via_callback(int xc_h
printf("Could not get the page frame list\n");
goto error_out;
}
- sts = dump_rtn(args, (char *)page_array, nr_pages * sizeof(unsigned long));
+ sts = dump_rtn(args, (char *)page_array, nr_pages * sizeof(xen_pfn_t));
if ( sts != 0 )
goto error_out;
diff -r d3e181fa238b -r 156a0963a1ae tools/libxc/xc_domain.c
--- a/tools/libxc/xc_domain.c Fri Jun 02 12:54:22 2006 -0500
+++ b/tools/libxc/xc_domain.c Tue Jun 06 13:25:31 2006 -0500
@@ -291,7 +291,7 @@ int xc_domain_memory_increase_reservatio
unsigned long nr_extents,
unsigned int extent_order,
unsigned int address_bits,
- unsigned long *extent_start)
+ xen_pfn_t *extent_start)
{
int err;
struct xen_memory_reservation reservation = {
@@ -324,7 +324,7 @@ int xc_domain_memory_decrease_reservatio
uint32_t domid,
unsigned long nr_extents,
unsigned int extent_order,
- unsigned long *extent_start)
+ xen_pfn_t *extent_start)
{
int err;
struct xen_memory_reservation reservation = {
@@ -363,7 +363,7 @@ int xc_domain_memory_populate_physmap(in
unsigned long nr_extents,
unsigned int extent_order,
unsigned int address_bits,
- unsigned long *extent_start)
+ xen_pfn_t *extent_start)
{
int err;
struct xen_memory_reservation reservation = {
@@ -392,8 +392,8 @@ int xc_domain_translate_gpfn_list(int xc
int xc_domain_translate_gpfn_list(int xc_handle,
uint32_t domid,
unsigned long nr_gpfns,
- unsigned long *gpfn_list,
- unsigned long *mfn_list)
+ xen_pfn_t *gpfn_list,
+ xen_pfn_t *mfn_list)
{
struct xen_translate_gpfn_list op = {
.domid = domid,
diff -r d3e181fa238b -r 156a0963a1ae tools/libxc/xc_hvm_build.c
--- a/tools/libxc/xc_hvm_build.c Fri Jun 02 12:54:22 2006 -0500
+++ b/tools/libxc/xc_hvm_build.c Tue Jun 06 13:25:31 2006 -0500
@@ -135,7 +135,7 @@ static void set_hvm_info_checksum(struct
* hvmloader will use this info to set BIOS accordingly
*/
static int set_hvm_info(int xc_handle, uint32_t dom,
- unsigned long *pfn_list, unsigned int vcpus,
+ xen_pfn_t *pfn_list, unsigned int vcpus,
unsigned int pae, unsigned int acpi, unsigned int apic)
{
char *va_map;
@@ -178,7 +178,7 @@ static int setup_guest(int xc_handle,
unsigned int store_evtchn,
unsigned long *store_mfn)
{
- unsigned long *page_array = NULL;
+ xen_pfn_t *page_array = NULL;
unsigned long count, i;
unsigned long long ptr;
xc_mmu_t *mmu = NULL;
@@ -223,7 +223,7 @@ static int setup_guest(int xc_handle,
goto error_out;
}
- if ( (page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL )
+ if ( (page_array = malloc(nr_pages * sizeof(xen_pfn_t))) == NULL )
{
PERROR("Could not allocate memory.\n");
goto error_out;
diff -r d3e181fa238b -r 156a0963a1ae tools/libxc/xc_ia64_stubs.c
--- a/tools/libxc/xc_ia64_stubs.c Fri Jun 02 12:54:22 2006 -0500
+++ b/tools/libxc/xc_ia64_stubs.c Tue Jun 06 13:25:31 2006 -0500
@@ -57,7 +57,7 @@ xc_plan9_build(int xc_handle,
int xc_ia64_get_pfn_list(int xc_handle,
uint32_t domid,
- unsigned long *pfn_buf,
+ xen_pfn_t *pfn_buf,
unsigned int start_page,
unsigned int nr_pages)
{
@@ -65,7 +65,7 @@ int xc_ia64_get_pfn_list(int xc_handle,
int num_pfns,ret;
unsigned int __start_page, __nr_pages;
unsigned long max_pfns;
- unsigned long *__pfn_buf;
+ xen_pfn_t *__pfn_buf;
__start_page = start_page;
__nr_pages = nr_pages;
@@ -80,7 +80,7 @@ int xc_ia64_get_pfn_list(int xc_handle,
set_xen_guest_handle(op.u.getmemlist.buffer, __pfn_buf);
if ( (max_pfns != -1UL)
- && mlock(__pfn_buf, __nr_pages * sizeof(unsigned long)) != 0 )
+ && mlock(__pfn_buf, __nr_pages * sizeof(xen_pfn_t)) != 0 )
{
PERROR("Could not lock pfn list buffer");
return -1;
@@ -89,7 +89,7 @@ int xc_ia64_get_pfn_list(int xc_handle,
ret = do_dom0_op(xc_handle, &op);
if (max_pfns != -1UL)
- (void)munlock(__pfn_buf, __nr_pages * sizeof(unsigned long));
+ (void)munlock(__pfn_buf, __nr_pages * sizeof(xen_pfn_t));
if (max_pfns == -1UL)
return 0;
@@ -122,10 +122,10 @@ int xc_ia64_copy_to_domain_pages(int xc_
{
// N.B. gva should be page aligned
- unsigned long *page_array = NULL;
+ xen_pfn_t *page_array = NULL;
int i;
- if ( (page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL ){
+ if ( (page_array = malloc(nr_pages * sizeof(xen_pfn_t))) == NULL ){
PERROR("Could not allocate memory");
goto error_out;
}
diff -r d3e181fa238b -r 156a0963a1ae tools/libxc/xc_linux.c
--- a/tools/libxc/xc_linux.c Fri Jun 02 12:54:22 2006 -0500
+++ b/tools/libxc/xc_linux.c Tue Jun 06 13:25:31 2006 -0500
@@ -28,7 +28,7 @@ int xc_interface_close(int xc_handle)
}
void *xc_map_foreign_batch(int xc_handle, uint32_t dom, int prot,
- unsigned long *arr, int num)
+ xen_pfn_t *arr, int num)
{
privcmd_mmapbatch_t ioctlx;
void *addr;
diff -r d3e181fa238b -r 156a0963a1ae tools/libxc/xc_linux_build.c
--- a/tools/libxc/xc_linux_build.c Fri Jun 02 12:54:22 2006 -0500
+++ b/tools/libxc/xc_linux_build.c Tue Jun 06 13:25:31 2006 -0500
@@ -10,6 +10,7 @@
#include "xc_aout9.h"
#include <stdlib.h>
#include <unistd.h>
+#include <inttypes.h>
#include <zlib.h>
#if defined(__i386__)
@@ -136,7 +137,7 @@ int load_initrd(int xc_handle, domid_t d
int load_initrd(int xc_handle, domid_t dom,
struct initrd_info *initrd,
unsigned long physbase,
- unsigned long *phys_to_mach)
+ xen_pfn_t *phys_to_mach)
{
char page[PAGE_SIZE];
unsigned long pfn_start, pfn, nr_pages;
@@ -189,7 +190,7 @@ static int setup_pg_tables(int xc_handle
vcpu_guest_context_t *ctxt,
unsigned long dsi_v_start,
unsigned long v_end,
- unsigned long *page_array,
+ xen_pfn_t *page_array,
unsigned long vpt_start,
unsigned long vpt_end,
unsigned shadow_mode_enabled)
@@ -251,19 +252,35 @@ static int setup_pg_tables_pae(int xc_ha
vcpu_guest_context_t *ctxt,
unsigned long dsi_v_start,
unsigned long v_end,
- unsigned long *page_array,
+ xen_pfn_t *page_array,
unsigned long vpt_start,
unsigned long vpt_end,
- unsigned shadow_mode_enabled)
+ unsigned shadow_mode_enabled,
+ unsigned pae_mode)
{
l1_pgentry_64_t *vl1tab = NULL, *vl1e = NULL;
l2_pgentry_64_t *vl2tab = NULL, *vl2e = NULL;
l3_pgentry_64_t *vl3tab = NULL, *vl3e = NULL;
uint64_t l1tab, l2tab, l3tab, pl1tab, pl2tab, pl3tab;
- unsigned long ppt_alloc, count;
+ unsigned long ppt_alloc, count, nmfn;
/* First allocate page for page dir. */
ppt_alloc = (vpt_start - dsi_v_start) >> PAGE_SHIFT;
+
+ if ( pae_mode == PAEKERN_extended_cr3 )
+ {
+ ctxt->vm_assist |= (1UL << VMASST_TYPE_pae_extended_cr3);
+ }
+ else if ( page_array[ppt_alloc] > 0xfffff )
+ {
+ nmfn = xc_make_page_below_4G(xc_handle, dom, page_array[ppt_alloc]);
+ if ( nmfn == 0 )
+ {
+ fprintf(stderr, "Couldn't get a page below 4GB :-(\n");
+ goto error_out;
+ }
+ page_array[ppt_alloc] = nmfn;
+ }
alloc_pt(l3tab, vl3tab, pl3tab);
vl3e = &vl3tab[l3_table_offset_pae(dsi_v_start)];
@@ -340,7 +357,7 @@ static int setup_pg_tables_64(int xc_han
vcpu_guest_context_t *ctxt,
unsigned long dsi_v_start,
unsigned long v_end,
- unsigned long *page_array,
+ xen_pfn_t *page_array,
unsigned long vpt_start,
unsigned long vpt_end,
int shadow_mode_enabled)
@@ -451,7 +468,7 @@ static int setup_guest(int xc_handle,
unsigned int console_evtchn, unsigned long *console_mfn,
uint32_t required_features[XENFEAT_NR_SUBMAPS])
{
- unsigned long *page_array = NULL;
+ xen_pfn_t *page_array = NULL;
struct load_funcs load_funcs;
struct domain_setup_info dsi;
unsigned long vinitrd_start;
@@ -478,7 +495,7 @@ static int setup_guest(int xc_handle,
start_page = dsi.v_start >> PAGE_SHIFT;
pgnr = (v_end - dsi.v_start) >> PAGE_SHIFT;
- if ( (page_array = malloc(pgnr * sizeof(unsigned long))) == NULL )
+ if ( (page_array = malloc(pgnr * sizeof(xen_pfn_t))) == NULL )
{
PERROR("Could not allocate memory");
goto error_out;
@@ -579,11 +596,11 @@ static int compat_check(int xc_handle, s
}
if (strstr(xen_caps, "xen-3.0-x86_32p")) {
- if (!dsi->pae_kernel) {
+ if (dsi->pae_kernel == PAEKERN_no) {
ERROR("Non PAE-kernel on PAE host.");
return 0;
}
- } else if (dsi->pae_kernel) {
+ } else if (dsi->pae_kernel != PAEKERN_no) {
ERROR("PAE-kernel on non-PAE host.");
return 0;
}
@@ -606,7 +623,7 @@ static int setup_guest(int xc_handle,
unsigned int console_evtchn, unsigned long *console_mfn,
uint32_t required_features[XENFEAT_NR_SUBMAPS])
{
- unsigned long *page_array = NULL;
+ xen_pfn_t *page_array = NULL;
unsigned long count, i, hypercall_pfn;
start_info_t *start_info;
shared_info_t *shared_info;
@@ -617,7 +634,7 @@ static int setup_guest(int xc_handle,
unsigned long nr_pt_pages;
unsigned long physmap_pfn;
- unsigned long *physmap, *physmap_e;
+ xen_pfn_t *physmap, *physmap_e;
struct load_funcs load_funcs;
struct domain_setup_info dsi;
@@ -673,7 +690,8 @@ static int setup_guest(int xc_handle,
for ( i = 0; i < XENFEAT_NR_SUBMAPS; i++ )
{
- if ( (supported_features[i]&required_features[i]) !=
required_features[i] )
+ if ( (supported_features[i] & required_features[i]) !=
+ required_features[i] )
{
ERROR("Guest kernel does not support a required feature.");
goto error_out;
@@ -719,7 +737,7 @@ static int setup_guest(int xc_handle,
(((((_h) + ((1UL<<(_s))-1)) & ~((1UL<<(_s))-1)) - \
((_l) & ~((1UL<<(_s))-1))) >> (_s))
#if defined(__i386__)
- if ( dsi.pae_kernel )
+ if ( dsi.pae_kernel != PAEKERN_no )
{
if ( (1 + /* # L3 */
NR(dsi.v_start, v_end, L3_PAGETABLE_SHIFT_PAE) + /* # L2 */
@@ -797,11 +815,11 @@ static int setup_guest(int xc_handle,
/* setup page tables */
#if defined(__i386__)
- if (dsi.pae_kernel)
+ if (dsi.pae_kernel != PAEKERN_no)
rc = setup_pg_tables_pae(xc_handle, dom, ctxt,
dsi.v_start, v_end,
page_array, vpt_start, vpt_end,
- shadow_mode_enabled);
+ shadow_mode_enabled, dsi.pae_kernel);
else
rc = setup_pg_tables(xc_handle, dom, ctxt,
dsi.v_start, v_end,
@@ -824,7 +842,7 @@ static int setup_guest(int xc_handle,
*/
if ( !shadow_mode_enabled )
{
- if ( dsi.pae_kernel )
+ if ( dsi.pae_kernel != PAEKERN_no )
{
if ( pin_table(xc_handle, MMUEXT_PIN_L3_TABLE,
xen_cr3_to_pfn(ctxt->ctrlreg[3]), dom) )
@@ -865,8 +883,8 @@ static int setup_guest(int xc_handle,
((uint64_t)page_array[count] << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE,
count) )
{
- fprintf(stderr,"m2p update failure p=%lx m=%lx\n",
- count, page_array[count]);
+ fprintf(stderr,"m2p update failure p=%lx m=%"PRIx64"\n",
+ count, (uint64_t)page_array[count]);
munmap(physmap, PAGE_SIZE);
goto error_out;
}
@@ -958,7 +976,7 @@ static int setup_guest(int xc_handle,
rc = xc_version(xc_handle, XENVER_version, NULL);
sprintf(start_info->magic, "xen-%i.%i-x86_%d%s",
rc >> 16, rc & (0xFFFF), (unsigned int)sizeof(long)*8,
- dsi.pae_kernel ? "p" : "");
+ (dsi.pae_kernel != PAEKERN_no) ? "p" : "");
start_info->nr_pages = nr_pages;
start_info->shared_info = guest_shared_info_mfn << PAGE_SHIFT;
start_info->flags = flags;
diff -r d3e181fa238b -r 156a0963a1ae tools/libxc/xc_linux_restore.c
--- a/tools/libxc/xc_linux_restore.c Fri Jun 02 12:54:22 2006 -0500
+++ b/tools/libxc/xc_linux_restore.c Tue Jun 06 13:25:31 2006 -0500
@@ -25,10 +25,10 @@ static unsigned long max_pfn;
static unsigned long max_pfn;
/* Live mapping of the table mapping each PFN to its current MFN. */
-static unsigned long *live_p2m = NULL;
+static xen_pfn_t *live_p2m = NULL;
/* A table mapping each PFN to its new MFN. */
-static unsigned long *p2m = NULL;
+static xen_pfn_t *p2m = NULL;
static ssize_t
@@ -108,7 +108,7 @@ int xc_linux_restore(int xc_handle, int
unsigned int console_evtchn, unsigned long *console_mfn)
{
DECLARE_DOM0_OP;
- int rc = 1, i, n;
+ int rc = 1, i, n, pae_extended_cr3 = 0;
unsigned long mfn, pfn;
unsigned int prev_pc, this_pc;
int verify = 0;
@@ -126,7 +126,7 @@ int xc_linux_restore(int xc_handle, int
unsigned long *pfn_type = NULL;
/* A table of MFNs to map in the current region */
- unsigned long *region_mfn = NULL;
+ xen_pfn_t *region_mfn = NULL;
/* Types of the pfns in the current region */
unsigned long region_pfn_type[MAX_BATCH_SIZE];
@@ -135,7 +135,7 @@ int xc_linux_restore(int xc_handle, int
unsigned long *page = NULL;
/* A copy of the pfn-to-mfn table frame list. */
- unsigned long *p2m_frame_list = NULL;
+ xen_pfn_t *p2m_frame_list = NULL;
/* A temporary mapping of the guest's start_info page. */
start_info_t *start_info;
@@ -162,30 +162,88 @@ int xc_linux_restore(int xc_handle, int
return 1;
}
-
if (mlock(&ctxt, sizeof(ctxt))) {
/* needed for build dom0 op, but might as well do early */
ERR("Unable to mlock ctxt");
return 1;
}
-
- /* Read the saved P2M frame list */
- if(!(p2m_frame_list = malloc(P2M_FL_SIZE))) {
+ if (!(p2m_frame_list = malloc(P2M_FL_SIZE))) {
ERR("Couldn't allocate p2m_frame_list array");
goto out;
}
- if (!read_exact(io_fd, p2m_frame_list, P2M_FL_SIZE)) {
+ /* Read first entry of P2M list, or extended-info signature (~0UL). */
+ if (!read_exact(io_fd, p2m_frame_list, sizeof(long))) {
+ ERR("read extended-info signature failed");
+ goto out;
+ }
+
+ if (p2m_frame_list[0] == ~0UL) {
+ uint32_t tot_bytes;
+
+ /* Next 4 bytes: total size of following extended info. */
+ if (!read_exact(io_fd, &tot_bytes, sizeof(tot_bytes))) {
+ ERR("read extended-info size failed");
+ goto out;
+ }
+
+ while (tot_bytes) {
+ uint32_t chunk_bytes;
+ char chunk_sig[4];
+
+ /* 4-character chunk signature + 4-byte remaining chunk size. */
+ if (!read_exact(io_fd, chunk_sig, sizeof(chunk_sig)) ||
+ !read_exact(io_fd, &chunk_bytes, sizeof(chunk_bytes))) {
+ ERR("read extended-info chunk signature failed");
+ goto out;
+ }
+ tot_bytes -= 8;
+
+ /* VCPU context structure? */
+ if (!strncmp(chunk_sig, "vcpu", 4)) {
+ if (!read_exact(io_fd, &ctxt, sizeof(ctxt))) {
+ ERR("read extended-info vcpu context failed");
+ goto out;
+ }
+ tot_bytes -= sizeof(struct vcpu_guest_context);
+ chunk_bytes -= sizeof(struct vcpu_guest_context);
+
+ if (ctxt.vm_assist & (1UL << VMASST_TYPE_pae_extended_cr3))
+ pae_extended_cr3 = 1;
+ }
+
+ /* Any remaining bytes of this chunk: read and discard. */
+ while (chunk_bytes) {
+ unsigned long sz = chunk_bytes;
+ if ( sz > P2M_FL_SIZE )
+ sz = P2M_FL_SIZE;
+ if (!read_exact(io_fd, p2m_frame_list, sz)) {
+ ERR("read-and-discard extended-info chunk bytes failed");
+ goto out;
+ }
+ chunk_bytes -= sz;
+ tot_bytes -= sz;
+ }
+ }
+
+ /* Now read the real first entry of P2M list. */
+ if (!read_exact(io_fd, p2m_frame_list, sizeof(long))) {
+ ERR("read first entry of p2m_frame_list failed");
+ goto out;
+ }
+ }
+
+ /* First entry is already read into the p2m array. */
+ if (!read_exact(io_fd, &p2m_frame_list[1], P2M_FL_SIZE - sizeof(long))) {
ERR("read p2m_frame_list failed");
goto out;
}
-
/* We want zeroed memory so use calloc rather than malloc. */
- p2m = calloc(max_pfn, sizeof(unsigned long));
+ p2m = calloc(max_pfn, sizeof(xen_pfn_t));
pfn_type = calloc(max_pfn, sizeof(unsigned long));
- region_mfn = calloc(MAX_BATCH_SIZE, sizeof(unsigned long));
+ region_mfn = calloc(MAX_BATCH_SIZE, sizeof(xen_pfn_t));
if ((p2m == NULL) || (pfn_type == NULL) || (region_mfn == NULL)) {
ERR("memory alloc failed");
@@ -193,7 +251,7 @@ int xc_linux_restore(int xc_handle, int
goto out;
}
- if (mlock(region_mfn, sizeof(unsigned long) * MAX_BATCH_SIZE)) {
+ if (mlock(region_mfn, sizeof(xen_pfn_t) * MAX_BATCH_SIZE)) {
ERR("Could not mlock region_mfn");
goto out;
}
@@ -331,17 +389,27 @@ int xc_linux_restore(int xc_handle, int
** A page table page - need to 'uncanonicalize' it, i.e.
** replace all the references to pfns with the corresponding
** mfns for the new domain.
+ **
+ ** On PAE we need to ensure that PGDs are in MFNs < 4G, and
+ ** so we may need to update the p2m after the main loop.
+ ** Hence we defer canonicalization of L1s until then.
*/
- if(!uncanonicalize_pagetable(pagetype, page)) {
- /*
- ** Failing to uncanonicalize a page table can be ok
- ** under live migration since the pages type may have
- ** changed by now (and we'll get an update later).
- */
- DPRINTF("PT L%ld race on pfn=%08lx mfn=%08lx\n",
- pagetype >> 28, pfn, mfn);
- nraces++;
- continue;
+ if ((pt_levels != 3) ||
+ pae_extended_cr3 ||
+ (pagetype != L1TAB)) {
+
+ if (!uncanonicalize_pagetable(pagetype, page)) {
+ /*
+ ** Failing to uncanonicalize a page table can be ok
+ ** under live migration since the pages type may have
+ ** changed by now (and we'll get an update later).
+ */
+ DPRINTF("PT L%ld race on pfn=%08lx mfn=%08lx\n",
+ pagetype >> 28, pfn, mfn);
+ nraces++;
+ continue;
+ }
+
}
} else if(pagetype != NOTAB) {
@@ -389,6 +457,100 @@ int xc_linux_restore(int xc_handle, int
}
DPRINTF("Received all pages (%d races)\n", nraces);
+
+ if ((pt_levels == 3) && !pae_extended_cr3) {
+
+ /*
+ ** XXX SMH on PAE we need to ensure PGDs are in MFNs < 4G. This
+ ** is a little awkward and involves (a) finding all such PGDs and
+ ** replacing them with 'lowmem' versions; (b) upating the p2m[]
+ ** with the new info; and (c) canonicalizing all the L1s using the
+ ** (potentially updated) p2m[].
+ **
+ ** This is relatively slow (and currently involves two passes through
+ ** the pfn_type[] array), but at least seems to be correct. May wish
+ ** to consider more complex approaches to optimize this later.
+ */
+
+ int j, k;
+
+ /* First pass: find all L3TABs current in > 4G mfns and get new mfns */
+ for (i = 0; i < max_pfn; i++) {
+
+ if (((pfn_type[i] & LTABTYPE_MASK)==L3TAB) && (p2m[i]>0xfffffUL)) {
+
+ unsigned long new_mfn;
+ uint64_t l3ptes[4];
+ uint64_t *l3tab;
+
+ l3tab = (uint64_t *)
+ xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+ PROT_READ, p2m[i]);
+
+ for(j = 0; j < 4; j++)
+ l3ptes[j] = l3tab[j];
+
+ munmap(l3tab, PAGE_SIZE);
+
+ if (!(new_mfn=xc_make_page_below_4G(xc_handle, dom, p2m[i]))) {
+ ERR("Couldn't get a page below 4GB :-(");
+ goto out;
+ }
+
+ p2m[i] = new_mfn;
+ if (xc_add_mmu_update(xc_handle, mmu,
+ (((unsigned long long)new_mfn)
+ << PAGE_SHIFT) |
+ MMU_MACHPHYS_UPDATE, i)) {
+ ERR("Couldn't m2p on PAE root pgdir");
+ goto out;
+ }
+
+ l3tab = (uint64_t *)
+ xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+ PROT_READ | PROT_WRITE, p2m[i]);
+
+ for(j = 0; j < 4; j++)
+ l3tab[j] = l3ptes[j];
+
+ munmap(l3tab, PAGE_SIZE);
+
+ }
+ }
+
+ /* Second pass: find all L1TABs and uncanonicalize them */
+ j = 0;
+
+ for(i = 0; i < max_pfn; i++) {
+
+ if (((pfn_type[i] & LTABTYPE_MASK)==L1TAB)) {
+ region_mfn[j] = p2m[i];
+ j++;
+ }
+
+ if(i == (max_pfn-1) || j == MAX_BATCH_SIZE) {
+
+ if (!(region_base = xc_map_foreign_batch(
+ xc_handle, dom, PROT_READ | PROT_WRITE,
+ region_mfn, j))) {
+ ERR("map batch failed");
+ goto out;
+ }
+
+ for(k = 0; k < j; k++) {
+ if(!uncanonicalize_pagetable(L1TAB,
+ region_base + k*PAGE_SIZE)) {
+ ERR("failed uncanonicalize pt!");
+ goto out;
+ }
+ }
+
+ munmap(region_base, j*PAGE_SIZE);
+ j = 0;
+ }
+ }
+
+ }
if (xc_finish_mmu_updates(xc_handle, mmu)) {
diff -r d3e181fa238b -r 156a0963a1ae tools/libxc/xc_linux_save.c
--- a/tools/libxc/xc_linux_save.c Fri Jun 02 12:54:22 2006 -0500
+++ b/tools/libxc/xc_linux_save.c Tue Jun 06 13:25:31 2006 -0500
@@ -40,10 +40,10 @@ static unsigned long max_pfn;
static unsigned long max_pfn;
/* Live mapping of the table mapping each PFN to its current MFN. */
-static unsigned long *live_p2m = NULL;
+static xen_pfn_t *live_p2m = NULL;
/* Live mapping of system MFN to PFN table. */
-static unsigned long *live_m2p = NULL;
+static xen_pfn_t *live_m2p = NULL;
/* grep fodder: machine_to_phys */
@@ -501,22 +501,22 @@ void canonicalize_pagetable(unsigned lon
-static unsigned long *xc_map_m2p(int xc_handle,
+static xen_pfn_t *xc_map_m2p(int xc_handle,
unsigned long max_mfn,
int prot)
{
struct xen_machphys_mfn_list xmml;
privcmd_mmap_entry_t *entries;
unsigned long m2p_chunks, m2p_size;
- unsigned long *m2p;
- unsigned long *extent_start;
+ xen_pfn_t *m2p;
+ xen_pfn_t *extent_start;
int i, rc;
m2p_size = M2P_SIZE(max_mfn);
m2p_chunks = M2P_CHUNKS(max_mfn);
xmml.max_extents = m2p_chunks;
- if (!(extent_start = malloc(m2p_chunks * sizeof(unsigned long)))) {
+ if (!(extent_start = malloc(m2p_chunks * sizeof(xen_pfn_t)))) {
ERR("failed to allocate space for m2p mfns");
return NULL;
}
@@ -583,11 +583,11 @@ int xc_linux_save(int xc_handle, int io_
char page[PAGE_SIZE];
/* Double and single indirect references to the live P2M table */
- unsigned long *live_p2m_frame_list_list = NULL;
- unsigned long *live_p2m_frame_list = NULL;
+ xen_pfn_t *live_p2m_frame_list_list = NULL;
+ xen_pfn_t *live_p2m_frame_list = NULL;
/* A copy of the pfn-to-mfn table frame list. */
- unsigned long *p2m_frame_list = NULL;
+ xen_pfn_t *p2m_frame_list = NULL;
/* Live mapping of shared info structure */
shared_info_t *live_shinfo = NULL;
@@ -712,11 +712,11 @@ int xc_linux_save(int xc_handle, int io_
memcpy(p2m_frame_list, live_p2m_frame_list, P2M_FL_SIZE);
/* Canonicalise the pfn-to-mfn table frame-number list. */
- for (i = 0; i < max_pfn; i += ulpp) {
- if (!translate_mfn_to_pfn(&p2m_frame_list[i/ulpp])) {
+ for (i = 0; i < max_pfn; i += fpp) {
+ if (!translate_mfn_to_pfn(&p2m_frame_list[i/fpp])) {
ERR("Frame# in pfn-to-mfn frame list is not in pseudophys");
- ERR("entry %d: p2m_frame_list[%ld] is 0x%lx", i, i/ulpp,
- p2m_frame_list[i/ulpp]);
+ ERR("entry %d: p2m_frame_list[%ld] is 0x%"PRIx64, i, i/fpp,
+ (uint64_t)p2m_frame_list[i/fpp]);
goto out;
}
}
@@ -818,12 +818,33 @@ int xc_linux_save(int xc_handle, int io_
/* Start writing out the saved-domain record. */
- if(!write_exact(io_fd, &max_pfn, sizeof(unsigned long))) {
+ if (!write_exact(io_fd, &max_pfn, sizeof(unsigned long))) {
ERR("write: max_pfn");
goto out;
}
- if(!write_exact(io_fd, p2m_frame_list, P2M_FL_SIZE)) {
+ /*
+ * Write an extended-info structure to inform the restore code that
+ * a PAE guest understands extended CR3 (PDPTs above 4GB). Turns off
+ * slow paths in the restore code.
+ */
+ if ((pt_levels == 3) &&
+ (ctxt.vm_assist & (1UL << VMASST_TYPE_pae_extended_cr3))) {
+ unsigned long signature = ~0UL;
+ uint32_t tot_sz = sizeof(struct vcpu_guest_context) + 8;
+ uint32_t chunk_sz = sizeof(struct vcpu_guest_context);
+ char chunk_sig[] = "vcpu";
+ if (!write_exact(io_fd, &signature, sizeof(signature)) ||
+ !write_exact(io_fd, &tot_sz, sizeof(tot_sz)) ||
+ !write_exact(io_fd, &chunk_sig, 4) ||
+ !write_exact(io_fd, &chunk_sz, sizeof(chunk_sz)) ||
+ !write_exact(io_fd, &ctxt, sizeof(ctxt))) {
+ ERR("write: extended info");
+ goto out;
+ }
+ }
+
+ if (!write_exact(io_fd, p2m_frame_list, P2M_FL_SIZE)) {
ERR("write: p2m_frame_list");
goto out;
}
diff -r d3e181fa238b -r 156a0963a1ae tools/libxc/xc_load_aout9.c
--- a/tools/libxc/xc_load_aout9.c Fri Jun 02 12:54:22 2006 -0500
+++ b/tools/libxc/xc_load_aout9.c Tue Jun 06 13:25:31 2006 -0500
@@ -17,7 +17,7 @@
#define KOFFSET(_p) ((_p)&~KZERO)
static int parseaout9image(const char *, unsigned long, struct
domain_setup_info *);
-static int loadaout9image(const char *, unsigned long, int, uint32_t, unsigned
long *, struct domain_setup_info *);
+static int loadaout9image(const char *, unsigned long, int, uint32_t,
xen_pfn_t *, struct domain_setup_info *);
static void copyout(int, uint32_t, unsigned long *, unsigned long, const char
*, int);
struct Exec *get_header(const char *, unsigned long, struct Exec *);
@@ -79,7 +79,7 @@ loadaout9image(
const char *image,
unsigned long image_size,
int xch, uint32_t dom,
- unsigned long *parray,
+ xen_pfn_t *parray,
struct domain_setup_info *dsi)
{
struct Exec ehdr;
diff -r d3e181fa238b -r 156a0963a1ae tools/libxc/xc_load_bin.c
--- a/tools/libxc/xc_load_bin.c Fri Jun 02 12:54:22 2006 -0500
+++ b/tools/libxc/xc_load_bin.c Tue Jun 06 13:25:31 2006 -0500
@@ -107,7 +107,7 @@ static int
static int
loadbinimage(
const char *image, unsigned long image_size, int xch, uint32_t dom,
- unsigned long *parray, struct domain_setup_info *dsi);
+ xen_pfn_t *parray, struct domain_setup_info *dsi);
int probe_bin(const char *image,
unsigned long image_size,
@@ -235,7 +235,7 @@ static int
static int
loadbinimage(
const char *image, unsigned long image_size, int xch, uint32_t dom,
- unsigned long *parray, struct domain_setup_info *dsi)
+ xen_pfn_t *parray, struct domain_setup_info *dsi)
{
unsigned long size;
char *va;
diff -r d3e181fa238b -r 156a0963a1ae tools/libxc/xc_load_elf.c
--- a/tools/libxc/xc_load_elf.c Fri Jun 02 12:54:22 2006 -0500
+++ b/tools/libxc/xc_load_elf.c Tue Jun 06 13:25:31 2006 -0500
@@ -17,10 +17,10 @@ static int
static int
loadelfimage(
const char *image, unsigned long image_size, int xch, uint32_t dom,
- unsigned long *parray, struct domain_setup_info *dsi);
+ xen_pfn_t *parray, struct domain_setup_info *dsi);
static int
loadelfsymtab(
- const char *image, int xch, uint32_t dom, unsigned long *parray,
+ const char *image, int xch, uint32_t dom, xen_pfn_t *parray,
struct domain_setup_info *dsi);
int probe_elf(const char *image,
@@ -138,8 +138,15 @@ static int parseelfimage(const char *ima
ERROR("Actually saw: '%s'", guestinfo);
return -EINVAL;
}
- if ( (strstr(guestinfo, "PAE=yes") != NULL) )
- dsi->pae_kernel = 1;
+
+ dsi->pae_kernel = PAEKERN_no;
+ p = strstr(guestinfo, "PAE=yes");
+ if ( p != NULL )
+ {
+ dsi->pae_kernel = PAEKERN_yes;
+ if ( !strncmp(p+7, "[extended-cr3]", 14) )
+ dsi->pae_kernel = PAEKERN_extended_cr3;
+ }
break;
}
@@ -220,7 +227,7 @@ static int
static int
loadelfimage(
const char *image, unsigned long elfsize, int xch, uint32_t dom,
- unsigned long *parray, struct domain_setup_info *dsi)
+ xen_pfn_t *parray, struct domain_setup_info *dsi)
{
Elf_Ehdr *ehdr = (Elf_Ehdr *)image;
Elf_Phdr *phdr;
@@ -274,7 +281,7 @@ loadelfimage(
static int
loadelfsymtab(
- const char *image, int xch, uint32_t dom, unsigned long *parray,
+ const char *image, int xch, uint32_t dom, xen_pfn_t *parray,
struct domain_setup_info *dsi)
{
Elf_Ehdr *ehdr = (Elf_Ehdr *)image, *sym_ehdr;
diff -r d3e181fa238b -r 156a0963a1ae tools/libxc/xc_private.c
--- a/tools/libxc/xc_private.c Fri Jun 02 12:54:22 2006 -0500
+++ b/tools/libxc/xc_private.c Tue Jun 06 13:25:31 2006 -0500
@@ -4,6 +4,7 @@
* Helper functions for the rest of the library.
*/
+#include <inttypes.h>
#include "xc_private.h"
/* NB: arr must be mlock'ed */
@@ -134,9 +135,9 @@ int xc_memory_op(int xc_handle,
struct xen_memory_reservation *reservation = arg;
struct xen_machphys_mfn_list *xmml = arg;
struct xen_translate_gpfn_list *trans = arg;
- unsigned long *extent_start;
- unsigned long *gpfn_list;
- unsigned long *mfn_list;
+ xen_pfn_t *extent_start;
+ xen_pfn_t *gpfn_list;
+ xen_pfn_t *mfn_list;
long ret = -EINVAL;
hypercall.op = __HYPERVISOR_memory_op;
@@ -156,7 +157,7 @@ int xc_memory_op(int xc_handle,
get_xen_guest_handle(extent_start, reservation->extent_start);
if ( (extent_start != NULL) &&
(mlock(extent_start,
- reservation->nr_extents * sizeof(unsigned long)) != 0) )
+ reservation->nr_extents * sizeof(xen_pfn_t)) != 0) )
{
PERROR("Could not mlock");
safe_munlock(reservation, sizeof(*reservation));
@@ -171,7 +172,7 @@ int xc_memory_op(int xc_handle,
}
get_xen_guest_handle(extent_start, xmml->extent_start);
if ( mlock(extent_start,
- xmml->max_extents * sizeof(unsigned long)) != 0 )
+ xmml->max_extents * sizeof(xen_pfn_t)) != 0 )
{
PERROR("Could not mlock");
safe_munlock(xmml, sizeof(*xmml));
@@ -192,17 +193,17 @@ int xc_memory_op(int xc_handle,
goto out1;
}
get_xen_guest_handle(gpfn_list, trans->gpfn_list);
- if ( mlock(gpfn_list, trans->nr_gpfns * sizeof(long)) != 0 )
+ if ( mlock(gpfn_list, trans->nr_gpfns * sizeof(xen_pfn_t)) != 0 )
{
PERROR("Could not mlock");
safe_munlock(trans, sizeof(*trans));
goto out1;
}
get_xen_guest_handle(mfn_list, trans->mfn_list);
- if ( mlock(mfn_list, trans->nr_gpfns * sizeof(long)) != 0 )
- {
- PERROR("Could not mlock");
- safe_munlock(gpfn_list, trans->nr_gpfns * sizeof(long));
+ if ( mlock(mfn_list, trans->nr_gpfns * sizeof(xen_pfn_t)) != 0 )
+ {
+ PERROR("Could not mlock");
+ safe_munlock(gpfn_list, trans->nr_gpfns * sizeof(xen_pfn_t));
safe_munlock(trans, sizeof(*trans));
goto out1;
}
@@ -220,22 +221,22 @@ int xc_memory_op(int xc_handle,
get_xen_guest_handle(extent_start, reservation->extent_start);
if ( extent_start != NULL )
safe_munlock(extent_start,
- reservation->nr_extents * sizeof(unsigned long));
+ reservation->nr_extents * sizeof(xen_pfn_t));
break;
case XENMEM_machphys_mfn_list:
safe_munlock(xmml, sizeof(*xmml));
get_xen_guest_handle(extent_start, xmml->extent_start);
safe_munlock(extent_start,
- xmml->max_extents * sizeof(unsigned long));
+ xmml->max_extents * sizeof(xen_pfn_t));
break;
case XENMEM_add_to_physmap:
safe_munlock(arg, sizeof(struct xen_add_to_physmap));
break;
case XENMEM_translate_gpfn_list:
get_xen_guest_handle(mfn_list, trans->mfn_list);
- safe_munlock(mfn_list, trans->nr_gpfns * sizeof(long));
+ safe_munlock(mfn_list, trans->nr_gpfns * sizeof(xen_pfn_t));
get_xen_guest_handle(gpfn_list, trans->gpfn_list);
- safe_munlock(gpfn_list, trans->nr_gpfns * sizeof(long));
+ safe_munlock(gpfn_list, trans->nr_gpfns * sizeof(xen_pfn_t));
safe_munlock(trans, sizeof(*trans));
break;
}
@@ -263,7 +264,7 @@ long long xc_domain_get_cpu_usage( int x
int xc_get_pfn_list(int xc_handle,
uint32_t domid,
- unsigned long *pfn_buf,
+ xen_pfn_t *pfn_buf,
unsigned long max_pfns)
{
DECLARE_DOM0_OP;
@@ -274,10 +275,10 @@ int xc_get_pfn_list(int xc_handle,
set_xen_guest_handle(op.u.getmemlist.buffer, pfn_buf);
#ifdef VALGRIND
- memset(pfn_buf, 0, max_pfns * sizeof(unsigned long));
+ memset(pfn_buf, 0, max_pfns * sizeof(xen_pfn_t));
#endif
- if ( mlock(pfn_buf, max_pfns * sizeof(unsigned long)) != 0 )
+ if ( mlock(pfn_buf, max_pfns * sizeof(xen_pfn_t)) != 0 )
{
PERROR("xc_get_pfn_list: pfn_buf mlock failed");
return -1;
@@ -285,7 +286,7 @@ int xc_get_pfn_list(int xc_handle,
ret = do_dom0_op(xc_handle, &op);
- safe_munlock(pfn_buf, max_pfns * sizeof(unsigned long));
+ safe_munlock(pfn_buf, max_pfns * sizeof(xen_pfn_t));
#if 0
#ifdef DEBUG
@@ -364,7 +365,7 @@ unsigned long xc_get_filesz(int fd)
}
void xc_map_memcpy(unsigned long dst, const char *src, unsigned long size,
- int xch, uint32_t dom, unsigned long *parray,
+ int xch, uint32_t dom, xen_pfn_t *parray,
unsigned long vstart)
{
char *va;
@@ -428,6 +429,29 @@ int xc_version(int xc_handle, int cmd, v
safe_munlock(arg, argsize);
return rc;
+}
+
+unsigned long xc_make_page_below_4G(
+ int xc_handle, uint32_t domid, unsigned long mfn)
+{
+ xen_pfn_t old_mfn = mfn;
+ xen_pfn_t new_mfn;
+
+ if ( xc_domain_memory_decrease_reservation(
+ xc_handle, domid, 1, 0, &old_mfn) != 0 )
+ {
+ fprintf(stderr,"xc_make_page_below_4G decrease failed. mfn=%lx\n",mfn);
+ return 0;
+ }
+
+ if ( xc_domain_memory_increase_reservation(
+ xc_handle, domid, 1, 0, 32, &new_mfn) != 0 )
+ {
+ fprintf(stderr,"xc_make_page_below_4G increase failed. mfn=%lx\n",mfn);
+ return 0;
+ }
+
+ return new_mfn;
}
/*
diff -r d3e181fa238b -r 156a0963a1ae tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h Fri Jun 02 12:54:22 2006 -0500
+++ b/tools/libxc/xenctrl.h Tue Jun 06 13:25:31 2006 -0500
@@ -420,26 +420,26 @@ int xc_domain_memory_increase_reservatio
unsigned long nr_extents,
unsigned int extent_order,
unsigned int address_bits,
- unsigned long *extent_start);
+ xen_pfn_t *extent_start);
int xc_domain_memory_decrease_reservation(int xc_handle,
uint32_t domid,
unsigned long nr_extents,
unsigned int extent_order,
- unsigned long *extent_start);
+ xen_pfn_t *extent_start);
int xc_domain_memory_populate_physmap(int xc_handle,
uint32_t domid,
unsigned long nr_extents,
unsigned int extent_order,
unsigned int address_bits,
- unsigned long *extent_start);
+ xen_pfn_t *extent_start);
int xc_domain_translate_gpfn_list(int xc_handle,
uint32_t domid,
unsigned long nr_gpfns,
- unsigned long *gpfn_list,
- unsigned long *mfn_list);
+ xen_pfn_t *gpfn_list,
+ xen_pfn_t *mfn_list);
int xc_domain_ioport_permission(int xc_handle,
uint32_t domid,
@@ -458,6 +458,9 @@ int xc_domain_iomem_permission(int xc_ha
unsigned long nr_mfns,
uint8_t allow_access);
+unsigned long xc_make_page_below_4G(int xc_handle, uint32_t domid,
+ unsigned long mfn);
+
typedef dom0_perfc_desc_t xc_perfc_desc_t;
/* IMPORTANT: The caller is responsible for mlock()'ing the @desc array. */
int xc_perfc_control(int xc_handle,
@@ -489,7 +492,7 @@ void *xc_map_foreign_range(int xc_handle
unsigned long mfn );
void *xc_map_foreign_batch(int xc_handle, uint32_t dom, int prot,
- unsigned long *arr, int num );
+ xen_pfn_t *arr, int num );
/**
* Translates a virtual address in the context of a given domain and
@@ -504,11 +507,11 @@ unsigned long xc_translate_foreign_addre
unsigned long xc_translate_foreign_address(int xc_handle, uint32_t dom,
int vcpu, unsigned long long virt);
-int xc_get_pfn_list(int xc_handle, uint32_t domid, unsigned long *pfn_buf,
+int xc_get_pfn_list(int xc_handle, uint32_t domid, xen_pfn_t *pfn_buf,
unsigned long max_pfns);
int xc_ia64_get_pfn_list(int xc_handle, uint32_t domid,
- unsigned long *pfn_buf,
+ xen_pfn_t *pfn_buf,
unsigned int start_page, unsigned int nr_pages);
int xc_copy_to_domain_page(int xc_handle, uint32_t domid,
diff -r d3e181fa238b -r 156a0963a1ae tools/libxc/xg_private.h
--- a/tools/libxc/xg_private.h Fri Jun 02 12:54:22 2006 -0500
+++ b/tools/libxc/xg_private.h Tue Jun 06 13:25:31 2006 -0500
@@ -156,6 +156,9 @@ struct domain_setup_info
unsigned long elf_paddr_offset;
+#define PAEKERN_no 0
+#define PAEKERN_yes 1
+#define PAEKERN_extended_cr3 2
unsigned int pae_kernel;
unsigned int load_symtab;
@@ -170,7 +173,7 @@ typedef int (*parseimagefunc)(const char
struct domain_setup_info *dsi);
typedef int (*loadimagefunc)(const char *image, unsigned long image_size,
int xch,
- uint32_t dom, unsigned long *parray,
+ uint32_t dom, xen_pfn_t *parray,
struct domain_setup_info *dsi);
struct load_funcs
@@ -198,7 +201,7 @@ unsigned long xc_get_filesz(int fd);
unsigned long xc_get_filesz(int fd);
void xc_map_memcpy(unsigned long dst, const char *src, unsigned long size,
- int xch, uint32_t dom, unsigned long *parray,
+ int xch, uint32_t dom, xen_pfn_t *parray,
unsigned long vstart);
int pin_table(int xc_handle, unsigned int type, unsigned long mfn,
diff -r d3e181fa238b -r 156a0963a1ae tools/libxc/xg_save_restore.h
--- a/tools/libxc/xg_save_restore.h Fri Jun 02 12:54:22 2006 -0500
+++ b/tools/libxc/xg_save_restore.h Tue Jun 06 13:25:31 2006 -0500
@@ -105,23 +105,23 @@ static int get_platform_info(int xc_hand
*/
#define M2P_SHIFT L2_PAGETABLE_SHIFT_PAE
#define M2P_CHUNK_SIZE (1 << M2P_SHIFT)
-#define M2P_SIZE(_m) ROUNDUP(((_m) * sizeof(unsigned long)), M2P_SHIFT)
+#define M2P_SIZE(_m) ROUNDUP(((_m) * sizeof(xen_pfn_t)), M2P_SHIFT)
#define M2P_CHUNKS(_m) (M2P_SIZE((_m)) >> M2P_SHIFT)
/* Size in bytes of the P2M (rounded up to the nearest PAGE_SIZE bytes) */
-#define P2M_SIZE ROUNDUP((max_pfn * sizeof(unsigned long)), PAGE_SHIFT)
+#define P2M_SIZE ROUNDUP((max_pfn * sizeof(xen_pfn_t)), PAGE_SHIFT)
-/* Number of unsigned longs in a page */
-#define ulpp (PAGE_SIZE/sizeof(unsigned long))
+/* Number of xen_pfn_t in a page */
+#define fpp (PAGE_SIZE/sizeof(xen_pfn_t))
/* Number of entries in the pfn_to_mfn_frame_list */
-#define P2M_FL_ENTRIES (((max_pfn)+ulpp-1)/ulpp)
+#define P2M_FL_ENTRIES (((max_pfn)+fpp-1)/fpp)
/* Size in bytes of the pfn_to_mfn_frame_list */
#define P2M_FL_SIZE ((P2M_FL_ENTRIES)*sizeof(unsigned long))
/* Number of entries in the pfn_to_mfn_frame_list_list */
-#define P2M_FLL_ENTRIES (((max_pfn)+(ulpp*ulpp)-1)/(ulpp*ulpp))
+#define P2M_FLL_ENTRIES (((max_pfn)+(fpp*fpp)-1)/(fpp*fpp))
/* Current guests allow 8MB 'slack' in their P2M */
#define NR_SLACK_ENTRIES ((8 * 1024 * 1024) / PAGE_SIZE)
diff -r d3e181fa238b -r 156a0963a1ae tools/tests/test_x86_emulator.c
--- a/tools/tests/test_x86_emulator.c Fri Jun 02 12:54:22 2006 -0500
+++ b/tools/tests/test_x86_emulator.c Tue Jun 06 13:25:31 2006 -0500
@@ -13,6 +13,7 @@ typedef int64_t s64;
typedef int64_t s64;
#include <public/xen.h>
#include <asm-x86/x86_emulate.h>
+#include <sys/mman.h>
static int read_any(
unsigned long addr,
@@ -85,23 +86,30 @@ int main(int argc, char **argv)
struct x86_emulate_ctxt ctxt;
struct cpu_user_regs regs;
char instr[20] = { 0x01, 0x08 }; /* add %ecx,(%eax) */
- unsigned int res = 0x7FFFFFFF;
- u32 cmpxchg8b_res[2] = { 0x12345678, 0x87654321 };
+ unsigned int *res;
int rc;
ctxt.regs = ®s;
ctxt.mode = X86EMUL_MODE_PROT32;
+ res = mmap((void *)0x100000, 0x1000, PROT_READ|PROT_WRITE,
+ MAP_FIXED|MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
+ if ( res == MAP_FAILED )
+ {
+ fprintf(stderr, "mmap to low address failed\n");
+ exit(1);
+ }
+
printf("%-40s", "Testing addl %%ecx,(%%eax)...");
instr[0] = 0x01; instr[1] = 0x08;
regs.eflags = 0x200;
regs.eip = (unsigned long)&instr[0];
regs.ecx = 0x12345678;
- ctxt.cr2 = (unsigned long)&res;
- res = 0x7FFFFFFF;
- rc = x86_emulate_memop(&ctxt, &emulops);
- if ( (rc != 0) ||
- (res != 0x92345677) ||
+ ctxt.cr2 = (unsigned long)res;
+ *res = 0x7FFFFFFF;
+ rc = x86_emulate_memop(&ctxt, &emulops);
+ if ( (rc != 0) ||
+ (*res != 0x92345677) ||
(regs.eflags != 0xa94) ||
(regs.eip != (unsigned long)&instr[2]) )
goto fail;
@@ -116,11 +124,25 @@ int main(int argc, char **argv)
#else
regs.ecx = 0x12345678UL;
#endif
- ctxt.cr2 = (unsigned long)&res;
- rc = x86_emulate_memop(&ctxt, &emulops);
- if ( (rc != 0) ||
- (res != 0x92345677) ||
+ ctxt.cr2 = (unsigned long)res;
+ rc = x86_emulate_memop(&ctxt, &emulops);
+ if ( (rc != 0) ||
+ (*res != 0x92345677) ||
(regs.ecx != 0x8000000FUL) ||
+ (regs.eip != (unsigned long)&instr[2]) )
+ goto fail;
+ printf("okay\n");
+
+ printf("%-40s", "Testing movl (%%eax),%%ecx...");
+ instr[0] = 0x8b; instr[1] = 0x08;
+ regs.eflags = 0x200;
+ regs.eip = (unsigned long)&instr[0];
+ regs.ecx = ~0UL;
+ ctxt.cr2 = (unsigned long)res;
+ rc = x86_emulate_memop(&ctxt, &emulops);
+ if ( (rc != 0) ||
+ (*res != 0x92345677) ||
+ (regs.ecx != 0x92345677UL) ||
(regs.eip != (unsigned long)&instr[2]) )
goto fail;
printf("okay\n");
@@ -131,10 +153,10 @@ int main(int argc, char **argv)
regs.eip = (unsigned long)&instr[0];
regs.eax = 0x92345677UL;
regs.ecx = 0xAA;
- ctxt.cr2 = (unsigned long)&res;
- rc = x86_emulate_memop(&ctxt, &emulops);
- if ( (rc != 0) ||
- (res != 0x923456AA) ||
+ ctxt.cr2 = (unsigned long)res;
+ rc = x86_emulate_memop(&ctxt, &emulops);
+ if ( (rc != 0) ||
+ (*res != 0x923456AA) ||
(regs.eflags != 0x244) ||
(regs.eax != 0x92345677UL) ||
(regs.eip != (unsigned long)&instr[4]) )
@@ -147,10 +169,10 @@ int main(int argc, char **argv)
regs.eip = (unsigned long)&instr[0];
regs.eax = 0xAABBCC77UL;
regs.ecx = 0xFF;
- ctxt.cr2 = (unsigned long)&res;
- rc = x86_emulate_memop(&ctxt, &emulops);
- if ( (rc != 0) ||
- (res != 0x923456AA) ||
+ ctxt.cr2 = (unsigned long)res;
+ rc = x86_emulate_memop(&ctxt, &emulops);
+ if ( (rc != 0) ||
+ (*res != 0x923456AA) ||
((regs.eflags&0x240) != 0x200) ||
(regs.eax != 0xAABBCCAA) ||
(regs.ecx != 0xFF) ||
@@ -163,10 +185,10 @@ int main(int argc, char **argv)
regs.eflags = 0x200;
regs.eip = (unsigned long)&instr[0];
regs.ecx = 0x12345678;
- ctxt.cr2 = (unsigned long)&res;
- rc = x86_emulate_memop(&ctxt, &emulops);
- if ( (rc != 0) ||
- (res != 0x12345678) ||
+ ctxt.cr2 = (unsigned long)res;
+ rc = x86_emulate_memop(&ctxt, &emulops);
+ if ( (rc != 0) ||
+ (*res != 0x12345678) ||
(regs.eflags != 0x200) ||
(regs.ecx != 0x923456AA) ||
(regs.eip != (unsigned long)&instr[2]) )
@@ -176,14 +198,14 @@ int main(int argc, char **argv)
printf("%-40s", "Testing lock cmpxchgl %%ecx,(%%eax)...");
instr[0] = 0xf0; instr[1] = 0x0f; instr[2] = 0xb1; instr[3] = 0x08;
regs.eflags = 0x200;
- res = 0x923456AA;
+ *res = 0x923456AA;
regs.eip = (unsigned long)&instr[0];
regs.eax = 0x923456AAUL;
regs.ecx = 0xDDEEFF00L;
- ctxt.cr2 = (unsigned long)&res;
- rc = x86_emulate_memop(&ctxt, &emulops);
- if ( (rc != 0) ||
- (res != 0xDDEEFF00) ||
+ ctxt.cr2 = (unsigned long)res;
+ rc = x86_emulate_memop(&ctxt, &emulops);
+ if ( (rc != 0) ||
+ (*res != 0xDDEEFF00) ||
(regs.eflags != 0x244) ||
(regs.eax != 0x923456AAUL) ||
(regs.eip != (unsigned long)&instr[4]) )
@@ -192,54 +214,57 @@ int main(int argc, char **argv)
printf("%-40s", "Testing rep movsw...");
instr[0] = 0xf3; instr[1] = 0x66; instr[2] = 0xa5;
- res = 0x22334455;
+ *res = 0x22334455;
regs.eflags = 0x200;
regs.ecx = 23;
regs.eip = (unsigned long)&instr[0];
- regs.esi = (unsigned long)&res + 0;
- regs.edi = (unsigned long)&res + 2;
+ regs.esi = (unsigned long)res + 0;
+ regs.edi = (unsigned long)res + 2;
regs.error_code = 0; /* read fault */
ctxt.cr2 = regs.esi;
rc = x86_emulate_memop(&ctxt, &emulops);
if ( (rc != 0) ||
- (res != 0x44554455) ||
+ (*res != 0x44554455) ||
(regs.eflags != 0x200) ||
(regs.ecx != 22) ||
- (regs.esi != ((unsigned long)&res + 2)) ||
- (regs.edi != ((unsigned long)&res + 4)) ||
+ (regs.esi != ((unsigned long)res + 2)) ||
+ (regs.edi != ((unsigned long)res + 4)) ||
(regs.eip != (unsigned long)&instr[0]) )
goto fail;
printf("okay\n");
printf("%-40s", "Testing btrl $0x1,(%edi)...");
instr[0] = 0x0f; instr[1] = 0xba; instr[2] = 0x37; instr[3] = 0x01;
- res = 0x2233445F;
- regs.eflags = 0x200;
- regs.eip = (unsigned long)&instr[0];
- regs.edi = (unsigned long)&res;
+ *res = 0x2233445F;
+ regs.eflags = 0x200;
+ regs.eip = (unsigned long)&instr[0];
+ regs.edi = (unsigned long)res;
ctxt.cr2 = regs.edi;
rc = x86_emulate_memop(&ctxt, &emulops);
if ( (rc != 0) ||
- (res != 0x2233445D) ||
+ (*res != 0x2233445D) ||
((regs.eflags&0x201) != 0x201) ||
(regs.eip != (unsigned long)&instr[4]) )
goto fail;
printf("okay\n");
+
+ res[0] = 0x12345678;
+ res[1] = 0x87654321;
printf("%-40s", "Testing cmpxchg8b (%edi) [succeeding]...");
instr[0] = 0x0f; instr[1] = 0xc7; instr[2] = 0x0f;
regs.eflags = 0x200;
- regs.eax = cmpxchg8b_res[0];
- regs.edx = cmpxchg8b_res[1];
+ regs.eax = res[0];
+ regs.edx = res[1];
regs.ebx = 0x9999AAAA;
regs.ecx = 0xCCCCFFFF;
regs.eip = (unsigned long)&instr[0];
- regs.edi = (unsigned long)cmpxchg8b_res;
+ regs.edi = (unsigned long)res;
ctxt.cr2 = regs.edi;
rc = x86_emulate_memop(&ctxt, &emulops);
if ( (rc != 0) ||
- (cmpxchg8b_res[0] != 0x9999AAAA) ||
- (cmpxchg8b_res[1] != 0xCCCCFFFF) ||
+ (res[0] != 0x9999AAAA) ||
+ (res[1] != 0xCCCCFFFF) ||
((regs.eflags&0x240) != 0x240) ||
(regs.eip != (unsigned long)&instr[3]) )
goto fail;
@@ -248,12 +273,12 @@ int main(int argc, char **argv)
printf("%-40s", "Testing cmpxchg8b (%edi) [failing]...");
instr[0] = 0x0f; instr[1] = 0xc7; instr[2] = 0x0f;
regs.eip = (unsigned long)&instr[0];
- regs.edi = (unsigned long)cmpxchg8b_res;
+ regs.edi = (unsigned long)res;
ctxt.cr2 = regs.edi;
rc = x86_emulate_memop(&ctxt, &emulops);
if ( (rc != 0) ||
- (cmpxchg8b_res[0] != 0x9999AAAA) ||
- (cmpxchg8b_res[1] != 0xCCCCFFFF) ||
+ (res[0] != 0x9999AAAA) ||
+ (res[1] != 0xCCCCFFFF) ||
(regs.eax != 0x9999AAAA) ||
(regs.edx != 0xCCCCFFFF) ||
((regs.eflags&0x240) != 0x200) ||
@@ -265,11 +290,11 @@ int main(int argc, char **argv)
instr[0] = 0x0f; instr[1] = 0xbe; instr[2] = 0x08;
regs.eip = (unsigned long)&instr[0];
regs.ecx = 0x12345678;
- ctxt.cr2 = (unsigned long)&res;
- res = 0x82;
+ ctxt.cr2 = (unsigned long)res;
+ *res = 0x82;
rc = x86_emulate_memop(&ctxt, &emulops);
if ( (rc != 0) ||
- (res != 0x82) ||
+ (*res != 0x82) ||
(regs.ecx != 0xFFFFFF82) ||
((regs.eflags&0x240) != 0x200) ||
(regs.eip != (unsigned long)&instr[3]) )
@@ -280,11 +305,11 @@ int main(int argc, char **argv)
instr[0] = 0x0f; instr[1] = 0xb7; instr[2] = 0x08;
regs.eip = (unsigned long)&instr[0];
regs.ecx = 0x12345678;
- ctxt.cr2 = (unsigned long)&res;
- res = 0x1234aa82;
+ ctxt.cr2 = (unsigned long)res;
+ *res = 0x1234aa82;
rc = x86_emulate_memop(&ctxt, &emulops);
if ( (rc != 0) ||
- (res != 0x1234aa82) ||
+ (*res != 0x1234aa82) ||
(regs.ecx != 0xaa82) ||
((regs.eflags&0x240) != 0x200) ||
(regs.eip != (unsigned long)&instr[3]) )
diff -r d3e181fa238b -r 156a0963a1ae xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c Fri Jun 02 12:54:22 2006 -0500
+++ b/xen/arch/x86/domain.c Tue Jun 06 13:25:31 2006 -0500
@@ -259,7 +259,7 @@ int arch_set_info_guest(
struct vcpu *v, struct vcpu_guest_context *c)
{
struct domain *d = v->domain;
- unsigned long cr3_pfn;
+ unsigned long cr3_pfn = INVALID_MFN;
int i, rc;
if ( !(c->flags & VGCF_HVM_GUEST) )
@@ -524,20 +524,29 @@ static void load_segments(struct vcpu *n
if ( unlikely(!all_segs_okay) )
{
struct cpu_user_regs *regs = guest_cpu_user_regs();
- unsigned long *rsp =
+ unsigned long *rsp =
(n->arch.flags & TF_kernel_mode) ?
(unsigned long *)regs->rsp :
(unsigned long *)nctxt->kernel_sp;
+ unsigned long cs_and_mask, rflags;
if ( !(n->arch.flags & TF_kernel_mode) )
toggle_guest_mode(n);
else
regs->cs &= ~3;
+ /* CS longword also contains full evtchn_upcall_mask. */
+ cs_and_mask = (unsigned long)regs->cs |
+ ((unsigned long)n->vcpu_info->evtchn_upcall_mask << 32);
+
+ /* Fold upcall mask into RFLAGS.IF. */
+ rflags = regs->rflags & ~X86_EFLAGS_IF;
+ rflags |= !n->vcpu_info->evtchn_upcall_mask << 9;
+
if ( put_user(regs->ss, rsp- 1) |
put_user(regs->rsp, rsp- 2) |
- put_user(regs->rflags, rsp- 3) |
- put_user(regs->cs, rsp- 4) |
+ put_user(rflags, rsp- 3) |
+ put_user(cs_and_mask, rsp- 4) |
put_user(regs->rip, rsp- 5) |
put_user(nctxt->user_regs.gs, rsp- 6) |
put_user(nctxt->user_regs.fs, rsp- 7) |
@@ -549,6 +558,10 @@ static void load_segments(struct vcpu *n
DPRINTK("Error while creating failsafe callback frame.\n");
domain_crash(n->domain);
}
+
+ if ( test_bit(_VGCF_failsafe_disables_events,
+ &n->arch.guest_context.flags) )
+ n->vcpu_info->evtchn_upcall_mask = 1;
regs->entry_vector = TRAP_syscall;
regs->rflags &= 0xFFFCBEFFUL;
diff -r d3e181fa238b -r 156a0963a1ae xen/arch/x86/domain_build.c
--- a/xen/arch/x86/domain_build.c Fri Jun 02 12:54:22 2006 -0500
+++ b/xen/arch/x86/domain_build.c Tue Jun 06 13:25:31 2006 -0500
@@ -301,6 +301,9 @@ int construct_dom0(struct domain *d,
xen_pae ? "yes" : "no", dom0_pae ? "yes" : "no");
return -EINVAL;
}
+
+ if ( xen_pae && !!strstr(dsi.xen_section_string, "PAE=yes[extended-cr3]") )
+ set_bit(VMASST_TYPE_pae_extended_cr3, &d->vm_assist);
if ( (p = strstr(dsi.xen_section_string, "FEATURES=")) != NULL )
{
diff -r d3e181fa238b -r 156a0963a1ae xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c Fri Jun 02 12:54:22 2006 -0500
+++ b/xen/arch/x86/hvm/vmx/vmx.c Tue Jun 06 13:25:31 2006 -0500
@@ -1970,7 +1970,6 @@ static inline void vmx_vmexit_do_extint(
__hvm_bug(regs);
vector &= INTR_INFO_VECTOR_MASK;
- local_irq_disable();
TRACE_VMEXIT(1,vector);
switch(vector) {
@@ -2065,30 +2064,33 @@ asmlinkage void vmx_vmexit_handler(struc
struct vcpu *v = current;
int error;
- if ((error = __vmread(VM_EXIT_REASON, &exit_reason)))
- __hvm_bug(®s);
+ error = __vmread(VM_EXIT_REASON, &exit_reason);
+ BUG_ON(error);
perfc_incra(vmexits, exit_reason);
- /* don't bother H/W interrutps */
- if (exit_reason != EXIT_REASON_EXTERNAL_INTERRUPT &&
- exit_reason != EXIT_REASON_VMCALL &&
- exit_reason != EXIT_REASON_IO_INSTRUCTION)
+ if ( (exit_reason != EXIT_REASON_EXTERNAL_INTERRUPT) &&
+ (exit_reason != EXIT_REASON_VMCALL) &&
+ (exit_reason != EXIT_REASON_IO_INSTRUCTION) )
HVM_DBG_LOG(DBG_LEVEL_0, "exit reason = %x", exit_reason);
- if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) {
+ if ( exit_reason != EXIT_REASON_EXTERNAL_INTERRUPT )
+ local_irq_enable();
+
+ if ( unlikely(exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) )
+ {
printk("Failed vm entry (reason 0x%x)\n", exit_reason);
printk("*********** VMCS Area **************\n");
vmcs_dump_vcpu();
printk("**************************************\n");
domain_crash_synchronous();
- return;
}
__vmread(GUEST_RIP, &eip);
TRACE_VMEXIT(0,exit_reason);
- switch (exit_reason) {
+ switch ( exit_reason )
+ {
case EXIT_REASON_EXCEPTION_NMI:
{
/*
diff -r d3e181fa238b -r 156a0963a1ae xen/arch/x86/hvm/vmx/x86_32/exits.S
--- a/xen/arch/x86/hvm/vmx/x86_32/exits.S Fri Jun 02 12:54:22 2006 -0500
+++ b/xen/arch/x86/hvm/vmx/x86_32/exits.S Tue Jun 06 13:25:31 2006 -0500
@@ -55,29 +55,26 @@
* domain pointer, DS, ES, FS, GS. Therefore, we effectively skip 6 registers.
*/
-#define HVM_MONITOR_EFLAGS 0x202 /* IF on */
#define NR_SKIPPED_REGS 6 /* See the above explanation */
-#define HVM_SAVE_ALL_NOSEGREGS \
- pushl $HVM_MONITOR_EFLAGS; \
- popf; \
- subl $(NR_SKIPPED_REGS*4), %esp; \
+#define HVM_SAVE_ALL_NOSEGREGS \
+ subl $(NR_SKIPPED_REGS*4), %esp; \
movl $0, 0xc(%esp); /* XXX why do we need to force eflags==0 ?? */ \
- pushl %eax; \
- pushl %ebp; \
- pushl %edi; \
- pushl %esi; \
- pushl %edx; \
- pushl %ecx; \
+ pushl %eax; \
+ pushl %ebp; \
+ pushl %edi; \
+ pushl %esi; \
+ pushl %edx; \
+ pushl %ecx; \
pushl %ebx;
-#define HVM_RESTORE_ALL_NOSEGREGS \
- popl %ebx; \
- popl %ecx; \
- popl %edx; \
- popl %esi; \
- popl %edi; \
- popl %ebp; \
- popl %eax; \
+#define HVM_RESTORE_ALL_NOSEGREGS \
+ popl %ebx; \
+ popl %ecx; \
+ popl %edx; \
+ popl %esi; \
+ popl %edi; \
+ popl %ebp; \
+ popl %eax; \
addl $(NR_SKIPPED_REGS*4), %esp
ALIGN
diff -r d3e181fa238b -r 156a0963a1ae xen/arch/x86/hvm/vmx/x86_64/exits.S
--- a/xen/arch/x86/hvm/vmx/x86_64/exits.S Fri Jun 02 12:54:22 2006 -0500
+++ b/xen/arch/x86/hvm/vmx/x86_64/exits.S Tue Jun 06 13:25:31 2006 -0500
@@ -51,45 +51,42 @@
* (2/1) u32 entry_vector;
* (1/1) u32 error_code;
*/
-#define HVM_MONITOR_RFLAGS 0x202 /* IF on */
#define NR_SKIPPED_REGS 6 /* See the above explanation */
-#define HVM_SAVE_ALL_NOSEGREGS \
- pushq $HVM_MONITOR_RFLAGS; \
- popfq; \
- subq $(NR_SKIPPED_REGS*8), %rsp; \
- pushq %rdi; \
- pushq %rsi; \
- pushq %rdx; \
- pushq %rcx; \
- pushq %rax; \
- pushq %r8; \
- pushq %r9; \
- pushq %r10; \
- pushq %r11; \
- pushq %rbx; \
- pushq %rbp; \
- pushq %r12; \
- pushq %r13; \
- pushq %r14; \
- pushq %r15; \
+#define HVM_SAVE_ALL_NOSEGREGS \
+ subq $(NR_SKIPPED_REGS*8), %rsp; \
+ pushq %rdi; \
+ pushq %rsi; \
+ pushq %rdx; \
+ pushq %rcx; \
+ pushq %rax; \
+ pushq %r8; \
+ pushq %r9; \
+ pushq %r10; \
+ pushq %r11; \
+ pushq %rbx; \
+ pushq %rbp; \
+ pushq %r12; \
+ pushq %r13; \
+ pushq %r14; \
+ pushq %r15;
-#define HVM_RESTORE_ALL_NOSEGREGS \
- popq %r15; \
- popq %r14; \
- popq %r13; \
- popq %r12; \
- popq %rbp; \
- popq %rbx; \
- popq %r11; \
- popq %r10; \
- popq %r9; \
- popq %r8; \
- popq %rax; \
- popq %rcx; \
- popq %rdx; \
- popq %rsi; \
- popq %rdi; \
- addq $(NR_SKIPPED_REGS*8), %rsp; \
+#define HVM_RESTORE_ALL_NOSEGREGS \
+ popq %r15; \
+ popq %r14; \
+ popq %r13; \
+ popq %r12; \
+ popq %rbp; \
+ popq %rbx; \
+ popq %r11; \
+ popq %r10; \
+ popq %r9; \
+ popq %r8; \
+ popq %rax; \
+ popq %rcx; \
+ popq %rdx; \
+ popq %rsi; \
+ popq %rdi; \
+ addq $(NR_SKIPPED_REGS*8), %rsp;
ENTRY(vmx_asm_vmexit_handler)
/* selectors are restored/saved by VMX */
diff -r d3e181fa238b -r 156a0963a1ae xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Fri Jun 02 12:54:22 2006 -0500
+++ b/xen/arch/x86/mm.c Tue Jun 06 13:25:31 2006 -0500
@@ -996,6 +996,21 @@ static int alloc_l3_table(struct page_in
int i;
ASSERT(!shadow_mode_refcounts(d));
+
+#ifdef CONFIG_X86_PAE
+ /*
+ * PAE pgdirs above 4GB are unacceptable if the guest does not understand
+ * the weird 'extended cr3' format for dealing with high-order address
+ * bits. We cut some slack for control tools (before vcpu0 is initialised).
+ */
+ if ( (pfn >= 0x100000) &&
+ unlikely(!VM_ASSIST(d, VMASST_TYPE_pae_extended_cr3)) &&
+ d->vcpu[0] && test_bit(_VCPUF_initialised, &d->vcpu[0]->vcpu_flags) )
+ {
+ MEM_LOG("PAE pgd must be below 4GB (0x%lx >= 0x100000)", pfn);
+ return 0;
+ }
+#endif
pl3e = map_domain_page(pfn);
for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
diff -r d3e181fa238b -r 156a0963a1ae xen/arch/x86/x86_32/asm-offsets.c
--- a/xen/arch/x86/x86_32/asm-offsets.c Fri Jun 02 12:54:22 2006 -0500
+++ b/xen/arch/x86/x86_32/asm-offsets.c Tue Jun 06 13:25:31 2006 -0500
@@ -64,11 +64,13 @@ void __dummy__(void)
arch.guest_context.kernel_ss);
OFFSET(VCPU_kernel_sp, struct vcpu,
arch.guest_context.kernel_sp);
+ OFFSET(VCPU_guest_context_flags, struct vcpu, arch.guest_context.flags);
OFFSET(VCPU_arch_guest_fpu_ctxt, struct vcpu, arch.guest_context.fpu_ctxt);
OFFSET(VCPU_flags, struct vcpu, vcpu_flags);
OFFSET(VCPU_nmi_addr, struct vcpu, nmi_addr);
DEFINE(_VCPUF_nmi_pending, _VCPUF_nmi_pending);
DEFINE(_VCPUF_nmi_masked, _VCPUF_nmi_masked);
+ DEFINE(_VGCF_failsafe_disables_events, _VGCF_failsafe_disables_events);
BLANK();
OFFSET(TSS_ss0, struct tss_struct, ss0);
diff -r d3e181fa238b -r 156a0963a1ae xen/arch/x86/x86_32/entry.S
--- a/xen/arch/x86/x86_32/entry.S Fri Jun 02 12:54:22 2006 -0500
+++ b/xen/arch/x86/x86_32/entry.S Tue Jun 06 13:25:31 2006 -0500
@@ -130,7 +130,10 @@ failsafe_callback:
movl VCPU_failsafe_sel(%ebx),%eax
movw %ax,TRAPBOUNCE_cs(%edx)
movw $TBF_FAILSAFE,TRAPBOUNCE_flags(%edx)
- call create_bounce_frame
+ bt $_VGCF_failsafe_disables_events,VCPU_guest_context_flags(%ebx)
+ jnc 1f
+ orw $TBF_INTERRUPT,TRAPBOUNCE_flags(%edx)
+1: call create_bounce_frame
xorl %eax,%eax
movl %eax,UREGS_ds(%esp)
movl %eax,UREGS_es(%esp)
diff -r d3e181fa238b -r 156a0963a1ae xen/arch/x86/x86_32/traps.c
--- a/xen/arch/x86/x86_32/traps.c Fri Jun 02 12:54:22 2006 -0500
+++ b/xen/arch/x86/x86_32/traps.c Tue Jun 06 13:25:31 2006 -0500
@@ -346,6 +346,12 @@ static long register_guest_callback(stru
case CALLBACKTYPE_failsafe:
v->arch.guest_context.failsafe_callback_cs = reg->address.cs;
v->arch.guest_context.failsafe_callback_eip = reg->address.eip;
+ if ( reg->flags & CALLBACKF_mask_events )
+ set_bit(_VGCF_failsafe_disables_events,
+ &v->arch.guest_context.flags);
+ else
+ clear_bit(_VGCF_failsafe_disables_events,
+ &v->arch.guest_context.flags);
break;
#ifdef CONFIG_X86_SUPERVISOR_MODE_KERNEL
diff -r d3e181fa238b -r 156a0963a1ae xen/arch/x86/x86_64/asm-offsets.c
--- a/xen/arch/x86/x86_64/asm-offsets.c Fri Jun 02 12:54:22 2006 -0500
+++ b/xen/arch/x86/x86_64/asm-offsets.c Tue Jun 06 13:25:31 2006 -0500
@@ -64,11 +64,14 @@ void __dummy__(void)
arch.guest_context.syscall_callback_eip);
OFFSET(VCPU_kernel_sp, struct vcpu,
arch.guest_context.kernel_sp);
+ OFFSET(VCPU_guest_context_flags, struct vcpu, arch.guest_context.flags);
OFFSET(VCPU_arch_guest_fpu_ctxt, struct vcpu, arch.guest_context.fpu_ctxt);
OFFSET(VCPU_flags, struct vcpu, vcpu_flags);
OFFSET(VCPU_nmi_addr, struct vcpu, nmi_addr);
DEFINE(_VCPUF_nmi_pending, _VCPUF_nmi_pending);
DEFINE(_VCPUF_nmi_masked, _VCPUF_nmi_masked);
+ DEFINE(_VGCF_failsafe_disables_events, _VGCF_failsafe_disables_events);
+ DEFINE(_VGCF_syscall_disables_events, _VGCF_syscall_disables_events);
BLANK();
OFFSET(VCPU_svm_vmcb_pa, struct vcpu, arch.hvm_svm.vmcb_pa);
diff -r d3e181fa238b -r 156a0963a1ae xen/arch/x86/x86_64/entry.S
--- a/xen/arch/x86/x86_64/entry.S Fri Jun 02 12:54:22 2006 -0500
+++ b/xen/arch/x86/x86_64/entry.S Tue Jun 06 13:25:31 2006 -0500
@@ -30,7 +30,10 @@ switch_to_kernel:
movq VCPU_syscall_addr(%rbx),%rax
movq %rax,TRAPBOUNCE_eip(%rdx)
movw $0,TRAPBOUNCE_flags(%rdx)
- call create_bounce_frame
+ bt $_VGCF_syscall_disables_events,VCPU_guest_context_flags(%rbx)
+ jnc 1f
+ orw $TBF_INTERRUPT,TRAPBOUNCE_flags(%rdx)
+1: call create_bounce_frame
jmp test_all_events
/* %rbx: struct vcpu, interrupts disabled */
@@ -77,7 +80,10 @@ failsafe_callback:
movq VCPU_failsafe_addr(%rbx),%rax
movq %rax,TRAPBOUNCE_eip(%rdx)
movw $TBF_FAILSAFE,TRAPBOUNCE_flags(%rdx)
- call create_bounce_frame
+ bt $_VGCF_failsafe_disables_events,VCPU_guest_context_flags(%rbx)
+ jnc 1f
+ orw $TBF_INTERRUPT,TRAPBOUNCE_flags(%rdx)
+1: call create_bounce_frame
jmp test_all_events
.previous
.section __pre_ex_table,"a"
diff -r d3e181fa238b -r 156a0963a1ae xen/arch/x86/x86_64/traps.c
--- a/xen/arch/x86/x86_64/traps.c Fri Jun 02 12:54:22 2006 -0500
+++ b/xen/arch/x86/x86_64/traps.c Tue Jun 06 13:25:31 2006 -0500
@@ -334,10 +334,22 @@ static long register_guest_callback(stru
case CALLBACKTYPE_failsafe:
v->arch.guest_context.failsafe_callback_eip = reg->address;
+ if ( reg->flags & CALLBACKF_mask_events )
+ set_bit(_VGCF_failsafe_disables_events,
+ &v->arch.guest_context.flags);
+ else
+ clear_bit(_VGCF_failsafe_disables_events,
+ &v->arch.guest_context.flags);
break;
case CALLBACKTYPE_syscall:
v->arch.guest_context.syscall_callback_eip = reg->address;
+ if ( reg->flags & CALLBACKF_mask_events )
+ set_bit(_VGCF_syscall_disables_events,
+ &v->arch.guest_context.flags);
+ else
+ clear_bit(_VGCF_syscall_disables_events,
+ &v->arch.guest_context.flags);
break;
case CALLBACKTYPE_nmi:
diff -r d3e181fa238b -r 156a0963a1ae xen/arch/x86/x86_emulate.c
--- a/xen/arch/x86/x86_emulate.c Fri Jun 02 12:54:22 2006 -0500
+++ b/xen/arch/x86/x86_emulate.c Tue Jun 06 13:25:31 2006 -0500
@@ -100,8 +100,8 @@ static uint8_t opcode_table[256] = {
ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
/* 0x88 - 0x8F */
- ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
- ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
+ ByteOp|DstMem|SrcReg|ModRM|Mov, DstMem|SrcReg|ModRM|Mov,
+ ByteOp|DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
0, 0, 0, DstMem|SrcNone|ModRM|Mov,
/* 0x90 - 0x9F */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
diff -r d3e181fa238b -r 156a0963a1ae xen/common/kernel.c
--- a/xen/common/kernel.c Fri Jun 02 12:54:22 2006 -0500
+++ b/xen/common/kernel.c Tue Jun 06 13:25:31 2006 -0500
@@ -184,6 +184,7 @@ long do_xen_version(int cmd, XEN_GUEST_H
case XENVER_get_features:
{
xen_feature_info_t fi;
+ struct domain *d = current->domain;
if ( copy_from_guest(&fi, arg, 1) )
return -EFAULT;
@@ -191,7 +192,9 @@ long do_xen_version(int cmd, XEN_GUEST_H
switch ( fi.submap_idx )
{
case 0:
- fi.submap = (1U << XENFEAT_pae_pgdir_above_4gb);
+ fi.submap = 0;
+ if ( VM_ASSIST(d, VMASST_TYPE_pae_extended_cr3) )
+ fi.submap |= (1U << XENFEAT_pae_pgdir_above_4gb);
if ( shadow_mode_translate(current->domain) )
fi.submap |=
(1U << XENFEAT_writable_page_tables) |
diff -r d3e181fa238b -r 156a0963a1ae xen/common/keyhandler.c
--- a/xen/common/keyhandler.c Fri Jun 02 12:54:22 2006 -0500
+++ b/xen/common/keyhandler.c Tue Jun 06 13:25:31 2006 -0500
@@ -128,11 +128,12 @@ static void dump_domains(unsigned char k
d->domain_flags, atomic_read(&d->refcnt),
d->tot_pages, d->xenheap_pages, cpuset);
printk(" handle=%02x%02x%02x%02x-%02x%02x-%02x%02x-"
- "%02x%02x-%02x%02x%02x%02x%02x%02x\n",
+ "%02x%02x-%02x%02x%02x%02x%02x%02x vm_assist=%08lx\n",
d->handle[ 0], d->handle[ 1], d->handle[ 2], d->handle[ 3],
d->handle[ 4], d->handle[ 5], d->handle[ 6], d->handle[ 7],
d->handle[ 8], d->handle[ 9], d->handle[10], d->handle[11],
- d->handle[12], d->handle[13], d->handle[14], d->handle[15]);
+ d->handle[12], d->handle[13], d->handle[14], d->handle[15],
+ d->vm_assist);
arch_dump_domain_info(d);
diff -r d3e181fa238b -r 156a0963a1ae xen/common/memory.c
--- a/xen/common/memory.c Fri Jun 02 12:54:22 2006 -0500
+++ b/xen/common/memory.c Tue Jun 06 13:25:31 2006 -0500
@@ -31,14 +31,15 @@ static long
static long
increase_reservation(
struct domain *d,
- XEN_GUEST_HANDLE(ulong) extent_list,
+ XEN_GUEST_HANDLE(xen_pfn_t) extent_list,
unsigned int nr_extents,
unsigned int extent_order,
unsigned int flags,
int *preempted)
{
struct page_info *page;
- unsigned long i, mfn;
+ unsigned long i;
+ xen_pfn_t mfn;
if ( !guest_handle_is_null(extent_list) &&
!guest_handle_okay(extent_list, nr_extents) )
@@ -80,14 +81,16 @@ static long
static long
populate_physmap(
struct domain *d,
- XEN_GUEST_HANDLE(ulong) extent_list,
+ XEN_GUEST_HANDLE(xen_pfn_t) extent_list,
unsigned int nr_extents,
unsigned int extent_order,
unsigned int flags,
int *preempted)
{
struct page_info *page;
- unsigned long i, j, gpfn, mfn;
+ unsigned long i, j;
+ xen_pfn_t gpfn;
+ xen_pfn_t mfn;
if ( !guest_handle_okay(extent_list, nr_extents) )
return 0;
@@ -177,13 +180,14 @@ static long
static long
decrease_reservation(
struct domain *d,
- XEN_GUEST_HANDLE(ulong) extent_list,
+ XEN_GUEST_HANDLE(xen_pfn_t) extent_list,
unsigned int nr_extents,
unsigned int extent_order,
unsigned int flags,
int *preempted)
{
- unsigned long i, j, gmfn;
+ unsigned long i, j;
+ xen_pfn_t gmfn;
if ( !guest_handle_okay(extent_list, nr_extents) )
return 0;
@@ -214,7 +218,9 @@ translate_gpfn_list(
XEN_GUEST_HANDLE(xen_translate_gpfn_list_t) uop, unsigned long *progress)
{
struct xen_translate_gpfn_list op;
- unsigned long i, gpfn, mfn;
+ unsigned long i;
+ xen_pfn_t gpfn;
+ xen_pfn_t mfn;
struct domain *d;
if ( copy_from_guest(&op, uop, 1) )
diff -r d3e181fa238b -r 156a0963a1ae xen/include/public/arch-ia64.h
--- a/xen/include/public/arch-ia64.h Fri Jun 02 12:54:22 2006 -0500
+++ b/xen/include/public/arch-ia64.h Tue Jun 06 13:25:31 2006 -0500
@@ -26,6 +26,9 @@ DEFINE_XEN_GUEST_HANDLE(int);
DEFINE_XEN_GUEST_HANDLE(int);
DEFINE_XEN_GUEST_HANDLE(long);
DEFINE_XEN_GUEST_HANDLE(void);
+
+typedef unsigned long xen_pfn_t;
+DEFINE_XEN_GUEST_HANDLE(xen_pfn_t);
#endif
/* Arch specific VIRQs definition */
diff -r d3e181fa238b -r 156a0963a1ae xen/include/public/arch-x86_32.h
--- a/xen/include/public/arch-x86_32.h Fri Jun 02 12:54:22 2006 -0500
+++ b/xen/include/public/arch-x86_32.h Tue Jun 06 13:25:31 2006 -0500
@@ -28,6 +28,9 @@ DEFINE_XEN_GUEST_HANDLE(int);
DEFINE_XEN_GUEST_HANDLE(int);
DEFINE_XEN_GUEST_HANDLE(long);
DEFINE_XEN_GUEST_HANDLE(void);
+
+typedef unsigned long xen_pfn_t;
+DEFINE_XEN_GUEST_HANDLE(xen_pfn_t);
#endif
/*
@@ -138,9 +141,17 @@ struct vcpu_guest_context {
struct vcpu_guest_context {
/* FPU registers come first so they can be aligned for FXSAVE/FXRSTOR. */
struct { char x[512]; } fpu_ctxt; /* User-level FPU registers */
-#define VGCF_I387_VALID (1<<0)
-#define VGCF_HVM_GUEST (1<<1)
-#define VGCF_IN_KERNEL (1<<2)
+#define VGCF_I387_VALID (1<<0)
+#define VGCF_HVM_GUEST (1<<1)
+#define VGCF_IN_KERNEL (1<<2)
+#define _VGCF_i387_valid 0
+#define VGCF_i387_valid (1<<_VGCF_i387_valid)
+#define _VGCF_hvm_guest 1
+#define VGCF_hvm_guest (1<<_VGCF_hvm_guest)
+#define _VGCF_in_kernel 2
+#define VGCF_in_kernel (1<<_VGCF_in_kernel)
+#define _VGCF_failsafe_disables_events 3
+#define VGCF_failsafe_disables_events (1<<_VGCF_failsafe_disables_events)
unsigned long flags; /* VGCF_* flags */
struct cpu_user_regs user_regs; /* User-level CPU registers */
struct trap_info trap_ctxt[256]; /* Virtual IDT */
@@ -169,7 +180,7 @@ struct arch_shared_info {
struct arch_shared_info {
unsigned long max_pfn; /* max pfn that appears in table */
/* Frame containing list of mfns containing list of mfns containing p2m. */
- unsigned long pfn_to_mfn_frame_list_list;
+ xen_pfn_t pfn_to_mfn_frame_list_list;
unsigned long nmi_reason;
};
typedef struct arch_shared_info arch_shared_info_t;
diff -r d3e181fa238b -r 156a0963a1ae xen/include/public/arch-x86_64.h
--- a/xen/include/public/arch-x86_64.h Fri Jun 02 12:54:22 2006 -0500
+++ b/xen/include/public/arch-x86_64.h Tue Jun 06 13:25:31 2006 -0500
@@ -28,6 +28,9 @@ DEFINE_XEN_GUEST_HANDLE(int);
DEFINE_XEN_GUEST_HANDLE(int);
DEFINE_XEN_GUEST_HANDLE(long);
DEFINE_XEN_GUEST_HANDLE(void);
+
+typedef unsigned long xen_pfn_t;
+DEFINE_XEN_GUEST_HANDLE(xen_pfn_t);
#endif
/*
@@ -211,9 +214,19 @@ struct vcpu_guest_context {
struct vcpu_guest_context {
/* FPU registers come first so they can be aligned for FXSAVE/FXRSTOR. */
struct { char x[512]; } fpu_ctxt; /* User-level FPU registers */
-#define VGCF_I387_VALID (1<<0)
-#define VGCF_HVM_GUEST (1<<1)
-#define VGCF_IN_KERNEL (1<<2)
+#define VGCF_I387_VALID (1<<0)
+#define VGCF_HVM_GUEST (1<<1)
+#define VGCF_IN_KERNEL (1<<2)
+#define _VGCF_i387_valid 0
+#define VGCF_i387_valid (1<<_VGCF_i387_valid)
+#define _VGCF_hvm_guest 1
+#define VGCF_hvm_guest (1<<_VGCF_hvm_guest)
+#define _VGCF_in_kernel 2
+#define VGCF_in_kernel (1<<_VGCF_in_kernel)
+#define _VGCF_failsafe_disables_events 3
+#define VGCF_failsafe_disables_events (1<<_VGCF_failsafe_disables_events)
+#define _VGCF_syscall_disables_events 4
+#define VGCF_syscall_disables_events (1<<_VGCF_syscall_disables_events)
unsigned long flags; /* VGCF_* flags */
struct cpu_user_regs user_regs; /* User-level CPU registers */
struct trap_info trap_ctxt[256]; /* Virtual IDT */
@@ -240,7 +253,7 @@ struct arch_shared_info {
struct arch_shared_info {
unsigned long max_pfn; /* max pfn that appears in table */
/* Frame containing list of mfns containing list of mfns containing p2m. */
- unsigned long pfn_to_mfn_frame_list_list;
+ xen_pfn_t pfn_to_mfn_frame_list_list;
unsigned long nmi_reason;
};
typedef struct arch_shared_info arch_shared_info_t;
diff -r d3e181fa238b -r 156a0963a1ae xen/include/public/callback.h
--- a/xen/include/public/callback.h Fri Jun 02 12:54:22 2006 -0500
+++ b/xen/include/public/callback.h Tue Jun 06 13:25:31 2006 -0500
@@ -29,12 +29,20 @@
#define CALLBACKTYPE_nmi 4
/*
+ * Disable event deliver during callback? This flag is ignored for event and
+ * NMI callbacks: event delivery is unconditionally disabled.
+ */
+#define _CALLBACKF_mask_events 0
+#define CALLBACKF_mask_events (1U << _CALLBACKF_mask_events)
+
+/*
* Register a callback.
*/
#define CALLBACKOP_register 0
struct callback_register {
- int type;
- xen_callback_t address;
+ uint16_t type;
+ uint16_t flags;
+ xen_callback_t address;
};
typedef struct callback_register callback_register_t;
DEFINE_XEN_GUEST_HANDLE(callback_register_t);
@@ -47,7 +55,8 @@ DEFINE_XEN_GUEST_HANDLE(callback_registe
*/
#define CALLBACKOP_unregister 1
struct callback_unregister {
- int type;
+ uint16_t type;
+ uint16_t _unused;
};
typedef struct callback_unregister callback_unregister_t;
DEFINE_XEN_GUEST_HANDLE(callback_unregister_t);
diff -r d3e181fa238b -r 156a0963a1ae xen/include/public/dom0_ops.h
--- a/xen/include/public/dom0_ops.h Fri Jun 02 12:54:22 2006 -0500
+++ b/xen/include/public/dom0_ops.h Tue Jun 06 13:25:31 2006 -0500
@@ -19,7 +19,7 @@
* This makes sure that old versions of dom0 tools will stop working in a
* well-defined way (rather than crashing the machine, for instance).
*/
-#define DOM0_INTERFACE_VERSION 0x03000000
+#define DOM0_INTERFACE_VERSION 0x03000001
/************************************************************************/
@@ -27,10 +27,10 @@ struct dom0_getmemlist {
struct dom0_getmemlist {
/* IN variables. */
domid_t domain;
- unsigned long max_pfns;
- XEN_GUEST_HANDLE(ulong) buffer;
- /* OUT variables. */
- unsigned long num_pfns;
+ uint64_t max_pfns;
+ XEN_GUEST_HANDLE(xen_pfn_t) buffer;
+ /* OUT variables. */
+ uint64_t num_pfns;
};
typedef struct dom0_getmemlist dom0_getmemlist_t;
DEFINE_XEN_GUEST_HANDLE(dom0_getmemlist_t);
@@ -96,9 +96,9 @@ struct dom0_getdomaininfo {
#define DOMFLAGS_SHUTDOWNMASK 255 /* DOMFLAGS_SHUTDOWN guest-supplied code. */
#define DOMFLAGS_SHUTDOWNSHIFT 16
uint32_t flags;
- unsigned long tot_pages;
- unsigned long max_pages;
- unsigned long shared_info_frame; /* MFN of shared_info struct */
+ uint64_t tot_pages;
+ uint64_t max_pages;
+ xen_pfn_t shared_info_frame; /* MFN of shared_info struct */
uint64_t cpu_time;
uint32_t nr_online_vcpus; /* Number of VCPUs currently online. */
uint32_t max_vcpu_id; /* Maximum VCPUID in use by this domain. */
@@ -162,7 +162,7 @@ DEFINE_XEN_GUEST_HANDLE(dom0_settime_t);
struct dom0_getpageframeinfo {
/* IN variables. */
- unsigned long mfn; /* Machine page frame number to query. */
+ xen_pfn_t mfn; /* Machine page frame number to query. */
domid_t domain; /* To which domain does the frame belong? */
/* OUT variables. */
/* Is the page PINNED to a type? */
@@ -213,7 +213,7 @@ struct dom0_tbufcontrol {
cpumap_t cpu_mask;
uint32_t evt_mask;
/* OUT variables */
- unsigned long buffer_mfn;
+ xen_pfn_t buffer_mfn;
uint32_t size;
};
typedef struct dom0_tbufcontrol dom0_tbufcontrol_t;
@@ -229,8 +229,8 @@ struct dom0_physinfo {
uint32_t sockets_per_node;
uint32_t nr_nodes;
uint32_t cpu_khz;
- unsigned long total_pages;
- unsigned long free_pages;
+ uint64_t total_pages;
+ uint64_t free_pages;
uint32_t hw_cap[8];
};
typedef struct dom0_physinfo dom0_physinfo_t;
@@ -276,7 +276,7 @@ struct dom0_shadow_control {
uint32_t op;
XEN_GUEST_HANDLE(ulong) dirty_bitmap;
/* IN/OUT variables. */
- unsigned long pages; /* size of buffer, updated with actual size */
+ uint64_t pages; /* size of buffer, updated with actual size */
/* OUT variables. */
struct dom0_shadow_control_stats stats;
};
@@ -286,8 +286,8 @@ DEFINE_XEN_GUEST_HANDLE(dom0_shadow_cont
#define DOM0_SETDOMAINMAXMEM 28
struct dom0_setdomainmaxmem {
/* IN variables. */
- domid_t domain;
- unsigned long max_memkb;
+ domid_t domain;
+ uint64_t max_memkb;
};
typedef struct dom0_setdomainmaxmem dom0_setdomainmaxmem_t;
DEFINE_XEN_GUEST_HANDLE(dom0_setdomainmaxmem_t);
@@ -295,8 +295,8 @@ DEFINE_XEN_GUEST_HANDLE(dom0_setdomainma
#define DOM0_GETPAGEFRAMEINFO2 29 /* batched interface */
struct dom0_getpageframeinfo2 {
/* IN variables. */
- domid_t domain;
- unsigned long num;
+ domid_t domain;
+ uint64_t num;
/* IN/OUT variables. */
XEN_GUEST_HANDLE(ulong) array;
};
@@ -313,12 +313,12 @@ DEFINE_XEN_GUEST_HANDLE(dom0_getpagefram
#define DOM0_ADD_MEMTYPE 31
struct dom0_add_memtype {
/* IN variables. */
- unsigned long mfn;
- unsigned long nr_mfns;
- uint32_t type;
- /* OUT variables. */
- uint32_t handle;
- uint32_t reg;
+ xen_pfn_t mfn;
+ uint64_t nr_mfns;
+ uint32_t type;
+ /* OUT variables. */
+ uint32_t handle;
+ uint32_t reg;
};
typedef struct dom0_add_memtype dom0_add_memtype_t;
DEFINE_XEN_GUEST_HANDLE(dom0_add_memtype_t);
@@ -345,8 +345,8 @@ struct dom0_read_memtype {
/* IN variables. */
uint32_t reg;
/* OUT variables. */
- unsigned long mfn;
- unsigned long nr_mfns;
+ xen_pfn_t mfn;
+ uint64_t nr_mfns;
uint32_t type;
};
typedef struct dom0_read_memtype dom0_read_memtype_t;
@@ -499,8 +499,8 @@ DEFINE_XEN_GUEST_HANDLE(dom0_irq_permiss
#define DOM0_IOMEM_PERMISSION 47
struct dom0_iomem_permission {
domid_t domain; /* domain to be affected */
- unsigned long first_mfn; /* first page (physical page number) in range */
- unsigned long nr_mfns; /* number of pages in range (>0) */
+ xen_pfn_t first_mfn; /* first page (physical page number) in range */
+ uint64_t nr_mfns; /* number of pages in range (>0) */
uint8_t allow_access; /* allow (!0) or deny (0) access to range? */
};
typedef struct dom0_iomem_permission dom0_iomem_permission_t;
@@ -509,7 +509,7 @@ DEFINE_XEN_GUEST_HANDLE(dom0_iomem_permi
#define DOM0_HYPERCALL_INIT 48
struct dom0_hypercall_init {
domid_t domain; /* domain to be affected */
- unsigned long mfn; /* machine frame to be initialised */
+ xen_pfn_t mfn; /* machine frame to be initialised */
};
typedef struct dom0_hypercall_init dom0_hypercall_init_t;
DEFINE_XEN_GUEST_HANDLE(dom0_hypercall_init_t);
diff -r d3e181fa238b -r 156a0963a1ae xen/include/public/grant_table.h
--- a/xen/include/public/grant_table.h Fri Jun 02 12:54:22 2006 -0500
+++ b/xen/include/public/grant_table.h Tue Jun 06 13:25:31 2006 -0500
@@ -244,7 +244,7 @@ DEFINE_XEN_GUEST_HANDLE(gnttab_dump_tabl
#define GNTTABOP_transfer 4
struct gnttab_transfer {
/* IN parameters. */
- unsigned long mfn;
+ xen_pfn_t mfn;
domid_t domid;
grant_ref_t ref;
/* OUT parameters. */
diff -r d3e181fa238b -r 156a0963a1ae xen/include/public/io/netif.h
--- a/xen/include/public/io/netif.h Fri Jun 02 12:54:22 2006 -0500
+++ b/xen/include/public/io/netif.h Tue Jun 06 13:25:31 2006 -0500
@@ -26,6 +26,10 @@
/* Packet data has been validated against protocol checksum. */
#define _NETTXF_data_validated (1)
#define NETTXF_data_validated (1U<<_NETTXF_data_validated)
+
+/* Packet continues in the request. */
+#define _NETTXF_more_data (2)
+#define NETTXF_more_data (1U<<_NETTXF_more_data)
struct netif_tx_request {
grant_ref_t gref; /* Reference to buffer page */
diff -r d3e181fa238b -r 156a0963a1ae xen/include/public/io/ring.h
--- a/xen/include/public/io/ring.h Fri Jun 02 12:54:22 2006 -0500
+++ b/xen/include/public/io/ring.h Tue Jun 06 13:25:31 2006 -0500
@@ -151,19 +151,27 @@ typedef struct __name##_back_ring __name
#define RING_SIZE(_r) \
((_r)->nr_ents)
+/* Number of free requests (for use on front side only). */
+#define RING_FREE_REQUESTS(_r) \
+ (RING_SIZE(_r) - ((_r)->req_prod_pvt - (_r)->rsp_cons))
+
/* Test if there is an empty slot available on the front ring.
* (This is only meaningful from the front. )
*/
#define RING_FULL(_r) \
- (((_r)->req_prod_pvt - (_r)->rsp_cons) == RING_SIZE(_r))
+ (RING_FREE_REQUESTS(_r) == 0)
/* Test if there are outstanding messages to be processed on a ring. */
#define RING_HAS_UNCONSUMED_RESPONSES(_r) \
- ((_r)->rsp_cons != (_r)->sring->rsp_prod)
+ ((_r)->sring->rsp_prod - (_r)->rsp_cons)
#define RING_HAS_UNCONSUMED_REQUESTS(_r) \
- (((_r)->req_cons != (_r)->sring->req_prod) && \
- (((_r)->req_cons - (_r)->rsp_prod_pvt) != RING_SIZE(_r)))
+ ({ \
+ unsigned int req = (_r)->sring->req_prod - (_r)->req_cons; \
+ unsigned int rsp = RING_SIZE(_r) - \
+ ((_r)->req_cons - (_r)->rsp_prod_pvt); \
+ req < rsp ? req : rsp; \
+ })
/* Direct access to individual ring elements, by index. */
#define RING_GET_REQUEST(_r, _idx) \
diff -r d3e181fa238b -r 156a0963a1ae xen/include/public/memory.h
--- a/xen/include/public/memory.h Fri Jun 02 12:54:22 2006 -0500
+++ b/xen/include/public/memory.h Tue Jun 06 13:25:31 2006 -0500
@@ -29,7 +29,7 @@ struct xen_memory_reservation {
* OUT: GMFN bases of extents that were allocated
* (NB. This command also updates the mach_to_phys translation table)
*/
- XEN_GUEST_HANDLE(ulong) extent_start;
+ XEN_GUEST_HANDLE(xen_pfn_t) extent_start;
/* Number of extents, and size/alignment of each (2^extent_order pages). */
unsigned long nr_extents;
@@ -87,7 +87,7 @@ struct xen_machphys_mfn_list {
* any large discontiguities in the machine address space, 2MB gaps in
* the machphys table will be represented by an MFN base of zero.
*/
- XEN_GUEST_HANDLE(ulong) extent_start;
+ XEN_GUEST_HANDLE(xen_pfn_t) extent_start;
/*
* Number of extents written to the above array. This will be smaller
@@ -117,7 +117,7 @@ struct xen_add_to_physmap {
unsigned long idx;
/* GPFN where the source mapping page should appear. */
- unsigned long gpfn;
+ xen_pfn_t gpfn;
};
typedef struct xen_add_to_physmap xen_add_to_physmap_t;
DEFINE_XEN_GUEST_HANDLE(xen_add_to_physmap_t);
@@ -135,13 +135,13 @@ struct xen_translate_gpfn_list {
unsigned long nr_gpfns;
/* List of GPFNs to translate. */
- XEN_GUEST_HANDLE(ulong) gpfn_list;
+ XEN_GUEST_HANDLE(xen_pfn_t) gpfn_list;
/*
* Output list to contain MFN translations. May be the same as the input
* list (in which case each input GPFN is overwritten with the output MFN).
*/
- XEN_GUEST_HANDLE(ulong) mfn_list;
+ XEN_GUEST_HANDLE(xen_pfn_t) mfn_list;
};
typedef struct xen_translate_gpfn_list xen_translate_gpfn_list_t;
DEFINE_XEN_GUEST_HANDLE(xen_translate_gpfn_list_t);
diff -r d3e181fa238b -r 156a0963a1ae xen/include/public/xen.h
--- a/xen/include/public/xen.h Fri Jun 02 12:54:22 2006 -0500
+++ b/xen/include/public/xen.h Tue Jun 06 13:25:31 2006 -0500
@@ -199,7 +199,7 @@ struct mmuext_op {
unsigned int cmd;
union {
/* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR */
- unsigned long mfn;
+ xen_pfn_t mfn;
/* INVLPG_LOCAL, INVLPG_ALL, SET_LDT */
unsigned long linear_addr;
} arg1;
@@ -236,10 +236,24 @@ DEFINE_XEN_GUEST_HANDLE(mmuext_op_t);
*/
#define VMASST_CMD_enable 0
#define VMASST_CMD_disable 1
+
+/* x86/32 guests: simulate full 4GB segment limits. */
#define VMASST_TYPE_4gb_segments 0
+
+/* x86/32 guests: trap (vector 15) whenever above vmassist is used. */
#define VMASST_TYPE_4gb_segments_notify 1
+
+/*
+ * x86 guests: support writes to bottom-level PTEs.
+ * NB1. Page-directory entries cannot be written.
+ * NB2. Guest must continue to remove all writable mappings of PTEs.
+ */
#define VMASST_TYPE_writable_pagetables 2
-#define MAX_VMASST_TYPE 2
+
+/* x86/PAE guests: support PDPTs above 4GB. */
+#define VMASST_TYPE_pae_extended_cr3 3
+
+#define MAX_VMASST_TYPE 3
#ifndef __ASSEMBLY__
@@ -449,9 +463,9 @@ struct start_info {
unsigned long nr_pages; /* Total pages allocated to this domain. */
unsigned long shared_info; /* MACHINE address of shared info struct. */
uint32_t flags; /* SIF_xxx flags. */
- unsigned long store_mfn; /* MACHINE page number of shared page. */
+ xen_pfn_t store_mfn; /* MACHINE page number of shared page. */
uint32_t store_evtchn; /* Event channel for store communication. */
- unsigned long console_mfn; /* MACHINE address of console page. */
+ xen_pfn_t console_mfn; /* MACHINE page number of console page. */
uint32_t console_evtchn; /* Event channel for console messages. */
/* THE FOLLOWING ARE ONLY FILLED IN ON INITIAL BOOT (NOT RESUME). */
unsigned long pt_base; /* VIRTUAL address of page directory. */
_______________________________________________
Xen-ppc-devel mailing list
Xen-ppc-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-ppc-devel
|