# HG changeset patch
# User Alex Williamson <alex.williamson@xxxxxx>
# Date 1182365367 21600
# Node ID 810885428743660169e7382ec9596373ca6ce48f
# Parent c20bc60f9243d08199cb0a9a837cbe11c6b3dcdc
# Parent 005dd6b1cf8e0008aba7984b828274a40e8d7d95
merge with xen-unstable.hg
---
docs/src/user.tex | 1
tools/blktap/drivers/Makefile | 1
tools/blktap/drivers/block-aio.c | 49 ++++-----
tools/blktap/drivers/block-qcow.c | 48 ++++-----
tools/blktap/drivers/tapaio.c | 164 ++++++++++++++++++++++++++++++++
tools/blktap/drivers/tapaio.h | 58 +++++++++++
tools/examples/init.d/xendomains | 33 ++++--
tools/ioemu/block-raw.c | 2
tools/ioemu/target-i386-dm/exec-dm.c | 42 ++++++--
tools/ioemu/vl.c | 12 ++
tools/libxc/xc_core.c | 2
tools/python/xen/xend/XendDomainInfo.py | 2
tools/python/xen/xend/server/blkif.py | 5
xen/arch/ia64/xen/domain.c | 9 -
xen/arch/ia64/xen/xenmem.c | 2
xen/arch/x86/apic.c | 4
xen/arch/x86/boot/cmdline.S | 40 +++++--
xen/arch/x86/boot/trampoline.S | 11 --
xen/arch/x86/boot/video.S | 59 ++++++-----
xen/arch/x86/boot/video.h | 9 -
xen/arch/x86/boot/x86_32.S | 4
xen/arch/x86/boot/x86_64.S | 2
xen/arch/x86/domain.c | 37 +++++--
xen/arch/x86/domain_build.c | 6 -
xen/arch/x86/flushtlb.c | 4
xen/arch/x86/hvm/hvm.c | 21 ++--
xen/arch/x86/hvm/irq.c | 81 ++++++++-------
xen/arch/x86/hvm/svm/asid.c | 72 +++++++-------
xen/arch/x86/hvm/svm/intr.c | 146 ++++++++++++++++------------
xen/arch/x86/hvm/svm/svm.c | 60 +++++------
xen/arch/x86/hvm/svm/vmcb.c | 6 -
xen/arch/x86/hvm/vioapic.c | 34 +++---
xen/arch/x86/hvm/vlapic.c | 9 -
xen/arch/x86/hvm/vmx/intr.c | 106 ++++++++++----------
xen/arch/x86/hvm/vmx/vmcs.c | 2
xen/arch/x86/hvm/vmx/vmx.c | 59 ++++++++---
xen/arch/x86/hvm/vpic.c | 3
xen/arch/x86/hvm/vpt.c | 40 ++++---
xen/arch/x86/mm.c | 10 -
xen/arch/x86/setup.c | 10 +
xen/arch/x86/traps.c | 14 ++
xen/arch/x86/x86_32/traps.c | 1
xen/arch/x86/x86_64/compat_kexec.S | 65 +++++++++++-
xen/arch/x86/x86_64/traps.c | 1
xen/common/compat/memory.c | 7 +
xen/common/domctl.c | 4
xen/common/grant_table.c | 12 +-
xen/common/kernel.c | 10 -
xen/common/kexec.c | 4
xen/common/perfc.c | 2
xen/drivers/char/console.c | 2
xen/drivers/video/vga.c | 3
xen/include/asm-ia64/guest_access.h | 25 ++--
xen/include/asm-x86/event.h | 1
xen/include/asm-x86/guest_access.h | 68 +++++++------
xen/include/asm-x86/hvm/hvm.h | 33 +++++-
xen/include/asm-x86/hvm/irq.h | 12 +-
xen/include/asm-x86/hvm/support.h | 1
xen/include/asm-x86/hvm/svm/asid.h | 1
xen/include/asm-x86/hvm/vcpu.h | 4
xen/include/asm-x86/hvm/vlapic.h | 2
xen/include/asm-x86/hvm/vmx/vmx.h | 13 +-
xen/include/asm-x86/hvm/vpic.h | 2
xen/include/asm-x86/hvm/vpt.h | 3
xen/include/xen/compat.h | 62 +++++++-----
xen/include/xen/xencomm.h | 43 ++++----
66 files changed, 1080 insertions(+), 580 deletions(-)
diff -r c20bc60f9243 -r 810885428743 docs/src/user.tex
--- a/docs/src/user.tex Wed Jun 20 12:47:52 2007 -0600
+++ b/docs/src/user.tex Wed Jun 20 12:49:27 2007 -0600
@@ -3178,6 +3178,7 @@ editing \path{grub.conf}.
\begin{description}
\item[ ask ] Display a vga menu allowing manual selection of video
mode.
+ \item[ current ] Use existing vga mode without modification.
\item[ text-$<$mode$>$ ] Select text-mode resolution, where mode is
one of 80x25, 80x28, 80x30, 80x34, 80x43, 80x50, 80x60.
\item[ gfx-$<$mode$>$ ] Select VESA graphics mode
diff -r c20bc60f9243 -r 810885428743 tools/blktap/drivers/Makefile
--- a/tools/blktap/drivers/Makefile Wed Jun 20 12:47:52 2007 -0600
+++ b/tools/blktap/drivers/Makefile Wed Jun 20 12:49:27 2007 -0600
@@ -35,6 +35,7 @@ BLK-OBJS += block-ram.o
BLK-OBJS += block-ram.o
BLK-OBJS += block-qcow.o
BLK-OBJS += aes.o
+BLK-OBJS += tapaio.o
all: $(IBIN) qcow-util
diff -r c20bc60f9243 -r 810885428743 tools/blktap/drivers/block-aio.c
--- a/tools/blktap/drivers/block-aio.c Wed Jun 20 12:47:52 2007 -0600
+++ b/tools/blktap/drivers/block-aio.c Wed Jun 20 12:49:27 2007 -0600
@@ -43,14 +43,7 @@
#include <sys/ioctl.h>
#include <linux/fs.h>
#include "tapdisk.h"
-
-
-/**
- * We used a kernel patch to return an fd associated with the AIO context
- * so that we can concurrently poll on synchronous and async descriptors.
- * This is signalled by passing 1 as the io context to io_setup.
- */
-#define REQUEST_ASYNC_FD 1
+#include "tapaio.h"
#define MAX_AIO_REQS (MAX_REQUESTS * MAX_SEGMENTS_PER_REQ)
@@ -65,14 +58,13 @@ struct tdaio_state {
int fd;
/* libaio state */
- io_context_t aio_ctx;
+ tap_aio_context_t aio_ctx;
struct iocb iocb_list [MAX_AIO_REQS];
struct iocb *iocb_free [MAX_AIO_REQS];
struct pending_aio pending_aio[MAX_AIO_REQS];
int iocb_free_count;
struct iocb *iocb_queue[MAX_AIO_REQS];
int iocb_queued;
- int poll_fd; /* NB: we require aio_poll support */
struct io_event aio_events[MAX_AIO_REQS];
};
@@ -148,7 +140,7 @@ static inline void init_fds(struct disk_
for(i = 0; i < MAX_IOFD; i++)
dd->io_fd[i] = 0;
- dd->io_fd[0] = prv->poll_fd;
+ dd->io_fd[0] = prv->aio_ctx.pollfd;
}
/* Open the disk file and initialize aio state. */
@@ -162,12 +154,9 @@ int tdaio_open (struct disk_driver *dd,
/* Initialize AIO */
prv->iocb_free_count = MAX_AIO_REQS;
prv->iocb_queued = 0;
-
- prv->aio_ctx = (io_context_t) REQUEST_ASYNC_FD;
- prv->poll_fd = io_setup(MAX_AIO_REQS, &prv->aio_ctx);
-
- if (prv->poll_fd < 0) {
- ret = prv->poll_fd;
+
+ ret = tap_aio_setup(&prv->aio_ctx, prv->aio_events, MAX_AIO_REQS);
+ if (ret < 0) {
if (ret == -EAGAIN) {
DPRINTF("Couldn't setup AIO context. If you are "
"trying to concurrently use a large number "
@@ -176,9 +165,7 @@ int tdaio_open (struct disk_driver *dd,
"(e.g. 'echo echo 1048576 > /proc/sys/fs/"
"aio-max-nr')\n");
} else {
- DPRINTF("Couldn't get fd for AIO poll support. This "
- "is probably because your kernel does not "
- "have the aio-poll patch applied.\n");
+ DPRINTF("Couldn't setup AIO context.\n");
}
goto done;
}
@@ -286,7 +273,7 @@ int tdaio_submit(struct disk_driver *dd)
if (!prv->iocb_queued)
return 0;
- ret = io_submit(prv->aio_ctx, prv->iocb_queued, prv->iocb_queue);
+ ret = io_submit(prv->aio_ctx.aio_ctx, prv->iocb_queued,
prv->iocb_queue);
/* XXX: TODO: Handle error conditions here. */
@@ -300,7 +287,7 @@ int tdaio_close(struct disk_driver *dd)
{
struct tdaio_state *prv = (struct tdaio_state *)dd->private;
- io_destroy(prv->aio_ctx);
+ io_destroy(prv->aio_ctx.aio_ctx);
close(prv->fd);
return 0;
@@ -308,15 +295,13 @@ int tdaio_close(struct disk_driver *dd)
int tdaio_do_callbacks(struct disk_driver *dd, int sid)
{
- int ret, i, rsp = 0;
+ int i, nr_events, rsp = 0;
struct io_event *ep;
struct tdaio_state *prv = (struct tdaio_state *)dd->private;
- /* Non-blocking test for completed io. */
- ret = io_getevents(prv->aio_ctx, 0, MAX_AIO_REQS, prv->aio_events,
- NULL);
-
- for (ep=prv->aio_events,i=ret; i-->0; ep++) {
+ nr_events = tap_aio_get_events(&prv->aio_ctx);
+repeat:
+ for (ep = prv->aio_events, i = nr_events; i-- > 0; ep++) {
struct iocb *io = ep->obj;
struct pending_aio *pio;
@@ -327,6 +312,14 @@ int tdaio_do_callbacks(struct disk_drive
prv->iocb_free[prv->iocb_free_count++] = io;
}
+
+ if (nr_events) {
+ nr_events = tap_aio_more_events(&prv->aio_ctx);
+ goto repeat;
+ }
+
+ tap_aio_continue(&prv->aio_ctx);
+
return rsp;
}
diff -r c20bc60f9243 -r 810885428743 tools/blktap/drivers/block-qcow.c
--- a/tools/blktap/drivers/block-qcow.c Wed Jun 20 12:47:52 2007 -0600
+++ b/tools/blktap/drivers/block-qcow.c Wed Jun 20 12:49:27 2007 -0600
@@ -38,6 +38,7 @@
#include "bswap.h"
#include "aes.h"
#include "tapdisk.h"
+#include "tapaio.h"
#if 1
#define ASSERT(_p) \
@@ -52,9 +53,6 @@
(uint64_t)( \
(l + (s - 1)) - ((l + (s - 1)) % s)); \
})
-
-/******AIO DEFINES******/
-#define REQUEST_ASYNC_FD 1
struct pending_aio {
td_callback_t cb;
@@ -145,7 +143,7 @@ struct tdqcow_state {
AES_KEY aes_encrypt_key; /*AES key*/
AES_KEY aes_decrypt_key; /*AES key*/
/* libaio state */
- io_context_t aio_ctx;
+ tap_aio_context_t aio_ctx;
int max_aio_reqs;
struct iocb *iocb_list;
struct iocb **iocb_free;
@@ -153,7 +151,6 @@ struct tdqcow_state {
int iocb_free_count;
struct iocb **iocb_queue;
int iocb_queued;
- int poll_fd; /* NB: we require aio_poll support */
struct io_event *aio_events;
};
@@ -179,7 +176,7 @@ static void free_aio_state(struct disk_d
static int init_aio_state(struct disk_driver *dd)
{
- int i;
+ int i, ret;
struct td_state *bs = dd->td_state;
struct tdqcow_state *s = (struct tdqcow_state *)dd->private;
long ioidx;
@@ -216,12 +213,9 @@ static int init_aio_state(struct disk_dr
goto fail;
}
- /*Signal kernel to create Poll FD for Asyc completion events*/
- s->aio_ctx = (io_context_t) REQUEST_ASYNC_FD;
- s->poll_fd = io_setup(s->max_aio_reqs, &s->aio_ctx);
-
- if (s->poll_fd < 0) {
- if (s->poll_fd == -EAGAIN) {
+ ret = tap_aio_setup(&s->aio_ctx, s->aio_events, s->max_aio_reqs);
+ if (ret < 0) {
+ if (ret == -EAGAIN) {
DPRINTF("Couldn't setup AIO context. If you are "
"trying to concurrently use a large number "
"of blktap-based disks, you may need to "
@@ -229,9 +223,7 @@ static int init_aio_state(struct disk_dr
"(e.g. 'echo echo 1048576 > /proc/sys/fs/"
"aio-max-nr')\n");
} else {
- DPRINTF("Couldn't get fd for AIO poll support. This "
- "is probably because your kernel does not "
- "have the aio-poll patch applied.\n");
+ DPRINTF("Couldn't setup AIO context.\n");
}
goto fail;
}
@@ -845,7 +837,7 @@ static inline void init_fds(struct disk_
for(i = 0; i < MAX_IOFD; i++)
dd->io_fd[i] = 0;
- dd->io_fd[0] = s->poll_fd;
+ dd->io_fd[0] = s->aio_ctx.pollfd;
}
/* Open the disk file and initialize qcow state. */
@@ -1144,7 +1136,7 @@ int tdqcow_submit(struct disk_driver *dd
if (!prv->iocb_queued)
return 0;
- ret = io_submit(prv->aio_ctx, prv->iocb_queued, prv->iocb_queue);
+ ret = io_submit(prv->aio_ctx.aio_ctx, prv->iocb_queued,
prv->iocb_queue);
/* XXX: TODO: Handle error conditions here. */
@@ -1172,7 +1164,7 @@ int tdqcow_close(struct disk_driver *dd)
close(fd);
}
- io_destroy(s->aio_ctx);
+ io_destroy(s->aio_ctx.aio_ctx);
free(s->name);
free(s->l1_table);
free(s->l2_cache);
@@ -1184,17 +1176,15 @@ int tdqcow_close(struct disk_driver *dd)
int tdqcow_do_callbacks(struct disk_driver *dd, int sid)
{
- int ret, i, rsp = 0,*ptr;
+ int ret, i, nr_events, rsp = 0,*ptr;
struct io_event *ep;
struct tdqcow_state *prv = (struct tdqcow_state *)dd->private;
if (sid > MAX_IOFD) return 1;
-
- /* Non-blocking test for completed io. */
- ret = io_getevents(prv->aio_ctx, 0, prv->max_aio_reqs, prv->aio_events,
- NULL);
-
- for (ep = prv->aio_events, i = ret; i-- > 0; ep++) {
+
+ nr_events = tap_aio_get_events(&prv->aio_ctx);
+repeat:
+ for (ep = prv->aio_events, i = nr_events; i-- > 0; ep++) {
struct iocb *io = ep->obj;
struct pending_aio *pio;
@@ -1215,6 +1205,14 @@ int tdqcow_do_callbacks(struct disk_driv
prv->iocb_free[prv->iocb_free_count++] = io;
}
+
+ if (nr_events) {
+ nr_events = tap_aio_more_events(&prv->aio_ctx);
+ goto repeat;
+ }
+
+ tap_aio_continue(&prv->aio_ctx);
+
return rsp;
}
diff -r c20bc60f9243 -r 810885428743 tools/blktap/drivers/tapaio.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/blktap/drivers/tapaio.c Wed Jun 20 12:49:27 2007 -0600
@@ -0,0 +1,164 @@
+/*
+ * Copyright (c) 2006 Andrew Warfield and Julian Chesterfield
+ * Copyright (c) 2007 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "tapaio.h"
+#include "tapdisk.h"
+#include <unistd.h>
+
+/**
+ * We used a kernel patch to return an fd associated with the AIO context
+ * so that we can concurrently poll on synchronous and async descriptors.
+ * This is signalled by passing 1 as the io context to io_setup.
+ */
+#define REQUEST_ASYNC_FD 1
+
+/*
+ * If we don't have any way to do epoll on aio events in a normal kernel,
+ * wait for aio events in a separate thread and return completion status
+ * that via a pipe that can be waited on normally.
+ *
+ * To keep locking problems between the completion thread and the submit
+ * thread to a minimum, there's a handshake which allows only one thread
+ * to be doing work on the completion queue at a time:
+ *
+ * 1) main thread sends completion thread a command via the command pipe;
+ * 2) completion thread waits for aio events and returns the number
+ * received on the completion pipe
+ * 3) main thread processes the received ctx->aio_events events
+ * 4) loop back to 1) to let the completion thread refill the aio_events
+ * buffer.
+ *
+ * This workaround needs to disappear once the kernel provides a single
+ * mechanism for waiting on both aio and normal fd wakeups.
+ */
+static void *
+tap_aio_completion_thread(void *arg)
+{
+ tap_aio_context_t *ctx = (tap_aio_context_t *) arg;
+ int command;
+ int nr_events;
+ int rc;
+
+ while (1) {
+ rc = read(ctx->command_fd[0], &command, sizeof(command));
+
+ do {
+ rc = io_getevents(ctx->aio_ctx, 1,
+ ctx->max_aio_events, ctx->aio_events,
+ NULL);
+ if (rc) {
+ nr_events = rc;
+ rc = write(ctx->completion_fd[1], &nr_events,
+ sizeof(nr_events));
+ }
+ } while (!rc);
+ }
+}
+
+void
+tap_aio_continue(tap_aio_context_t *ctx)
+{
+ int cmd = 0;
+
+ if (!ctx->poll_in_thread)
+ return;
+
+ if (write(ctx->command_fd[1], &cmd, sizeof(cmd)) < 0)
+ DPRINTF("Cannot write to command pipe\n");
+}
+
+int
+tap_aio_setup(tap_aio_context_t *ctx,
+ struct io_event *aio_events,
+ int max_aio_events)
+{
+ int ret;
+
+ ctx->aio_events = aio_events;
+ ctx->max_aio_events = max_aio_events;
+ ctx->poll_in_thread = 0;
+
+ ctx->aio_ctx = (io_context_t) REQUEST_ASYNC_FD;
+ ret = io_setup(ctx->max_aio_events, &ctx->aio_ctx);
+ if (ret < 0 && ret != -EINVAL)
+ return ret;
+ else if (ret > 0) {
+ ctx->pollfd = ret;
+ return ctx->pollfd;
+ }
+
+ ctx->aio_ctx = (io_context_t) 0;
+ ret = io_setup(ctx->max_aio_events, &ctx->aio_ctx);
+ if (ret < 0)
+ return ret;
+
+ if ((ret = pipe(ctx->command_fd)) < 0) {
+ DPRINTF("Unable to create command pipe\n");
+ return -1;
+ }
+ if ((ret = pipe(ctx->completion_fd)) < 0) {
+ DPRINTF("Unable to create completion pipe\n");
+ return -1;
+ }
+
+ if ((ret = pthread_create(&ctx->aio_thread, NULL,
+ tap_aio_completion_thread, ctx)) != 0) {
+ DPRINTF("Unable to create completion thread\n");
+ return -1;
+ }
+
+ ctx->pollfd = ctx->completion_fd[0];
+ ctx->poll_in_thread = 1;
+
+ tap_aio_continue(ctx);
+
+ return 0;
+}
+
+int
+tap_aio_get_events(tap_aio_context_t *ctx)
+{
+ int nr_events = 0;
+
+ if (!ctx->poll_in_thread)
+ nr_events = io_getevents(ctx->aio_ctx, 1,
+ ctx->max_aio_events, ctx->aio_events,
NULL);
+ else
+ read(ctx->completion_fd[0], &nr_events, sizeof(nr_events));
+
+ return nr_events;
+}
+
+int tap_aio_more_events(tap_aio_context_t *ctx)
+{
+ return io_getevents(ctx->aio_ctx, 0,
+ ctx->max_aio_events, ctx->aio_events, NULL);
+}
+
+
diff -r c20bc60f9243 -r 810885428743 tools/blktap/drivers/tapaio.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/blktap/drivers/tapaio.h Wed Jun 20 12:49:27 2007 -0600
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2006 Andrew Warfield and Julian Chesterfield
+ * Copyright (c) 2007 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef __TAPAIO_H__
+#define __TAPAIO_H__
+
+#include <pthread.h>
+#include <libaio.h>
+
+struct tap_aio_context {
+ io_context_t aio_ctx;
+
+ struct io_event *aio_events;
+ int max_aio_events;
+
+ pthread_t aio_thread;
+ int command_fd[2];
+ int completion_fd[2];
+ int pollfd;
+ unsigned int poll_in_thread : 1;
+};
+
+typedef struct tap_aio_context tap_aio_context_t;
+
+int tap_aio_setup (tap_aio_context_t *ctx,
+ struct io_event *aio_events,
+ int max_aio_events);
+void tap_aio_continue (tap_aio_context_t *ctx);
+int tap_aio_get_events (tap_aio_context_t *ctx);
+int tap_aio_more_events(tap_aio_context_t *ctx);
+
+#endif /* __TAPAIO_H__ */
diff -r c20bc60f9243 -r 810885428743 tools/examples/init.d/xendomains
--- a/tools/examples/init.d/xendomains Wed Jun 20 12:47:52 2007 -0600
+++ b/tools/examples/init.d/xendomains Wed Jun 20 12:49:27 2007 -0600
@@ -182,25 +182,31 @@ rdnames()
parseln()
{
- name=`echo "$1" | cut -c0-17`
- name=${name%% *}
- rest=`echo "$1" | cut -c18- `
- read id mem cpu vcpu state tm < <(echo "$rest")
+ if [[ "$1" =~ "\(domain" ]]; then
+ name=;id=
+ else if [[ "$1" =~ "\(name" ]]; then
+ name=$(echo $1 | sed -e 's/^.*(name \(.*\))$/\1/')
+ else if [[ "$1" =~ "\(domid" ]]; then
+ id=$(echo $1 | sed -e 's/^.*(domid \(.*\))$/\1/')
+ fi; fi; fi
+
+ [ -n "$name" -a -n "$id" ] && return 0 || return 1
}
is_running()
{
rdname $1
RC=1
+ name=;id=
while read LN; do
- parseln "$LN"
+ parseln "$LN" || continue
if test $id = 0; then continue; fi
case $name in
($NM)
RC=0
;;
esac
- done < <(xm list | grep -v '^Name')
+ done < <(xm list -l | grep '(\(domain\|domid\|name\)')
return $RC
}
@@ -267,13 +273,14 @@ start()
all_zombies()
{
+ name=;id=
while read LN; do
- parseln "$LN"
+ parseln "$LN" || continue
if test $id = 0; then continue; fi
if test "$state" != "-b---d" -a "$state" != "-----d"; then
return 1;
fi
- done < <(xm list | grep -v '^Name')
+ done < <(xm list -l | grep '(\(domain\|domid\|name\)')
return 0
}
@@ -309,8 +316,9 @@ stop()
rdnames
fi
echo -n "Shutting down Xen domains:"
+ name=;id=
while read LN; do
- parseln "$LN"
+ parseln "$LN" || continue
if test $id = 0; then continue; fi
echo -n " $name"
if test "$XENDOMAINS_AUTO_ONLY" = "true"; then
@@ -384,7 +392,7 @@ stop()
fi
kill $WDOG_PID >/dev/null 2>&1
fi
- done < <(xm list | grep -v '^Name')
+ done < <(xm list -l | grep '(\(domain\|domid\|name\)')
# NB. this shuts down ALL Xen domains (politely), not just the ones in
# AUTODIR/*
@@ -409,15 +417,16 @@ stop()
check_domain_up()
{
+ name=;id=
while read LN; do
- parseln "$LN"
+ parseln "$LN" || continue
if test $id = 0; then continue; fi
case $name in
($1)
return 0
;;
esac
- done < <(xm list | grep -v "^Name")
+ done < <(xm list -l | grep '(\(domain\|domid\|name\)')
return 1
}
diff -r c20bc60f9243 -r 810885428743 tools/ioemu/block-raw.c
--- a/tools/ioemu/block-raw.c Wed Jun 20 12:47:52 2007 -0600
+++ b/tools/ioemu/block-raw.c Wed Jun 20 12:49:27 2007 -0600
@@ -166,7 +166,7 @@ typedef struct RawAIOCB {
struct RawAIOCB *next;
} RawAIOCB;
-static int aio_sig_num = SIGUSR2;
+const int aio_sig_num = SIGUSR2;
static RawAIOCB *first_aio; /* AIO issued */
static int aio_initialized = 0;
diff -r c20bc60f9243 -r 810885428743 tools/ioemu/target-i386-dm/exec-dm.c
--- a/tools/ioemu/target-i386-dm/exec-dm.c Wed Jun 20 12:47:52 2007 -0600
+++ b/tools/ioemu/target-i386-dm/exec-dm.c Wed Jun 20 12:49:27 2007 -0600
@@ -443,19 +443,40 @@ extern unsigned long logdirty_bitmap_siz
* Forcing a word-sized read/write prevents the guest from seeing a partially
* written word-sized atom.
*/
-void memcpy_words(void *dst, void *src, size_t n)
-{
- while (n >= sizeof(long)) {
- *((long *)dst) = *((long *)src);
- dst = ((long *)dst) + 1;
- src = ((long *)src) + 1;
- n -= sizeof(long);
- }
-
- if (n & 4) {
+#if defined(__x86_64__) || defined(__i386__)
+static void memcpy_words(void *dst, void *src, size_t n)
+{
+ asm (
+ " movl %%edx,%%ecx \n"
+#ifdef __x86_64
+ " shrl $3,%%ecx \n"
+ " andl $7,%%edx \n"
+ " rep movsq \n"
+ " test $4,%%edx \n"
+ " jz 1f \n"
+ " movsl \n"
+#else /* __i386__ */
+ " shrl $2,%%ecx \n"
+ " andl $3,%%edx \n"
+ " rep movsl \n"
+#endif
+ "1: test $2,%%edx \n"
+ " jz 1f \n"
+ " movsw \n"
+ "1: test $1,%%edx \n"
+ " jz 1f \n"
+ " movsb \n"
+ "1: \n"
+ : : "S" (src), "D" (dst), "d" (n) : "ecx" );
+}
+#else
+static void memcpy_words(void *dst, void *src, size_t n)
+{
+ while (n >= sizeof(uint32_t)) {
*((uint32_t *)dst) = *((uint32_t *)src);
dst = ((uint32_t *)dst) + 1;
src = ((uint32_t *)src) + 1;
+ n -= sizeof(uint32_t);
}
if (n & 2) {
@@ -470,6 +491,7 @@ void memcpy_words(void *dst, void *src,
src = ((uint8_t *)src) + 1;
}
}
+#endif
void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
int len, int is_write)
diff -r c20bc60f9243 -r 810885428743 tools/ioemu/vl.c
--- a/tools/ioemu/vl.c Wed Jun 20 12:47:52 2007 -0600
+++ b/tools/ioemu/vl.c Wed Jun 20 12:49:27 2007 -0600
@@ -7059,6 +7059,18 @@ int main(int argc, char **argv)
#endif
char qemu_dm_logfilename[128];
+
+ /* Ensure that SIGUSR2 is blocked by default when a new thread is created,
+ then only the threads that use the signal unblock it -- this fixes a
+ race condition in Qcow support where the AIO signal is misdelivered. */
+ {
+ extern const int aio_sig_num;
+ sigset_t set;
+
+ sigemptyset(&set);
+ sigaddset(&set, aio_sig_num);
+ sigprocmask(SIG_BLOCK, &set, NULL);
+ }
LIST_INIT (&vm_change_state_head);
#ifndef _WIN32
diff -r c20bc60f9243 -r 810885428743 tools/libxc/xc_core.c
--- a/tools/libxc/xc_core.c Wed Jun 20 12:47:52 2007 -0600
+++ b/tools/libxc/xc_core.c Wed Jun 20 12:49:27 2007 -0600
@@ -156,7 +156,7 @@ struct xc_core_section_headers {
Elf64_Shdr *shdrs;
};
#define SHDR_INIT 16
-#define SHDR_INC 4
+#define SHDR_INC 4U
static struct xc_core_section_headers*
xc_core_shdr_init(void)
diff -r c20bc60f9243 -r 810885428743 tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py Wed Jun 20 12:47:52 2007 -0600
+++ b/tools/python/xen/xend/XendDomainInfo.py Wed Jun 20 12:49:27 2007 -0600
@@ -983,7 +983,7 @@ class XendDomainInfo:
self.info['VCPUs_live'] = vcpus
self._writeDom(self._vcpuDomDetails())
else:
- self.info['VCPUs_live'] = vcpus
+ self.info['VCPUs_max'] = vcpus
xen.xend.XendDomain.instance().managed_config_save(self)
log.info("Set VCPU count on domain %s to %d", self.info['name_label'],
vcpus)
diff -r c20bc60f9243 -r 810885428743 tools/python/xen/xend/server/blkif.py
--- a/tools/python/xen/xend/server/blkif.py Wed Jun 20 12:47:52 2007 -0600
+++ b/tools/python/xen/xend/server/blkif.py Wed Jun 20 12:49:27 2007 -0600
@@ -98,6 +98,11 @@ class BlkifController(DevController):
if (dev_type == 'cdrom' and new_front['device-type'] == 'cdrom' and
dev == new_back['dev'] and mode == 'r'):
+ # dummy device
+ self.writeBackend(devid,
+ 'type', new_back['type'],
+ 'params', '')
+ # new backend-device
self.writeBackend(devid,
'type', new_back['type'],
'params', new_back['params'])
diff -r c20bc60f9243 -r 810885428743 xen/arch/ia64/xen/domain.c
--- a/xen/arch/ia64/xen/domain.c Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/ia64/xen/domain.c Wed Jun 20 12:49:27 2007 -0600
@@ -1146,9 +1146,8 @@ static void __init loaddomainelfimage(st
dom_imva = __va_ul(page_to_maddr(p));
if (filesz > 0) {
if (filesz >= PAGE_SIZE)
- memcpy((void *) dom_imva,
- (void *) elfaddr,
- PAGE_SIZE);
+ copy_page((void *) dom_imva,
+ (void *) elfaddr);
else {
// copy partial page
memcpy((void *) dom_imva,
@@ -1166,7 +1165,7 @@ static void __init loaddomainelfimage(st
}
else if (memsz > 0) {
/* always zero out entire page */
- memset((void *) dom_imva, 0, PAGE_SIZE);
+ clear_page((void *) dom_imva);
}
memsz -= PAGE_SIZE;
filesz -= PAGE_SIZE;
@@ -1367,7 +1366,7 @@ int __init construct_dom0(struct domain
if (start_info_page == NULL)
panic("can't allocate start info page");
si = page_to_virt(start_info_page);
- memset(si, 0, PAGE_SIZE);
+ clear_page(si);
snprintf(si->magic, sizeof(si->magic), "xen-%i.%i-ia64",
xen_major_version(), xen_minor_version());
si->nr_pages = max_pages;
diff -r c20bc60f9243 -r 810885428743 xen/arch/ia64/xen/xenmem.c
--- a/xen/arch/ia64/xen/xenmem.c Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/ia64/xen/xenmem.c Wed Jun 20 12:49:27 2007 -0600
@@ -90,7 +90,7 @@ alloc_dir_page(void)
panic("Not enough memory for virtual frame table!\n");
++table_size;
dir = mfn << PAGE_SHIFT;
- memset(__va(dir), 0, PAGE_SIZE);
+ clear_page(__va(dir));
return dir;
}
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/apic.c
--- a/xen/arch/x86/apic.c Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/apic.c Wed Jun 20 12:49:27 2007 -0600
@@ -817,7 +817,7 @@ void __init init_apic_mappings(void)
*/
if (!smp_found_config && detect_init_APIC()) {
apic_phys = __pa(alloc_xenheap_page());
- memset(__va(apic_phys), 0, PAGE_SIZE);
+ clear_page(__va(apic_phys));
} else
apic_phys = mp_lapic_addr;
@@ -852,7 +852,7 @@ void __init init_apic_mappings(void)
} else {
fake_ioapic_page:
ioapic_phys = __pa(alloc_xenheap_page());
- memset(__va(ioapic_phys), 0, PAGE_SIZE);
+ clear_page(__va(ioapic_phys));
}
set_fixmap_nocache(idx, ioapic_phys);
apic_printk(APIC_VERBOSE, "mapped IOAPIC to %08lx (%08lx)\n",
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/boot/cmdline.S
--- a/xen/arch/x86/boot/cmdline.S Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/boot/cmdline.S Wed Jun 20 12:49:27 2007 -0600
@@ -119,30 +119,31 @@ 3: pop %edi
ret
.Lfind_option:
- push %ebx
- push 4+8(%esp)
- push 4+8(%esp)
+ mov 4(%esp),%eax
+ dec %eax
+ push %ebx
+1: pushl 4+8(%esp)
+ inc %eax
+ push %eax
call .Lstrstr
add $8,%esp
test %eax,%eax
jz 3f
cmp %eax,4+4(%esp)
- je 1f
+ je 2f
cmpb $' ',-1(%eax)
- jne 2f
-1: mov %eax,%ebx
- push 4+8(%esp)
+ jne 1b
+2: mov %eax,%ebx
+ pushl 4+8(%esp)
call .Lstrlen
add $4,%esp
- xchg %eax,%ebx
- add %eax,%ebx
+ xadd %eax,%ebx
cmpb $'\0',(%ebx)
je 3f
cmpb $' ',(%ebx)
je 3f
cmpb $'=',(%ebx)
- je 3f
-2: xor %eax,%eax
+ jne 1b
3: pop %ebx
ret
@@ -297,7 +298,7 @@ 1: lodsw
call .Lstr_prefix
add $8,%esp
test %eax,%eax
- jnz .Lcmdline_exit
+ jnz .Lparse_vga_current
/* We have 'vga=mode-<mode>'. */
add $5,%ebx
@@ -305,6 +306,19 @@ 1: lodsw
call .Latoi
add $4,%esp
mov %ax,bootsym_phys(boot_vid_mode)
+ jmp .Lcmdline_exit
+
+.Lparse_vga_current:
+ /* Check for 'vga=current'. */
+ push %ebx
+ pushl $sym_phys(.Lvga_current)
+ call .Lstr_prefix
+ add $8,%esp
+ test %eax,%eax
+ jnz .Lcmdline_exit
+
+ /* We have 'vga=current'. */
+ movw $VIDEO_CURRENT_MODE,bootsym_phys(boot_vid_mode)
.Lcmdline_exit:
popa
@@ -328,6 +342,8 @@ 1: lodsw
.asciz "gfx-"
.Lvga_mode:
.asciz "mode-"
+.Lvga_current:
+ .asciz "current"
.Lno_rm_opt:
.asciz "no-real-mode"
.Ledid_opt:
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/boot/trampoline.S
--- a/xen/arch/x86/boot/trampoline.S Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/boot/trampoline.S Wed Jun 20 12:49:27 2007 -0600
@@ -13,12 +13,11 @@ trampoline_realmode_entry:
cli
lidt bootsym(idt_48)
lgdt bootsym(gdt_48)
+ mov $1,%bl # EBX != 0 indicates we are an AP
xor %ax, %ax
inc %ax
lmsw %ax # CR0.PE = 1 (enter protected mode)
- mov $1,%bl # EBX != 0 indicates we are an AP
- jmp 1f
-1: ljmpl $BOOT_CS32,$bootsym_phys(trampoline_protmode_entry)
+ ljmpl $BOOT_CS32,$bootsym_phys(trampoline_protmode_entry)
idt_48: .word 0, 0, 0 # base = limit = 0
gdt_48: .word 6*8-1
@@ -135,10 +134,9 @@ trampoline_boot_cpu_entry:
ljmp $BOOT_PSEUDORM_CS,$bootsym(1f)
.code16
1: mov %eax,%cr0 # CR0.PE = 0 (leave protected mode)
- jmp 1f
/* Load proper real-mode values into %cs, %ds, %es and %ss. */
-1: ljmp $(BOOT_TRAMPOLINE>>4),$bootsym(1f)
+ ljmp $(BOOT_TRAMPOLINE>>4),$bootsym(1f)
1: mov $(BOOT_TRAMPOLINE>>4),%ax
mov %ax,%ds
mov %ax,%es
@@ -166,10 +164,9 @@ 1: mov $(BOOT_TRAMPOLINE>>4),%a
xor %ax,%ax
inc %ax
lmsw %ax # CR0.PE = 1 (enter protected mode)
- jmp 1f
/* Load proper protected-mode values into all segment registers. */
-1: ljmpl $BOOT_CS32,$bootsym_phys(1f)
+ ljmpl $BOOT_CS32,$bootsym_phys(1f)
.code32
1: mov $BOOT_DS,%eax
mov %eax,%ds
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/boot/video.S
--- a/xen/arch/x86/boot/video.S Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/boot/video.S Wed Jun 20 12:49:27 2007 -0600
@@ -15,7 +15,10 @@
#include "video.h"
-#define modelist (0x3000)
+/* Scratch space layout. */
+#define modelist (0x3000)
+#define vesa_glob_info (modelist + 1024)
+#define vesa_mode_info (vesa_glob_info + 1024)
/* Retrieve Extended Display Identification Data. */
#define CONFIG_FIRMWARE_EDID
@@ -109,7 +112,7 @@ mopar2: movb %al, _param(PARAM_VIDEO_
# Fetching of VESA frame buffer parameters
mopar_gr:
- leaw modelist+1024, %di
+ leaw vesa_mode_info, %di
movb $0x23, _param(PARAM_HAVE_VGA)
movw 16(%di), %ax
movw %ax, _param(PARAM_LFB_LINELENGTH)
@@ -128,9 +131,7 @@ mopar_gr:
movl %eax, _param(PARAM_LFB_COLORS+4)
# get video mem size
- leaw modelist+1024, %di
- movw $0x4f00, %ax
- int $0x10
+ leaw vesa_glob_info, %di
xorl %eax, %eax
movw 18(%di), %ax
movl %eax, _param(PARAM_LFB_SIZE)
@@ -183,7 +184,10 @@ dac_done:
movw %es, _param(PARAM_VESAPM_SEG)
movw %di, _param(PARAM_VESAPM_OFF)
-no_pm: ret
+
+no_pm: pushw %ds
+ popw %es
+ ret
# The video mode menu
mode_menu:
@@ -428,17 +432,13 @@ setmenu:
jmp mode_set
check_vesa:
-#ifdef CONFIG_FIRMWARE_EDID
- leaw modelist+1024, %di
+ leaw vesa_glob_info, %di
movw $0x4f00, %ax
int $0x10
cmpw $0x004f, %ax
jnz setbad
- movw 4(%di), %ax
- movw %ax, bootsym(vbe_version)
-#endif
- leaw modelist+1024, %di
+ leaw vesa_mode_info, %di
subb $VIDEO_FIRST_VESA>>8, %bh
movw %bx, %cx # Get mode information structure
movw $0x4f01, %ax
@@ -447,7 +447,7 @@ check_vesa:
cmpw $0x004f, %ax
jnz setbad
- movb (%di), %al # Check capabilities.
+ movb (%di), %al # Check mode attributes.
andb $0x99, %al
cmpb $0x99, %al
jnz _setbad # Doh! No linear frame buffer.
@@ -530,6 +530,7 @@ spec_inits:
.word bootsym(set_8pixel)
.word bootsym(set_80x43)
.word bootsym(set_80x28)
+ .word bootsym(set_current)
.word bootsym(set_80x30)
.word bootsym(set_80x34)
.word bootsym(set_80x60)
@@ -575,6 +576,7 @@ set14: movw $0x1111, %ax
movb $0x01, %ah # Define cursor scan lines 11-12
movw $0x0b0c, %cx
int $0x10
+set_current:
stc
ret
@@ -695,33 +697,34 @@ vga_modes_end:
# Detect VESA modes.
vesa_modes:
movw %di, %bp # BP=original mode table end
- addw $0x200, %di # Buffer space
+ leaw vesa_glob_info, %di
movw $0x4f00, %ax # VESA Get card info call
int $0x10
+ movw %di, %si
movw %bp, %di
cmpw $0x004f, %ax # Successful?
jnz ret0
- cmpw $0x4556, 0x200(%di) # 'VE'
+ cmpw $0x4556, (%si) # 'VE'
jnz ret0
- cmpw $0x4153, 0x202(%di) # 'SA'
+ cmpw $0x4153, 2(%si) # 'SA'
jnz ret0
movw $bootsym(vesa_name), bootsym(card_name) # Set name to "VESA
VGA"
pushw %gs
- lgsw 0x20e(%di), %si # GS:SI=mode list
+ lgsw 0xe(%si), %si # GS:SI=mode list
movw $128, %cx # Iteration limit
vesa1:
gs; lodsw
- cmpw $0xffff, %ax # End of the table?
+ cmpw $0xffff, %ax # End of the table?
jz vesar
- cmpw $0x0080, %ax # Check validity of mode ID
+ cmpw $0x0080, %ax # Check validity of mode ID
jc vesa2
- orb %ah, %ah # Valid IDs: 0x0000-0x007f/0x0100-0x07ff
- jz vesan # Certain BIOSes report 0x80-0xff!
+ orb %ah, %ah # Valid IDs 0x0000-0x007f/0x0100-0x07ff
+ jz vesan # Certain BIOSes report 0x80-0xff!
cmpw $0x0800, %ax
jnc vesae
@@ -891,8 +894,13 @@ store_edid:
cmpb $1, bootsym(opt_edid) # EDID disabled on cmdline (edid=no)?
je .Lno_edid
- cmpw $0x0200, bootsym(vbe_version) # only do EDID on >= VBE2.0
- jl .Lno_edid
+ leaw vesa_glob_info, %di
+ movw $0x4f00, %ax
+ int $0x10
+ cmpw $0x004f, %ax
+ jne .Lno_edid
+ cmpw $0x0200, 4(%di) # only do EDID on >= VBE2.0
+ jb .Lno_edid
xorw %di, %di # Report Capability
pushw %di
@@ -901,6 +909,8 @@ store_edid:
xorw %bx, %bx
xorw %cx, %cx
int $0x10
+ pushw %ds
+ popw %es
cmpw $0x004f, %ax # Call failed?
jne .Lno_edid
@@ -920,8 +930,6 @@ store_edid:
movw $0x01, %bx
movw $0x00, %cx
movw $0x00, %dx
- pushw %ds
- popw %es
movw $bootsym(boot_edid_info), %di
int $0x10
@@ -940,7 +948,6 @@ card_name: .word 0 # Pointe
card_name: .word 0 # Pointer to adapter name
graphic_mode: .byte 0 # Graphic mode with a linear frame buffer
dac_size: .byte 6 # DAC bit depth
-vbe_version: .word 0 # VBE bios version
# Status messages
keymsg: .ascii "Press <RETURN> to see video modes available,"
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/boot/video.h
--- a/xen/arch/x86/boot/video.h Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/boot/video.h Wed Jun 20 12:49:27 2007 -0600
@@ -16,10 +16,11 @@
#define VIDEO_80x50 0x0f01
#define VIDEO_80x43 0x0f02
#define VIDEO_80x28 0x0f03
-#define VIDEO_80x30 0x0f04
-#define VIDEO_80x34 0x0f05
-#define VIDEO_80x60 0x0f06
-#define VIDEO_LAST_SPECIAL 0x0f07
+#define VIDEO_CURRENT_MODE 0x0f04
+#define VIDEO_80x30 0x0f05
+#define VIDEO_80x34 0x0f06
+#define VIDEO_80x60 0x0f07
+#define VIDEO_LAST_SPECIAL 0x0f08
#define ASK_VGA 0xfffd
#define VIDEO_VESA_BY_SIZE 0xffff
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/boot/x86_32.S
--- a/xen/arch/x86/boot/x86_32.S Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/boot/x86_32.S Wed Jun 20 12:49:27 2007 -0600
@@ -30,9 +30,7 @@ 1: mov %eax,(%edi)
loop 1b
/* Pass off the Multiboot info structure to C land. */
- mov multiboot_ptr,%eax
- add $__PAGE_OFFSET,%eax
- push %eax
+ pushl multiboot_ptr
call __start_xen
ud2 /* Force a panic (invalid opcode). */
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/boot/x86_64.S
--- a/xen/arch/x86/boot/x86_64.S Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/boot/x86_64.S Wed Jun 20 12:49:27 2007 -0600
@@ -51,8 +51,6 @@ 1: movq %rax,(%rdi)
/* Pass off the Multiboot info structure to C land. */
mov multiboot_ptr(%rip),%edi
- lea start-0x100000(%rip),%rax
- add %rax,%rdi
call __start_xen
ud2 /* Force a panic (invalid opcode). */
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/domain.c Wed Jun 20 12:49:27 2007 -0600
@@ -232,26 +232,28 @@ static int setup_compat_l4(struct vcpu *
l4_pgentry_t *l4tab;
int rc;
- if ( !pg )
+ if ( pg == NULL )
return -ENOMEM;
/* This page needs to look like a pagetable so that it can be shadowed */
pg->u.inuse.type_info = PGT_l4_page_table|PGT_validated;
l4tab = copy_page(page_to_virt(pg), idle_pg_table);
+ l4tab[0] = l4e_empty();
l4tab[l4_table_offset(LINEAR_PT_VIRT_START)] =
l4e_from_page(pg, __PAGE_HYPERVISOR);
l4tab[l4_table_offset(PERDOMAIN_VIRT_START)] =
l4e_from_paddr(__pa(v->domain->arch.mm_perdomain_l3),
__PAGE_HYPERVISOR);
+
+ if ( (rc = setup_arg_xlat_area(v, l4tab)) < 0 )
+ {
+ free_domheap_page(pg);
+ return rc;
+ }
+
v->arch.guest_table = pagetable_from_page(pg);
v->arch.guest_table_user = v->arch.guest_table;
-
- if ( (rc = setup_arg_xlat_area(v, l4tab)) < 0 )
- {
- free_domheap_page(pg);
- return rc;
- }
return 0;
}
@@ -318,11 +320,11 @@ int switch_compat(struct domain *d)
gdt_l1e = l1e_from_page(virt_to_page(compat_gdt_table), PAGE_HYPERVISOR);
for ( vcpuid = 0; vcpuid < MAX_VIRT_CPUS; vcpuid++ )
{
+ if ( (d->vcpu[vcpuid] != NULL) &&
+ (setup_compat_l4(d->vcpu[vcpuid]) != 0) )
+ goto undo_and_fail;
d->arch.mm_perdomain_pt[((vcpuid << GDT_LDT_VCPU_SHIFT) +
FIRST_RESERVED_GDT_PAGE)] = gdt_l1e;
- if (d->vcpu[vcpuid]
- && setup_compat_l4(d->vcpu[vcpuid]) != 0)
- return -ENOMEM;
}
d->arch.physaddr_bitsize =
@@ -330,6 +332,19 @@ int switch_compat(struct domain *d)
+ (PAGE_SIZE - 2);
return 0;
+
+ undo_and_fail:
+ d->arch.is_32bit_pv = d->arch.has_32bit_shinfo = 0;
+ release_arg_xlat_area(d);
+ gdt_l1e = l1e_from_page(virt_to_page(gdt_table), PAGE_HYPERVISOR);
+ while ( vcpuid-- != 0 )
+ {
+ if ( d->vcpu[vcpuid] != NULL )
+ release_compat_l4(d->vcpu[vcpuid]);
+ d->arch.mm_perdomain_pt[((vcpuid << GDT_LDT_VCPU_SHIFT) +
+ FIRST_RESERVED_GDT_PAGE)] = gdt_l1e;
+ }
+ return -ENOMEM;
}
#else
@@ -461,7 +476,7 @@ int arch_domain_create(struct domain *d)
if ( (d->shared_info = alloc_xenheap_page()) == NULL )
goto fail;
- memset(d->shared_info, 0, PAGE_SIZE);
+ clear_page(d->shared_info);
share_xen_page_with_guest(
virt_to_page(d->shared_info), d, XENSHARE_writable);
}
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/domain_build.c
--- a/xen/arch/x86/domain_build.c Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/domain_build.c Wed Jun 20 12:49:27 2007 -0600
@@ -505,7 +505,7 @@ int __init construct_dom0(
v->arch.guest_table = pagetable_from_paddr((unsigned long)l3start);
#else
l2start = l2tab = (l2_pgentry_t *)mpt_alloc; mpt_alloc += PAGE_SIZE;
- memcpy(l2tab, idle_pg_table, PAGE_SIZE);
+ copy_page(l2tab, idle_pg_table);
l2tab[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
l2e_from_paddr((unsigned long)l2start, __PAGE_HYPERVISOR);
v->arch.guest_table = pagetable_from_paddr((unsigned long)l2start);
@@ -645,7 +645,7 @@ int __init construct_dom0(
panic("Not enough RAM for domain 0 PML4.\n");
l4start = l4tab = page_to_virt(page);
}
- memcpy(l4tab, idle_pg_table, PAGE_SIZE);
+ copy_page(l4tab, idle_pg_table);
l4tab[l4_table_offset(LINEAR_PT_VIRT_START)] =
l4e_from_paddr(__pa(l4start), __PAGE_HYPERVISOR);
l4tab[l4_table_offset(PERDOMAIN_VIRT_START)] =
@@ -823,7 +823,7 @@ int __init construct_dom0(
/* Set up start info area. */
si = (start_info_t *)vstartinfo_start;
- memset(si, 0, PAGE_SIZE);
+ clear_page(si);
si->nr_pages = nr_pages;
si->shared_info = virt_to_maddr(d->shared_info);
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/flushtlb.c
--- a/xen/arch/x86/flushtlb.c Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/flushtlb.c Wed Jun 20 12:49:27 2007 -0600
@@ -80,6 +80,8 @@ void write_cr3(unsigned long cr3)
t = pre_flush();
+ hvm_flush_guest_tlbs();
+
#ifdef USER_MAPPINGS_ARE_GLOBAL
__pge_off();
__asm__ __volatile__ ( "mov %0, %%cr3" : : "r" (cr3) : "memory" );
@@ -103,6 +105,8 @@ void local_flush_tlb(void)
t = pre_flush();
+ hvm_flush_guest_tlbs();
+
#ifdef USER_MAPPINGS_ARE_GLOBAL
__pge_off();
__pge_on();
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/hvm/hvm.c Wed Jun 20 12:49:27 2007 -0600
@@ -831,11 +831,24 @@ void hvm_update_guest_cr3(struct vcpu *v
hvm_funcs.update_guest_cr3(v);
}
+static void hvm_latch_shinfo_size(struct domain *d)
+{
+ /*
+ * Called from operations which are among the very first executed by
+ * PV drivers on initialisation or after save/restore. These are sensible
+ * points at which to sample the execution mode of the guest and latch
+ * 32- or 64-bit format for shared state.
+ */
+ if ( current->domain == d )
+ d->arch.has_32bit_shinfo = (hvm_guest_x86_mode(current) != 8);
+}
+
/* Initialise a hypercall transfer page for a VMX domain using
paravirtualised drivers. */
void hvm_hypercall_page_initialise(struct domain *d,
void *hypercall_page)
{
+ hvm_latch_shinfo_size(d);
hvm_funcs.init_hypercall_page(d, hypercall_page);
}
@@ -1065,13 +1078,7 @@ long do_hvm_op(unsigned long op, XEN_GUE
break;
case HVM_PARAM_CALLBACK_IRQ:
hvm_set_callback_via(d, a.value);
- /*
- * Since this operation is one of the very first executed
- * by PV drivers on initialisation or after save/restore, it
- * is a sensible point at which to sample the execution mode of
- * the guest and latch 32- or 64-bit format for shared state.
- */
- d->arch.has_32bit_shinfo = (hvm_guest_x86_mode(current) != 8);
+ hvm_latch_shinfo_size(d);
break;
}
d->arch.hvm_domain.params[a.index] = a.value;
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/hvm/irq.c
--- a/xen/arch/x86/hvm/irq.c Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/hvm/irq.c Wed Jun 20 12:49:27 2007 -0600
@@ -285,43 +285,49 @@ void hvm_set_callback_via(struct domain
}
}
-int cpu_has_pending_irq(struct vcpu *v)
+enum hvm_intack hvm_vcpu_has_pending_irq(struct vcpu *v)
{
struct hvm_domain *plat = &v->domain->arch.hvm_domain;
- /* APIC */
+ if ( unlikely(v->arch.hvm_vcpu.nmi_pending) )
+ return hvm_intack_nmi;
+
if ( vlapic_has_interrupt(v) != -1 )
- return 1;
-
- /* PIC */
+ return hvm_intack_lapic;
+
if ( !vlapic_accept_pic_intr(v) )
- return 0;
-
- return plat->vpic[0].int_output;
-}
-
-int cpu_get_interrupt(struct vcpu *v, int *type)
-{
- int vector;
-
- if ( (vector = cpu_get_apic_interrupt(v, type)) != -1 )
- return vector;
-
- if ( (v->vcpu_id == 0) &&
- ((vector = cpu_get_pic_interrupt(v, type)) != -1) )
- return vector;
-
- return -1;
-}
-
-int get_isa_irq_vector(struct vcpu *v, int isa_irq, int type)
+ return hvm_intack_none;
+
+ return plat->vpic[0].int_output ? hvm_intack_pic : hvm_intack_none;
+}
+
+int hvm_vcpu_ack_pending_irq(struct vcpu *v, enum hvm_intack type, int *vector)
+{
+ switch ( type )
+ {
+ case hvm_intack_nmi:
+ return test_and_clear_bool(v->arch.hvm_vcpu.nmi_pending);
+ case hvm_intack_lapic:
+ return ((*vector = cpu_get_apic_interrupt(v)) != -1);
+ case hvm_intack_pic:
+ ASSERT(v->vcpu_id == 0);
+ return ((*vector = cpu_get_pic_interrupt(v)) != -1);
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+int get_isa_irq_vector(struct vcpu *v, int isa_irq, enum hvm_intack src)
{
unsigned int gsi = hvm_isa_irq_to_gsi(isa_irq);
- if ( type == APIC_DM_EXTINT )
+ if ( src == hvm_intack_pic )
return (v->domain->arch.hvm_domain.vpic[isa_irq >> 3].irq_base
+ (isa_irq & 7));
+ ASSERT(src == hvm_intack_lapic);
return domain_vioapic(v->domain)->redirtbl[gsi].fields.vector;
}
@@ -337,19 +343,20 @@ int is_isa_irq_masked(struct vcpu *v, in
domain_vioapic(v->domain)->redirtbl[gsi].fields.mask);
}
-/*
- * TODO: 1. Should not need special treatment of event-channel events.
- * 2. Should take notice of interrupt shadows (or clear them).
- */
int hvm_local_events_need_delivery(struct vcpu *v)
{
- int pending;
-
- pending = (vcpu_info(v, evtchn_upcall_pending) || cpu_has_pending_irq(v));
- if ( unlikely(pending) )
- pending = hvm_interrupts_enabled(v);
-
- return pending;
+ enum hvm_intack type;
+
+ /* TODO: Get rid of event-channel special case. */
+ if ( vcpu_info(v, evtchn_upcall_pending) )
+ type = hvm_intack_pic;
+ else
+ type = hvm_vcpu_has_pending_irq(v);
+
+ if ( likely(type == hvm_intack_none) )
+ return 0;
+
+ return hvm_interrupts_enabled(v, type);
}
#if 0 /* Keep for debugging */
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/hvm/svm/asid.c
--- a/xen/arch/x86/hvm/svm/asid.c Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/hvm/svm/asid.c Wed Jun 20 12:49:27 2007 -0600
@@ -60,7 +60,7 @@ struct svm_asid_data {
u64 core_asid_generation;
u32 next_asid;
u32 max_asid;
- u32 erratum170;
+ u32 erratum170:1;
};
static DEFINE_PER_CPU(struct svm_asid_data, svm_asid_data);
@@ -140,25 +140,21 @@ void svm_asid_init_vcpu(struct vcpu *v)
}
/*
- * Increase the Generation to make free ASIDs. Flush physical TLB and give
- * ASID.
- */
-static void svm_asid_handle_inc_generation(struct vcpu *v)
-{
- struct svm_asid_data *data = svm_asid_core_data();
-
- if ( likely(data->core_asid_generation < SVM_ASID_LAST_GENERATION) )
- {
- /* Handle ASID overflow. */
+ * Increase the Generation to make free ASIDs, and indirectly cause a
+ * TLB flush of all ASIDs on the next vmrun.
+ */
+void svm_asid_inc_generation(void)
+{
+ struct svm_asid_data *data = svm_asid_core_data();
+
+ if ( likely(data->core_asid_generation < SVM_ASID_LAST_GENERATION) )
+ {
+ /* Move to the next generation. We can't flush the TLB now
+ * because you need to vmrun to do that, and current might not
+ * be a HVM vcpu, but the first HVM vcpu that runs after this
+ * will pick up ASID 1 and flush the TLBs. */
data->core_asid_generation++;
- data->next_asid = SVM_ASID_FIRST_GUEST_ASID + 1;
-
- /* Handle VCPU. */
- v->arch.hvm_svm.vmcb->guest_asid = SVM_ASID_FIRST_GUEST_ASID;
- v->arch.hvm_svm.asid_generation = data->core_asid_generation;
-
- /* Trigger flush of physical TLB. */
- v->arch.hvm_svm.vmcb->tlb_control = 1;
+ data->next_asid = SVM_ASID_FIRST_GUEST_ASID;
return;
}
@@ -168,11 +164,12 @@ static void svm_asid_handle_inc_generati
* this core (flushing TLB always). So correctness is established; it
* only runs a bit slower.
*/
- printk("AMD SVM: ASID generation overrun. Disabling ASIDs.\n");
- data->erratum170 = 1;
- data->core_asid_generation = SVM_ASID_INVALID_GENERATION;
-
- svm_asid_init_vcpu(v);
+ if ( !data->erratum170 )
+ {
+ printk("AMD SVM: ASID generation overrun. Disabling ASIDs.\n");
+ data->erratum170 = 1;
+ data->core_asid_generation = SVM_ASID_INVALID_GENERATION;
+ }
}
/*
@@ -202,18 +199,21 @@ asmlinkage void svm_asid_handle_vmrun(vo
return;
}
- /* Different ASID generations trigger fetching of a fresh ASID. */
- if ( likely(data->next_asid <= data->max_asid) )
- {
- /* There is a free ASID. */
- v->arch.hvm_svm.vmcb->guest_asid = data->next_asid++;
- v->arch.hvm_svm.asid_generation = data->core_asid_generation;
- v->arch.hvm_svm.vmcb->tlb_control = 0;
- return;
- }
-
- /* Slow path, may cause TLB flush. */
- svm_asid_handle_inc_generation(v);
+ /* If there are no free ASIDs, need to go to a new generation */
+ if ( unlikely(data->next_asid > data->max_asid) )
+ svm_asid_inc_generation();
+
+ /* Now guaranteed to be a free ASID. */
+ v->arch.hvm_svm.vmcb->guest_asid = data->next_asid++;
+ v->arch.hvm_svm.asid_generation = data->core_asid_generation;
+
+ /* When we assign ASID 1, flush all TLB entries. We need to do it
+ * here because svm_asid_inc_generation() can be called at any time,
+ * but the TLB flush can only happen on vmrun. */
+ if ( v->arch.hvm_svm.vmcb->guest_asid == SVM_ASID_FIRST_GUEST_ASID )
+ v->arch.hvm_svm.vmcb->tlb_control = 1;
+ else
+ v->arch.hvm_svm.vmcb->tlb_control = 0;
}
void svm_asid_inv_asid(struct vcpu *v)
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/hvm/svm/intr.c
--- a/xen/arch/x86/hvm/svm/intr.c Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/hvm/svm/intr.c Wed Jun 20 12:49:27 2007 -0600
@@ -15,7 +15,6 @@
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 59 Temple
* Place - Suite 330, Boston, MA 02111-1307 USA.
- *
*/
#include <xen/config.h>
@@ -39,100 +38,119 @@
#include <xen/domain_page.h>
#include <asm/hvm/trace.h>
-/*
- * Most of this code is copied from vmx_io.c and modified
- * to be suitable for SVM.
- */
-
-static inline int svm_inject_extint(struct vcpu *v, int trap)
+static void svm_inject_dummy_vintr(struct vcpu *v)
{
struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
vintr_t intr = vmcb->vintr;
- /* Update only relevant fields */
intr.fields.irq = 1;
intr.fields.intr_masking = 1;
- intr.fields.vector = trap;
+ intr.fields.vector = 0;
intr.fields.prio = 0xF;
intr.fields.ign_tpr = 1;
vmcb->vintr = intr;
+}
+
+static void svm_inject_nmi(struct vcpu *v)
+{
+ struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+ eventinj_t event;
- return 0;
+ event.bytes = 0;
+ event.fields.v = 1;
+ event.fields.type = EVENTTYPE_NMI;
+ event.fields.vector = 2;
+
+ ASSERT(vmcb->eventinj.fields.v == 0);
+ vmcb->eventinj = event;
+}
+
+static void svm_inject_extint(struct vcpu *v, int vector)
+{
+ struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+ eventinj_t event;
+
+ event.bytes = 0;
+ event.fields.v = 1;
+ event.fields.type = EVENTTYPE_INTR;
+ event.fields.vector = vector;
+
+ ASSERT(vmcb->eventinj.fields.v == 0);
+ vmcb->eventinj = event;
}
asmlinkage void svm_intr_assist(void)
{
struct vcpu *v = current;
struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
- int intr_type = APIC_DM_EXTINT;
- int intr_vector = -1;
+ enum hvm_intack intr_source;
+ int intr_vector;
/*
- * Previous Interrupt delivery caused this intercept?
+ * Previous event delivery caused this intercept?
* This will happen if the injection is latched by the processor (hence
- * clearing vintr.fields.irq) but then subsequently a fault occurs (e.g.,
- * due to lack of shadow mapping of guest IDT or guest-kernel stack).
- *
- * NB. Exceptions that fault during delivery are lost. This needs to be
- * fixed but we'll usually get away with it since faults are usually
- * idempotent. But this isn't the case for e.g. software interrupts!
+ * clearing vintr.fields.irq or eventinj.v) but then subsequently a fault
+ * occurs (e.g., due to lack of shadow mapping of guest IDT or guest-kernel
+ * stack).
*/
- if ( vmcb->exitintinfo.fields.v && (vmcb->exitintinfo.fields.type == 0) )
+ if ( vmcb->exitintinfo.fields.v )
{
- intr_vector = vmcb->exitintinfo.fields.vector;
+ vmcb->eventinj = vmcb->exitintinfo;
vmcb->exitintinfo.bytes = 0;
HVMTRACE_1D(REINJ_VIRQ, v, intr_vector);
- svm_inject_extint(v, intr_vector);
return;
}
- /*
- * Previous interrupt still pending? This occurs if we return from VMRUN
- * very early in the entry-to-guest process. Usually this is because an
- * external physical interrupt was pending when we executed VMRUN.
- */
- if ( vmcb->vintr.fields.irq )
- return;
-
- /* Crank the handle on interrupt state and check for new interrrupts. */
+ /* Crank the handle on interrupt state. */
pt_update_irq(v);
hvm_set_callback_irq_level();
- if ( !cpu_has_pending_irq(v) )
- return;
- /*
- * If the guest can't take an interrupt right now, create a 'fake'
- * virtual interrupt on to intercept as soon as the guest _can_ take
- * interrupts. Do not obtain the next interrupt from the vlapic/pic
- * if unable to inject.
- *
- * Also do this if there is an exception pending. This is because
- * the delivery of the exception can arbitrarily delay the injection
- * of the vintr (for example, if the exception is handled via an
- * interrupt gate, hence zeroing RFLAGS.IF). In the meantime:
- * - the vTPR could be modified upwards, so we need to wait until the
- * exception is delivered before we can safely decide that an
- * interrupt is deliverable; and
- * - the guest might look at the APIC/PIC state, so we ought not to have
- * cleared the interrupt out of the IRR.
- */
- if ( irq_masked(vmcb->rflags) || vmcb->interrupt_shadow
- || vmcb->eventinj.fields.v )
+ do {
+ intr_source = hvm_vcpu_has_pending_irq(v);
+ if ( likely(intr_source == hvm_intack_none) )
+ return;
+
+ /*
+ * If the guest can't take an interrupt right now, create a 'fake'
+ * virtual interrupt on to intercept as soon as the guest _can_ take
+ * interrupts. Do not obtain the next interrupt from the vlapic/pic
+ * if unable to inject.
+ *
+ * Also do this if there is an injection already pending. This is
+ * because the event delivery can arbitrarily delay the injection
+ * of the vintr (for example, if the exception is handled via an
+ * interrupt gate, hence zeroing RFLAGS.IF). In the meantime:
+ * - the vTPR could be modified upwards, so we need to wait until the
+ * exception is delivered before we can safely decide that an
+ * interrupt is deliverable; and
+ * - the guest might look at the APIC/PIC state, so we ought not to
+ * have cleared the interrupt out of the IRR.
+ *
+ * TODO: Better NMI handling. We need a way to skip a MOV SS interrupt
+ * shadow. This is hard to do without hardware support. We should also
+ * track 'NMI blocking' from NMI injection until IRET. This can be done
+ * quite easily in software by intercepting the unblocking IRET.
+ */
+ if ( !hvm_interrupts_enabled(v, intr_source) ||
+ vmcb->eventinj.fields.v )
+ {
+ vmcb->general1_intercepts |= GENERAL1_INTERCEPT_VINTR;
+ HVMTRACE_2D(INJ_VIRQ, v, 0x0, /*fake=*/ 1);
+ svm_inject_dummy_vintr(v);
+ return;
+ }
+ } while ( !hvm_vcpu_ack_pending_irq(v, intr_source, &intr_vector) );
+
+ if ( intr_source == hvm_intack_nmi )
{
- vmcb->general1_intercepts |= GENERAL1_INTERCEPT_VINTR;
- HVMTRACE_2D(INJ_VIRQ, v, 0x0, /*fake=*/ 1);
- svm_inject_extint(v, 0x0); /* actual vector doesn't matter */
- return;
+ svm_inject_nmi(v);
}
-
- /* Okay, we can deliver the interrupt: grab it and update PIC state. */
- intr_vector = cpu_get_interrupt(v, &intr_type);
- BUG_ON(intr_vector < 0);
-
- HVMTRACE_2D(INJ_VIRQ, v, intr_vector, /*fake=*/ 0);
- svm_inject_extint(v, intr_vector);
-
- pt_intr_post(v, intr_vector, intr_type);
+ else
+ {
+ HVMTRACE_2D(INJ_VIRQ, v, intr_vector, /*fake=*/ 0);
+ svm_inject_extint(v, intr_vector);
+ pt_intr_post(v, intr_vector, intr_source);
+ }
}
/*
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/hvm/svm/svm.c Wed Jun 20 12:49:27 2007 -0600
@@ -312,26 +312,8 @@ int svm_vmcb_save(struct vcpu *v, struct
c->sysenter_esp = vmcb->sysenter_esp;
c->sysenter_eip = vmcb->sysenter_eip;
- /* Save any event/interrupt that was being injected when we last
- * exited. Although there are three(!) VMCB fields that can contain
- * active events, we only need to save at most one: because the
- * intr_assist logic never delivers an IRQ when any other event is
- * active, we know that the only possible collision is if we inject
- * a fault while exitintinfo contains a valid event (the delivery of
- * which caused the last exit). In that case replaying just the
- * first event should cause the same behaviour when we restore. */
- if ( vmcb->vintr.fields.irq
- && /* Check it's not a fake interrupt (see svm_intr_assist()) */
- !(vmcb->general1_intercepts & GENERAL1_INTERCEPT_VINTR) )
- {
- c->pending_vector = vmcb->vintr.fields.vector;
- c->pending_type = 0; /* External interrupt */
- c->pending_error_valid = 0;
- c->pending_reserved = 0;
- c->pending_valid = 1;
- c->error_code = 0;
- }
- else if ( vmcb->exitintinfo.fields.v )
+ /* Save any event/interrupt that was being injected when we last exited. */
+ if ( vmcb->exitintinfo.fields.v )
{
c->pending_event = vmcb->exitintinfo.bytes & 0xffffffff;
c->error_code = vmcb->exitintinfo.fields.errorcode;
@@ -569,10 +551,15 @@ static inline void svm_restore_dr(struct
__restore_debug_registers(v);
}
-static int svm_interrupts_enabled(struct vcpu *v)
-{
- unsigned long eflags = v->arch.hvm_svm.vmcb->rflags;
- return !irq_masked(eflags);
+static int svm_interrupts_enabled(struct vcpu *v, enum hvm_intack type)
+{
+ struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+
+ if ( type == hvm_intack_nmi )
+ return !vmcb->interrupt_shadow;
+
+ ASSERT((type == hvm_intack_pic) || (type == hvm_intack_lapic));
+ return !irq_masked(vmcb->rflags) && !vmcb->interrupt_shadow;
}
static int svm_guest_x86_mode(struct vcpu *v)
@@ -596,6 +583,14 @@ static void svm_update_guest_cr3(struct
static void svm_update_guest_cr3(struct vcpu *v)
{
v->arch.hvm_svm.vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3;
+}
+
+static void svm_flush_guest_tlbs(void)
+{
+ /* Roll over the CPU's ASID generation, so it gets a clean TLB when we
+ * next VMRUN. (If ASIDs are disabled, the whole TLB is flushed on
+ * VMRUN anyway). */
+ svm_asid_inc_generation();
}
static void svm_update_vtpr(struct vcpu *v, unsigned long value)
@@ -770,8 +765,6 @@ static void svm_init_hypercall_page(stru
{
char *p;
int i;
-
- memset(hypercall_page, 0, PAGE_SIZE);
for ( i = 0; i < (PAGE_SIZE / 32); i++ )
{
@@ -948,6 +941,7 @@ static struct hvm_function_table svm_fun
.get_segment_register = svm_get_segment_register,
.update_host_cr3 = svm_update_host_cr3,
.update_guest_cr3 = svm_update_guest_cr3,
+ .flush_guest_tlbs = svm_flush_guest_tlbs,
.update_vtpr = svm_update_vtpr,
.stts = svm_stts,
.set_tsc_offset = svm_set_tsc_offset,
@@ -957,7 +951,7 @@ static struct hvm_function_table svm_fun
.event_injection_faulted = svm_event_injection_faulted
};
-void svm_npt_detect(void)
+static void svm_npt_detect(void)
{
u32 eax, ebx, ecx, edx;
@@ -1017,6 +1011,9 @@ int start_svm(struct cpuinfo_x86 *c)
hvm_enable(&svm_function_table);
+ if ( opt_hap_enabled )
+ printk("SVM: Nested paging enabled.\n");
+
return 1;
}
@@ -1477,7 +1474,7 @@ static void svm_io_instruction(struct vc
/* Copy current guest state into io instruction state structure. */
memcpy(regs, guest_cpu_user_regs(), HVM_CONTEXT_STACK_BYTES);
- hvm_store_cpu_guest_regs(v, regs, NULL);
+ svm_store_cpu_guest_regs(v, regs, NULL);
info.bytes = vmcb->exitinfo1;
@@ -2148,11 +2145,14 @@ static inline void svm_do_msr_access(
static inline void svm_vmexit_do_hlt(struct vmcb_struct *vmcb)
{
+ enum hvm_intack type = hvm_vcpu_has_pending_irq(current);
+
__update_guest_eip(vmcb, 1);
/* Check for interrupt not handled or new interrupt. */
- if ( (vmcb->rflags & X86_EFLAGS_IF) &&
- (vmcb->vintr.fields.irq || cpu_has_pending_irq(current)) ) {
+ if ( vmcb->eventinj.fields.v ||
+ ((type != hvm_intack_none) && svm_interrupts_enabled(current, type)) )
+ {
HVMTRACE_1D(HLT, current, /*int pending=*/ 1);
return;
}
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/hvm/svm/vmcb.c
--- a/xen/arch/x86/hvm/svm/vmcb.c Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/hvm/svm/vmcb.c Wed Jun 20 12:49:27 2007 -0600
@@ -56,7 +56,7 @@ struct vmcb_struct *alloc_vmcb(void)
return NULL;
}
- memset(vmcb, 0, PAGE_SIZE);
+ clear_page(vmcb);
return vmcb;
}
@@ -72,11 +72,11 @@ struct host_save_area *alloc_host_save_a
hsa = alloc_xenheap_page();
if ( hsa == NULL )
{
- printk(XENLOG_WARNING "Warning: failed to allocate vmcb.\n");
+ printk(XENLOG_WARNING "Warning: failed to allocate hsa.\n");
return NULL;
}
- memset(hsa, 0, PAGE_SIZE);
+ clear_page(hsa);
return hsa;
}
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/hvm/vioapic.c
--- a/xen/arch/x86/hvm/vioapic.c Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/hvm/vioapic.c Wed Jun 20 12:49:27 2007 -0600
@@ -254,17 +254,11 @@ static void ioapic_inj_irq(
HVM_DBG_LOG(DBG_LEVEL_IOAPIC, "irq %d trig %d deliv %d",
vector, trig_mode, delivery_mode);
- switch ( delivery_mode )
- {
- case dest_Fixed:
- case dest_LowestPrio:
- if ( vlapic_set_irq(target, vector, trig_mode) )
- vcpu_kick(vlapic_vcpu(target));
- break;
- default:
- gdprintk(XENLOG_WARNING, "error delivery mode %d\n", delivery_mode);
- break;
- }
+ ASSERT((delivery_mode == dest_Fixed) ||
+ (delivery_mode == dest_LowestPrio));
+
+ if ( vlapic_set_irq(target, vector, trig_mode) )
+ vcpu_kick(vlapic_vcpu(target));
}
static uint32_t ioapic_get_delivery_bitmask(
@@ -368,7 +362,6 @@ static void vioapic_deliver(struct hvm_h
}
case dest_Fixed:
- case dest_ExtINT:
{
uint8_t bit;
for ( bit = 0; deliver_bitmask != 0; bit++ )
@@ -393,10 +386,21 @@ static void vioapic_deliver(struct hvm_h
break;
}
- case dest_SMI:
case dest_NMI:
- case dest_INIT:
- case dest__reserved_2:
+ {
+ uint8_t bit;
+ for ( bit = 0; deliver_bitmask != 0; bit++ )
+ {
+ if ( !(deliver_bitmask & (1 << bit)) )
+ continue;
+ deliver_bitmask &= ~(1 << bit);
+ if ( ((v = vioapic_domain(vioapic)->vcpu[bit]) != NULL) &&
+ !test_and_set_bool(v->arch.hvm_vcpu.nmi_pending) )
+ vcpu_kick(v);
+ }
+ break;
+ }
+
default:
gdprintk(XENLOG_WARNING, "Unsupported delivery mode %d\n",
delivery_mode);
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/hvm/vlapic.c
--- a/xen/arch/x86/hvm/vlapic.c Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/hvm/vlapic.c Wed Jun 20 12:49:27 2007 -0600
@@ -294,7 +294,8 @@ static int vlapic_accept_irq(struct vcpu
break;
case APIC_DM_NMI:
- gdprintk(XENLOG_WARNING, "Ignoring guest NMI\n");
+ if ( !test_and_set_bool(v->arch.hvm_vcpu.nmi_pending) )
+ vcpu_kick(v);
break;
case APIC_DM_INIT:
@@ -747,7 +748,7 @@ int vlapic_has_interrupt(struct vcpu *v)
return highest_irr;
}
-int cpu_get_apic_interrupt(struct vcpu *v, int *mode)
+int cpu_get_apic_interrupt(struct vcpu *v)
{
int vector = vlapic_has_interrupt(v);
struct vlapic *vlapic = vcpu_vlapic(v);
@@ -757,8 +758,6 @@ int cpu_get_apic_interrupt(struct vcpu *
vlapic_set_vector(vector, &vlapic->regs->data[APIC_ISR]);
vlapic_clear_irr(vector, vlapic);
-
- *mode = APIC_DM_FIXED;
return vector;
}
@@ -935,7 +934,7 @@ int vlapic_init(struct vcpu *v)
return -ENOMEM;
}
- memset(vlapic->regs, 0, PAGE_SIZE);
+ clear_page(vlapic->regs);
vlapic_reset(vlapic);
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/hvm/vmx/intr.c
--- a/xen/arch/x86/hvm/vmx/intr.c Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/hvm/vmx/intr.c Wed Jun 20 12:49:27 2007 -0600
@@ -102,8 +102,8 @@ static void update_tpr_threshold(struct
asmlinkage void vmx_intr_assist(void)
{
- int has_ext_irq, intr_vector, intr_type = 0;
- unsigned long eflags, intr_shadow;
+ int intr_vector;
+ enum hvm_intack intr_source;
struct vcpu *v = current;
unsigned int idtv_info_field;
unsigned long inst_len;
@@ -114,65 +114,67 @@ asmlinkage void vmx_intr_assist(void)
update_tpr_threshold(vcpu_vlapic(v));
- has_ext_irq = cpu_has_pending_irq(v);
+ do {
+ intr_source = hvm_vcpu_has_pending_irq(v);
- if ( unlikely(v->arch.hvm_vmx.vector_injected) )
- {
- v->arch.hvm_vmx.vector_injected = 0;
- if ( unlikely(has_ext_irq) )
- enable_irq_window(v);
- return;
- }
+ if ( unlikely(v->arch.hvm_vmx.vector_injected) )
+ {
+ v->arch.hvm_vmx.vector_injected = 0;
+ if ( unlikely(intr_source != hvm_intack_none) )
+ enable_irq_window(v);
+ return;
+ }
- /* This could be moved earlier in the VMX resume sequence. */
- idtv_info_field = __vmread(IDT_VECTORING_INFO_FIELD);
- if ( unlikely(idtv_info_field & INTR_INFO_VALID_MASK) )
- {
- __vmwrite(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field);
+ /* This could be moved earlier in the VMX resume sequence. */
+ idtv_info_field = __vmread(IDT_VECTORING_INFO_FIELD);
+ if ( unlikely(idtv_info_field & INTR_INFO_VALID_MASK) )
+ {
+ __vmwrite(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field);
+
+ /*
+ * Safe: the length will only be interpreted for software
+ * exceptions and interrupts. If we get here then delivery of some
+ * event caused a fault, and this always results in defined
+ * VM_EXIT_INSTRUCTION_LEN.
+ */
+ inst_len = __vmread(VM_EXIT_INSTRUCTION_LEN); /* Safe */
+ __vmwrite(VM_ENTRY_INSTRUCTION_LEN, inst_len);
+
+ if ( unlikely(idtv_info_field & 0x800) ) /* valid error code */
+ __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE,
+ __vmread(IDT_VECTORING_ERROR_CODE));
+ if ( unlikely(intr_source != hvm_intack_none) )
+ enable_irq_window(v);
+
+ HVM_DBG_LOG(DBG_LEVEL_1, "idtv_info_field=%x", idtv_info_field);
+ return;
+ }
+
+ if ( likely(intr_source == hvm_intack_none) )
+ return;
/*
- * Safe: the length will only be interpreted for software exceptions
- * and interrupts. If we get here then delivery of some event caused a
- * fault, and this always results in defined VM_EXIT_INSTRUCTION_LEN.
+ * TODO: Better NMI handling. Shouldn't wait for EFLAGS.IF==1, but
+ * should wait for exit from 'NMI blocking' window (NMI injection to
+ * next IRET). This requires us to use the new 'virtual NMI' support.
*/
- inst_len = __vmread(VM_EXIT_INSTRUCTION_LEN); /* Safe */
- __vmwrite(VM_ENTRY_INSTRUCTION_LEN, inst_len);
+ if ( !hvm_interrupts_enabled(v, intr_source) )
+ {
+ enable_irq_window(v);
+ return;
+ }
+ } while ( !hvm_vcpu_ack_pending_irq(v, intr_source, &intr_vector) );
- if ( unlikely(idtv_info_field & 0x800) ) /* valid error code */
- __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE,
- __vmread(IDT_VECTORING_ERROR_CODE));
- if ( unlikely(has_ext_irq) )
- enable_irq_window(v);
-
- HVM_DBG_LOG(DBG_LEVEL_1, "idtv_info_field=%x", idtv_info_field);
- return;
+ if ( intr_source == hvm_intack_nmi )
+ {
+ vmx_inject_nmi(v);
}
-
- if ( likely(!has_ext_irq) )
- return;
-
- intr_shadow = __vmread(GUEST_INTERRUPTIBILITY_INFO);
- if ( unlikely(intr_shadow & (VMX_INTR_SHADOW_STI|VMX_INTR_SHADOW_MOV_SS)) )
+ else
{
- enable_irq_window(v);
- HVM_DBG_LOG(DBG_LEVEL_1, "interruptibility");
- return;
+ HVMTRACE_2D(INJ_VIRQ, v, intr_vector, /*fake=*/ 0);
+ vmx_inject_extint(v, intr_vector);
+ pt_intr_post(v, intr_vector, intr_source);
}
-
- eflags = __vmread(GUEST_RFLAGS);
- if ( irq_masked(eflags) )
- {
- enable_irq_window(v);
- return;
- }
-
- intr_vector = cpu_get_interrupt(v, &intr_type);
- BUG_ON(intr_vector < 0);
-
- HVMTRACE_2D(INJ_VIRQ, v, intr_vector, /*fake=*/ 0);
- vmx_inject_extint(v, intr_vector, VMX_DELIVER_NO_ERROR_CODE);
-
- pt_intr_post(v, intr_vector, intr_type);
}
/*
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/hvm/vmx/vmcs.c
--- a/xen/arch/x86/hvm/vmx/vmcs.c Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/hvm/vmx/vmcs.c Wed Jun 20 12:49:27 2007 -0600
@@ -158,7 +158,7 @@ static struct vmcs_struct *vmx_alloc_vmc
return NULL;
}
- memset(vmcs, 0, PAGE_SIZE);
+ clear_page(vmcs);
vmcs->vmcs_revision_id = vmcs_revision_id;
return vmcs;
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/hvm/vmx/vmx.c Wed Jun 20 12:49:27 2007 -0600
@@ -1070,8 +1070,6 @@ static void vmx_init_hypercall_page(stru
char *p;
int i;
- memset(hypercall_page, 0, PAGE_SIZE);
-
for ( i = 0; i < (PAGE_SIZE / 32); i++ )
{
p = (char *)(hypercall_page + (i * 32));
@@ -1115,16 +1113,26 @@ static int vmx_nx_enabled(struct vcpu *v
return v->arch.hvm_vmx.efer & EFER_NX;
}
-static int vmx_interrupts_enabled(struct vcpu *v)
-{
- unsigned long eflags = __vmread(GUEST_RFLAGS);
- return !irq_masked(eflags);
-}
-
+static int vmx_interrupts_enabled(struct vcpu *v, enum hvm_intack type)
+{
+ unsigned long intr_shadow, eflags;
+
+ ASSERT(v == current);
+
+ intr_shadow = __vmread(GUEST_INTERRUPTIBILITY_INFO);
+ intr_shadow &= VMX_INTR_SHADOW_STI|VMX_INTR_SHADOW_MOV_SS;
+
+ if ( type == hvm_intack_nmi )
+ return !intr_shadow;
+
+ ASSERT((type == hvm_intack_pic) || (type == hvm_intack_lapic));
+ eflags = __vmread(GUEST_RFLAGS);
+ return !irq_masked(eflags) && !intr_shadow;
+}
static void vmx_update_host_cr3(struct vcpu *v)
{
- ASSERT( (v == current) || !vcpu_runnable(v) );
+ ASSERT((v == current) || !vcpu_runnable(v));
vmx_vmcs_enter(v);
__vmwrite(HOST_CR3, v->arch.cr3);
vmx_vmcs_exit(v);
@@ -1132,12 +1140,18 @@ static void vmx_update_host_cr3(struct v
static void vmx_update_guest_cr3(struct vcpu *v)
{
- ASSERT( (v == current) || !vcpu_runnable(v) );
+ ASSERT((v == current) || !vcpu_runnable(v));
vmx_vmcs_enter(v);
__vmwrite(GUEST_CR3, v->arch.hvm_vcpu.hw_cr3);
vmx_vmcs_exit(v);
}
+static void vmx_flush_guest_tlbs(void)
+{
+ /* No tagged TLB support on VMX yet. The fact that we're in Xen
+ * at all means any guest will have a clean TLB when it's next run,
+ * because VMRESUME will flush it for us. */
+}
static void vmx_inject_exception(
unsigned int trapnr, int errcode, unsigned long cr2)
@@ -1205,6 +1219,7 @@ static struct hvm_function_table vmx_fun
.get_segment_register = vmx_get_segment_register,
.update_host_cr3 = vmx_update_host_cr3,
.update_guest_cr3 = vmx_update_guest_cr3,
+ .flush_guest_tlbs = vmx_flush_guest_tlbs,
.update_vtpr = vmx_update_vtpr,
.stts = vmx_stts,
.set_tsc_offset = vmx_set_tsc_offset,
@@ -1837,7 +1852,7 @@ static void vmx_io_instruction(unsigned
/* Copy current guest state into io instruction state structure. */
memcpy(regs, guest_cpu_user_regs(), HVM_CONTEXT_STACK_BYTES);
- hvm_store_cpu_guest_regs(current, regs, NULL);
+ vmx_store_cpu_guest_regs(current, regs, NULL);
HVM_DBG_LOG(DBG_LEVEL_IO, "vm86 %d, eip=%x:%lx, "
"exit_qualification = %lx",
@@ -2549,7 +2564,8 @@ static inline int vmx_do_msr_read(struct
HVM_DBG_LOG(DBG_LEVEL_1, "ecx=%x", ecx);
- switch (ecx) {
+ switch ( ecx )
+ {
case MSR_IA32_TIME_STAMP_COUNTER:
msr_content = hvm_get_guest_time(v);
break;
@@ -2565,6 +2581,8 @@ static inline int vmx_do_msr_read(struct
case MSR_IA32_APICBASE:
msr_content = vcpu_vlapic(v)->hw.apic_base_msr;
break;
+ case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_CR4_FIXED1:
+ goto gp_fault;
default:
if ( long_mode_do_msr_read(regs) )
goto done;
@@ -2576,8 +2594,8 @@ static inline int vmx_do_msr_read(struct
regs->edx = edx;
goto done;
}
- vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
- return 0;
+
+ goto gp_fault;
}
regs->eax = msr_content & 0xFFFFFFFF;
@@ -2589,6 +2607,10 @@ done:
ecx, (unsigned long)regs->eax,
(unsigned long)regs->edx);
return 1;
+
+gp_fault:
+ vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
+ return 0;
}
static int vmx_alloc_vlapic_mapping(struct domain *d)
@@ -2667,7 +2689,8 @@ static inline int vmx_do_msr_write(struc
msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
HVMTRACE_2D(MSR_WRITE, v, ecx, msr_content);
- switch (ecx) {
+ switch ( ecx )
+ {
case MSR_IA32_TIME_STAMP_COUNTER:
hvm_set_guest_time(v, msr_content);
pt_reset(v);
@@ -2684,6 +2707,8 @@ static inline int vmx_do_msr_write(struc
case MSR_IA32_APICBASE:
vlapic_msr_set(vcpu_vlapic(v), msr_content);
break;
+ case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_CR4_FIXED1:
+ goto gp_fault;
default:
if ( !long_mode_do_msr_write(regs) )
wrmsr_hypervisor_regs(ecx, regs->eax, regs->edx);
@@ -2691,6 +2716,10 @@ static inline int vmx_do_msr_write(struc
}
return 1;
+
+gp_fault:
+ vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
+ return 0;
}
static void vmx_do_hlt(void)
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/hvm/vpic.c
--- a/xen/arch/x86/hvm/vpic.c Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/hvm/vpic.c Wed Jun 20 12:49:27 2007 -0600
@@ -499,7 +499,7 @@ void vpic_irq_negative_edge(struct domai
vpic_update_int_output(vpic);
}
-int cpu_get_pic_interrupt(struct vcpu *v, int *type)
+int cpu_get_pic_interrupt(struct vcpu *v)
{
int irq, vector;
struct hvm_hw_vpic *vpic = &v->domain->arch.hvm_domain.vpic[0];
@@ -512,6 +512,5 @@ int cpu_get_pic_interrupt(struct vcpu *v
return -1;
vector = vpic[irq >> 3].irq_base + (irq & 7);
- *type = APIC_DM_EXTINT;
return vector;
}
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/hvm/vpt.c
--- a/xen/arch/x86/hvm/vpt.c Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/hvm/vpt.c Wed Jun 20 12:49:27 2007 -0600
@@ -155,7 +155,8 @@ void pt_update_irq(struct vcpu *v)
}
}
-static struct periodic_time *is_pt_irq(struct vcpu *v, int vector, int type)
+static struct periodic_time *is_pt_irq(
+ struct vcpu *v, int vector, enum hvm_intack src)
{
struct list_head *head = &v->arch.hvm_vcpu.tm_list;
struct periodic_time *pt;
@@ -174,7 +175,7 @@ static struct periodic_time *is_pt_irq(s
return pt;
}
- vec = get_isa_irq_vector(v, pt->irq, type);
+ vec = get_isa_irq_vector(v, pt->irq, src);
/* RTC irq need special care */
if ( (vector != vec) || (pt->irq == 8 && !is_rtc_periodic_irq(rtc)) )
@@ -186,7 +187,7 @@ static struct periodic_time *is_pt_irq(s
return NULL;
}
-void pt_intr_post(struct vcpu *v, int vector, int type)
+void pt_intr_post(struct vcpu *v, int vector, enum hvm_intack src)
{
struct periodic_time *pt;
time_cb *cb;
@@ -194,7 +195,7 @@ void pt_intr_post(struct vcpu *v, int ve
spin_lock(&v->arch.hvm_vcpu.tm_lock);
- pt = is_pt_irq(v, vector, type);
+ pt = is_pt_irq(v, vector, src);
if ( pt == NULL )
{
spin_unlock(&v->arch.hvm_vcpu.tm_lock);
@@ -227,13 +228,10 @@ void pt_reset(struct vcpu *v)
list_for_each_entry ( pt, head, list )
{
- if ( pt->enabled )
- {
- pt->pending_intr_nr = 0;
- pt->last_plt_gtime = hvm_get_guest_time(pt->vcpu);
- pt->scheduled = NOW() + pt->period;
- set_timer(&pt->timer, pt->scheduled);
- }
+ pt->pending_intr_nr = 0;
+ pt->last_plt_gtime = hvm_get_guest_time(pt->vcpu);
+ pt->scheduled = NOW() + pt->period;
+ set_timer(&pt->timer, pt->scheduled);
}
spin_unlock(&v->arch.hvm_vcpu.tm_lock);
@@ -247,10 +245,7 @@ void pt_migrate(struct vcpu *v)
spin_lock(&v->arch.hvm_vcpu.tm_lock);
list_for_each_entry ( pt, head, list )
- {
- if ( pt->enabled )
- migrate_timer(&pt->timer, v->processor);
- }
+ migrate_timer(&pt->timer, v->processor);
spin_unlock(&v->arch.hvm_vcpu.tm_lock);
}
@@ -263,8 +258,9 @@ void create_periodic_time(
spin_lock(&v->arch.hvm_vcpu.tm_lock);
- init_timer(&pt->timer, pt_timer_fn, pt, v->processor);
pt->enabled = 1;
+ pt->pending_intr_nr = 0;
+
if ( period < 900000 ) /* < 0.9 ms */
{
gdprintk(XENLOG_WARNING,
@@ -283,6 +279,8 @@ void create_periodic_time(
pt->priv = data;
list_add(&pt->list, &v->arch.hvm_vcpu.tm_list);
+
+ init_timer(&pt->timer, pt_timer_fn, pt, v->processor);
set_timer(&pt->timer, pt->scheduled);
spin_unlock(&v->arch.hvm_vcpu.tm_lock);
@@ -295,8 +293,12 @@ void destroy_periodic_time(struct period
pt_lock(pt);
pt->enabled = 0;
- pt->pending_intr_nr = 0;
list_del(&pt->list);
+ pt_unlock(pt);
+
+ /*
+ * pt_timer_fn() can run until this kill_timer() returns. We must do this
+ * outside pt_lock() otherwise we can deadlock with pt_timer_fn().
+ */
kill_timer(&pt->timer);
- pt_unlock(pt);
-}
+}
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/mm.c Wed Jun 20 12:49:27 2007 -0600
@@ -2942,7 +2942,7 @@ long do_set_gdt(XEN_GUEST_HANDLE(ulong)
if ( entries > FIRST_RESERVED_GDT_ENTRY )
return -EINVAL;
- if ( copy_from_guest((unsigned long *)frames, frame_list, nr_pages) )
+ if ( copy_from_guest(frames, frame_list, nr_pages) )
return -EFAULT;
LOCK_BIGLOCK(current->domain);
@@ -3123,7 +3123,7 @@ long arch_memory_op(int op, XEN_GUEST_HA
else if ( (d = rcu_lock_domain_by_id(fmap.domid)) == NULL )
return -ESRCH;
- rc = copy_from_guest(&d->arch.e820[0], fmap.map.buffer,
+ rc = copy_from_guest(d->arch.e820, fmap.map.buffer,
fmap.map.nr_entries) ? -EFAULT : 0;
d->arch.nr_e820 = fmap.map.nr_entries;
@@ -3144,7 +3144,7 @@ long arch_memory_op(int op, XEN_GUEST_HA
return -EFAULT;
map.nr_entries = min(map.nr_entries, d->arch.nr_e820);
- if ( copy_to_guest(map.buffer, &d->arch.e820[0], map.nr_entries) ||
+ if ( copy_to_guest(map.buffer, d->arch.e820, map.nr_entries) ||
copy_to_guest(arg, &map, 1) )
return -EFAULT;
@@ -3168,7 +3168,7 @@ long arch_memory_op(int op, XEN_GUEST_HA
buffer = guest_handle_cast(memmap.buffer, e820entry_t);
count = min((unsigned int)e820.nr_map, memmap.nr_entries);
- if ( copy_to_guest(buffer, &e820.map[0], count) < 0 )
+ if ( copy_to_guest(buffer, e820.map, count) < 0 )
return -EFAULT;
memmap.nr_entries = count;
@@ -3181,7 +3181,7 @@ long arch_memory_op(int op, XEN_GUEST_HA
case XENMEM_machphys_mapping:
{
- struct xen_machphys_mapping mapping = {
+ static const struct xen_machphys_mapping mapping = {
.v_start = MACH2PHYS_VIRT_START,
.v_end = MACH2PHYS_VIRT_END,
.max_mfn = MACH2PHYS_NR_ENTRIES - 1
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/setup.c
--- a/xen/arch/x86/setup.c Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/setup.c Wed Jun 20 12:49:27 2007 -0600
@@ -295,14 +295,14 @@ static struct e820map __initdata boot_e8
/* Reserve area (@s,@e) in the temporary bootstrap e820 map. */
static void __init reserve_in_boot_e820(unsigned long s, unsigned long e)
{
- unsigned long rs, re;
+ uint64_t rs, re;
int i;
for ( i = 0; i < boot_e820.nr_map; i++ )
{
/* Have we found the e820 region that includes the specified range? */
rs = boot_e820.map[i].addr;
- re = boot_e820.map[i].addr + boot_e820.map[i].size;
+ re = rs + boot_e820.map[i].size;
if ( (s < rs) || (e > re) )
continue;
@@ -402,7 +402,7 @@ void init_done(void)
startup_cpu_idle_loop();
}
-void __init __start_xen(multiboot_info_t *mbi)
+void __init __start_xen(unsigned long mbi_p)
{
char *memmap_type = NULL;
char __cmdline[] = "", *cmdline = __cmdline;
@@ -410,6 +410,7 @@ void __init __start_xen(multiboot_info_t
unsigned int initrdidx = 1;
char *_policy_start = NULL;
unsigned long _policy_len = 0;
+ multiboot_info_t *mbi = __va(mbi_p);
module_t *mod = (module_t *)__va(mbi->mods_addr);
unsigned long nr_pages, modules_length;
int i, e820_warn = 0, bytes = 0;
@@ -678,6 +679,9 @@ void __init __start_xen(multiboot_info_t
barrier();
move_memory(e, 0, __pa(&_end) - xen_phys_start);
+ /* Poison low 1MB to detect stray pointers to physical 0-1MB. */
+ memset(maddr_to_bootstrap_virt(e), 0x55, 1U<<20);
+
/* Walk initial pagetables, relocating page directory entries. */
pl4e = __va(__pa(idle_pg_table));
for ( i = 0 ; i < L4_PAGETABLE_ENTRIES; i++, pl4e++ )
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/traps.c Wed Jun 20 12:49:27 2007 -0600
@@ -462,7 +462,17 @@ int rdmsr_hypervisor_regs(
if ( idx > 0 )
return 0;
- *eax = *edx = 0;
+ switch ( idx )
+ {
+ case 0:
+ {
+ *eax = *edx = 0;
+ break;
+ }
+ default:
+ BUG();
+ }
+
return 1;
}
@@ -1130,7 +1140,7 @@ static inline int guest_io_okay(
* read as 0xff (no access allowed).
*/
TOGGLE_MODE();
- switch ( __copy_from_guest_offset(&x.bytes[0], v->arch.iobmp,
+ switch ( __copy_from_guest_offset(x.bytes, v->arch.iobmp,
port>>3, 2) )
{
default: x.bytes[0] = ~0;
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/x86_32/traps.c
--- a/xen/arch/x86/x86_32/traps.c Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/x86_32/traps.c Wed Jun 20 12:49:27 2007 -0600
@@ -513,6 +513,7 @@ static void hypercall_page_initialise_ri
void hypercall_page_initialise(struct domain *d, void *hypercall_page)
{
+ memset(hypercall_page, 0xCC, PAGE_SIZE);
if ( is_hvm_domain(d) )
hvm_hypercall_page_initialise(d, hypercall_page);
else if ( supervisor_mode_kernel )
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/x86_64/compat_kexec.S
--- a/xen/arch/x86/x86_64/compat_kexec.S Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/x86_64/compat_kexec.S Wed Jun 20 12:49:27 2007 -0600
@@ -1,5 +1,11 @@
/*
* Compatibility kexec handler.
+ */
+
+/*
+ * NOTE: We rely on Xen not relocating itself above the 4G boundary. This is
+ * currently true but if it ever changes then compat_pg_table will
+ * need to be moved back below 4G at run time.
*/
#include <xen/config.h>
@@ -8,7 +14,20 @@
#include <asm/msr.h>
#include <asm/page.h>
-#define SYM_PHYS(sym) ((sym) - __XEN_VIRT_START)
+/* The unrelocated physical address of a symbol. */
+#define SYM_PHYS(sym) ((sym) - __XEN_VIRT_START)
+
+/* Load physical address of symbol into register and relocate it. */
+#define RELOCATE_SYM(sym,reg) mov $SYM_PHYS(sym), reg ; \
+ add xen_phys_start(%rip), reg
+
+/*
+ * Relocate a physical address in memory. Size of temporary register
+ * determines size of the value to relocate.
+ */
+#define RELOCATE_MEM(addr,reg) mov addr(%rip), reg ; \
+ add xen_phys_start(%rip), reg ; \
+ mov reg, addr(%rip)
.text
@@ -31,20 +50,35 @@ 1: dec %r9
test %r9,%r9
jnz 1b
- mov $SYM_PHYS(compat_page_list),%rdx
+ RELOCATE_SYM(compat_page_list,%rdx)
+
+ /* Relocate compatibility mode entry point address. */
+ RELOCATE_MEM(compatibility_mode_far,%eax)
+
+ /* Relocate compat_pg_table. */
+ RELOCATE_MEM(compat_pg_table, %rax)
+ RELOCATE_MEM(compat_pg_table+0x8, %rax)
+ RELOCATE_MEM(compat_pg_table+0x10,%rax)
+ RELOCATE_MEM(compat_pg_table+0x18,%rax)
/*
* Setup an identity mapped region in PML4[0] of idle page
* table.
*/
- lea l3_identmap(%rip),%rax
- sub %rbx,%rax
+ RELOCATE_SYM(l3_identmap,%rax)
or $0x63,%rax
mov %rax, idle_pg_table(%rip)
/* Switch to idle page table. */
- movq $SYM_PHYS(idle_pg_table), %rax
+ RELOCATE_SYM(idle_pg_table,%rax)
movq %rax, %cr3
+
+ /* Switch to identity mapped compatibility stack. */
+ RELOCATE_SYM(compat_stack,%rax)
+ movq %rax, %rsp
+
+ /* Save xen_phys_start for 32 bit code. */
+ movq xen_phys_start(%rip), %rbx
/* Jump to low identity mapping in compatibility mode. */
ljmp *compatibility_mode_far(%rip)
@@ -54,7 +88,26 @@ compatibility_mode_far:
.long SYM_PHYS(compatibility_mode)
.long __HYPERVISOR_CS32
+ /*
+ * We use 5 words of stack for the arguments passed to the kernel. The
+ * kernel only uses 1 word before switching to its own stack. Allocate
+ * 16 words to give "plenty" of room.
+ */
+ .fill 16,4,0
+compat_stack:
+
.code32
+
+#undef RELOCATE_SYM
+#undef RELOCATE_MEM
+
+/*
+ * Load physical address of symbol into register and relocate it. %rbx
+ * contains xen_phys_start(%rip) saved before jump to compatibility
+ * mode.
+ */
+#define RELOCATE_SYM(sym,reg) mov $SYM_PHYS(sym), reg ; \
+ add %ebx, reg
compatibility_mode:
/* Setup some sane segments. */
@@ -78,7 +131,7 @@ compatibility_mode:
movl %eax, %cr0
/* Switch to 32 bit page table. */
- movl $SYM_PHYS(compat_pg_table), %eax
+ RELOCATE_SYM(compat_pg_table, %eax)
movl %eax, %cr3
/* Clear MSR_EFER[LME], disabling long mode */
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/x86_64/traps.c
--- a/xen/arch/x86/x86_64/traps.c Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/x86_64/traps.c Wed Jun 20 12:49:27 2007 -0600
@@ -510,6 +510,7 @@ static void hypercall_page_initialise_ri
void hypercall_page_initialise(struct domain *d, void *hypercall_page)
{
+ memset(hypercall_page, 0xCC, PAGE_SIZE);
if ( is_hvm_domain(d) )
hvm_hypercall_page_initialise(d, hypercall_page);
else if ( !is_pv_32bit_domain(d) )
diff -r c20bc60f9243 -r 810885428743 xen/common/compat/memory.c
--- a/xen/common/compat/memory.c Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/common/compat/memory.c Wed Jun 20 12:49:27 2007 -0600
@@ -258,7 +258,8 @@ int compat_memory_op(unsigned int cmd, X
compat_pfn_t pfn = nat.rsrv->extent_start.p[start_extent];
BUG_ON(pfn != nat.rsrv->extent_start.p[start_extent]);
- if ( __copy_to_compat_offset(cmp.rsrv.extent_start,
start_extent, &pfn, 1) )
+ if ( __copy_to_compat_offset(cmp.rsrv.extent_start,
+ start_extent, &pfn, 1) )
{
if ( split >= 0 )
{
@@ -275,6 +276,10 @@ int compat_memory_op(unsigned int cmd, X
break;
}
}
+
+ /* Bail if there was an error. */
+ if ( (split >= 0) && (end_extent != nat.rsrv->nr_extents) )
+ split = 0;
}
else
start_extent = end_extent;
diff -r c20bc60f9243 -r 810885428743 xen/common/domctl.c
--- a/xen/common/domctl.c Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/common/domctl.c Wed Jun 20 12:49:27 2007 -0600
@@ -43,7 +43,7 @@ void cpumask_to_xenctl_cpumap(
bitmap_long_to_byte(bytemap, cpus_addr(*cpumask), NR_CPUS);
- copy_to_guest(xenctl_cpumap->bitmap, &bytemap[0], copy_bytes);
+ copy_to_guest(xenctl_cpumap->bitmap, bytemap, copy_bytes);
for ( i = copy_bytes; i < guest_bytes; i++ )
copy_to_guest_offset(xenctl_cpumap->bitmap, i, &zero, 1);
@@ -63,7 +63,7 @@ void xenctl_cpumap_to_cpumask(
if ( guest_handle_is_null(xenctl_cpumap->bitmap) )
return;
- copy_from_guest(&bytemap[0], xenctl_cpumap->bitmap, copy_bytes);
+ copy_from_guest(bytemap, xenctl_cpumap->bitmap, copy_bytes);
bitmap_byte_to_long(cpus_addr(*cpumask), bytemap, NR_CPUS);
}
diff -r c20bc60f9243 -r 810885428743 xen/common/grant_table.c
--- a/xen/common/grant_table.c Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/common/grant_table.c Wed Jun 20 12:49:27 2007 -0600
@@ -148,7 +148,7 @@ get_maptrack_handle(
return -1;
}
- memset(new_mt, 0, PAGE_SIZE);
+ clear_page(new_mt);
new_mt_limit = lgt->maptrack_limit + MAPTRACK_PER_PAGE;
@@ -624,7 +624,7 @@ gnttab_grow_table(struct domain *d, unsi
{
if ( (gt->active[i] = alloc_xenheap_page()) == NULL )
goto active_alloc_failed;
- memset(gt->active[i], 0, PAGE_SIZE);
+ clear_page(gt->active[i]);
}
/* Shared */
@@ -632,7 +632,7 @@ gnttab_grow_table(struct domain *d, unsi
{
if ( (gt->shared[i] = alloc_xenheap_page()) == NULL )
goto shared_alloc_failed;
- memset(gt->shared[i], 0, PAGE_SIZE);
+ clear_page(gt->shared[i]);
}
/* Share the new shared frames with the recipient domain */
@@ -1365,7 +1365,7 @@ grant_table_create(
{
if ( (t->active[i] = alloc_xenheap_page()) == NULL )
goto no_mem_2;
- memset(t->active[i], 0, PAGE_SIZE);
+ clear_page(t->active[i]);
}
/* Tracking of mapped foreign frames table */
@@ -1375,7 +1375,7 @@ grant_table_create(
memset(t->maptrack, 0, max_nr_maptrack_frames() * sizeof(t->maptrack[0]));
if ( (t->maptrack[0] = alloc_xenheap_page()) == NULL )
goto no_mem_3;
- memset(t->maptrack[0], 0, PAGE_SIZE);
+ clear_page(t->maptrack[0]);
t->maptrack_limit = PAGE_SIZE / sizeof(struct grant_mapping);
for ( i = 0; i < t->maptrack_limit; i++ )
t->maptrack[0][i].ref = i+1;
@@ -1389,7 +1389,7 @@ grant_table_create(
{
if ( (t->shared[i] = alloc_xenheap_page()) == NULL )
goto no_mem_4;
- memset(t->shared[i], 0, PAGE_SIZE);
+ clear_page(t->shared[i]);
}
for ( i = 0; i < INITIAL_NR_GRANT_FRAMES; i++ )
diff -r c20bc60f9243 -r 810885428743 xen/common/kernel.c
--- a/xen/common/kernel.c Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/common/kernel.c Wed Jun 20 12:49:27 2007 -0600
@@ -142,7 +142,7 @@ DO(xen_version)(int cmd, XEN_GUEST_HANDL
{
xen_extraversion_t extraversion;
safe_strcpy(extraversion, xen_extra_version());
- if ( copy_to_guest(arg, (char *)extraversion, sizeof(extraversion)) )
+ if ( copy_to_guest(arg, extraversion, ARRAY_SIZE(extraversion)) )
return -EFAULT;
return 0;
}
@@ -167,7 +167,7 @@ DO(xen_version)(int cmd, XEN_GUEST_HANDL
memset(info, 0, sizeof(info));
arch_get_xen_caps(&info);
- if ( copy_to_guest(arg, (char *)info, sizeof(info)) )
+ if ( copy_to_guest(arg, info, ARRAY_SIZE(info)) )
return -EFAULT;
return 0;
}
@@ -187,7 +187,7 @@ DO(xen_version)(int cmd, XEN_GUEST_HANDL
{
xen_changeset_info_t chgset;
safe_strcpy(chgset, xen_changeset());
- if ( copy_to_guest(arg, (char *)chgset, sizeof(chgset)) )
+ if ( copy_to_guest(arg, chgset, ARRAY_SIZE(chgset)) )
return -EFAULT;
return 0;
}
@@ -229,8 +229,8 @@ DO(xen_version)(int cmd, XEN_GUEST_HANDL
case XENVER_guest_handle:
{
- if ( copy_to_guest(arg, (char *)current->domain->handle,
- sizeof(current->domain->handle)) )
+ if ( copy_to_guest(arg, current->domain->handle,
+ ARRAY_SIZE(current->domain->handle)) )
return -EFAULT;
return 0;
}
diff -r c20bc60f9243 -r 810885428743 xen/common/kexec.c
--- a/xen/common/kexec.c Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/common/kexec.c Wed Jun 20 12:49:27 2007 -0600
@@ -169,7 +169,11 @@ static int kexec_get(reserve)(xen_kexec_
static int kexec_get(xen)(xen_kexec_range_t *range)
{
+#ifdef CONFIG_X86_64
+ range->start = xenheap_phys_start;
+#else
range->start = virt_to_maddr(_start);
+#endif
range->size = (unsigned long)xenheap_phys_end - (unsigned
long)range->start;
return 0;
}
diff -r c20bc60f9243 -r 810885428743 xen/common/perfc.c
--- a/xen/common/perfc.c Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/common/perfc.c Wed Jun 20 12:49:27 2007 -0600
@@ -227,7 +227,7 @@ static int perfc_copy_info(XEN_GUEST_HAN
}
BUG_ON(v != perfc_nbr_vals);
- if ( copy_to_guest(desc, (xen_sysctl_perfc_desc_t *)perfc_d, NR_PERFCTRS) )
+ if ( copy_to_guest(desc, perfc_d, NR_PERFCTRS) )
return -EFAULT;
if ( copy_to_guest(val, perfc_vals, perfc_nbr_vals) )
return -EFAULT;
diff -r c20bc60f9243 -r 810885428743 xen/drivers/char/console.c
--- a/xen/drivers/char/console.c Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/drivers/char/console.c Wed Jun 20 12:49:27 2007 -0600
@@ -326,7 +326,7 @@ static long guest_console_write(XEN_GUES
CONSOLEIO_write, count, buffer);
kcount = min_t(int, count, sizeof(kbuf)-1);
- if ( copy_from_guest((char *)kbuf, buffer, kcount) )
+ if ( copy_from_guest(kbuf, buffer, kcount) )
return -EFAULT;
kbuf[kcount] = '\0';
diff -r c20bc60f9243 -r 810885428743 xen/drivers/video/vga.c
--- a/xen/drivers/video/vga.c Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/drivers/video/vga.c Wed Jun 20 12:49:27 2007 -0600
@@ -32,6 +32,9 @@ static unsigned char *video;
*
* 'vga=ask':
* display a vga menu of available modes
+ *
+ * 'vga=current':
+ * use the current vga mode without modification
*
* 'vga=text-80x<rows>':
* text mode, where <rows> is one of {25,28,30,34,43,50,60}
diff -r c20bc60f9243 -r 810885428743 xen/include/asm-ia64/guest_access.h
--- a/xen/include/asm-ia64/guest_access.h Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/include/asm-ia64/guest_access.h Wed Jun 20 12:49:27 2007 -0600
@@ -76,28 +76,31 @@ extern int xencomm_handle_is_null(void *
__copy_field_from_guest(ptr, hnd, field)
#define __copy_to_guest_offset(hnd, idx, ptr, nr) ({ \
- const typeof(ptr) _d = (hnd).p; \
- const typeof(ptr) _s = (ptr); \
+ const typeof(*(ptr)) *_s = (ptr); \
+ void *_d = (hnd).p; \
+ ((void)((hnd).p == (ptr))); \
xencomm_copy_to_guest(_d, _s, sizeof(*_s)*(nr), sizeof(*_s)*(idx)); \
})
#define __copy_field_to_guest(hnd, ptr, field) ({ \
- const int _off = offsetof(typeof(*ptr), field); \
- const typeof(ptr) _d = (hnd).p; \
+ unsigned int _off = offsetof(typeof(*(hnd).p), field); \
const typeof(&(ptr)->field) _s = &(ptr)->field; \
+ void *_d = (hnd).p; \
+ ((void)(&(hnd).p->field == &(ptr)->field)); \
xencomm_copy_to_guest(_d, _s, sizeof(*_s), _off); \
})
-#define __copy_from_guest_offset(ptr, hnd, idx, nr) ({ \
- const typeof(ptr) _s = (hnd).p; \
- const typeof(ptr) _d = (ptr); \
- xencomm_copy_from_guest(_d, _s, sizeof(*_s)*(nr), sizeof(*_s)*(idx)); \
+#define __copy_from_guest_offset(ptr, hnd, idx, nr) ({ \
+ const typeof(*(ptr)) *_s = (hnd).p; \
+ typeof(*(ptr)) *_d = (ptr); \
+ xencomm_copy_from_guest(_d, _s, sizeof(*_d)*(nr), sizeof(*_d)*(idx)); \
})
#define __copy_field_from_guest(ptr, hnd, field) ({ \
- const int _off = offsetof(typeof(*ptr), field); \
- const typeof(ptr) _s = (hnd).p; \
- const typeof(&(ptr)->field) _d = &(ptr)->field; \
+ unsigned int _off = offsetof(typeof(*(hnd).p), field); \
+ const void *_s = (hnd).p; \
+ typeof(&(ptr)->field) _d = &(ptr)->field; \
+ ((void)(&(hnd).p->field == &(ptr)->field)); \
xencomm_copy_from_guest(_d, _s, sizeof(*_d), _off); \
})
diff -r c20bc60f9243 -r 810885428743 xen/include/asm-x86/event.h
--- a/xen/include/asm-x86/event.h Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/include/asm-x86/event.h Wed Jun 20 12:49:27 2007 -0600
@@ -10,7 +10,6 @@
#define __ASM_EVENT_H__
#include <xen/shared.h>
-#include <asm/hvm/irq.h> /* cpu_has_pending_irq() */
static inline void vcpu_kick(struct vcpu *v)
{
diff -r c20bc60f9243 -r 810885428743 xen/include/asm-x86/guest_access.h
--- a/xen/include/asm-x86/guest_access.h Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/include/asm-x86/guest_access.h Wed Jun 20 12:49:27 2007 -0600
@@ -32,11 +32,12 @@
* specifying an offset into the guest array.
*/
#define copy_to_guest_offset(hnd, off, ptr, nr) ({ \
- typeof(ptr) _x = (hnd).p; \
- const typeof(ptr) _y = (ptr); \
+ const typeof(*(ptr)) *_s = (ptr); \
+ char (*_d)[sizeof(*_s)] = (void *)(hnd).p; \
+ ((void)((hnd).p == (ptr))); \
is_hvm_vcpu(current) ? \
- copy_to_user_hvm(_x+(off), _y, sizeof(*_x)*(nr)) : \
- copy_to_user(_x+(off), _y, sizeof(*_x)*(nr)); \
+ copy_to_user_hvm(_d+(off), _s, sizeof(*_s)*(nr)) : \
+ copy_to_user(_d+(off), _s, sizeof(*_s)*(nr)); \
})
/*
@@ -44,29 +45,30 @@
* specifying an offset into the guest array.
*/
#define copy_from_guest_offset(ptr, hnd, off, nr) ({ \
- const typeof(ptr) _x = (hnd).p; \
- typeof(ptr) _y = (ptr); \
+ const typeof(*(ptr)) *_s = (hnd).p; \
+ typeof(*(ptr)) *_d = (ptr); \
is_hvm_vcpu(current) ? \
- copy_from_user_hvm(_y, _x+(off), sizeof(*_x)*(nr)) :\
- copy_from_user(_y, _x+(off), sizeof(*_x)*(nr)); \
+ copy_from_user_hvm(_d, _s+(off), sizeof(*_d)*(nr)) :\
+ copy_from_user(_d, _s+(off), sizeof(*_d)*(nr)); \
})
/* Copy sub-field of a structure to guest context via a guest handle. */
#define copy_field_to_guest(hnd, ptr, field) ({ \
- typeof(&(ptr)->field) _x = &(hnd).p->field; \
- const typeof(&(ptr)->field) _y = &(ptr)->field; \
+ const typeof(&(ptr)->field) _s = &(ptr)->field; \
+ void *_d = &(hnd).p->field; \
+ ((void)(&(hnd).p->field == &(ptr)->field)); \
is_hvm_vcpu(current) ? \
- copy_to_user_hvm(_x, _y, sizeof(*_x)) : \
- copy_to_user(_x, _y, sizeof(*_x)); \
+ copy_to_user_hvm(_d, _s, sizeof(*_s)) : \
+ copy_to_user(_d, _s, sizeof(*_s)); \
})
/* Copy sub-field of a structure from guest context via a guest handle. */
#define copy_field_from_guest(ptr, hnd, field) ({ \
- const typeof(&(ptr)->field) _x = &(hnd).p->field; \
- typeof(&(ptr)->field) _y = &(ptr)->field; \
+ const typeof(&(ptr)->field) _s = &(hnd).p->field; \
+ typeof(&(ptr)->field) _d = &(ptr)->field; \
is_hvm_vcpu(current) ? \
- copy_from_user_hvm(_y, _x, sizeof(*_x)) : \
- copy_from_user(_y, _x, sizeof(*_x)); \
+ copy_from_user_hvm(_d, _s, sizeof(*_d)) : \
+ copy_from_user(_d, _s, sizeof(*_d)); \
})
/*
@@ -78,35 +80,37 @@
array_access_ok((hnd).p, (nr), sizeof(*(hnd).p)))
#define __copy_to_guest_offset(hnd, off, ptr, nr) ({ \
- typeof(ptr) _x = (hnd).p; \
- const typeof(ptr) _y = (ptr); \
+ const typeof(*(ptr)) *_s = (ptr); \
+ char (*_d)[sizeof(*_s)] = (void *)(hnd).p; \
+ ((void)((hnd).p == (ptr))); \
is_hvm_vcpu(current) ? \
- copy_to_user_hvm(_x+(off), _y, sizeof(*_x)*(nr)) : \
- __copy_to_user(_x+(off), _y, sizeof(*_x)*(nr)); \
+ copy_to_user_hvm(_d+(off), _s, sizeof(*_s)*(nr)) : \
+ __copy_to_user(_d+(off), _s, sizeof(*_s)*(nr)); \
})
#define __copy_from_guest_offset(ptr, hnd, off, nr) ({ \
- const typeof(ptr) _x = (hnd).p; \
- typeof(ptr) _y = (ptr); \
+ const typeof(*(ptr)) *_s = (hnd).p; \
+ typeof(*(ptr)) *_d = (ptr); \
is_hvm_vcpu(current) ? \
- copy_from_user_hvm(_y, _x+(off),sizeof(*_x)*(nr)) : \
- __copy_from_user(_y, _x+(off), sizeof(*_x)*(nr)); \
+ copy_from_user_hvm(_d, _s+(off), sizeof(*_d)*(nr)) :\
+ __copy_from_user(_d, _s+(off), sizeof(*_d)*(nr)); \
})
#define __copy_field_to_guest(hnd, ptr, field) ({ \
- typeof(&(ptr)->field) _x = &(hnd).p->field; \
- const typeof(&(ptr)->field) _y = &(ptr)->field; \
+ const typeof(&(ptr)->field) _s = &(ptr)->field; \
+ void *_d = &(hnd).p->field; \
+ ((void)(&(hnd).p->field == &(ptr)->field)); \
is_hvm_vcpu(current) ? \
- copy_to_user_hvm(_x, _y, sizeof(*_x)) : \
- __copy_to_user(_x, _y, sizeof(*_x)); \
+ copy_to_user_hvm(_d, _s, sizeof(*_s)) : \
+ __copy_to_user(_d, _s, sizeof(*_s)); \
})
#define __copy_field_from_guest(ptr, hnd, field) ({ \
- const typeof(&(ptr)->field) _x = &(hnd).p->field; \
- typeof(&(ptr)->field) _y = &(ptr)->field; \
+ const typeof(&(ptr)->field) _s = &(hnd).p->field; \
+ typeof(&(ptr)->field) _d = &(ptr)->field; \
is_hvm_vcpu(current) ? \
- copy_from_user_hvm(_y, _x, sizeof(*_x)) : \
- __copy_from_user(_y, _x, sizeof(*_x)); \
+ copy_from_user_hvm(_d, _s, sizeof(*_d)) : \
+ __copy_from_user(_d, _s, sizeof(*_d)); \
})
#endif /* __ASM_X86_GUEST_ACCESS_H__ */
diff -r c20bc60f9243 -r 810885428743 xen/include/asm-x86/hvm/hvm.h
--- a/xen/include/asm-x86/hvm/hvm.h Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/include/asm-x86/hvm/hvm.h Wed Jun 20 12:49:27 2007 -0600
@@ -55,6 +55,14 @@ typedef struct segment_register {
u64 base;
} __attribute__ ((packed)) segment_register_t;
+/* Interrupt acknowledgement sources. */
+enum hvm_intack {
+ hvm_intack_none,
+ hvm_intack_pic,
+ hvm_intack_lapic,
+ hvm_intack_nmi
+};
+
/*
* The hardware virtual machine (HVM) interface abstracts away from the
* x86/x86_64 CPU virtualization assist specifics. Currently this interface
@@ -106,7 +114,7 @@ struct hvm_function_table {
int (*long_mode_enabled)(struct vcpu *v);
int (*pae_enabled)(struct vcpu *v);
int (*nx_enabled)(struct vcpu *v);
- int (*interrupts_enabled)(struct vcpu *v);
+ int (*interrupts_enabled)(struct vcpu *v, enum hvm_intack);
int (*guest_x86_mode)(struct vcpu *v);
unsigned long (*get_guest_ctrl_reg)(struct vcpu *v, unsigned int num);
unsigned long (*get_segment_base)(struct vcpu *v, enum x86_segment seg);
@@ -124,6 +132,13 @@ struct hvm_function_table {
void (*update_guest_cr3)(struct vcpu *v);
/*
+ * Called to ensure than all guest-specific mappings in a tagged TLB
+ * are flushed; does *not* flush Xen's TLB entries, and on
+ * processors without a tagged TLB it will be a noop.
+ */
+ void (*flush_guest_tlbs)(void);
+
+ /*
* Reflect the virtual APIC's value in the guest's V_TPR register
*/
void (*update_vtpr)(struct vcpu *v, unsigned long value);
@@ -148,6 +163,7 @@ struct hvm_function_table {
};
extern struct hvm_function_table hvm_funcs;
+extern int hvm_enabled;
int hvm_domain_initialise(struct domain *d);
void hvm_domain_relinquish_resources(struct domain *d);
@@ -191,16 +207,16 @@ hvm_long_mode_enabled(struct vcpu *v)
#define hvm_long_mode_enabled(v) (v,0)
#endif
- static inline int
+static inline int
hvm_pae_enabled(struct vcpu *v)
{
return hvm_funcs.pae_enabled(v);
}
static inline int
-hvm_interrupts_enabled(struct vcpu *v)
-{
- return hvm_funcs.interrupts_enabled(v);
+hvm_interrupts_enabled(struct vcpu *v, enum hvm_intack type)
+{
+ return hvm_funcs.interrupts_enabled(v, type);
}
static inline int
@@ -230,6 +246,13 @@ hvm_update_vtpr(struct vcpu *v, unsigned
}
void hvm_update_guest_cr3(struct vcpu *v, unsigned long guest_cr3);
+
+static inline void
+hvm_flush_guest_tlbs(void)
+{
+ if ( hvm_enabled )
+ hvm_funcs.flush_guest_tlbs();
+}
void hvm_hypercall_page_initialise(struct domain *d,
void *hypercall_page);
diff -r c20bc60f9243 -r 810885428743 xen/include/asm-x86/hvm/irq.h
--- a/xen/include/asm-x86/hvm/irq.h Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/include/asm-x86/hvm/irq.h Wed Jun 20 12:49:27 2007 -0600
@@ -24,10 +24,10 @@
#include <xen/types.h>
#include <xen/spinlock.h>
+#include <asm/hvm/hvm.h>
#include <asm/hvm/vpic.h>
#include <asm/hvm/vioapic.h>
#include <public/hvm/save.h>
-
struct hvm_irq {
/*
@@ -58,7 +58,6 @@ struct hvm_irq {
HVMIRQ_callback_gsi,
HVMIRQ_callback_pci_intx
} callback_via_type;
- uint32_t pad; /* So the next field will be aligned */
};
union {
uint32_t gsi;
@@ -115,9 +114,12 @@ void hvm_set_callback_irq_level(void);
void hvm_set_callback_irq_level(void);
void hvm_set_callback_via(struct domain *d, uint64_t via);
-int cpu_get_interrupt(struct vcpu *v, int *type);
-int cpu_has_pending_irq(struct vcpu *v);
-int get_isa_irq_vector(struct vcpu *vcpu, int irq, int type);
+/* Check/Acknowledge next pending interrupt. */
+enum hvm_intack hvm_vcpu_has_pending_irq(struct vcpu *v);
+int hvm_vcpu_ack_pending_irq(
+ struct vcpu *v, enum hvm_intack type, int *vector);
+
+int get_isa_irq_vector(struct vcpu *vcpu, int irq, enum hvm_intack src);
int is_isa_irq_masked(struct vcpu *v, int isa_irq);
#endif /* __ASM_X86_HVM_IRQ_H__ */
diff -r c20bc60f9243 -r 810885428743 xen/include/asm-x86/hvm/support.h
--- a/xen/include/asm-x86/hvm/support.h Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/include/asm-x86/hvm/support.h Wed Jun 20 12:49:27 2007 -0600
@@ -215,7 +215,6 @@ int hvm_load(struct domain *d, hvm_domai
/* End of save/restore */
extern char hvm_io_bitmap[];
-extern int hvm_enabled;
void hvm_enable(struct hvm_function_table *);
void hvm_disable(void);
diff -r c20bc60f9243 -r 810885428743 xen/include/asm-x86/hvm/svm/asid.h
--- a/xen/include/asm-x86/hvm/svm/asid.h Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/include/asm-x86/hvm/svm/asid.h Wed Jun 20 12:49:27 2007 -0600
@@ -30,6 +30,7 @@ void svm_asid_init(struct cpuinfo_x86 *c
void svm_asid_init(struct cpuinfo_x86 *c);
void svm_asid_init_vcpu(struct vcpu *v);
void svm_asid_inv_asid(struct vcpu *v);
+void svm_asid_inc_generation(void);
/*
* ASID related, guest triggered events.
diff -r c20bc60f9243 -r 810885428743 xen/include/asm-x86/hvm/vcpu.h
--- a/xen/include/asm-x86/hvm/vcpu.h Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/include/asm-x86/hvm/vcpu.h Wed Jun 20 12:49:27 2007 -0600
@@ -30,11 +30,13 @@
struct hvm_vcpu {
unsigned long hw_cr3; /* value we give to HW to use */
- unsigned long ioflags;
struct hvm_io_op io_op;
struct vlapic vlapic;
s64 cache_tsc_offset;
u64 guest_time;
+
+ /* Is an NMI pending for delivery to this VCPU core? */
+ bool_t nmi_pending; /* NB. integrate flag with save/restore */
/* Lock and list for virtual platform timers. */
spinlock_t tm_lock;
diff -r c20bc60f9243 -r 810885428743 xen/include/asm-x86/hvm/vlapic.h
--- a/xen/include/asm-x86/hvm/vlapic.h Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/include/asm-x86/hvm/vlapic.h Wed Jun 20 12:49:27 2007 -0600
@@ -76,7 +76,7 @@ int vlapic_find_highest_irr(struct vlapi
int vlapic_find_highest_irr(struct vlapic *vlapic);
int vlapic_has_interrupt(struct vcpu *v);
-int cpu_get_apic_interrupt(struct vcpu *v, int *mode);
+int cpu_get_apic_interrupt(struct vcpu *v);
int vlapic_init(struct vcpu *v);
void vlapic_destroy(struct vcpu *v);
diff -r c20bc60f9243 -r 810885428743 xen/include/asm-x86/hvm/vmx/vmx.h
--- a/xen/include/asm-x86/hvm/vmx/vmx.h Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h Wed Jun 20 12:49:27 2007 -0600
@@ -336,9 +336,16 @@ static inline void vmx_inject_sw_excepti
instruction_len);
}
-static inline void vmx_inject_extint(struct vcpu *v, int trap, int error_code)
-{
- __vmx_inject_exception(v, trap, INTR_TYPE_EXT_INTR, error_code, 0);
+static inline void vmx_inject_extint(struct vcpu *v, int trap)
+{
+ __vmx_inject_exception(v, trap, INTR_TYPE_EXT_INTR,
+ VMX_DELIVER_NO_ERROR_CODE, 0);
+}
+
+static inline void vmx_inject_nmi(struct vcpu *v)
+{
+ __vmx_inject_exception(v, 2, INTR_TYPE_NMI,
+ VMX_DELIVER_NO_ERROR_CODE, 0);
}
#endif /* __ASM_X86_HVM_VMX_VMX_H__ */
diff -r c20bc60f9243 -r 810885428743 xen/include/asm-x86/hvm/vpic.h
--- a/xen/include/asm-x86/hvm/vpic.h Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/include/asm-x86/hvm/vpic.h Wed Jun 20 12:49:27 2007 -0600
@@ -32,7 +32,7 @@ void vpic_irq_positive_edge(struct domai
void vpic_irq_positive_edge(struct domain *d, int irq);
void vpic_irq_negative_edge(struct domain *d, int irq);
void vpic_init(struct domain *d);
-int cpu_get_pic_interrupt(struct vcpu *v, int *type);
+int cpu_get_pic_interrupt(struct vcpu *v);
int is_periodic_irq(struct vcpu *v, int irq, int type);
#endif /* __ASM_X86_HVM_VPIC_H__ */
diff -r c20bc60f9243 -r 810885428743 xen/include/asm-x86/hvm/vpt.h
--- a/xen/include/asm-x86/hvm/vpt.h Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/include/asm-x86/hvm/vpt.h Wed Jun 20 12:49:27 2007 -0600
@@ -29,6 +29,7 @@
#include <xen/timer.h>
#include <xen/list.h>
#include <asm/hvm/vpic.h>
+#include <asm/hvm/irq.h>
#include <public/hvm/save.h>
struct HPETState;
@@ -119,7 +120,7 @@ void pt_freeze_time(struct vcpu *v);
void pt_freeze_time(struct vcpu *v);
void pt_thaw_time(struct vcpu *v);
void pt_update_irq(struct vcpu *v);
-void pt_intr_post(struct vcpu *v, int vector, int type);
+void pt_intr_post(struct vcpu *v, int vector, enum hvm_intack src);
void pt_reset(struct vcpu *v);
void pt_migrate(struct vcpu *v);
void create_periodic_time(
diff -r c20bc60f9243 -r 810885428743 xen/include/xen/compat.h
--- a/xen/include/xen/compat.h Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/include/xen/compat.h Wed Jun 20 12:49:27 2007 -0600
@@ -44,9 +44,10 @@
* specifying an offset into the guest array.
*/
#define copy_to_compat_offset(hnd, off, ptr, nr) ({ \
- const typeof(ptr) _x = (typeof(**(hnd)._) *)(full_ptr_t)(hnd).c; \
- const typeof(*(ptr)) *const _y = (ptr); \
- copy_to_user(_x + (off), _y, sizeof(*_x) * (nr)); \
+ const typeof(*(ptr)) *_s = (ptr); \
+ char (*_d)[sizeof(*_s)] = (void *)(full_ptr_t)(hnd).c; \
+ ((void)((typeof(**(hnd)._) *)(full_ptr_t)(hnd).c == (ptr))); \
+ copy_to_user(_d + (off), _s, sizeof(*_s) * (nr)); \
})
/*
@@ -54,9 +55,9 @@
* specifying an offset into the guest array.
*/
#define copy_from_compat_offset(ptr, hnd, off, nr) ({ \
- const typeof(ptr) _x = (typeof(**(hnd)._) *)(full_ptr_t)(hnd).c; \
- const typeof(ptr) _y = (ptr); \
- copy_from_user(_y, _x + (off), sizeof(*_x) * (nr)); \
+ const typeof(*(ptr)) *_s = (typeof(**(hnd)._) *)(full_ptr_t)(hnd).c; \
+ typeof(*(ptr)) *_d = (ptr); \
+ copy_from_user(_d, _s + (off), sizeof(*_d) * (nr)); \
})
#define copy_to_compat(hnd, ptr, nr) \
@@ -67,16 +68,19 @@
/* Copy sub-field of a structure to guest context via a compat handle. */
#define copy_field_to_compat(hnd, ptr, field) ({ \
- typeof((ptr)->field) *const _x = &((typeof(**(hnd)._)
*)(full_ptr_t)(hnd).c)->field; \
- const typeof((ptr)->field) *const _y = &(ptr)->field; \
- copy_to_user(_x, _y, sizeof(*_x)); \
+ const typeof(&(ptr)->field) _s = &(ptr)->field; \
+ void *_d = &((typeof(**(hnd)._) *)(full_ptr_t)(hnd).c)->field; \
+ ((void)(&((typeof(**(hnd)._) *)(full_ptr_t)(hnd).c)->field == \
+ &(ptr)->field)); \
+ copy_to_user(_d, _s, sizeof(*_s)); \
})
/* Copy sub-field of a structure from guest context via a compat handle. */
#define copy_field_from_compat(ptr, hnd, field) ({ \
- typeof((ptr)->field) *const _x = &((typeof(**(hnd)._)
*)(full_ptr_t)(hnd).c)->field; \
- typeof((ptr)->field) *const _y = &(ptr)->field; \
- copy_from_user(_y, _x, sizeof(*_x)); \
+ const typeof(&(ptr)->field) _s = \
+ &((typeof(**(hnd)._) *)(full_ptr_t)(hnd).c)->field; \
+ typeof(&(ptr)->field) _d = &(ptr)->field; \
+ copy_from_user(_d, _s, sizeof(*_d)); \
})
/*
@@ -84,18 +88,20 @@
* Allows use of faster __copy_* functions.
*/
#define compat_handle_okay(hnd, nr) \
- compat_array_access_ok((void *)(full_ptr_t)(hnd).c, (nr),
sizeof(**(hnd)._))
+ compat_array_access_ok((void *)(full_ptr_t)(hnd).c, (nr), \
+ sizeof(**(hnd)._))
#define __copy_to_compat_offset(hnd, off, ptr, nr) ({ \
- const typeof(ptr) _x = (typeof(**(hnd)._) *)(full_ptr_t)(hnd).c; \
- const typeof(*(ptr)) *const _y = (ptr); \
- __copy_to_user(_x + (off), _y, sizeof(*_x) * (nr)); \
+ const typeof(*(ptr)) *_s = (ptr); \
+ char (*_d)[sizeof(*_s)] = (void *)(full_ptr_t)(hnd).c; \
+ ((void)((typeof(**(hnd)._) *)(full_ptr_t)(hnd).c == (ptr))); \
+ __copy_to_user(_d + (off), _s, sizeof(*_s) * (nr)); \
})
#define __copy_from_compat_offset(ptr, hnd, off, nr) ({ \
- const typeof(ptr) _x = (typeof(**(hnd)._) *)(full_ptr_t)(hnd).c; \
- const typeof(ptr) _y = (ptr); \
- __copy_from_user(_y, _x + (off), sizeof(*_x) * (nr)); \
+ const typeof(*(ptr)) *_s = (typeof(**(hnd)._) *)(full_ptr_t)(hnd).c; \
+ typeof(*(ptr)) *_d = (ptr); \
+ __copy_from_user(_d, _s + (off), sizeof(*_d) * (nr)); \
})
#define __copy_to_compat(hnd, ptr, nr) \
@@ -105,15 +111,18 @@
__copy_from_compat_offset(ptr, hnd, 0, nr)
#define __copy_field_to_compat(hnd, ptr, field) ({ \
- typeof((ptr)->field) *const _x = &((typeof(**(hnd)._)
*)(full_ptr_t)(hnd).c)->field; \
- const typeof((ptr)->field) *const _y = &(ptr)->field; \
- __copy_to_user(_x, _y, sizeof(*_x)); \
+ const typeof(&(ptr)->field) _s = &(ptr)->field; \
+ void *_d = &((typeof(**(hnd)._) *)(full_ptr_t)(hnd).c)->field; \
+ ((void)(&((typeof(**(hnd)._) *)(full_ptr_t)(hnd).c)->field == \
+ &(ptr)->field)); \
+ __copy_to_user(_d, _s, sizeof(*_s)); \
})
#define __copy_field_from_compat(ptr, hnd, field) ({ \
- typeof((ptr)->field) *const _x = &((typeof(**(hnd)._)
*)(full_ptr_t)(hnd).c)->field; \
- typeof((ptr)->field) *const _y = &(ptr)->field; \
- __copy_from_user(_y, _x, sizeof(*_x)); \
+ const typeof(&(ptr)->field) _s = \
+ &((typeof(**(hnd)._) *)(full_ptr_t)(hnd).c)->field; \
+ typeof(&(ptr)->field) _d = &(ptr)->field; \
+ __copy_from_user(_d, _s, sizeof(*_d)); \
})
@@ -169,7 +178,8 @@ int switch_compat(struct domain *);
int switch_compat(struct domain *);
int switch_native(struct domain *);
-#define BITS_PER_GUEST_LONG(d) (!IS_COMPAT(d) ? BITS_PER_LONG :
COMPAT_BITS_PER_LONG)
+#define BITS_PER_GUEST_LONG(d) \
+ (!IS_COMPAT(d) ? BITS_PER_LONG : COMPAT_BITS_PER_LONG)
#else
diff -r c20bc60f9243 -r 810885428743 xen/include/xen/xencomm.h
--- a/xen/include/xen/xencomm.h Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/include/xen/xencomm.h Wed Jun 20 12:49:27 2007 -0600
@@ -47,17 +47,17 @@ static inline unsigned long xencomm_inli
((hnd).p == NULL || xencomm_handle_is_null((hnd).p))
/* Offset the given guest handle into the array it refers to. */
-#define guest_handle_add_offset(hnd, nr) ({ \
- const typeof((hnd).p) _ptr; \
- xencomm_add_offset((void **)&((hnd).p), nr * sizeof(*_ptr)); \
+#define guest_handle_add_offset(hnd, nr) ({ \
+ const typeof((hnd).p) _ptr; \
+ xencomm_add_offset((void **)&((hnd).p), nr * sizeof(*_ptr)); \
})
/* Cast a guest handle to the specified type of handle. */
#define guest_handle_cast(hnd, type) ({ \
type *_x = (hnd).p; \
- XEN_GUEST_HANDLE(type) _y; \
- set_xen_guest_handle(_y, _x); \
- _y; \
+ XEN_GUEST_HANDLE(type) _y; \
+ set_xen_guest_handle(_y, _x); \
+ _y; \
})
/* Since we run in real mode, we can safely access all addresses. That also
@@ -87,29 +87,32 @@ static inline unsigned long xencomm_inli
__copy_field_from_guest(ptr, hnd, field)
#define __copy_to_guest_offset(hnd, idx, ptr, nr) ({ \
- const typeof(ptr) _x = (hnd).p; \
- const typeof(ptr) _y = (ptr); \
- xencomm_copy_to_guest(_x, _y, sizeof(*_x)*(nr), sizeof(*_x)*(idx)); \
+ const typeof(*(ptr)) *_s = (ptr); \
+ void *_d = (hnd).p; \
+ ((void)((hnd).p == (ptr))); \
+ xencomm_copy_to_guest(_d, _s, sizeof(*_s)*(nr), sizeof(*_s)*(idx)); \
})
#define __copy_field_to_guest(hnd, ptr, field) ({ \
- const int _off = offsetof(typeof(*ptr), field); \
- const typeof(&(ptr)->field) _x = &(hnd).p->field; \
- const typeof(&(ptr)->field) _y = &(ptr)->field; \
- xencomm_copy_to_guest(_x, _y, sizeof(*_x), sizeof(*_x)*(_off)); \
+ unsigned int _off = offsetof(typeof(*(hnd).p), field); \
+ const typeof(&(ptr)->field) _s = &(ptr)->field; \
+ void *_d = (hnd).p; \
+ ((void)(&(hnd).p->field == &(ptr)->field)); \
+ xencomm_copy_to_guest(_d, _s, sizeof(*_s), _off); \
})
#define __copy_from_guest_offset(ptr, hnd, idx, nr) ({ \
- const typeof(ptr) _x = (hnd).p; \
- const typeof(ptr) _y = (ptr); \
- xencomm_copy_from_guest(_y, _x, sizeof(*_x)*(nr), sizeof(*_x)*(idx)); \
+ const typeof(*(ptr)) *_s = (hnd).p; \
+ typeof(*(ptr)) *_d = (ptr); \
+ xencomm_copy_from_guest(_d, _s, sizeof(*_d)*(nr), sizeof(*_d)*(idx)); \
})
#define __copy_field_from_guest(ptr, hnd, field) ({ \
- const int _off = offsetof(typeof(*ptr), field); \
- const typeof(&(ptr)->field) _x = &(hnd).p->field; \
- const typeof(&(ptr)->field) _y = &(ptr)->field; \
- xencomm_copy_to_guest(_y, _x, sizeof(*_x), sizeof(*_x)*(_off)); \
+ unsigned int _off = offsetof(typeof(*(hnd).p), field); \
+ const void *_s = (hnd).p; \
+ typeof(&(ptr)->field) _d = &(ptr)->field; \
+ ((void)(&(hnd).p->field == &(ptr)->field)); \
+ xencomm_copy_from_guest(_d, _s, sizeof(*_d), _off); \
})
#endif /* __XENCOMM_H__ */
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|