# HG changeset patch
# User Alex Williamson <alex.williamson@xxxxxx>
# Date 1175627091 21600
# Node ID f378c424e0ced4cbc584e5c6125d065f1cc05d0c
# Parent fc9e2f7920c95229caaf5ad8fc44965dd891f600
# Parent 7e431ea834a877b1f0c90bdb1e6f1346da4e81cc
merge with xen-unstable.hg
---
README | 22
docs/src/user.tex | 4
linux-2.6-xen-sparse/arch/ia64/Kconfig | 9
linux-2.6-xen-sparse/drivers/xen/Kconfig | 16
linux-2.6-xen-sparse/drivers/xen/Makefile | 7
linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c | 38
linux-2.6-xen-sparse/drivers/xen/core/Makefile | 3
linux-2.6-xen-sparse/drivers/xen/gntdev/Makefile | 1
linux-2.6-xen-sparse/drivers/xen/gntdev/gntdev.c | 973 +++++++++++++++++++++++
linux-2.6-xen-sparse/drivers/xen/util.c | 22
linux-2.6-xen-sparse/include/linux/mm.h | 4
linux-2.6-xen-sparse/include/xen/driver_util.h | 3
linux-2.6-xen-sparse/include/xen/public/gntdev.h | 105 ++
linux-2.6-xen-sparse/mm/memory.c | 9
tools/blktap/drivers/qcow2raw.c | 9
tools/examples/xmexample.hvm | 4
tools/ioemu/target-i386-dm/helper2.c | 33
tools/ioemu/vl.c | 3
tools/ioemu/vl.h | 6
tools/ioemu/xenstore.c | 69 +
tools/libxc/ia64/xc_ia64_linux_restore.c | 51 -
tools/libxc/xc_core.c | 4
tools/libxc/xc_core_x86.c | 12
tools/libxc/xc_hvm_restore.c | 14
tools/libxc/xc_hvm_save.c | 7
tools/libxc/xc_linux.c | 156 +++
tools/libxc/xc_linux_restore.c | 85 +-
tools/libxc/xc_linux_save.c | 66 -
tools/libxc/xc_resume.c | 4
tools/libxc/xenctrl.h | 59 +
tools/libxc/xenguest.h | 9
tools/libxc/xg_private.h | 9
tools/python/xen/lowlevel/scf/scf.c | 2
tools/python/xen/xend/XendCheckpoint.py | 13
tools/python/xen/xend/XendConfig.py | 3
tools/python/xen/xend/XendDomainInfo.py | 8
tools/python/xen/xend/balloon.py | 18
tools/python/xen/xend/image.py | 3
tools/python/xen/xend/osdep.py | 50 +
tools/python/xen/xend/server/SrvServer.py | 4
tools/python/xen/xend/server/relocate.py | 8
tools/python/xen/xm/create.py | 6
tools/python/xen/xm/main.py | 8
tools/python/xen/xm/xenapi_create.py | 1
tools/xcutils/xc_restore.c | 33
tools/xenstat/xentop/xentop.c | 2
xen/arch/x86/hvm/hvm.c | 9
xen/arch/x86/hvm/intercept.c | 38
xen/arch/x86/hvm/io.c | 11
xen/arch/x86/hvm/platform.c | 20
xen/arch/x86/hvm/rtc.c | 8
xen/arch/x86/hvm/svm/vmcb.c | 28
xen/arch/x86/hvm/vmx/vmcs.c | 2
xen/arch/x86/hvm/vmx/vmx.c | 13
xen/arch/x86/mm.c | 3
xen/arch/x86/mm/hap/hap.c | 68 -
xen/arch/x86/mm/shadow/multi.c | 4
xen/arch/x86/setup.c | 4
xen/arch/x86/time.c | 2
xen/arch/x86/traps.c | 17
xen/arch/x86/x86_32/traps.c | 7
xen/arch/x86/x86_64/traps.c | 10
xen/common/domain.c | 121 ++
xen/common/domctl.c | 5
xen/common/page_alloc.c | 12
xen/common/symbols.c | 12
xen/drivers/char/console.c | 8
xen/include/asm-x86/domain.h | 1
xen/include/asm-x86/hvm/io.h | 2
xen/include/asm-x86/hvm/support.h | 1
xen/include/asm-x86/hvm/vmx/vmcs.h | 1
xen/include/asm-x86/processor.h | 8
xen/include/asm-x86/time.h | 5
xen/include/public/hvm/ioreq.h | 1
xen/include/xen/sched.h | 12
75 files changed, 2055 insertions(+), 353 deletions(-)
diff -r fc9e2f7920c9 -r f378c424e0ce README
--- a/README Fri Mar 30 17:18:42 2007 -0600
+++ b/README Tue Apr 03 13:04:51 2007 -0600
@@ -177,3 +177,25 @@ 5. To rebuild a kernel with a modified c
an initial ram disk, just like a native system e.g.
# depmod 2.6.16-xen
# mkinitrd -v -f --with=aacraid --with=sd_mod --with=scsi_mod
initrd-2.6.16-xen.img 2.6.16-xen
+
+
+Python Runtime Libraries
+========================
+
+Xend (the Xen daemon) has the following runtime dependencies:
+
+ * Python 2.3 or later.
+ In many distros, the XML-aspects to the standard library
+ (xml.dom.minidom etc) are broken out into a separate python-xml package.
+ This is also required.
+
+ URL: http://www.python.org/
+ Debian: python, python-xml
+
+ * For optional SSL support, pyOpenSSL:
+ URL: http://pyopenssl.sourceforge.net/
+ Debian: python-pyopenssl
+
+ * For optional PAM support, PyPAM:
+ URL: http://www.pangalactic.org/PyPAM/
+ Debian: python-pam
diff -r fc9e2f7920c9 -r f378c424e0ce docs/src/user.tex
--- a/docs/src/user.tex Fri Mar 30 17:18:42 2007 -0600
+++ b/docs/src/user.tex Tue Apr 03 13:04:51 2007 -0600
@@ -3250,6 +3250,10 @@ editing \path{grub.conf}.
\item [ dma\_emergency\_pool=xxx ] Specify lower bound on size of DMA
pool below which ordinary allocations will fail rather than fall
back to allocating from the DMA pool.
+\item [ hap ] Instruct Xen to detect hardware-assisted paging support, such
+ as AMD-V's nested paging or Intel\textregistered VT's extended paging. If
+ available, Xen will use hardware-assisted paging instead of shadow paging
+ for guest memory management.
\end{description}
In addition, the following options may be specified on the Xen command
diff -r fc9e2f7920c9 -r f378c424e0ce linux-2.6-xen-sparse/arch/ia64/Kconfig
--- a/linux-2.6-xen-sparse/arch/ia64/Kconfig Fri Mar 30 17:18:42 2007 -0600
+++ b/linux-2.6-xen-sparse/arch/ia64/Kconfig Tue Apr 03 13:04:51 2007 -0600
@@ -576,15 +576,6 @@ source "crypto/Kconfig"
# override default values of drivers/xen/Kconfig
#
if XEN
-config XEN_UTIL
- default n
-
-config XEN_BALLOON
- default y
-
-config XEN_REBOOT
- default y
-
config XEN_SMPBOOT
default n
endif
diff -r fc9e2f7920c9 -r f378c424e0ce linux-2.6-xen-sparse/drivers/xen/Kconfig
--- a/linux-2.6-xen-sparse/drivers/xen/Kconfig Fri Mar 30 17:18:42 2007 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/Kconfig Tue Apr 03 13:04:51 2007 -0600
@@ -253,22 +253,6 @@ config NO_IDLE_HZ
bool
default y
-config XEN_UTIL
- bool
- default y
-
-config XEN_BALLOON
- bool
- default y
-
-config XEN_DEVMEM
- bool
- default y
-
-config XEN_REBOOT
- bool
- default y
-
config XEN_SMPBOOT
bool
default y
diff -r fc9e2f7920c9 -r f378c424e0ce linux-2.6-xen-sparse/drivers/xen/Makefile
--- a/linux-2.6-xen-sparse/drivers/xen/Makefile Fri Mar 30 17:18:42 2007 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/Makefile Tue Apr 03 13:04:51 2007 -0600
@@ -3,10 +3,11 @@ obj-y += evtchn/
obj-y += evtchn/
obj-y += privcmd/
obj-y += xenbus/
+obj-y += gntdev/
+obj-y += balloon/
+obj-y += char/
-obj-$(CONFIG_XEN_UTIL) += util.o
-obj-$(CONFIG_XEN_BALLOON) += balloon/
-obj-$(CONFIG_XEN_DEVMEM) += char/
+obj-y += util.o
obj-$(CONFIG_XEN_BLKDEV_BACKEND) += blkback/
obj-$(CONFIG_XEN_BLKDEV_TAP) += blktap/
obj-$(CONFIG_XEN_NETDEV_BACKEND) += netback/
diff -r fc9e2f7920c9 -r f378c424e0ce
linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c Fri Mar 30 17:18:42
2007 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c Tue Apr 03 13:04:51
2007 -0600
@@ -44,6 +44,7 @@
#include <asm/hypervisor.h>
#include "common.h"
#include <xen/balloon.h>
+#include <xen/driver_util.h>
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/mm.h>
@@ -55,30 +56,6 @@
#define MAX_TAP_DEV 256 /*the maximum number of tapdisk ring devices */
#define MAX_DEV_NAME 100 /*the max tapdisk ring device name e.g. blktap0 */
-
-
-struct class *xen_class;
-EXPORT_SYMBOL_GPL(xen_class);
-
-/*
- * Setup the xen class. This should probably go in another file, but
- * since blktap is the only user of it so far, it gets to keep it.
- */
-int setup_xen_class(void)
-{
- int ret;
-
- if (xen_class)
- return 0;
-
- xen_class = class_create(THIS_MODULE, "xen");
- if ((ret = IS_ERR(xen_class))) {
- xen_class = NULL;
- return ret;
- }
-
- return 0;
-}
/*
* The maximum number of requests that can be outstanding at any time
@@ -347,6 +324,7 @@ static const struct file_operations blkt
static tap_blkif_t *get_next_free_dev(void)
{
+ struct class *class;
tap_blkif_t *info;
int minor;
@@ -409,9 +387,10 @@ found:
wmb();
tapfds[minor] = info;
- class_device_create(xen_class, NULL,
- MKDEV(blktap_major, minor), NULL,
- "blktap%d", minor);
+ if ((class = get_xen_class()) != NULL)
+ class_device_create(class, NULL,
+ MKDEV(blktap_major, minor), NULL,
+ "blktap%d", minor);
}
out:
@@ -1487,6 +1466,7 @@ static int __init blkif_init(void)
static int __init blkif_init(void)
{
int i, ret;
+ struct class *class;
if (!is_running_on_xen())
return -ENODEV;
@@ -1522,7 +1502,7 @@ static int __init blkif_init(void)
DPRINTK("Created misc_dev [/dev/xen/blktap%d]\n",i);
/* Make sure the xen class exists */
- if (!setup_xen_class()) {
+ if ((class = get_xen_class()) != NULL) {
/*
* This will allow udev to create the blktap ctrl device.
* We only want to create blktap0 first. We don't want
@@ -1530,7 +1510,7 @@ static int __init blkif_init(void)
* We only create the device when a request of a new device is
* made.
*/
- class_device_create(xen_class, NULL,
+ class_device_create(class, NULL,
MKDEV(blktap_major, 0), NULL,
"blktap0");
} else {
diff -r fc9e2f7920c9 -r f378c424e0ce
linux-2.6-xen-sparse/drivers/xen/core/Makefile
--- a/linux-2.6-xen-sparse/drivers/xen/core/Makefile Fri Mar 30 17:18:42
2007 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/core/Makefile Tue Apr 03 13:04:51
2007 -0600
@@ -2,12 +2,11 @@
# Makefile for the linux kernel.
#
-obj-y := evtchn.o gnttab.o features.o
+obj-y := evtchn.o gnttab.o features.o reboot.o machine_reboot.o
obj-$(CONFIG_PROC_FS) += xen_proc.o
obj-$(CONFIG_SYSFS) += hypervisor_sysfs.o
obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o
obj-$(CONFIG_XEN_SYSFS) += xen_sysfs.o
-obj-$(CONFIG_XEN_REBOOT) += reboot.o machine_reboot.o
obj-$(CONFIG_XEN_SMPBOOT) += smpboot.o
obj-$(CONFIG_KEXEC) += machine_kexec.o
diff -r fc9e2f7920c9 -r f378c424e0ce
linux-2.6-xen-sparse/drivers/xen/gntdev/Makefile
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/linux-2.6-xen-sparse/drivers/xen/gntdev/Makefile Tue Apr 03 13:04:51
2007 -0600
@@ -0,0 +1,1 @@
+obj-y := gntdev.o
diff -r fc9e2f7920c9 -r f378c424e0ce
linux-2.6-xen-sparse/drivers/xen/gntdev/gntdev.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/linux-2.6-xen-sparse/drivers/xen/gntdev/gntdev.c Tue Apr 03 13:04:51
2007 -0600
@@ -0,0 +1,973 @@
+/******************************************************************************
+ * gntdev.c
+ *
+ * Device for accessing (in user-space) pages that have been granted by other
+ * domains.
+ *
+ * Copyright (c) 2006-2007, D G Murray.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <asm/atomic.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/device.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <xen/gnttab.h>
+#include <asm/hypervisor.h>
+#include <xen/balloon.h>
+#include <xen/evtchn.h>
+#include <xen/driver_util.h>
+
+#include <linux/types.h>
+#include <xen/public/gntdev.h>
+
+
+#define DRIVER_AUTHOR "Derek G. Murray <Derek.Murray@xxxxxxxxxxxx>"
+#define DRIVER_DESC "User-space granted page access driver"
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);
+
+#define MAX_GRANTS 128
+
+/* A slot can be in one of three states:
+ *
+ * 0. GNTDEV_SLOT_INVALID:
+ * This slot is not associated with a grant reference, and is therefore free
+ * to be overwritten by a new grant reference.
+ *
+ * 1. GNTDEV_SLOT_NOT_YET_MAPPED:
+ * This slot is associated with a grant reference (via the
+ * IOCTL_GNTDEV_MAP_GRANT_REF ioctl), but it has not yet been mmap()-ed.
+ *
+ * 2. GNTDEV_SLOT_MAPPED:
+ * This slot is associated with a grant reference, and has been mmap()-ed.
+ */
+typedef enum gntdev_slot_state {
+ GNTDEV_SLOT_INVALID = 0,
+ GNTDEV_SLOT_NOT_YET_MAPPED,
+ GNTDEV_SLOT_MAPPED
+} gntdev_slot_state_t;
+
+#define GNTDEV_INVALID_HANDLE -1
+#define GNTDEV_FREE_LIST_INVALID -1
+/* Each opened instance of gntdev is associated with a list of grants,
+ * represented by an array of elements of the following type,
+ * gntdev_grant_info_t.
+ */
+typedef struct gntdev_grant_info {
+ gntdev_slot_state_t state;
+ union {
+ uint32_t free_list_index;
+ struct {
+ domid_t domid;
+ grant_ref_t ref;
+ grant_handle_t kernel_handle;
+ grant_handle_t user_handle;
+ uint64_t dev_bus_addr;
+ } valid;
+ } u;
+} gntdev_grant_info_t;
+
+/* Private data structure, which is stored in the file pointer for files
+ * associated with this device.
+ */
+typedef struct gntdev_file_private_data {
+
+ /* Array of grant information. */
+ gntdev_grant_info_t grants[MAX_GRANTS];
+
+ /* Read/write semaphore used to protect the grants array. */
+ struct rw_semaphore grants_sem;
+
+ /* An array of indices of free slots in the grants array.
+ * N.B. An entry in this list may temporarily have the value
+ * GNTDEV_FREE_LIST_INVALID if the corresponding slot has been removed
+ * from the list by the contiguous allocator, but the list has not yet
+ * been compressed. However, this is not visible across invocations of
+ * the device.
+ */
+ int32_t free_list[MAX_GRANTS];
+
+ /* The number of free slots in the grants array. */
+ uint32_t free_list_size;
+
+ /* Read/write semaphore used to protect the free list. */
+ struct rw_semaphore free_list_sem;
+
+ /* Index of the next slot after the most recent contiguous allocation,
+ * for use in a next-fit allocator.
+ */
+ uint32_t next_fit_index;
+
+ /* Used to map grants into the kernel, before mapping them into user
+ * space.
+ */
+ struct page **foreign_pages;
+
+} gntdev_file_private_data_t;
+
+/* Module lifecycle operations. */
+static int __init gntdev_init(void);
+static void __exit gntdev_exit(void);
+
+module_init(gntdev_init);
+module_exit(gntdev_exit);
+
+/* File operations. */
+static int gntdev_open(struct inode *inode, struct file *flip);
+static int gntdev_release(struct inode *inode, struct file *flip);
+static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma);
+static int gntdev_ioctl (struct inode *inode, struct file *flip,
+ unsigned int cmd, unsigned long arg);
+
+static struct file_operations gntdev_fops = {
+ .owner = THIS_MODULE,
+ .open = gntdev_open,
+ .release = gntdev_release,
+ .mmap = gntdev_mmap,
+ .ioctl = gntdev_ioctl
+};
+
+/* VM operations. */
+static void gntdev_vma_close(struct vm_area_struct *vma);
+static pte_t gntdev_clear_pte(struct vm_area_struct *vma, unsigned long addr,
+ pte_t *ptep, int is_fullmm);
+
+static struct vm_operations_struct gntdev_vmops = {
+ .close = gntdev_vma_close,
+ .zap_pte = gntdev_clear_pte
+};
+
+/* Global variables. */
+
+/* The driver major number, for use when unregistering the driver. */
+static int gntdev_major;
+
+#define GNTDEV_NAME "gntdev"
+
+/* Memory mapping functions
+ * ------------------------
+ *
+ * Every granted page is mapped into both kernel and user space, and the two
+ * following functions return the respective virtual addresses of these pages.
+ *
+ * When shadow paging is disabled, the granted page is mapped directly into
+ * user space; when it is enabled, it is mapped into the kernel and remapped
+ * into user space using vm_insert_page() (see gntdev_mmap(), below).
+ */
+
+/* Returns the virtual address (in user space) of the @page_index'th page
+ * in the given VM area.
+ */
+static inline unsigned long get_user_vaddr (struct vm_area_struct *vma,
+ int page_index)
+{
+ return (unsigned long) vma->vm_start + (page_index << PAGE_SHIFT);
+}
+
+/* Returns the virtual address (in kernel space) of the @slot_index'th page
+ * mapped by the gntdev instance that owns the given private data struct.
+ */
+static inline unsigned long get_kernel_vaddr (gntdev_file_private_data_t *priv,
+ int slot_index)
+{
+ unsigned long pfn;
+ void *kaddr;
+ pfn = page_to_pfn(priv->foreign_pages[slot_index]);
+ kaddr = pfn_to_kaddr(pfn);
+ return (unsigned long) kaddr;
+}
+
+/* Helper functions. */
+
+/* Adds information about a grant reference to the list of grants in the file's
+ * private data structure. Returns non-zero on failure. On success, sets the
+ * value of *offset to the offset that should be mmap()-ed in order to map the
+ * grant reference.
+ */
+static int add_grant_reference(struct file *flip,
+ struct ioctl_gntdev_grant_ref *op,
+ uint64_t *offset)
+{
+ gntdev_file_private_data_t *private_data
+ = (gntdev_file_private_data_t *) flip->private_data;
+
+ uint32_t slot_index;
+
+ if (unlikely(private_data->free_list_size == 0)) {
+ return -ENOMEM;
+ }
+
+ slot_index = private_data->free_list[--private_data->free_list_size];
+
+ /* Copy the grant information into file's private data. */
+ private_data->grants[slot_index].state = GNTDEV_SLOT_NOT_YET_MAPPED;
+ private_data->grants[slot_index].u.valid.domid = op->domid;
+ private_data->grants[slot_index].u.valid.ref = op->ref;
+
+ /* The offset is calculated as the index of the chosen entry in the
+ * file's private data's array of grant information. This is then
+ * shifted to give an offset into the virtual "file address space".
+ */
+ *offset = slot_index << PAGE_SHIFT;
+
+ return 0;
+}
+
+/* Adds the @count grant references to the contiguous range in the slot array
+ * beginning at @first_slot. It is assumed that @first_slot was returned by a
+ * previous invocation of find_contiguous_free_range(), during the same
+ * invocation of the driver.
+ */
+static int add_grant_references(struct file *flip,
+ int count,
+ struct ioctl_gntdev_grant_ref *ops,
+ uint32_t first_slot)
+{
+ gntdev_file_private_data_t *private_data
+ = (gntdev_file_private_data_t *) flip->private_data;
+ int i;
+
+ for (i = 0; i < count; ++i) {
+
+ /* First, mark the slot's entry in the free list as invalid. */
+ int free_list_index =
+ private_data->grants[first_slot+i].u.free_list_index;
+ private_data->free_list[free_list_index] =
+ GNTDEV_FREE_LIST_INVALID;
+
+ /* Now, update the slot. */
+ private_data->grants[first_slot+i].state =
+ GNTDEV_SLOT_NOT_YET_MAPPED;
+ private_data->grants[first_slot+i].u.valid.domid =
+ ops[i].domid;
+ private_data->grants[first_slot+i].u.valid.ref = ops[i].ref;
+ }
+
+ return 0;
+}
+
+/* Scans through the free list for @flip, removing entries that are marked as
+ * GNTDEV_SLOT_INVALID. This will reduce the recorded size of the free list to
+ * the number of valid entries.
+ */
+static void compress_free_list(struct file *flip)
+{
+ gntdev_file_private_data_t *private_data
+ = (gntdev_file_private_data_t *) flip->private_data;
+ int i, j = 0, old_size;
+
+ old_size = private_data->free_list_size;
+ for (i = 0; i < old_size; ++i) {
+ if (private_data->free_list[i] != GNTDEV_FREE_LIST_INVALID) {
+ private_data->free_list[j] =
+ private_data->free_list[i];
+ ++j;
+ } else {
+ --private_data->free_list_size;
+ }
+ }
+}
+
+/* Searches the grant array in the private data of @flip for a range of
+ * @num_slots contiguous slots in the GNTDEV_SLOT_INVALID state.
+ *
+ * Returns the index of the first slot if a range is found, otherwise -ENOMEM.
+ */
+static int find_contiguous_free_range(struct file *flip,
+ uint32_t num_slots)
+{
+ gntdev_file_private_data_t *private_data
+ = (gntdev_file_private_data_t *) flip->private_data;
+
+ int i;
+ int start_index = private_data->next_fit_index;
+ int range_start = 0, range_length;
+
+ if (private_data->free_list_size < num_slots) {
+ return -ENOMEM;
+ }
+
+ /* First search from the start_index to the end of the array. */
+ range_length = 0;
+ for (i = start_index; i < MAX_GRANTS; ++i) {
+ if (private_data->grants[i].state == GNTDEV_SLOT_INVALID) {
+ if (range_length == 0) {
+ range_start = i;
+ }
+ ++range_length;
+ if (range_length == num_slots) {
+ return range_start;
+ }
+ }
+ }
+
+ /* Now search from the start of the array to the start_index. */
+ range_length = 0;
+ for (i = 0; i < start_index; ++i) {
+ if (private_data->grants[i].state == GNTDEV_SLOT_INVALID) {
+ if (range_length == 0) {
+ range_start = i;
+ }
+ ++range_length;
+ if (range_length == num_slots) {
+ return range_start;
+ }
+ }
+ }
+
+ return -ENOMEM;
+}
+
+/* Interface functions. */
+
+/* Initialises the driver. Called when the module is loaded. */
+static int __init gntdev_init(void)
+{
+ struct class *class;
+ struct class_device *device;
+
+ if (!is_running_on_xen()) {
+ printk(KERN_ERR "You must be running Xen to use gntdev\n");
+ return -ENODEV;
+ }
+
+ gntdev_major = register_chrdev(0, GNTDEV_NAME, &gntdev_fops);
+ if (gntdev_major < 0)
+ {
+ printk(KERN_ERR "Could not register gntdev device\n");
+ return -ENOMEM;
+ }
+
+ /* Note that if the sysfs code fails, we will still initialise the
+ * device, and output the major number so that the device can be
+ * created manually using mknod.
+ */
+ if ((class = get_xen_class()) == NULL) {
+ printk(KERN_ERR "Error setting up xen_class\n");
+ printk(KERN_ERR "gntdev created with major number = %d\n",
+ gntdev_major);
+ return 0;
+ }
+
+ device = class_device_create(class, NULL, MKDEV(gntdev_major, 0),
+ NULL, GNTDEV_NAME);
+ if (IS_ERR(device)) {
+ printk(KERN_ERR "Error creating gntdev device in xen_class\n");
+ printk(KERN_ERR "gntdev created with major number = %d\n",
+ gntdev_major);
+ return 0;
+ }
+
+ return 0;
+}
+
+/* Cleans up and unregisters the driver. Called when the driver is unloaded.
+ */
+static void __exit gntdev_exit(void)
+{
+ struct class *class;
+ if ((class = get_xen_class()) != NULL)
+ class_device_destroy(class, MKDEV(gntdev_major, 0));
+ unregister_chrdev(gntdev_major, GNTDEV_NAME);
+}
+
+/* Called when the device is opened. */
+static int gntdev_open(struct inode *inode, struct file *flip)
+{
+ gntdev_file_private_data_t *private_data;
+ int i;
+
+ try_module_get(THIS_MODULE);
+
+ /* Allocate space for the per-instance private data. */
+ private_data = kmalloc(sizeof(*private_data), GFP_KERNEL);
+ if (!private_data)
+ goto nomem_out;
+
+ /* Allocate space for the kernel-mapping of granted pages. */
+ private_data->foreign_pages =
+ alloc_empty_pages_and_pagevec(MAX_GRANTS);
+ if (!private_data->foreign_pages)
+ goto nomem_out2;
+
+ /* Initialise the free-list, which contains all slots at first.
+ */
+ for (i = 0; i < MAX_GRANTS; ++i) {
+ private_data->free_list[MAX_GRANTS - i - 1] = i;
+ private_data->grants[i].state = GNTDEV_SLOT_INVALID;
+ private_data->grants[i].u.free_list_index = MAX_GRANTS - i - 1;
+ }
+ private_data->free_list_size = MAX_GRANTS;
+ private_data->next_fit_index = 0;
+
+ init_rwsem(&private_data->grants_sem);
+ init_rwsem(&private_data->free_list_sem);
+
+ flip->private_data = private_data;
+
+ return 0;
+
+nomem_out2:
+ kfree(private_data);
+nomem_out:
+ return -ENOMEM;
+}
+
+/* Called when the device is closed.
+ */
+static int gntdev_release(struct inode *inode, struct file *flip)
+{
+ if (flip->private_data) {
+ gntdev_file_private_data_t *private_data =
+ (gntdev_file_private_data_t *) flip->private_data;
+ if (private_data->foreign_pages) {
+ free_empty_pages_and_pagevec
+ (private_data->foreign_pages, MAX_GRANTS);
+ }
+ kfree(private_data);
+ }
+ module_put(THIS_MODULE);
+ return 0;
+}
+
+/* Called when an attempt is made to mmap() the device. The private data from
+ * @flip contains the list of grant references that can be mapped. The vm_pgoff
+ * field of @vma contains the index into that list that refers to the grant
+ * reference that will be mapped. Only mappings that are a multiple of
+ * PAGE_SIZE are handled.
+ */
+static int gntdev_mmap (struct file *flip, struct vm_area_struct *vma)
+{
+ struct gnttab_map_grant_ref op;
+ unsigned long slot_index = vma->vm_pgoff;
+ unsigned long kernel_vaddr, user_vaddr;
+ uint32_t size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+ uint64_t ptep;
+ int ret;
+ int flags;
+ int i;
+ struct page *page;
+ gntdev_file_private_data_t *private_data = flip->private_data;
+
+ if (unlikely(!private_data)) {
+ printk(KERN_ERR "File's private data is NULL.\n");
+ return -EINVAL;
+ }
+
+ if (unlikely((size <= 0) || (size + slot_index) > MAX_GRANTS)) {
+ printk(KERN_ERR "Invalid number of pages or offset"
+ "(num_pages = %d, first_slot = %ld).\n",
+ size, slot_index);
+ return -ENXIO;
+ }
+
+ if ((vma->vm_flags & VM_WRITE) && !(vma->vm_flags & VM_SHARED)) {
+ printk(KERN_ERR "Writable mappings must be shared.\n");
+ return -EINVAL;
+ }
+
+ /* Slots must be in the NOT_YET_MAPPED state. */
+ down_write(&private_data->grants_sem);
+ for (i = 0; i < size; ++i) {
+ if (private_data->grants[slot_index + i].state !=
+ GNTDEV_SLOT_NOT_YET_MAPPED) {
+ printk(KERN_ERR "Slot (index = %ld) is in the wrong "
+ "state (%d).\n", slot_index + i,
+ private_data->grants[slot_index + i].state);
+ up_write(&private_data->grants_sem);
+ return -EINVAL;
+ }
+ }
+
+ /* Install the hook for unmapping. */
+ vma->vm_ops = &gntdev_vmops;
+
+ /* The VM area contains pages from another VM. */
+ vma->vm_flags |= VM_FOREIGN;
+ vma->vm_private_data = kzalloc(size * sizeof(struct page_struct *),
+ GFP_KERNEL);
+ if (vma->vm_private_data == NULL) {
+ printk(KERN_ERR "Couldn't allocate mapping structure for VM "
+ "area.\n");
+ return -ENOMEM;
+ }
+
+ /* This flag prevents Bad PTE errors when the memory is unmapped. */
+ vma->vm_flags |= VM_RESERVED;
+
+ /* This flag prevents this VM area being copied on a fork(). A better
+ * behaviour might be to explicitly carry out the appropriate mappings
+ * on fork(), but I don't know if there's a hook for this.
+ */
+ vma->vm_flags |= VM_DONTCOPY;
+
+#ifdef CONFIG_X86
+ /* This flag ensures that the page tables are not unpinned before the
+ * VM area is unmapped. Therefore Xen still recognises the PTE as
+ * belonging to an L1 pagetable, and the grant unmap operation will
+ * succeed, even if the process does not exit cleanly.
+ */
+ vma->vm_mm->context.has_foreign_mappings = 1;
+#endif
+
+ for (i = 0; i < size; ++i) {
+
+ flags = GNTMAP_host_map;
+ if (!(vma->vm_flags & VM_WRITE))
+ flags |= GNTMAP_readonly;
+
+ kernel_vaddr = get_kernel_vaddr(private_data, slot_index + i);
+ user_vaddr = get_user_vaddr(vma, i);
+ page = pfn_to_page(__pa(kernel_vaddr) >> PAGE_SHIFT);
+
+ gnttab_set_map_op(&op, kernel_vaddr, flags,
+ private_data->grants[slot_index+i]
+ .u.valid.ref,
+ private_data->grants[slot_index+i]
+ .u.valid.domid);
+
+ /* Carry out the mapping of the grant reference. */
+ ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
+ &op, 1);
+ BUG_ON(ret);
+ if (op.status) {
+ printk(KERN_ERR "Error mapping the grant reference "
+ "into the kernel (%d). domid = %d; ref = %d\n",
+ op.status,
+ private_data->grants[slot_index+i]
+ .u.valid.domid,
+ private_data->grants[slot_index+i]
+ .u.valid.ref);
+ goto undo_map_out;
+ }
+
+ /* Store a reference to the page that will be mapped into user
+ * space.
+ */
+ ((struct page **) vma->vm_private_data)[i] = page;
+
+ /* Mark mapped page as reserved. */
+ SetPageReserved(page);
+
+ /* Record the grant handle, for use in the unmap operation. */
+ private_data->grants[slot_index+i].u.valid.kernel_handle =
+ op.handle;
+ private_data->grants[slot_index+i].u.valid.dev_bus_addr =
+ op.dev_bus_addr;
+
+ private_data->grants[slot_index+i].state = GNTDEV_SLOT_MAPPED;
+ private_data->grants[slot_index+i].u.valid.user_handle =
+ GNTDEV_INVALID_HANDLE;
+
+ /* Now perform the mapping to user space. */
+ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+
+ /* NOT USING SHADOW PAGE TABLES. */
+ /* In this case, we map the grant(s) straight into user
+ * space.
+ */
+
+ /* Get the machine address of the PTE for the user
+ * page.
+ */
+ if ((ret = create_lookup_pte_addr(vma->vm_mm,
+ vma->vm_start
+ + (i << PAGE_SHIFT),
+ &ptep)))
+ {
+ printk(KERN_ERR "Error obtaining PTE pointer "
+ "(%d).\n", ret);
+ goto undo_map_out;
+ }
+
+ /* Configure the map operation. */
+
+ /* The reference is to be used by host CPUs. */
+ flags = GNTMAP_host_map;
+
+ /* Specifies a user space mapping. */
+ flags |= GNTMAP_application_map;
+
+ /* The map request contains the machine address of the
+ * PTE to update.
+ */
+ flags |= GNTMAP_contains_pte;
+
+ if (!(vma->vm_flags & VM_WRITE))
+ flags |= GNTMAP_readonly;
+
+ gnttab_set_map_op(&op, ptep, flags,
+ private_data->grants[slot_index+i]
+ .u.valid.ref,
+ private_data->grants[slot_index+i]
+ .u.valid.domid);
+
+ /* Carry out the mapping of the grant reference. */
+ ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
+ &op, 1);
+ BUG_ON(ret);
+ if (op.status) {
+ printk(KERN_ERR "Error mapping the grant "
+ "reference into user space (%d). domid "
+ "= %d; ref = %d\n", op.status,
+ private_data->grants[slot_index+i].u
+ .valid.domid,
+ private_data->grants[slot_index+i].u
+ .valid.ref);
+ goto undo_map_out;
+ }
+
+ /* Record the grant handle, for use in the unmap
+ * operation.
+ */
+ private_data->grants[slot_index+i].u.
+ valid.user_handle = op.handle;
+
+ /* Update p2m structure with the new mapping. */
+ set_phys_to_machine(__pa(kernel_vaddr) >> PAGE_SHIFT,
+ FOREIGN_FRAME(private_data->
+ grants[slot_index+i]
+ .u.valid.dev_bus_addr
+ >> PAGE_SHIFT));
+ } else {
+ /* USING SHADOW PAGE TABLES. */
+ /* In this case, we simply insert the page into the VM
+ * area. */
+ ret = vm_insert_page(vma, user_vaddr, page);
+ }
+
+ }
+
+ up_write(&private_data->grants_sem);
+ return 0;
+
+undo_map_out:
+ /* If we have a mapping failure, the unmapping will be taken care of
+ * by do_mmap_pgoff(), which will eventually call gntdev_clear_pte().
+ * All we need to do here is free the vma_private_data.
+ */
+ kfree(vma->vm_private_data);
+
+ /* THIS IS VERY UNPLEASANT: do_mmap_pgoff() will set the vma->vm_file
+ * to NULL on failure. However, we need this in gntdev_clear_pte() to
+ * unmap the grants. Therefore, we smuggle a reference to the file's
+ * private data in the VM area's private data pointer.
+ */
+ vma->vm_private_data = private_data;
+
+ up_write(&private_data->grants_sem);
+
+ return -ENOMEM;
+}
+
+static pte_t gntdev_clear_pte(struct vm_area_struct *vma, unsigned long addr,
+ pte_t *ptep, int is_fullmm)
+{
+ int slot_index, ret;
+ pte_t copy;
+ struct gnttab_unmap_grant_ref op;
+ gntdev_file_private_data_t *private_data;
+
+ /* THIS IS VERY UNPLEASANT: do_mmap_pgoff() will set the vma->vm_file
+ * to NULL on failure. However, we need this in gntdev_clear_pte() to
+ * unmap the grants. Therefore, we smuggle a reference to the file's
+ * private data in the VM area's private data pointer.
+ */
+ if (vma->vm_file) {
+ private_data = (gntdev_file_private_data_t *)
+ vma->vm_file->private_data;
+ } else if (vma->vm_private_data) {
+ private_data = (gntdev_file_private_data_t *)
+ vma->vm_private_data;
+ } else {
+ private_data = NULL; /* gcc warning */
+ BUG();
+ }
+
+ /* Copy the existing value of the PTE for returning. */
+ copy = *ptep;
+
+ /* Calculate the grant relating to this PTE. */
+ slot_index = vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT);
+
+ /* Only unmap grants if the slot has been mapped. This could be being
+ * called from a failing mmap().
+ */
+ if (private_data->grants[slot_index].state == GNTDEV_SLOT_MAPPED) {
+
+ /* First, we clear the user space mapping, if it has been made.
+ */
+ if (private_data->grants[slot_index].u.valid.user_handle !=
+ GNTDEV_INVALID_HANDLE &&
+ !xen_feature(XENFEAT_auto_translated_physmap)) {
+ /* NOT USING SHADOW PAGE TABLES. */
+ gnttab_set_unmap_op(&op, virt_to_machine(ptep),
+ GNTMAP_contains_pte,
+ private_data->grants[slot_index]
+ .u.valid.user_handle);
+ ret = HYPERVISOR_grant_table_op(
+ GNTTABOP_unmap_grant_ref, &op, 1);
+ BUG_ON(ret);
+ if (op.status)
+ printk("User unmap grant status = %d\n",
+ op.status);
+ } else {
+ /* USING SHADOW PAGE TABLES. */
+ pte_clear_full(vma->vm_mm, addr, ptep, is_fullmm);
+ }
+
+ /* Finally, we unmap the grant from kernel space. */
+ gnttab_set_unmap_op(&op,
+ get_kernel_vaddr(private_data, slot_index),
+ GNTMAP_host_map,
+ private_data->grants[slot_index].u.valid
+ .kernel_handle);
+ ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
+ &op, 1);
+ BUG_ON(ret);
+ if (op.status)
+ printk("Kernel unmap grant status = %d\n", op.status);
+
+
+ /* Return slot to the not-yet-mapped state, so that it may be
+ * mapped again, or removed by a subsequent ioctl.
+ */
+ private_data->grants[slot_index].state =
+ GNTDEV_SLOT_NOT_YET_MAPPED;
+
+ /* Invalidate the physical to machine mapping for this page. */
+ set_phys_to_machine(__pa(get_kernel_vaddr(private_data,
+ slot_index))
+ >> PAGE_SHIFT, INVALID_P2M_ENTRY);
+
+ } else {
+ pte_clear_full(vma->vm_mm, addr, ptep, is_fullmm);
+ }
+
+ return copy;
+}
+
+/* "Destructor" for a VM area.
+ */
+static void gntdev_vma_close(struct vm_area_struct *vma) {
+ if (vma->vm_private_data) {
+ kfree(vma->vm_private_data);
+ }
+}
+
+/* Called when an ioctl is made on the device.
+ */
+static int gntdev_ioctl(struct inode *inode, struct file *flip,
+ unsigned int cmd, unsigned long arg)
+{
+ int rc = 0;
+ gntdev_file_private_data_t *private_data =
+ (gntdev_file_private_data_t *) flip->private_data;
+
+ switch (cmd) {
+ case IOCTL_GNTDEV_MAP_GRANT_REF:
+ {
+ struct ioctl_gntdev_map_grant_ref op;
+ down_write(&private_data->grants_sem);
+ down_write(&private_data->free_list_sem);
+
+ if ((rc = copy_from_user(&op, (void __user *) arg,
+ sizeof(op)))) {
+ rc = -EFAULT;
+ goto map_out;
+ }
+ if (unlikely(op.count <= 0)) {
+ rc = -EINVAL;
+ goto map_out;
+ }
+
+ if (op.count == 1) {
+ if ((rc = add_grant_reference(flip, &op.refs[0],
+ &op.index)) < 0) {
+ printk(KERN_ERR "Adding grant reference "
+ "failed (%d).\n", rc);
+ goto map_out;
+ }
+ } else {
+ struct ioctl_gntdev_grant_ref *refs, *u;
+ refs = kmalloc(op.count * sizeof(*refs), GFP_KERNEL);
+ if (!refs) {
+ rc = -ENOMEM;
+ goto map_out;
+ }
+ u = ((struct ioctl_gntdev_map_grant_ref *)arg)->refs;
+ if ((rc = copy_from_user(refs,
+ (void __user *)u,
+ sizeof(*refs) * op.count))) {
+ printk(KERN_ERR "Copying refs from user failed"
+ " (%d).\n", rc);
+ rc = -EINVAL;
+ goto map_out;
+ }
+ if ((rc = find_contiguous_free_range(flip, op.count))
+ < 0) {
+ printk(KERN_ERR "Finding contiguous range "
+ "failed (%d).\n", rc);
+ kfree(refs);
+ goto map_out;
+ }
+ op.index = rc << PAGE_SHIFT;
+ if ((rc = add_grant_references(flip, op.count,
+ refs, rc))) {
+ printk(KERN_ERR "Adding grant references "
+ "failed (%d).\n", rc);
+ kfree(refs);
+ goto map_out;
+ }
+ compress_free_list(flip);
+ kfree(refs);
+ }
+ if ((rc = copy_to_user((void __user *) arg,
+ &op,
+ sizeof(op)))) {
+ printk(KERN_ERR "Copying result back to user failed "
+ "(%d)\n", rc);
+ rc = -EFAULT;
+ goto map_out;
+ }
+ map_out:
+ up_write(&private_data->grants_sem);
+ up_write(&private_data->free_list_sem);
+ return rc;
+ }
+ case IOCTL_GNTDEV_UNMAP_GRANT_REF:
+ {
+ struct ioctl_gntdev_unmap_grant_ref op;
+ int i, start_index;
+
+ down_write(&private_data->grants_sem);
+ down_write(&private_data->free_list_sem);
+
+ if ((rc = copy_from_user(&op,
+ (void __user *) arg,
+ sizeof(op)))) {
+ rc = -EFAULT;
+ goto unmap_out;
+ }
+
+ start_index = op.index >> PAGE_SHIFT;
+
+ /* First, check that all pages are in the NOT_YET_MAPPED
+ * state.
+ */
+ for (i = 0; i < op.count; ++i) {
+ if (unlikely
+ (private_data->grants[start_index + i].state
+ != GNTDEV_SLOT_NOT_YET_MAPPED)) {
+ if (private_data->grants[start_index + i].state
+ == GNTDEV_SLOT_INVALID) {
+ printk(KERN_ERR
+ "Tried to remove an invalid "
+ "grant at offset 0x%x.",
+ (start_index + i)
+ << PAGE_SHIFT);
+ rc = -EINVAL;
+ } else {
+ printk(KERN_ERR
+ "Tried to remove a grant which "
+ "is currently mmap()-ed at "
+ "offset 0x%x.",
+ (start_index + i)
+ << PAGE_SHIFT);
+ rc = -EBUSY;
+ }
+ goto unmap_out;
+ }
+ }
+
+ /* Unmap pages and add them to the free list.
+ */
+ for (i = 0; i < op.count; ++i) {
+ private_data->grants[start_index+i].state =
+ GNTDEV_SLOT_INVALID;
+ private_data->grants[start_index+i].u.free_list_index =
+ private_data->free_list_size;
+ private_data->free_list[private_data->free_list_size] =
+ start_index + i;
+ ++private_data->free_list_size;
+ }
+ compress_free_list(flip);
+
+ unmap_out:
+ up_write(&private_data->grants_sem);
+ up_write(&private_data->free_list_sem);
+ return rc;
+ }
+ case IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR:
+ {
+ struct ioctl_gntdev_get_offset_for_vaddr op;
+ struct vm_area_struct *vma;
+ unsigned long vaddr;
+
+ if ((rc = copy_from_user(&op,
+ (void __user *) arg,
+ sizeof(op)))) {
+ rc = -EFAULT;
+ goto get_offset_out;
+ }
+ vaddr = (unsigned long)op.vaddr;
+
+ down_read(¤t->mm->mmap_sem);
+ vma = find_vma(current->mm, vaddr);
+ if (vma == NULL) {
+ rc = -EFAULT;
+ goto get_offset_unlock_out;
+ }
+ if ((!vma->vm_ops) || (vma->vm_ops != &gntdev_vmops)) {
+ printk(KERN_ERR "The vaddr specified does not belong "
+ "to a gntdev instance: %#lx\n", vaddr);
+ rc = -EFAULT;
+ goto get_offset_unlock_out;
+ }
+ if (vma->vm_start != vaddr) {
+ printk(KERN_ERR "The vaddr specified in an "
+ "IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR must be at "
+ "the start of the VM area. vma->vm_start = "
+ "%#lx; vaddr = %#lx\n",
+ vma->vm_start, vaddr);
+ rc = -EFAULT;
+ goto get_offset_unlock_out;
+ }
+ op.offset = vma->vm_pgoff << PAGE_SHIFT;
+ op.count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+ up_read(¤t->mm->mmap_sem);
+ if ((rc = copy_to_user((void __user *) arg,
+ &op,
+ sizeof(op)))) {
+ rc = -EFAULT;
+ goto get_offset_out;
+ }
+ goto get_offset_out;
+ get_offset_unlock_out:
+ up_read(¤t->mm->mmap_sem);
+ get_offset_out:
+ return rc;
+ }
+ default:
+ return -ENOIOCTLCMD;
+ }
+
+ return 0;
+}
diff -r fc9e2f7920c9 -r f378c424e0ce linux-2.6-xen-sparse/drivers/xen/util.c
--- a/linux-2.6-xen-sparse/drivers/xen/util.c Fri Mar 30 17:18:42 2007 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/util.c Tue Apr 03 13:04:51 2007 -0600
@@ -4,6 +4,26 @@
#include <linux/vmalloc.h>
#include <asm/uaccess.h>
#include <xen/driver_util.h>
+
+struct class *get_xen_class(void)
+{
+ static struct class *xen_class;
+
+ if (xen_class)
+ return xen_class;
+
+ xen_class = class_create(THIS_MODULE, "xen");
+ if (IS_ERR(xen_class)) {
+ printk("Failed to create xen sysfs class.\n");
+ xen_class = NULL;
+ }
+
+ return xen_class;
+}
+EXPORT_SYMBOL_GPL(get_xen_class);
+
+/* Todo: merge ia64 ('auto-translate physmap') versions of these functions. */
+#ifndef __ia64__
static int f(pte_t *pte, struct page *pmd_page, unsigned long addr, void *data)
{
@@ -46,3 +66,5 @@ void free_vm_area(struct vm_struct *area
kfree(area);
}
EXPORT_SYMBOL_GPL(free_vm_area);
+
+#endif /* !__ia64__ */
diff -r fc9e2f7920c9 -r f378c424e0ce linux-2.6-xen-sparse/include/linux/mm.h
--- a/linux-2.6-xen-sparse/include/linux/mm.h Fri Mar 30 17:18:42 2007 -0600
+++ b/linux-2.6-xen-sparse/include/linux/mm.h Tue Apr 03 13:04:51 2007 -0600
@@ -205,6 +205,10 @@ struct vm_operations_struct {
/* notification that a previously read-only page is about to become
* writable, if an error is returned it will cause a SIGBUS */
int (*page_mkwrite)(struct vm_area_struct *vma, struct page *page);
+ /* Area-specific function for clearing the PTE at @ptep. Returns the
+ * original value of @ptep. */
+ pte_t (*zap_pte)(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep, int is_fullmm);
#ifdef CONFIG_NUMA
int (*set_policy)(struct vm_area_struct *vma, struct mempolicy *new);
struct mempolicy *(*get_policy)(struct vm_area_struct *vma,
diff -r fc9e2f7920c9 -r f378c424e0ce
linux-2.6-xen-sparse/include/xen/driver_util.h
--- a/linux-2.6-xen-sparse/include/xen/driver_util.h Fri Mar 30 17:18:42
2007 -0600
+++ b/linux-2.6-xen-sparse/include/xen/driver_util.h Tue Apr 03 13:04:51
2007 -0600
@@ -3,9 +3,12 @@
#define __ASM_XEN_DRIVER_UTIL_H__
#include <linux/vmalloc.h>
+#include <linux/device.h>
/* Allocate/destroy a 'vmalloc' VM area. */
extern struct vm_struct *alloc_vm_area(unsigned long size);
extern void free_vm_area(struct vm_struct *area);
+extern struct class *get_xen_class(void);
+
#endif /* __ASM_XEN_DRIVER_UTIL_H__ */
diff -r fc9e2f7920c9 -r f378c424e0ce
linux-2.6-xen-sparse/include/xen/public/gntdev.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/linux-2.6-xen-sparse/include/xen/public/gntdev.h Tue Apr 03 13:04:51
2007 -0600
@@ -0,0 +1,105 @@
+/******************************************************************************
+ * gntdev.h
+ *
+ * Interface to /dev/xen/gntdev.
+ *
+ * Copyright (c) 2007, D G Murray
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef __LINUX_PUBLIC_GNTDEV_H__
+#define __LINUX_PUBLIC_GNTDEV_H__
+
+struct ioctl_gntdev_grant_ref {
+ /* The domain ID of the grant to be mapped. */
+ uint32_t domid;
+ /* The grant reference of the grant to be mapped. */
+ uint32_t ref;
+};
+
+/*
+ * Inserts the grant references into the mapping table of an instance
+ * of gntdev. N.B. This does not perform the mapping, which is deferred
+ * until mmap() is called with @index as the offset.
+ */
+#define IOCTL_GNTDEV_MAP_GRANT_REF \
+_IOC(_IOC_NONE, 'G', 0, sizeof(struct ioctl_gntdev_map_grant_ref))
+struct ioctl_gntdev_map_grant_ref {
+ /* IN parameters */
+ /* The number of grants to be mapped. */
+ uint32_t count;
+ uint32_t pad;
+ /* OUT parameters */
+ /* The offset to be used on a subsequent call to mmap(). */
+ uint64_t index;
+ /* Variable IN parameter. */
+ /* Array of grant references, of size @count. */
+ struct ioctl_gntdev_grant_ref refs[1];
+};
+
+/*
+ * Removes the grant references from the mapping table of an instance of
+ * of gntdev. N.B. munmap() must be called on the relevant virtual address(es)
+ * before this ioctl is called, or an error will result.
+ */
+#define IOCTL_GNTDEV_UNMAP_GRANT_REF \
+_IOC(_IOC_NONE, 'G', 1, sizeof(struct ioctl_gntdev_unmap_grant_ref))
+struct ioctl_gntdev_unmap_grant_ref {
+ /* IN parameters */
+ /* The offset was returned by the corresponding map operation. */
+ uint64_t index;
+ /* The number of pages to be unmapped. */
+ uint32_t count;
+ uint32_t pad;
+};
+
+/*
+ * Returns the offset in the driver's address space that corresponds
+ * to @vaddr. This can be used to perform a munmap(), followed by an
+ * UNMAP_GRANT_REF ioctl, where no state about the offset is retained by
+ * the caller. The number of pages that were allocated at the same time as
+ * @vaddr is returned in @count.
+ *
+ * N.B. Where more than one page has been mapped into a contiguous range, the
+ * supplied @vaddr must correspond to the start of the range; otherwise
+ * an error will result. It is only possible to munmap() the entire
+ * contiguously-allocated range at once, and not any subrange thereof.
+ */
+#define IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR \
+_IOC(_IOC_NONE, 'G', 2, sizeof(struct ioctl_gntdev_get_offset_for_vaddr))
+struct ioctl_gntdev_get_offset_for_vaddr {
+ /* IN parameters */
+ /* The virtual address of the first mapped page in a range. */
+ uint64_t vaddr;
+ /* OUT parameters */
+ /* The offset that was used in the initial mmap() operation. */
+ uint64_t offset;
+ /* The number of pages mapped in the VM area that begins at @vaddr. */
+ uint32_t count;
+ uint32_t pad;
+};
+
+#endif /* __LINUX_PUBLIC_GNTDEV_H__ */
diff -r fc9e2f7920c9 -r f378c424e0ce linux-2.6-xen-sparse/mm/memory.c
--- a/linux-2.6-xen-sparse/mm/memory.c Fri Mar 30 17:18:42 2007 -0600
+++ b/linux-2.6-xen-sparse/mm/memory.c Tue Apr 03 13:04:51 2007 -0600
@@ -659,8 +659,12 @@ static unsigned long zap_pte_range(struc
page->index > details->last_index))
continue;
}
- ptent = ptep_get_and_clear_full(mm, addr, pte,
- tlb->fullmm);
+ if (unlikely(vma->vm_ops && vma->vm_ops->zap_pte))
+ ptent = vma->vm_ops->zap_pte(vma, addr, pte,
+ tlb->fullmm);
+ else
+ ptent = ptep_get_and_clear_full(mm, addr, pte,
+ tlb->fullmm);
tlb_remove_tlb_entry(tlb, pte, addr);
if (unlikely(!page))
continue;
@@ -755,6 +759,7 @@ static unsigned long unmap_page_range(st
details = NULL;
BUG_ON(addr >= end);
+
tlb_start_vma(tlb, vma);
pgd = pgd_offset(vma->vm_mm, addr);
do {
diff -r fc9e2f7920c9 -r f378c424e0ce tools/blktap/drivers/qcow2raw.c
--- a/tools/blktap/drivers/qcow2raw.c Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/blktap/drivers/qcow2raw.c Tue Apr 03 13:04:51 2007 -0600
@@ -51,7 +51,6 @@
#define BLOCK_PROCESSSZ 4096
static int maxfds, *qcowio_fd, *aio_fd, running = 1, complete = 0;
-static int read_complete = 0, write_complete = 0;
static int returned_read_events = 0, returned_write_events = 0;
static int submit_events = 0;
static uint32_t read_idx = 0, write_idx = 0;
@@ -109,8 +108,6 @@ static int send_write_responses(struct d
written += BLOCK_PROCESSSZ;
returned_write_events++;
write_idx = idx;
- if (complete && (returned_write_events == submit_events))
- write_complete = 1;
debug_output(written, dd->td_state->size << 9);
free(private);
@@ -126,8 +123,6 @@ static int send_read_responses(struct di
returned_read_events++;
read_idx = idx;
- if (complete && (returned_read_events == submit_events))
- read_complete = 1;
ret = ddaio.drv->td_queue_write(&ddaio, idx, BLOCK_PROCESSSZ>>9,
private,
send_write_responses, idx, private);
@@ -136,7 +131,7 @@ static int send_read_responses(struct di
return 0;
}
- if ( (complete && returned_read_events == submit_events) ||
+ if ( (returned_read_events == submit_events) ||
(returned_read_events % 10 == 0) ) {
ddaio.drv->td_submit(&ddaio);
}
@@ -299,6 +294,7 @@ int main(int argc, char *argv[])
}
/*Attempt to read 4k sized blocks*/
+ submit_events++;
ret = ddqcow.drv->td_queue_read(&ddqcow, i>>9,
BLOCK_PROCESSSZ>>9,
buf,
send_read_responses,
i>>9, buf);
@@ -309,7 +305,6 @@ int main(int argc, char *argv[])
exit(-1);
} else {
i += BLOCK_PROCESSSZ;
- submit_events++;
}
if (i >= ddqcow.td_state->size<<9) {
diff -r fc9e2f7920c9 -r f378c424e0ce tools/examples/xmexample.hvm
--- a/tools/examples/xmexample.hvm Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/examples/xmexample.hvm Tue Apr 03 13:04:51 2007 -0600
@@ -180,6 +180,10 @@ serial='pty'
#-----------------------------------------------------------------------------
+# set the real time clock offset in seconds [default=0 i.e. same as dom0]
+#rtc_timeoffset=3600
+
+#-----------------------------------------------------------------------------
# start in full screen
#full-screen=1
diff -r fc9e2f7920c9 -r f378c424e0ce tools/ioemu/target-i386-dm/helper2.c
--- a/tools/ioemu/target-i386-dm/helper2.c Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/ioemu/target-i386-dm/helper2.c Tue Apr 03 13:04:51 2007 -0600
@@ -73,6 +73,8 @@ int vcpus = 1;
int vcpus = 1;
int xc_handle;
+
+long time_offset = 0;
shared_iopage_t *shared_page = NULL;
@@ -439,6 +441,34 @@ void cpu_ioreq_xor(CPUState *env, ioreq_
req->data = tmp1;
}
+void timeoffset_get()
+{
+ char *p;
+
+ p = xenstore_vm_read(domid, "rtc/timeoffset", NULL);
+ if (!p)
+ return;
+
+ if (sscanf(p, "%ld", &time_offset) == 1)
+ fprintf(logfile, "Time offset set %ld\n", time_offset);
+ else
+ time_offset = 0;
+
+ xc_domain_set_time_offset(xc_handle, domid, time_offset);
+
+ free(p);
+}
+
+void cpu_ioreq_timeoffset(CPUState *env, ioreq_t *req)
+{
+ char b[64];
+
+ time_offset += (ulong)req->data;
+
+ sprintf(b, "%ld", time_offset);
+ xenstore_vm_write(domid, "rtc/timeoffset", b);
+}
+
void cpu_ioreq_xchg(CPUState *env, ioreq_t *req)
{
unsigned long tmp1;
@@ -478,6 +508,9 @@ void __handle_ioreq(CPUState *env, ioreq
case IOREQ_TYPE_XCHG:
cpu_ioreq_xchg(env, req);
break;
+ case IOREQ_TYPE_TIMEOFFSET:
+ cpu_ioreq_timeoffset(env, req);
+ break;
default:
hw_error("Invalid ioreq type 0x%x\n", req->type);
}
diff -r fc9e2f7920c9 -r f378c424e0ce tools/ioemu/vl.c
--- a/tools/ioemu/vl.c Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/ioemu/vl.c Tue Apr 03 13:04:51 2007 -0600
@@ -6670,6 +6670,9 @@ int main(int argc, char **argv)
}
free(page_array);
#endif
+
+ timeoffset_get();
+
#else /* !CONFIG_DM */
phys_ram_base = qemu_vmalloc(phys_ram_size);
diff -r fc9e2f7920c9 -r f378c424e0ce tools/ioemu/vl.h
--- a/tools/ioemu/vl.h Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/ioemu/vl.h Tue Apr 03 13:04:51 2007 -0600
@@ -1276,6 +1276,12 @@ int xenstore_unsubscribe_from_hotplug_st
const char *inst,
const char *token);
+int xenstore_vm_write(int domid, char *key, char *val);
+char *xenstore_vm_read(int domid, char *key, int *len);
+
+/* helper2.c */
+extern long time_offset;
+void timeoffset_get(void);
/* xen_platform.c */
void pci_xen_platform_init(PCIBus *bus);
diff -r fc9e2f7920c9 -r f378c424e0ce tools/ioemu/xenstore.c
--- a/tools/ioemu/xenstore.c Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/ioemu/xenstore.c Tue Apr 03 13:04:51 2007 -0600
@@ -567,3 +567,72 @@ int xenstore_unsubscribe_from_hotplug_st
return rc;
}
+
+char *xenstore_vm_read(int domid, char *key, int *len)
+{
+ char *buf = NULL, *path = NULL, *value = NULL;
+
+ if (xsh == NULL)
+ goto out;
+
+ path = xs_get_domain_path(xsh, domid);
+ if (path == NULL) {
+ fprintf(logfile, "xs_get_domain_path(%d): error\n", domid);
+ goto out;
+ }
+
+ pasprintf(&buf, "%s/vm", path);
+ free(path);
+ path = xs_read(xsh, XBT_NULL, buf, NULL);
+ if (path == NULL) {
+ fprintf(logfile, "xs_read(%s): read error\n", buf);
+ goto out;
+ }
+
+ pasprintf(&buf, "%s/%s", path, key);
+ value = xs_read(xsh, XBT_NULL, buf, len);
+ if (value == NULL) {
+ fprintf(logfile, "xs_read(%s): read error\n", buf);
+ goto out;
+ }
+
+ out:
+ free(path);
+ free(buf);
+ return value;
+}
+
+int xenstore_vm_write(int domid, char *key, char *value)
+{
+ char *buf = NULL, *path = NULL;
+ int rc = -1;
+
+ if (xsh == NULL)
+ goto out;
+
+ path = xs_get_domain_path(xsh, domid);
+ if (path == NULL) {
+ fprintf(logfile, "xs_get_domain_path(%d): error\n");
+ goto out;
+ }
+
+ pasprintf(&buf, "%s/vm", path);
+ free(path);
+ path = xs_read(xsh, XBT_NULL, buf, NULL);
+ if (path == NULL) {
+ fprintf(logfile, "xs_read(%s): read error\n", buf);
+ goto out;
+ }
+
+ pasprintf(&buf, "%s/%s", path, key);
+ rc = xs_write(xsh, XBT_NULL, buf, value, strlen(value));
+ if (rc) {
+ fprintf(logfile, "xs_write(%s, %s): write error\n", buf, key);
+ goto out;
+ }
+
+ out:
+ free(path);
+ free(buf);
+ return rc;
+}
diff -r fc9e2f7920c9 -r f378c424e0ce tools/libxc/ia64/xc_ia64_linux_restore.c
--- a/tools/libxc/ia64/xc_ia64_linux_restore.c Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/libxc/ia64/xc_ia64_linux_restore.c Tue Apr 03 13:04:51 2007 -0600
@@ -14,8 +14,14 @@
#define PFN_TO_KB(_pfn) ((_pfn) << (PAGE_SHIFT - 10))
-/* total number of pages used by the current guest */
-static unsigned long max_pfn;
+/* number of pfns this guest has (i.e. number of entries in the P2M) */
+static unsigned long p2m_size;
+
+/* number of 'in use' pfns in the guest (i.e. #P2M entries with a valid mfn) */
+static unsigned long nr_pfns;
+
+/* largest possible value of nr_pfns (i.e. domain's maximum memory size) */
+static unsigned long max_nr_pfns;
static ssize_t
read_exact(int fd, void *buf, size_t count)
@@ -57,9 +63,9 @@ read_page(int xc_handle, int io_fd, uint
int
xc_linux_restore(int xc_handle, int io_fd, uint32_t dom,
- unsigned long nr_pfns, unsigned int store_evtchn,
- unsigned long *store_mfn, unsigned int console_evtchn,
- unsigned long *console_mfn)
+ unsigned long p2msize, unsigned long maxnrpfns,
+ unsigned int store_evtchn, unsigned long *store_mfn,
+ unsigned int console_evtchn, unsigned long *console_mfn)
{
DECLARE_DOMCTL;
int rc = 1, i;
@@ -79,10 +85,13 @@ xc_linux_restore(int xc_handle, int io_f
/* A temporary mapping of the guest's start_info page. */
start_info_t *start_info;
- max_pfn = nr_pfns;
-
- DPRINTF("xc_linux_restore start: max_pfn = %ld\n", max_pfn);
-
+ p2m_size = p2msize;
+ max_nr_pfns = maxnrpfns;
+
+ /* For info only */
+ nr_pfns = 0;
+
+ DPRINTF("xc_linux_restore start: p2m_size = %lx\n", p2m_size);
if (!read_exact(io_fd, &ver, sizeof(unsigned long))) {
ERROR("Error when reading version");
@@ -99,29 +108,29 @@ xc_linux_restore(int xc_handle, int io_f
return 1;
}
- if (xc_domain_setmaxmem(xc_handle, dom, PFN_TO_KB(max_pfn)) != 0) {
+ if (xc_domain_setmaxmem(xc_handle, dom, PFN_TO_KB(max_nr_pfns)) != 0) {
errno = ENOMEM;
goto out;
}
/* Get pages. */
- page_array = malloc(max_pfn * sizeof(unsigned long));
+ page_array = malloc(p2m_size * sizeof(unsigned long));
if (page_array == NULL) {
ERROR("Could not allocate memory");
goto out;
}
- for ( i = 0; i < max_pfn; i++ )
+ for ( i = 0; i < p2m_size; i++ )
page_array[i] = i;
- if ( xc_domain_memory_populate_physmap(xc_handle, dom, max_pfn,
+ if ( xc_domain_memory_populate_physmap(xc_handle, dom, p2m_size,
0, 0, page_array) )
{
ERROR("Failed to allocate memory for %ld KB to dom %d.\n",
- PFN_TO_KB(max_pfn), dom);
- goto out;
- }
- DPRINTF("Allocated memory by %ld KB\n", PFN_TO_KB(max_pfn));
+ PFN_TO_KB(p2m_size), dom);
+ goto out;
+ }
+ DPRINTF("Allocated memory by %ld KB\n", PFN_TO_KB(p2m_size));
if (!read_exact(io_fd, &domctl.u.arch_setup, sizeof(domctl.u.arch_setup)))
{
ERROR("read: domain setup");
@@ -131,9 +140,9 @@ xc_linux_restore(int xc_handle, int io_f
/* Build firmware (will be overwritten). */
domctl.domain = (domid_t)dom;
domctl.u.arch_setup.flags &= ~XEN_DOMAINSETUP_query;
- domctl.u.arch_setup.bp = ((nr_pfns - 3) << PAGE_SHIFT)
+ domctl.u.arch_setup.bp = ((p2m_size - 3) << PAGE_SHIFT)
+ sizeof (start_info_t);
- domctl.u.arch_setup.maxmem = (nr_pfns - 3) << PAGE_SHIFT;
+ domctl.u.arch_setup.maxmem = (p2m_size - 3) << PAGE_SHIFT;
domctl.cmd = XEN_DOMCTL_arch_setup;
if (xc_domctl(xc_handle, &domctl))
@@ -157,8 +166,6 @@ xc_linux_restore(int xc_handle, int io_f
}
if (gmfn == INVALID_MFN)
break;
-
- //DPRINTF("xc_linux_restore: page %lu/%lu at %lx\n", gmfn, max_pfn,
pfn);
if (read_page(xc_handle, io_fd, dom, gmfn) < 0)
goto out;
@@ -281,7 +288,7 @@ xc_linux_restore(int xc_handle, int io_f
/* Uncanonicalise the suspend-record frame number and poke resume rec. */
start_info = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
PROT_READ | PROT_WRITE, gmfn);
- start_info->nr_pages = max_pfn;
+ start_info->nr_pages = p2m_size;
start_info->shared_info = shared_info_frame << PAGE_SHIFT;
start_info->flags = 0;
*store_mfn = start_info->store_mfn;
diff -r fc9e2f7920c9 -r f378c424e0ce tools/libxc/xc_core.c
--- a/tools/libxc/xc_core.c Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/libxc/xc_core.c Tue Apr 03 13:04:51 2007 -0600
@@ -312,7 +312,7 @@ xc_domain_dumpcore_via_callback(int xc_h
int auto_translated_physmap;
xen_pfn_t *p2m = NULL;
- unsigned long max_pfn = 0;
+ unsigned long p2m_size = 0;
struct xen_dumpcore_p2m *p2m_array = NULL;
uint64_t *pfn_array = NULL;
@@ -396,7 +396,7 @@ xc_domain_dumpcore_via_callback(int xc_h
}
sts = xc_core_arch_map_p2m(xc_handle, &info, live_shinfo,
- &p2m, &max_pfn);
+ &p2m, &p2m_size);
if ( sts != 0 )
goto out;
}
diff -r fc9e2f7920c9 -r f378c424e0ce tools/libxc/xc_core_x86.c
--- a/tools/libxc/xc_core_x86.c Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/libxc/xc_core_x86.c Tue Apr 03 13:04:51 2007 -0600
@@ -38,7 +38,7 @@ xc_core_arch_memory_map_get(int xc_handl
xc_core_memory_map_t **mapp,
unsigned int *nr_entries)
{
- unsigned long max_pfn = max_gpfn(xc_handle, info->domid);
+ unsigned long p2m_size = max_gpfn(xc_handle, info->domid);
xc_core_memory_map_t *map;
map = malloc(sizeof(*map));
@@ -49,7 +49,7 @@ xc_core_arch_memory_map_get(int xc_handl
}
map->addr = 0;
- map->size = max_pfn << PAGE_SHIFT;
+ map->size = p2m_size << PAGE_SHIFT;
*mapp = map;
*nr_entries = 1;
@@ -65,13 +65,13 @@ xc_core_arch_map_p2m(int xc_handle, xc_d
xen_pfn_t *live_p2m_frame_list_list = NULL;
xen_pfn_t *live_p2m_frame_list = NULL;
uint32_t dom = info->domid;
- unsigned long max_pfn = max_gpfn(xc_handle, info->domid);
+ unsigned long p2m_size = max_gpfn(xc_handle, info->domid);
int ret = -1;
int err;
- if ( max_pfn < info->nr_pages )
+ if ( p2m_size < info->nr_pages )
{
- ERROR("max_pfn < nr_pages -1 (%lx < %lx", max_pfn, info->nr_pages - 1);
+ ERROR("p2m_size < nr_pages -1 (%lx < %lx", p2m_size, info->nr_pages -
1);
goto out;
}
@@ -106,7 +106,7 @@ xc_core_arch_map_p2m(int xc_handle, xc_d
goto out;
}
- *pfnp = max_pfn;
+ *pfnp = p2m_size;
ret = 0;
diff -r fc9e2f7920c9 -r f378c424e0ce tools/libxc/xc_hvm_restore.c
--- a/tools/libxc/xc_hvm_restore.c Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/libxc/xc_hvm_restore.c Tue Apr 03 13:04:51 2007 -0600
@@ -95,7 +95,7 @@ int xc_hvm_restore(int xc_handle, int io
unsigned long pfn_array_size = max_pfn + 1;
/* Number of pages of memory the guest has. *Not* the same as max_pfn. */
- unsigned long nr_pages = max_pfn + 1;
+ unsigned long nr_pages = max_pfn;
/* MMIO hole doesn't contain RAM */
if ( nr_pages >= HVM_BELOW_4G_MMIO_START >> PAGE_SHIFT )
nr_pages -= HVM_BELOW_4G_MMIO_LENGTH >> PAGE_SHIFT;
@@ -270,7 +270,6 @@ int xc_hvm_restore(int xc_handle, int io
}/*while 1*/
-/* xc_set_hvm_param(xc_handle, dom, HVM_PARAM_APIC_ENABLED, apic);*/
xc_set_hvm_param(xc_handle, dom, HVM_PARAM_PAE_ENABLED, pae);
xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_EVTCHN, store_evtchn);
@@ -279,13 +278,22 @@ int xc_hvm_restore(int xc_handle, int io
else
shared_page_nr = (v_end >> PAGE_SHIFT) - 1;
+ /* Ensure we clear these pages */
+ if ( xc_clear_domain_page(xc_handle, dom, shared_page_nr) ||
+ xc_clear_domain_page(xc_handle, dom, shared_page_nr-1) ||
+ xc_clear_domain_page(xc_handle, dom, shared_page_nr-2) ) {
+ rc = -1;
+ goto out;
+ }
+
xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN, shared_page_nr-1);
xc_set_hvm_param(xc_handle, dom, HVM_PARAM_BUFIOREQ_PFN, shared_page_nr-2);
xc_set_hvm_param(xc_handle, dom, HVM_PARAM_IOREQ_PFN, shared_page_nr);
/* caculate the store_mfn , wrong val cause hang when introduceDomain */
*store_mfn = (v_end >> PAGE_SHIFT) - 2;
- DPRINTF("hvm restore:calculate new store_mfn=0x%lx,v_end=0x%llx..\n",
*store_mfn, v_end);
+ DPRINTF("hvm restore: calculate new store_mfn=0x%lx, v_end=0x%llx.\n",
+ *store_mfn, v_end);
if (!read_exact(io_fd, &nr_vcpus, sizeof(uint32_t))) {
ERROR("error read nr vcpu !\n");
diff -r fc9e2f7920c9 -r f378c424e0ce tools/libxc/xc_hvm_save.c
--- a/tools/libxc/xc_hvm_save.c Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/libxc/xc_hvm_save.c Tue Apr 03 13:04:51 2007 -0600
@@ -332,10 +332,10 @@ int xc_hvm_save(int xc_handle, int io_fd
unsigned long total_sent = 0;
- DPRINTF("xc_hvm_save:dom=%d, max_iters=%d, max_factor=%d, flags=0x%x,
live=%d, debug=%d.\n",
- dom, max_iters, max_factor, flags,
+ DPRINTF("xc_hvm_save: dom=%d, max_iters=%d, max_factor=%d, flags=0x%x, "
+ "live=%d, debug=%d.\n", dom, max_iters, max_factor, flags,
live, debug);
-
+
/* If no explicit control parameters given, use defaults */
if(!max_iters)
max_iters = DEF_MAX_ITERS;
@@ -382,7 +382,6 @@ int xc_hvm_save(int xc_handle, int io_fd
ERROR("HVM: Could not read magic PFN parameters");
goto out;
}
-
DPRINTF("saved hvm domain info:max_memkb=0x%lx, max_mfn=0x%lx, "
"nr_pages=0x%lx\n", info.max_memkb, max_mfn, info.nr_pages);
diff -r fc9e2f7920c9 -r f378c424e0ce tools/libxc/xc_linux.c
--- a/tools/libxc/xc_linux.c Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/libxc/xc_linux.c Tue Apr 03 13:04:51 2007 -0600
@@ -2,6 +2,9 @@
*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ *
+ * xc_gnttab functions:
+ * Copyright (c) 2007, D G Murray <Derek.Murray@xxxxxxxxxxxx>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
@@ -13,6 +16,7 @@
#include <xen/memory.h>
#include <xen/sys/evtchn.h>
+#include <xen/sys/gntdev.h>
#include <unistd.h>
#include <fcntl.h>
@@ -361,6 +365,158 @@ void discard_file_cache(int fd, int flus
out:
errno = saved_errno;
+}
+
+#define GNTTAB_DEV_NAME "/dev/xen/gntdev"
+
+int xc_gnttab_open(void)
+{
+ struct stat st;
+ int fd;
+ int devnum;
+
+ devnum = xc_find_device_number("gntdev");
+
+ /* Make sure any existing device file links to correct device. */
+ if ( (lstat(GNTTAB_DEV_NAME, &st) != 0) || !S_ISCHR(st.st_mode) ||
+ (st.st_rdev != devnum) )
+ (void)unlink(GNTTAB_DEV_NAME);
+
+reopen:
+ if ( (fd = open(GNTTAB_DEV_NAME, O_RDWR)) == -1 )
+ {
+ if ( (errno == ENOENT) &&
+ ((mkdir("/dev/xen", 0755) == 0) || (errno == EEXIST)) &&
+ (mknod(GNTTAB_DEV_NAME, S_IFCHR|0600, devnum) == 0) )
+ goto reopen;
+
+ PERROR("Could not open grant table interface");
+ return -1;
+ }
+
+ return fd;
+}
+
+int xc_gnttab_close(int xcg_handle)
+{
+ return close(xcg_handle);
+}
+
+void *xc_gnttab_map_grant_ref(int xcg_handle,
+ uint32_t domid,
+ uint32_t ref,
+ int prot)
+{
+ struct ioctl_gntdev_map_grant_ref map;
+ void *addr;
+
+ map.count = 1;
+ map.refs[0].domid = domid;
+ map.refs[0].ref = ref;
+
+ if ( ioctl(xcg_handle, IOCTL_GNTDEV_MAP_GRANT_REF, &map) )
+ return NULL;
+
+ addr = mmap(NULL, PAGE_SIZE, prot, MAP_SHARED, xcg_handle, map.index);
+ if ( addr == MAP_FAILED )
+ {
+ int saved_errno = errno;
+ struct ioctl_gntdev_unmap_grant_ref unmap_grant;
+ /* Unmap the driver slots used to store the grant information. */
+ unmap_grant.index = map.index;
+ unmap_grant.count = 1;
+ ioctl(xcg_handle, IOCTL_GNTDEV_UNMAP_GRANT_REF, &unmap_grant);
+ errno = saved_errno;
+ return NULL;
+ }
+
+ return addr;
+}
+
+void *xc_gnttab_map_grant_refs(int xcg_handle,
+ uint32_t count,
+ uint32_t *domids,
+ uint32_t *refs,
+ int prot)
+{
+ struct ioctl_gntdev_map_grant_ref *map;
+ void *addr = NULL;
+ int i;
+
+ map = malloc(sizeof(*map) +
+ (count-1) * sizeof(struct ioctl_gntdev_map_grant_ref));
+ if ( map == NULL )
+ return NULL;
+
+ for ( i = 0; i < count; i++ )
+ {
+ map->refs[i].domid = domids[i];
+ map->refs[i].ref = refs[i];
+ }
+
+ map->count = count;
+
+ if ( ioctl(xcg_handle, IOCTL_GNTDEV_MAP_GRANT_REF, &map) )
+ goto out;
+
+ addr = mmap(NULL, PAGE_SIZE * count, prot, MAP_SHARED, xcg_handle,
+ map->index);
+ if ( addr == MAP_FAILED )
+ {
+ int saved_errno = errno;
+ struct ioctl_gntdev_unmap_grant_ref unmap_grant;
+ /* Unmap the driver slots used to store the grant information. */
+ unmap_grant.index = map->index;
+ unmap_grant.count = count;
+ ioctl(xcg_handle, IOCTL_GNTDEV_UNMAP_GRANT_REF, &unmap_grant);
+ errno = saved_errno;
+ addr = NULL;
+ }
+
+ out:
+ free(map);
+ return addr;
+}
+
+int xc_gnttab_munmap(int xcg_handle,
+ void *start_address,
+ uint32_t count)
+{
+ struct ioctl_gntdev_get_offset_for_vaddr get_offset;
+ struct ioctl_gntdev_unmap_grant_ref unmap_grant;
+ int rc;
+
+ if ( start_address == NULL )
+ {
+ errno = EINVAL;
+ return -1;
+ }
+
+ /* First, it is necessary to get the offset which was initially used to
+ * mmap() the pages.
+ */
+ get_offset.vaddr = (unsigned long)start_address;
+ if ( (rc = ioctl(xcg_handle, IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR,
+ &get_offset)) )
+ return rc;
+
+ if ( get_offset.count != count )
+ {
+ errno = EINVAL;
+ return -1;
+ }
+
+ /* Next, unmap the memory. */
+ if ( (rc = munmap(start_address, count * getpagesize())) )
+ return rc;
+
+ /* Finally, unmap the driver slots used to store the grant information. */
+ unmap_grant.index = get_offset.offset;
+ unmap_grant.count = count;
+ if ( (rc = ioctl(xcg_handle, IOCTL_GNTDEV_UNMAP_GRANT_REF, &unmap_grant)) )
+ return rc;
+
+ return 0;
}
/*
diff -r fc9e2f7920c9 -r f378c424e0ce tools/libxc/xc_linux_restore.c
--- a/tools/libxc/xc_linux_restore.c Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/libxc/xc_linux_restore.c Tue Apr 03 13:04:51 2007 -0600
@@ -22,8 +22,14 @@ static unsigned long hvirt_start;
/* #levels of page tables used by the current guest */
static unsigned int pt_levels;
-/* total number of pages used by the current guest */
-static unsigned long max_pfn;
+/* number of pfns this guest has (i.e. number of entries in the P2M) */
+static unsigned long p2m_size;
+
+/* number of 'in use' pfns in the guest (i.e. #P2M entries with a valid mfn) */
+static unsigned long nr_pfns;
+
+/* largest possible value of nr_pfns (i.e. domain's maximum memory size) */
+static unsigned long max_nr_pfns;
/* Live mapping of the table mapping each PFN to its current MFN. */
static xen_pfn_t *live_p2m = NULL;
@@ -33,7 +39,6 @@ static xen_pfn_t *p2m = NULL;
/* A table of P2M mappings in the current region */
static xen_pfn_t *p2m_batch = NULL;
-
static ssize_t
read_exact(int fd, void *buf, size_t count)
@@ -85,11 +90,11 @@ static int uncanonicalize_pagetable(int
pfn = (pte >> PAGE_SHIFT) & MFN_MASK_X86;
- if(pfn >= max_pfn) {
+ if(pfn >= p2m_size) {
/* This "page table page" is probably not one; bail. */
ERROR("Frame number in type %lu page table is out of range: "
- "i=%d pfn=0x%lx max_pfn=%lu",
- type >> 28, i, pfn, max_pfn);
+ "i=%d pfn=0x%lx p2m_size=%lu",
+ type >> 28, i, pfn, p2m_size);
return 0;
}
@@ -138,8 +143,9 @@ static int uncanonicalize_pagetable(int
return 1;
}
-int xc_linux_restore(int xc_handle, int io_fd,
- uint32_t dom, unsigned long nr_pfns,
+
+int xc_linux_restore(int xc_handle, int io_fd, uint32_t dom,
+ unsigned long p2msize, unsigned long maxnrpfns,
unsigned int store_evtchn, unsigned long *store_mfn,
unsigned int console_evtchn, unsigned long *console_mfn)
{
@@ -191,9 +197,13 @@ int xc_linux_restore(int xc_handle, int
unsigned int max_vcpu_id = 0;
int new_ctxt_format = 0;
- max_pfn = nr_pfns;
-
- DPRINTF("xc_linux_restore start: max_pfn = %lx\n", max_pfn);
+ p2m_size = p2msize;
+ max_nr_pfns = maxnrpfns;
+
+ /* For info only */
+ nr_pfns = 0;
+
+ DPRINTF("xc_linux_restore start: p2m_size = %lx\n", p2m_size);
/*
* XXX For now, 32bit dom0's can only save/restore 32bit domUs
@@ -294,8 +304,8 @@ int xc_linux_restore(int xc_handle, int
}
/* We want zeroed memory so use calloc rather than malloc. */
- p2m = calloc(max_pfn, sizeof(xen_pfn_t));
- pfn_type = calloc(max_pfn, sizeof(unsigned long));
+ p2m = calloc(p2m_size, sizeof(xen_pfn_t));
+ pfn_type = calloc(p2m_size, sizeof(unsigned long));
region_mfn = calloc(MAX_BATCH_SIZE, sizeof(xen_pfn_t));
p2m_batch = calloc(MAX_BATCH_SIZE, sizeof(xen_pfn_t));
@@ -325,13 +335,13 @@ int xc_linux_restore(int xc_handle, int
}
shared_info_frame = domctl.u.getdomaininfo.shared_info_frame;
- if (xc_domain_setmaxmem(xc_handle, dom, PFN_TO_KB(max_pfn)) != 0) {
+ if (xc_domain_setmaxmem(xc_handle, dom, PFN_TO_KB(max_nr_pfns)) != 0) {
errno = ENOMEM;
goto out;
}
/* Mark all PFNs as invalid; we allocate on demand */
- for ( pfn = 0; pfn < max_pfn; pfn++ )
+ for ( pfn = 0; pfn < p2m_size; pfn++ )
p2m[pfn] = INVALID_P2M_ENTRY;
if(!(mmu = xc_init_mmu_updates(xc_handle, dom))) {
@@ -352,7 +362,7 @@ int xc_linux_restore(int xc_handle, int
int j, nr_mfns = 0;
- this_pc = (n * 100) / max_pfn;
+ this_pc = (n * 100) / p2m_size;
if ( (this_pc - prev_pc) >= 5 )
{
PPRINTF("\b\b\b\b%3d%%", this_pc);
@@ -436,6 +446,7 @@ int xc_linux_restore(int xc_handle, int
if (p2m[pfn] == INVALID_P2M_ENTRY) {
/* We just allocated a new mfn above; update p2m */
p2m[pfn] = p2m_batch[nr_mfns++];
+ nr_pfns++;
}
/* setup region_mfn[] for batch map */
@@ -465,7 +476,7 @@ int xc_linux_restore(int xc_handle, int
/* a bogus/unmapped page: skip it */
continue;
- if ( pfn > max_pfn )
+ if ( pfn > p2m_size )
{
ERROR("pfn out of range");
goto out;
@@ -518,7 +529,7 @@ int xc_linux_restore(int xc_handle, int
else if ( pagetype != XEN_DOMCTL_PFINFO_NOTAB )
{
ERROR("Bogus page type %lx page table is out of range: "
- "i=%d max_pfn=%lu", pagetype, i, max_pfn);
+ "i=%d p2m_size=%lu", pagetype, i, p2m_size);
goto out;
}
@@ -598,7 +609,7 @@ int xc_linux_restore(int xc_handle, int
int j, k;
/* First pass: find all L3TABs current in > 4G mfns and get new mfns */
- for ( i = 0; i < max_pfn; i++ )
+ for ( i = 0; i < p2m_size; i++ )
{
if ( ((pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) ==
XEN_DOMCTL_PFINFO_L3TAB) &&
@@ -646,7 +657,7 @@ int xc_linux_restore(int xc_handle, int
/* Second pass: find all L1TABs and uncanonicalize them */
j = 0;
- for ( i = 0; i < max_pfn; i++ )
+ for ( i = 0; i < p2m_size; i++ )
{
if ( ((pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) ==
XEN_DOMCTL_PFINFO_L1TAB) )
@@ -655,7 +666,7 @@ int xc_linux_restore(int xc_handle, int
j++;
}
- if(i == (max_pfn-1) || j == MAX_BATCH_SIZE) {
+ if(i == (p2m_size-1) || j == MAX_BATCH_SIZE) {
if (!(region_base = xc_map_foreign_batch(
xc_handle, dom, PROT_READ | PROT_WRITE,
@@ -689,7 +700,7 @@ int xc_linux_restore(int xc_handle, int
* will barf when doing the type-checking.
*/
nr_pins = 0;
- for ( i = 0; i < max_pfn; i++ )
+ for ( i = 0; i < p2m_size; i++ )
{
if ( (pfn_type[i] & XEN_DOMCTL_PFINFO_LPINTAB) == 0 )
continue;
@@ -736,7 +747,7 @@ int xc_linux_restore(int xc_handle, int
}
DPRINTF("\b\b\b\b100%%\n");
- DPRINTF("Memory reloaded.\n");
+ DPRINTF("Memory reloaded (%ld pages of max %ld)\n", nr_pfns, max_nr_pfns);
/* Get the list of PFNs that are not in the psuedo-phys map */
{
@@ -808,7 +819,7 @@ int xc_linux_restore(int xc_handle, int
* resume record.
*/
pfn = ctxt.user_regs.edx;
- if ((pfn >= max_pfn) ||
+ if ((pfn >= p2m_size) ||
(pfn_type[pfn] != XEN_DOMCTL_PFINFO_NOTAB)) {
ERROR("Suspend record frame number is bad");
goto out;
@@ -816,7 +827,7 @@ int xc_linux_restore(int xc_handle, int
ctxt.user_regs.edx = mfn = p2m[pfn];
start_info = xc_map_foreign_range(
xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE, mfn);
- start_info->nr_pages = max_pfn;
+ start_info->nr_pages = p2m_size;
start_info->shared_info = shared_info_frame << PAGE_SHIFT;
start_info->flags = 0;
*store_mfn = start_info->store_mfn = p2m[start_info->store_mfn];
@@ -835,7 +846,7 @@ int xc_linux_restore(int xc_handle, int
for (j = 0; (512*j) < ctxt.gdt_ents; j++) {
pfn = ctxt.gdt_frames[j];
- if ((pfn >= max_pfn) ||
+ if ((pfn >= p2m_size) ||
(pfn_type[pfn] != XEN_DOMCTL_PFINFO_NOTAB)) {
ERROR("GDT frame number is bad");
goto out;
@@ -846,16 +857,16 @@ int xc_linux_restore(int xc_handle, int
/* Uncanonicalise the page table base pointer. */
pfn = xen_cr3_to_pfn(ctxt.ctrlreg[3]);
- if (pfn >= max_pfn) {
- ERROR("PT base is bad: pfn=%lu max_pfn=%lu type=%08lx",
- pfn, max_pfn, pfn_type[pfn]);
+ if (pfn >= p2m_size) {
+ ERROR("PT base is bad: pfn=%lu p2m_size=%lu type=%08lx",
+ pfn, p2m_size, pfn_type[pfn]);
goto out;
}
if ( (pfn_type[pfn] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) !=
((unsigned long)pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT) ) {
ERROR("PT base is bad. pfn=%lu nr=%lu type=%08lx %08lx",
- pfn, max_pfn, pfn_type[pfn],
+ pfn, p2m_size, pfn_type[pfn],
(unsigned long)pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT);
goto out;
}
@@ -867,16 +878,16 @@ int xc_linux_restore(int xc_handle, int
{
pfn = xen_cr3_to_pfn(ctxt.ctrlreg[1]);
- if (pfn >= max_pfn) {
- ERROR("User PT base is bad: pfn=%lu max_pfn=%lu type=%08lx",
- pfn, max_pfn, pfn_type[pfn]);
+ if (pfn >= p2m_size) {
+ ERROR("User PT base is bad: pfn=%lu p2m_size=%lu type=%08lx",
+ pfn, p2m_size, pfn_type[pfn]);
goto out;
}
if ( (pfn_type[pfn] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) !=
((unsigned long)pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT) ) {
ERROR("User PT base is bad. pfn=%lu nr=%lu type=%08lx %08lx",
- pfn, max_pfn, pfn_type[pfn],
+ pfn, p2m_size, pfn_type[pfn],
(unsigned long)pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT);
goto out;
}
@@ -915,7 +926,7 @@ int xc_linux_restore(int xc_handle, int
/* Uncanonicalise the pfn-to-mfn table frame-number list. */
for (i = 0; i < P2M_FL_ENTRIES; i++) {
pfn = p2m_frame_list[i];
- if ((pfn >= max_pfn) || (pfn_type[pfn] != XEN_DOMCTL_PFINFO_NOTAB)) {
+ if ((pfn >= p2m_size) || (pfn_type[pfn] != XEN_DOMCTL_PFINFO_NOTAB)) {
ERROR("PFN-to-MFN frame number is bad");
goto out;
}
@@ -930,8 +941,8 @@ int xc_linux_restore(int xc_handle, int
goto out;
}
- memcpy(live_p2m, p2m, P2M_SIZE);
- munmap(live_p2m, P2M_SIZE);
+ memcpy(live_p2m, p2m, ROUNDUP(p2m_size * sizeof(xen_pfn_t), PAGE_SHIFT));
+ munmap(live_p2m, ROUNDUP(p2m_size * sizeof(xen_pfn_t), PAGE_SHIFT));
DPRINTF("Domain ready to be built.\n");
diff -r fc9e2f7920c9 -r f378c424e0ce tools/libxc/xc_linux_save.c
--- a/tools/libxc/xc_linux_save.c Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/libxc/xc_linux_save.c Tue Apr 03 13:04:51 2007 -0600
@@ -25,7 +25,7 @@
**
*/
#define DEF_MAX_ITERS 29 /* limit us to 30 times round loop */
-#define DEF_MAX_FACTOR 3 /* never send more than 3x nr_pfns */
+#define DEF_MAX_FACTOR 3 /* never send more than 3x p2m_size */
/* max mfn of the whole machine */
@@ -37,8 +37,8 @@ static unsigned long hvirt_start;
/* #levels of page tables used by the current guest */
static unsigned int pt_levels;
-/* total number of pages used by the current guest */
-static unsigned long max_pfn;
+/* number of pfns this guest has (i.e. number of entries in the P2M) */
+static unsigned long p2m_size;
/* Live mapping of the table mapping each PFN to its current MFN. */
static xen_pfn_t *live_p2m = NULL;
@@ -57,7 +57,7 @@ static unsigned long m2p_mfn0;
*/
#define MFN_IS_IN_PSEUDOPHYS_MAP(_mfn) \
(((_mfn) < (max_mfn)) && \
- ((mfn_to_pfn(_mfn) < (max_pfn)) && \
+ ((mfn_to_pfn(_mfn) < (p2m_size)) && \
(live_p2m[mfn_to_pfn(_mfn)] == (_mfn))))
@@ -79,7 +79,7 @@ static unsigned long m2p_mfn0;
*/
#define BITS_PER_LONG (sizeof(unsigned long) * 8)
-#define BITMAP_SIZE ((max_pfn + BITS_PER_LONG - 1) / 8)
+#define BITMAP_SIZE ((p2m_size + BITS_PER_LONG - 1) / 8)
#define BITMAP_ENTRY(_nr,_bmap) \
((volatile unsigned long *)(_bmap))[(_nr)/BITS_PER_LONG]
@@ -343,7 +343,7 @@ static int print_stats(int xc_handle, ui
}
-static int analysis_phase(int xc_handle, uint32_t domid, int max_pfn,
+static int analysis_phase(int xc_handle, uint32_t domid, int p2m_size,
unsigned long *arr, int runs)
{
long long start, now;
@@ -356,7 +356,7 @@ static int analysis_phase(int xc_handle,
int i;
xc_shadow_control(xc_handle, domid, XEN_DOMCTL_SHADOW_OP_CLEAN,
- arr, max_pfn, NULL, 0, NULL);
+ arr, p2m_size, NULL, 0, NULL);
DPRINTF("#Flush\n");
for ( i = 0; i < 40; i++ ) {
usleep(50000);
@@ -682,7 +682,7 @@ int xc_linux_save(int xc_handle, int io_
/* base of the region in which domain memory is mapped */
unsigned char *region_base = NULL;
- /* power of 2 order of max_pfn */
+ /* power of 2 order of p2m_size */
int order_nr;
/* bitmap of pages:
@@ -730,7 +730,7 @@ int xc_linux_save(int xc_handle, int io_
goto out;
}
- max_pfn = live_shinfo->arch.max_pfn;
+ p2m_size = live_shinfo->arch.max_pfn;
live_p2m_frame_list_list = map_frame_list_list(xc_handle, dom,
live_shinfo);
@@ -777,7 +777,7 @@ int xc_linux_save(int xc_handle, int io_
memcpy(p2m_frame_list, live_p2m_frame_list, P2M_FL_SIZE);
/* Canonicalise the pfn-to-mfn table frame-number list. */
- for (i = 0; i < max_pfn; i += fpp) {
+ for (i = 0; i < p2m_size; i += fpp) {
if (!translate_mfn_to_pfn(&p2m_frame_list[i/fpp])) {
ERROR("Frame# in pfn-to-mfn frame list is not in pseudophys");
ERROR("entry %d: p2m_frame_list[%ld] is 0x%"PRIx64, i, i/fpp,
@@ -813,12 +813,12 @@ int xc_linux_save(int xc_handle, int io_
}
/* pretend we sent all the pages last iteration */
- sent_last_iter = max_pfn;
-
-
- /* calculate the power of 2 order of max_pfn, e.g.
+ sent_last_iter = p2m_size;
+
+
+ /* calculate the power of 2 order of p2m_size, e.g.
15->4 16->4 17->5 */
- for (i = max_pfn-1, order_nr = 0; i ; i >>= 1, order_nr++)
+ for (i = p2m_size-1, order_nr = 0; i ; i >>= 1, order_nr++)
continue;
/* Setup to_send / to_fix and to_skip bitmaps */
@@ -844,7 +844,7 @@ int xc_linux_save(int xc_handle, int io_
return 1;
}
- analysis_phase(xc_handle, dom, max_pfn, to_skip, 0);
+ analysis_phase(xc_handle, dom, p2m_size, to_skip, 0);
/* We want zeroed memory so use calloc rather than malloc. */
pfn_type = calloc(MAX_BATCH_SIZE, sizeof(*pfn_type));
@@ -867,7 +867,7 @@ int xc_linux_save(int xc_handle, int io_
{
int err=0;
unsigned long mfn;
- for (i = 0; i < max_pfn; i++) {
+ for (i = 0; i < p2m_size; i++) {
mfn = live_p2m[i];
if((mfn != INVALID_P2M_ENTRY) && (mfn_to_pfn(mfn) != i)) {
@@ -882,8 +882,8 @@ int xc_linux_save(int xc_handle, int io_
/* Start writing out the saved-domain record. */
- if (!write_exact(io_fd, &max_pfn, sizeof(unsigned long))) {
- ERROR("write: max_pfn");
+ if (!write_exact(io_fd, &p2m_size, sizeof(unsigned long))) {
+ ERROR("write: p2m_size");
goto out;
}
@@ -929,9 +929,9 @@ int xc_linux_save(int xc_handle, int io_
DPRINTF("Saving memory pages: iter %d 0%%", iter);
- while( N < max_pfn ){
-
- unsigned int this_pc = (N * 100) / max_pfn;
+ while( N < p2m_size ){
+
+ unsigned int this_pc = (N * 100) / p2m_size;
if ((this_pc - prev_pc) >= 5) {
DPRINTF("\b\b\b\b%3d%%", this_pc);
@@ -942,7 +942,7 @@ int xc_linux_save(int xc_handle, int io_
but this is fast enough for the moment. */
if (!last_iter && xc_shadow_control(
xc_handle, dom, XEN_DOMCTL_SHADOW_OP_PEEK,
- to_skip, max_pfn, NULL, 0, NULL) != max_pfn) {
+ to_skip, p2m_size, NULL, 0, NULL) != p2m_size) {
ERROR("Error peeking shadow bitmap");
goto out;
}
@@ -950,9 +950,9 @@ int xc_linux_save(int xc_handle, int io_
/* load pfn_type[] with the mfn of all the pages we're doing in
this batch. */
- for (batch = 0; batch < MAX_BATCH_SIZE && N < max_pfn ; N++) {
-
- int n = permute(N, max_pfn, order_nr);
+ for (batch = 0; batch < MAX_BATCH_SIZE && N < p2m_size ; N++) {
+
+ int n = permute(N, p2m_size, order_nr);
if (debug) {
DPRINTF("%d pfn= %08lx mfn= %08lx %d [mfn]= %08lx\n",
@@ -1123,7 +1123,7 @@ int xc_linux_save(int xc_handle, int io_
print_stats( xc_handle, dom, sent_this_iter, &stats, 1);
DPRINTF("Total pages sent= %ld (%.2fx)\n",
- total_sent, ((float)total_sent)/max_pfn );
+ total_sent, ((float)total_sent)/p2m_size );
DPRINTF("(of which %ld were fixups)\n", needed_to_fix );
}
@@ -1150,7 +1150,7 @@ int xc_linux_save(int xc_handle, int io_
if (((sent_this_iter > sent_last_iter) && RATE_IS_MAX()) ||
(iter >= max_iters) ||
(sent_this_iter+skip_this_iter < 50) ||
- (total_sent > max_pfn*max_factor)) {
+ (total_sent > p2m_size*max_factor)) {
DPRINTF("Start last iteration\n");
last_iter = 1;
@@ -1168,7 +1168,7 @@ int xc_linux_save(int xc_handle, int io_
if (xc_shadow_control(xc_handle, dom,
XEN_DOMCTL_SHADOW_OP_CLEAN, to_send,
- max_pfn, NULL, 0, &stats) != max_pfn) {
+ p2m_size, NULL, 0, &stats) != p2m_size) {
ERROR("Error flushing shadow PT");
goto out;
}
@@ -1220,7 +1220,7 @@ int xc_linux_save(int xc_handle, int io_
unsigned int i,j;
unsigned long pfntab[1024];
- for (i = 0, j = 0; i < max_pfn; i++) {
+ for (i = 0, j = 0; i < p2m_size; i++) {
if (!is_mapped(live_p2m[i]))
j++;
}
@@ -1230,13 +1230,13 @@ int xc_linux_save(int xc_handle, int io_
goto out;
}
- for (i = 0, j = 0; i < max_pfn; ) {
+ for (i = 0, j = 0; i < p2m_size; ) {
if (!is_mapped(live_p2m[i]))
pfntab[j++] = i;
i++;
- if (j == 1024 || i == max_pfn) {
+ if (j == 1024 || i == p2m_size) {
if(!write_exact(io_fd, &pfntab, sizeof(unsigned long)*j)) {
ERROR("Error when writing to state file (6b) (errno %d)",
errno);
@@ -1333,7 +1333,7 @@ int xc_linux_save(int xc_handle, int io_
munmap(live_p2m_frame_list, P2M_FLL_ENTRIES * PAGE_SIZE);
if (live_p2m)
- munmap(live_p2m, P2M_SIZE);
+ munmap(live_p2m, ROUNDUP(p2m_size * sizeof(xen_pfn_t), PAGE_SHIFT));
if (live_m2p)
munmap(live_m2p, M2P_SIZE(max_mfn));
diff -r fc9e2f7920c9 -r f378c424e0ce tools/libxc/xc_resume.c
--- a/tools/libxc/xc_resume.c Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/libxc/xc_resume.c Tue Apr 03 13:04:51 2007 -0600
@@ -46,7 +46,7 @@ static int xc_domain_resume_any(int xc_h
xc_dominfo_t info;
int i, rc = -1;
#if defined(__i386__) || defined(__x86_64__)
- unsigned long mfn, max_pfn = 0;
+ unsigned long mfn, p2m_size = 0;
vcpu_guest_context_t ctxt;
start_info_t *start_info;
shared_info_t *shinfo = NULL;
@@ -74,7 +74,7 @@ static int xc_domain_resume_any(int xc_h
goto out;
}
- max_pfn = shinfo->arch.max_pfn;
+ p2m_size = shinfo->arch.max_pfn;
p2m_frame_list_list =
xc_map_foreign_range(xc_handle, domid, PAGE_SIZE, PROT_READ,
diff -r fc9e2f7920c9 -r f378c424e0ce tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/libxc/xenctrl.h Tue Apr 03 13:04:51 2007 -0600
@@ -4,6 +4,9 @@
* A library for low-level access to the Xen control interfaces.
*
* Copyright (c) 2003-2004, K A Fraser.
+ *
+ * xc_gnttab functions:
+ * Copyright (c) 2007, D G Murray <Derek.Murray@xxxxxxxxxxxx>
*/
#ifndef XENCTRL_H
@@ -740,6 +743,62 @@ evtchn_port_t xc_evtchn_pending(int xce_
*/
int xc_evtchn_unmask(int xce_handle, evtchn_port_t port);
+/**************************
+ * GRANT TABLE OPERATIONS *
+ **************************/
+
+/*
+ * Return a handle to the grant table driver, or -1 on failure, in which case
+ * errno will be set appropriately.
+ */
+int xc_gnttab_open(void);
+
+/*
+ * Close a handle previously allocated with xc_gnttab_open().
+ */
+int xc_gnttab_close(int xcg_handle);
+
+/*
+ * Memory maps a grant reference from one domain to a local address range.
+ * Mappings should be unmapped with xc_gnttab_munmap. Returns NULL on failure.
+ *
+ * @parm xcg_handle a handle on an open grant table interface
+ * @parm domid the domain to map memory from
+ * @parm ref the grant reference ID to map
+ * @parm prot same flag as in mmap()
+ */
+void *xc_gnttab_map_grant_ref(int xcg_handle,
+ uint32_t domid,
+ uint32_t ref,
+ int prot);
+
+/**
+ * Memory maps one or more grant references from one or more domains to a
+ * contiguous local address range. Mappings should be unmapped with
+ * xc_gnttab_munmap. Returns NULL on failure.
+ *
+ * @parm xcg_handle a handle on an open grant table interface
+ * @parm count the number of grant references to be mapped
+ * @parm domids an array of @count domain IDs by which the corresponding @refs
+ * were granted
+ * @parm refs an array of @count grant references to be mapped
+ * @parm prot same flag as in mmap()
+ */
+void *xc_gnttab_map_grant_refs(int xcg_handle,
+ uint32_t count,
+ uint32_t *domids,
+ uint32_t *refs,
+ int prot);
+
+/*
+ * Unmaps the @count pages starting at @start_address, which were mapped by a
+ * call to xc_gnttab_map_grant_ref or xc_gnttab_map_grant_refs. Returns zero
+ * on success, otherwise sets errno and returns non-zero.
+ */
+int xc_gnttab_munmap(int xcg_handle,
+ void *start_address,
+ uint32_t count);
+
int xc_hvm_set_pci_intx_level(
int xc_handle, domid_t dom,
uint8_t domain, uint8_t bus, uint8_t device, uint8_t intx,
diff -r fc9e2f7920c9 -r f378c424e0ce tools/libxc/xenguest.h
--- a/tools/libxc/xenguest.h Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/libxc/xenguest.h Tue Apr 03 13:04:51 2007 -0600
@@ -43,15 +43,16 @@ int xc_hvm_save(int xc_handle, int io_fd
* @parm xc_handle a handle to an open hypervisor interface
* @parm fd the file descriptor to restore a domain from
* @parm dom the id of the domain
- * @parm nr_pfns the number of pages
+ * @parm p2m_size number of pages the guest has (i.e. number entries in P2M)
+ * @parm max_nr_pfns domains maximum real memory allocation, in pages
* @parm store_evtchn the store event channel for this domain to use
* @parm store_mfn returned with the mfn of the store page
* @return 0 on success, -1 on failure
*/
int xc_linux_restore(int xc_handle, int io_fd, uint32_t dom,
- unsigned long nr_pfns, unsigned int store_evtchn,
- unsigned long *store_mfn, unsigned int console_evtchn,
- unsigned long *console_mfn);
+ unsigned long p2m_size, unsigned long max_nr_pfns,
+ unsigned int store_evtchn, unsigned long *store_mfn,
+ unsigned int console_evtchn, unsigned long *console_mfn);
/**
* This function will restore a saved hvm domain running unmodified guest.
diff -r fc9e2f7920c9 -r f378c424e0ce tools/libxc/xg_private.h
--- a/tools/libxc/xg_private.h Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/libxc/xg_private.h Tue Apr 03 13:04:51 2007 -0600
@@ -148,17 +148,16 @@ typedef l4_pgentry_64_t l4_pgentry_t;
#define ROUNDUP(_x,_w) (((unsigned long)(_x)+(1UL<<(_w))-1) & ~((1UL<<(_w))-1))
-/* Size in bytes of the P2M (rounded up to the nearest PAGE_SIZE bytes) */
-#define P2M_SIZE ROUNDUP((max_pfn * sizeof(xen_pfn_t)), PAGE_SHIFT)
-
/* Number of xen_pfn_t in a page */
#define fpp (PAGE_SIZE/sizeof(xen_pfn_t))
+/* XXX SMH: following 3 skanky macros rely on variable p2m_size being set */
+
/* Number of entries in the pfn_to_mfn_frame_list_list */
-#define P2M_FLL_ENTRIES (((max_pfn)+(fpp*fpp)-1)/(fpp*fpp))
+#define P2M_FLL_ENTRIES (((p2m_size)+(fpp*fpp)-1)/(fpp*fpp))
/* Number of entries in the pfn_to_mfn_frame_list */
-#define P2M_FL_ENTRIES (((max_pfn)+fpp-1)/fpp)
+#define P2M_FL_ENTRIES (((p2m_size)+fpp-1)/fpp)
/* Size in bytes of the pfn_to_mfn_frame_list */
#define P2M_FL_SIZE ((P2M_FL_ENTRIES)*sizeof(unsigned long))
diff -r fc9e2f7920c9 -r f378c424e0ce tools/python/xen/lowlevel/scf/scf.c
--- a/tools/python/xen/lowlevel/scf/scf.c Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/python/xen/lowlevel/scf/scf.c Tue Apr 03 13:04:51 2007 -0600
@@ -26,7 +26,7 @@
#include <libscf.h>
#include <stdio.h>
-#define XEND_FMRI "svc:/system/xen/xend:default"
+#define XEND_FMRI "svc:/system/xctl/xend:default"
#define XEND_PG "config"
static PyObject *scf_exc;
diff -r fc9e2f7920c9 -r f378c424e0ce tools/python/xen/xend/XendCheckpoint.py
--- a/tools/python/xen/xend/XendCheckpoint.py Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/python/xen/xend/XendCheckpoint.py Tue Apr 03 13:04:51 2007 -0600
@@ -187,6 +187,7 @@ def restore(xd, fd, dominfo = None, paus
assert console_port
nr_pfns = (dominfo.getMemoryTarget() + 3) / 4
+ max_nr_pfns = (dominfo.getMemoryMaximum() + 3) / 4
# if hvm, pass mem size to calculate the store_mfn
image_cfg = dominfo.info.get('image', {})
@@ -203,17 +204,17 @@ def restore(xd, fd, dominfo = None, paus
try:
l = read_exact(fd, sizeof_unsigned_long,
"not a valid guest state file: pfn count read")
- max_pfn = unpack("L", l)[0] # native sizeof long
-
- if max_pfn > 16*1024*1024: # XXX
+ p2m_size = unpack("L", l)[0] # native sizeof long
+
+ if p2m_size > 16*1024*1024: # XXX
raise XendError(
"not a valid guest state file: pfn count out of range")
shadow = dominfo.info['shadow_memory']
log.debug("restore:shadow=0x%x, _static_max=0x%x, _static_min=0x%x, "
- "nr_pfns=0x%x.", dominfo.info['shadow_memory'],
+ "p2m_size=0x%x.", dominfo.info['shadow_memory'],
dominfo.info['memory_static_max'],
- dominfo.info['memory_static_min'], nr_pfns)
+ dominfo.info['memory_static_min'], p2m_size)
balloon.free(xc.pages_to_kib(nr_pfns) + shadow * 1024)
@@ -221,7 +222,7 @@ def restore(xd, fd, dominfo = None, paus
dominfo.info['shadow_memory'] = shadow_cur
cmd = map(str, [xen.util.auxbin.pathTo(XC_RESTORE),
- fd, dominfo.getDomid(), max_pfn,
+ fd, dominfo.getDomid(), p2m_size, max_nr_pfns,
store_port, console_port, int(is_hvm), pae, apic])
log.debug("[xc_restore]: %s", string.join(cmd))
diff -r fc9e2f7920c9 -r f378c424e0ce tools/python/xen/xend/XendConfig.py
--- a/tools/python/xen/xend/XendConfig.py Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/python/xen/xend/XendConfig.py Tue Apr 03 13:04:51 2007 -0600
@@ -118,7 +118,7 @@ LEGACY_CFG_TO_XENAPI_CFG = reverse_dict(
# Platform configuration keys.
XENAPI_PLATFORM_CFG = [ 'acpi', 'apic', 'boot', 'device_model', 'display',
'fda', 'fdb', 'keymap', 'isa', 'localtime',
- 'nographic', 'pae', 'serial', 'sdl',
+ 'nographic', 'pae', 'rtc_timeoffset', 'serial', 'sdl',
'soundhw','stdvga', 'usb', 'usbdevice', 'vnc',
'vncconsole', 'vncdisplay', 'vnclisten',
'vncpasswd', 'vncunused', 'xauthority']
@@ -203,6 +203,7 @@ LEGACY_CFG_TYPES = {
'on_xend_stop': str,
'on_xend_start': str,
'online_vcpus': int,
+ 'rtc/timeoffset': str,
}
# Values that should be stored in xenstore's /vm/<uuid> that is used
diff -r fc9e2f7920c9 -r f378c424e0ce tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/python/xen/xend/XendDomainInfo.py Tue Apr 03 13:04:51 2007 -0600
@@ -859,7 +859,8 @@ class XendDomainInfo:
# Check whether values in the configuration have
# changed in Xenstore.
- cfg_vm = ['name', 'on_poweroff', 'on_reboot', 'on_crash']
+ cfg_vm = ['name', 'on_poweroff', 'on_reboot', 'on_crash',
+ 'rtc/timeoffset']
vm_details = self._readVMDetails([(k,XendConfig.LEGACY_CFG_TYPES[k])
for k in cfg_vm])
@@ -888,6 +889,11 @@ class XendDomainInfo:
self.info.update_with_image_sxp(sxp.from_string(image_sxp))
changed = True
+ # Check if the rtc offset has changes
+ if vm_details.get("rtc/timeoffset", 0) !=
self.info["platform"].get("rtc_timeoffset", 0):
+ self.info["platform"]["rtc_timeoffset"] =
vm_details.get("rtc/timeoffset", 0)
+ changed = True
+
if changed:
# Update the domain section of the store, as this contains some
# parameters derived from the VM configuration.
diff -r fc9e2f7920c9 -r f378c424e0ce tools/python/xen/xend/balloon.py
--- a/tools/python/xen/xend/balloon.py Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/python/xen/xend/balloon.py Tue Apr 03 13:04:51 2007 -0600
@@ -25,9 +25,7 @@ import XendOptions
import XendOptions
from XendLogging import log
from XendError import VmError
-
-
-PROC_XEN_BALLOON = '/proc/xen/balloon'
+import osdep
RETRY_LIMIT = 20
RETRY_LIMIT_INCR = 5
@@ -51,19 +49,7 @@ def _get_proc_balloon(label):
"""Returns the value for the named label. Returns None if the label was
not found or the value was non-numeric."""
- f = file(PROC_XEN_BALLOON, 'r')
- try:
- for line in f:
- keyvalue = line.split(':')
- if keyvalue[0] == label:
- values = keyvalue[1].split()
- if values[0].isdigit():
- return int(values[0])
- else:
- return None
- return None
- finally:
- f.close()
+ return osdep.lookup_balloon_stat(label)
def get_dom0_current_alloc():
"""Returns the current memory allocation (in KiB) of dom0."""
diff -r fc9e2f7920c9 -r f378c424e0ce tools/python/xen/xend/image.py
--- a/tools/python/xen/xend/image.py Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/python/xen/xend/image.py Tue Apr 03 13:04:51 2007 -0600
@@ -256,9 +256,12 @@ class HVMImageHandler(ImageHandler):
self.xauthority = vmConfig['platform'].get('xauthority')
self.vncconsole = vmConfig['platform'].get('vncconsole')
+ rtc_timeoffset = vmConfig['platform'].get('rtc_timeoffset')
+
self.vm.storeVm(("image/dmargs", " ".join(self.dmargs)),
("image/device-model", self.device_model),
("image/display", self.display))
+ self.vm.storeVm(("rtc/timeoffset", rtc_timeoffset))
self.pid = None
diff -r fc9e2f7920c9 -r f378c424e0ce tools/python/xen/xend/osdep.py
--- a/tools/python/xen/xend/osdep.py Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/python/xen/xend/osdep.py Tue Apr 03 13:04:51 2007 -0600
@@ -41,6 +41,55 @@ _vif_script = {
"SunOS": "vif-vnic"
}
+def _linux_balloon_stat(label):
+ """Returns the value for the named label, or None if an error occurs."""
+
+ PROC_XEN_BALLOON = '/proc/xen/balloon'
+ f = file(PROC_XEN_BALLOON, 'r')
+ try:
+ for line in f:
+ keyvalue = line.split(':')
+ if keyvalue[0] == label:
+ values = keyvalue[1].split()
+ if values[0].isdigit():
+ return int(values[0])
+ else:
+ return None
+ return None
+ finally:
+ f.close()
+
+def _solaris_balloon_stat(label):
+ """Returns the value for the named label, or None if an error occurs."""
+
+ import fcntl
+ import array
+ DEV_XEN_BALLOON = '/dev/xen/balloon'
+ BLN_IOCTL_CURRENT = 0x4201
+ BLN_IOCTL_TARGET = 0x4202
+ BLN_IOCTL_LOW = 0x4203
+ BLN_IOCTL_HIGH = 0x4204
+ BLN_IOCTL_LIMIT = 0x4205
+ label_to_ioctl = { 'Current allocation' : BLN_IOCTL_CURRENT,
+ 'Requested target' : BLN_IOCTL_TARGET,
+ 'Low-mem balloon' : BLN_IOCTL_LOW,
+ 'High-mem balloon' : BLN_IOCTL_HIGH,
+ 'Xen hard limit' : BLN_IOCTL_LIMIT }
+
+ f = file(DEV_XEN_BALLOON, 'r')
+ try:
+ values = array.array('L', [0])
+ if fcntl.ioctl(f.fileno(), label_to_ioctl[label], values, 1) == 0:
+ return values[0]
+ else:
+ return None
+ finally:
+ f.close()
+
+_balloon_stat = {
+ "SunOS": _solaris_balloon_stat
+}
+
def _get(var, default=None):
return var.get(os.uname()[0], default)
@@ -49,3 +98,4 @@ pygrub_path = _get(_pygrub_path, "/usr/b
pygrub_path = _get(_pygrub_path, "/usr/bin/pygrub")
netback_type = _get(_netback_type, "netfront")
vif_script = _get(_vif_script, "vif-bridge")
+lookup_balloon_stat = _get(_balloon_stat, _linux_balloon_stat)
diff -r fc9e2f7920c9 -r f378c424e0ce tools/python/xen/xend/server/SrvServer.py
--- a/tools/python/xen/xend/server/SrvServer.py Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/python/xen/xend/server/SrvServer.py Tue Apr 03 13:04:51 2007 -0600
@@ -212,8 +212,8 @@ def _loadConfig(servers, root, reload):
if server_cfg[1] in [XendAPI.AUTH_PAM, XendAPI.AUTH_NONE]:
auth_method = server_cfg[1]
- if len(server_cfg) > 2:
- hosts_allowed = server_cfg[2] or None
+ if len(server_cfg) > 2 and len(server_cfg[2]):
+ hosts_allowed = map(re.compile, server_cfg[2].split(' '))
if len(server_cfg) > 4:
# SSL key and cert file
diff -r fc9e2f7920c9 -r f378c424e0ce tools/python/xen/xend/server/relocate.py
--- a/tools/python/xen/xend/server/relocate.py Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/python/xen/xend/server/relocate.py Tue Apr 03 13:04:51 2007 -0600
@@ -106,8 +106,12 @@ class RelocationProtocol(protocol.Protoc
def op_receive(self, name, _):
if self.transport:
self.send_reply(["ready", name])
- XendDomain.instance().domain_restore_fd(
- self.transport.sock.fileno())
+ try:
+ XendDomain.instance().domain_restore_fd(
+ self.transport.sock.fileno())
+ except:
+ self.send_error()
+ self.close()
else:
log.error(name + ": no transport")
raise XendError(name + ": no transport")
diff -r fc9e2f7920c9 -r f378c424e0ce tools/python/xen/xm/create.py
--- a/tools/python/xen/xm/create.py Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/python/xen/xm/create.py Tue Apr 03 13:04:51 2007 -0600
@@ -185,6 +185,10 @@ gopts.var('cpus', val='CPUS',
gopts.var('cpus', val='CPUS',
fn=set_value, default=None,
use="CPUS to run the domain on.")
+
+gopts.var('rtc_timeoffset', val='RTC_TIMEOFFSET',
+ fn=set_value, default="0",
+ use="Set RTC offset.")
gopts.var('pae', val='PAE',
fn=set_int, default=1,
@@ -717,7 +721,7 @@ def configure_hvm(config_image, vals):
args = [ 'device_model', 'pae', 'vcpus', 'boot', 'fda', 'fdb',
'localtime', 'serial', 'stdvga', 'isa', 'nographic', 'soundhw',
'vnc', 'vncdisplay', 'vncunused', 'vncconsole', 'vnclisten',
- 'sdl', 'display', 'xauthority',
+ 'sdl', 'display', 'xauthority', 'rtc_timeoffset',
'acpi', 'apic', 'usb', 'usbdevice', 'keymap' ]
for a in args:
if a in vals.__dict__ and vals.__dict__[a] is not None:
diff -r fc9e2f7920c9 -r f378c424e0ce tools/python/xen/xm/main.py
--- a/tools/python/xen/xm/main.py Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/python/xen/xm/main.py Tue Apr 03 13:04:51 2007 -0600
@@ -929,10 +929,10 @@ def xm_label_list(doms):
if security.active_policy not in ['INACTIVE', 'NULL', 'DEFAULT']:
if not d['seclabel']:
d['seclabel'] = 'ERROR'
- elif security.active_policy in ['DEFAULT']:
- d['seclabel'] = 'DEFAULT'
- else:
- d['seclabel'] = 'INACTIVE'
+ elif security.active_policy in ['DEFAULT']:
+ d['seclabel'] = 'DEFAULT'
+ else:
+ d['seclabel'] = 'INACTIVE'
output.append((format % d, d['seclabel']))
diff -r fc9e2f7920c9 -r f378c424e0ce tools/python/xen/xm/xenapi_create.py
--- a/tools/python/xen/xm/xenapi_create.py Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/python/xen/xm/xenapi_create.py Tue Apr 03 13:04:51 2007 -0600
@@ -20,7 +20,6 @@
from xen.xm.main import server, get_default_SR
from xml.dom.minidom import parse, getDOMImplementation
-from xml.dom.ext import PrettyPrint
from xml.parsers.xmlproc import xmlproc, xmlval, xmldtd
from xen.xend import sxp
from xen.xend.XendAPIConstants import XEN_API_ON_NORMAL_EXIT, \
diff -r fc9e2f7920c9 -r f378c424e0ce tools/xcutils/xc_restore.c
--- a/tools/xcutils/xc_restore.c Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/xcutils/xc_restore.c Tue Apr 03 13:04:51 2007 -0600
@@ -18,15 +18,14 @@ int
int
main(int argc, char **argv)
{
- unsigned int xc_fd, io_fd, domid, max_pfn, store_evtchn, console_evtchn;
+ unsigned int xc_fd, io_fd, domid, store_evtchn, console_evtchn;
unsigned int hvm, pae, apic;
int ret;
- unsigned long store_mfn, console_mfn;
+ unsigned long p2m_size, max_nr_pfns, store_mfn, console_mfn;
- if (argc != 9)
- errx(1,
- "usage: %s iofd domid max_pfn store_evtchn console_evtchn hvm pae
apic",
- argv[0]);
+ if (argc != 10)
+ errx(1, "usage: %s iofd domid p2m_size max_nr_pfns store_evtchn "
+ "console_evtchn hvm pae apic", argv[0]);
xc_fd = xc_interface_open();
if (xc_fd < 0)
@@ -34,19 +33,21 @@ main(int argc, char **argv)
io_fd = atoi(argv[1]);
domid = atoi(argv[2]);
- max_pfn = atoi(argv[3]);
- store_evtchn = atoi(argv[4]);
- console_evtchn = atoi(argv[5]);
- hvm = atoi(argv[6]);
- pae = atoi(argv[7]);
- apic = atoi(argv[8]);
+ p2m_size = atoi(argv[3]);
+ max_nr_pfns = atoi(argv[4]);
+ store_evtchn = atoi(argv[5]);
+ console_evtchn = atoi(argv[6]);
+ hvm = atoi(argv[7]);
+ pae = atoi(argv[8]);
+ apic = atoi(argv[9]);
if (hvm) {
- ret = xc_hvm_restore(xc_fd, io_fd, domid, max_pfn, store_evtchn,
+ ret = xc_hvm_restore(xc_fd, io_fd, domid, max_nr_pfns, store_evtchn,
&store_mfn, pae, apic);
- } else
- ret = xc_linux_restore(xc_fd, io_fd, domid, max_pfn, store_evtchn,
- &store_mfn, console_evtchn, &console_mfn);
+ } else
+ ret = xc_linux_restore(xc_fd, io_fd, domid, p2m_size,
+ max_nr_pfns, store_evtchn, &store_mfn,
+ console_evtchn, &console_mfn);
if (ret == 0) {
printf("store-mfn %li\n", store_mfn);
diff -r fc9e2f7920c9 -r f378c424e0ce tools/xenstat/xentop/xentop.c
--- a/tools/xenstat/xentop/xentop.c Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/xenstat/xentop/xentop.c Tue Apr 03 13:04:51 2007 -0600
@@ -984,6 +984,8 @@ static void top(void)
if(!batch)
do_bottom_line();
+
+ free(domains);
}
int main(int argc, char **argv)
diff -r fc9e2f7920c9 -r f378c424e0ce xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/arch/x86/hvm/hvm.c Tue Apr 03 13:04:51 2007 -0600
@@ -59,9 +59,6 @@ struct hvm_function_table hvm_funcs __re
/* I/O permission bitmap is globally shared by all HVM guests. */
char __attribute__ ((__section__ (".bss.page_aligned")))
hvm_io_bitmap[3*PAGE_SIZE];
-/* MSR permission bitmap is globally shared by all HVM guests. */
-char __attribute__ ((__section__ (".bss.page_aligned")))
- hvm_msr_bitmap[PAGE_SIZE];
void hvm_enable(struct hvm_function_table *fns)
{
@@ -74,9 +71,6 @@ void hvm_enable(struct hvm_function_tabl
*/
memset(hvm_io_bitmap, ~0, sizeof(hvm_io_bitmap));
clear_bit(0x80, hvm_io_bitmap);
-
- /* All MSR accesses are intercepted by default. */
- memset(hvm_msr_bitmap, ~0, sizeof(hvm_msr_bitmap));
hvm_funcs = *fns;
hvm_enabled = 1;
@@ -378,6 +372,9 @@ void hvm_send_assist_req(struct vcpu *v)
void hvm_send_assist_req(struct vcpu *v)
{
ioreq_t *p;
+
+ if ( unlikely(!vcpu_start_shutdown_deferral(v)) )
+ return; /* implicitly bins the i/o operation */
p = &get_vio(v->domain, v->vcpu_id)->vp_ioreq;
if ( unlikely(p->state != STATE_IOREQ_NONE) )
diff -r fc9e2f7920c9 -r f378c424e0ce xen/arch/x86/hvm/intercept.c
--- a/xen/arch/x86/hvm/intercept.c Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/arch/x86/hvm/intercept.c Tue Apr 03 13:04:51 2007 -0600
@@ -155,28 +155,13 @@ static inline void hvm_mmio_access(struc
}
}
-int hvm_buffered_io_intercept(ioreq_t *p)
+int hvm_buffered_io_send(ioreq_t *p)
{
struct vcpu *v = current;
spinlock_t *buffered_io_lock;
buffered_iopage_t *buffered_iopage =
(buffered_iopage_t *)(v->domain->arch.hvm_domain.buffered_io_va);
unsigned long tmp_write_pointer = 0;
- int i;
-
- /* ignore READ ioreq_t! */
- if ( p->dir == IOREQ_READ )
- return 0;
-
- for ( i = 0; i < HVM_BUFFERED_IO_RANGE_NR; i++ ) {
- if ( p->addr >= hvm_buffered_io_ranges[i]->start_addr &&
- p->addr + p->size - 1 < hvm_buffered_io_ranges[i]->start_addr +
- hvm_buffered_io_ranges[i]->length )
- break;
- }
-
- if ( i == HVM_BUFFERED_IO_RANGE_NR )
- return 0;
buffered_io_lock = &v->domain->arch.hvm_domain.buffered_io_lock;
spin_lock(buffered_io_lock);
@@ -205,6 +190,27 @@ int hvm_buffered_io_intercept(ioreq_t *p
return 1;
}
+int hvm_buffered_io_intercept(ioreq_t *p)
+{
+ int i;
+
+ /* ignore READ ioreq_t! */
+ if ( p->dir == IOREQ_READ )
+ return 0;
+
+ for ( i = 0; i < HVM_BUFFERED_IO_RANGE_NR; i++ ) {
+ if ( p->addr >= hvm_buffered_io_ranges[i]->start_addr &&
+ p->addr + p->size - 1 < hvm_buffered_io_ranges[i]->start_addr +
+ hvm_buffered_io_ranges[i]->length )
+ break;
+ }
+
+ if ( i == HVM_BUFFERED_IO_RANGE_NR )
+ return 0;
+
+ return hvm_buffered_io_send(p);
+}
+
int hvm_mmio_intercept(ioreq_t *p)
{
struct vcpu *v = current;
diff -r fc9e2f7920c9 -r f378c424e0ce xen/arch/x86/hvm/io.c
--- a/xen/arch/x86/hvm/io.c Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/arch/x86/hvm/io.c Tue Apr 03 13:04:51 2007 -0600
@@ -771,10 +771,11 @@ void hvm_io_assist(struct vcpu *v)
struct cpu_user_regs *regs;
struct hvm_io_op *io_opp;
unsigned long gmfn;
+ struct domain *d = v->domain;
io_opp = &v->arch.hvm_vcpu.io_op;
regs = &io_opp->io_context;
- vio = get_vio(v->domain, v->vcpu_id);
+ vio = get_vio(d, v->vcpu_id);
p = &vio->vp_ioreq;
if ( p->state != STATE_IORESP_READY )
@@ -797,11 +798,13 @@ void hvm_io_assist(struct vcpu *v)
memcpy(guest_cpu_user_regs(), regs, HVM_CONTEXT_STACK_BYTES);
/* Has memory been dirtied? */
- if ( p->dir == IOREQ_READ && p->data_is_ptr )
+ if ( (p->dir == IOREQ_READ) && p->data_is_ptr )
{
gmfn = get_mfn_from_gpfn(paging_gva_to_gfn(v, p->data));
- mark_dirty(v->domain, gmfn);
- }
+ mark_dirty(d, gmfn);
+ }
+
+ vcpu_end_shutdown_deferral(v);
}
/*
diff -r fc9e2f7920c9 -r f378c424e0ce xen/arch/x86/hvm/platform.c
--- a/xen/arch/x86/hvm/platform.c Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/arch/x86/hvm/platform.c Tue Apr 03 13:04:51 2007 -0600
@@ -921,6 +921,26 @@ static void send_mmio_req(unsigned char
hvm_send_assist_req(v);
}
+void send_timeoffset_req(unsigned long timeoff)
+{
+ ioreq_t p[1];
+
+ if ( timeoff == 0 )
+ return;
+
+ memset(p, 0, sizeof(*p));
+
+ p->type = IOREQ_TYPE_TIMEOFFSET;
+ p->size = 4;
+ p->dir = IOREQ_WRITE;
+ p->data = timeoff;
+
+ p->state = STATE_IOREQ_READY;
+
+ if ( !hvm_buffered_io_send(p) )
+ printk("Unsuccessful timeoffset update\n");
+}
+
static void mmio_operands(int type, unsigned long gpa,
struct hvm_io_op *mmio_op,
unsigned char op_size)
diff -r fc9e2f7920c9 -r f378c424e0ce xen/arch/x86/hvm/rtc.c
--- a/xen/arch/x86/hvm/rtc.c Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/arch/x86/hvm/rtc.c Tue Apr 03 13:04:51 2007 -0600
@@ -157,6 +157,10 @@ static void rtc_set_time(RTCState *s)
static void rtc_set_time(RTCState *s)
{
struct tm *tm = &s->current_tm;
+ unsigned long before, after; /* XXX s_time_t */
+
+ before = mktime(tm->tm_year, tm->tm_mon, tm->tm_mday,
+ tm->tm_hour, tm->tm_min, tm->tm_sec);
tm->tm_sec = from_bcd(s, s->hw.cmos_data[RTC_SECONDS]);
tm->tm_min = from_bcd(s, s->hw.cmos_data[RTC_MINUTES]);
@@ -168,6 +172,10 @@ static void rtc_set_time(RTCState *s)
tm->tm_mday = from_bcd(s, s->hw.cmos_data[RTC_DAY_OF_MONTH]);
tm->tm_mon = from_bcd(s, s->hw.cmos_data[RTC_MONTH]) - 1;
tm->tm_year = from_bcd(s, s->hw.cmos_data[RTC_YEAR]) + 100;
+
+ after = mktime(tm->tm_year, tm->tm_mon, tm->tm_mday,
+ tm->tm_hour, tm->tm_min, tm->tm_sec);
+ send_timeoffset_req(after - before);
}
static void rtc_copy_date(RTCState *s)
diff -r fc9e2f7920c9 -r f378c424e0ce xen/arch/x86/hvm/svm/vmcb.c
--- a/xen/arch/x86/hvm/svm/vmcb.c Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/arch/x86/hvm/svm/vmcb.c Tue Apr 03 13:04:51 2007 -0600
@@ -79,6 +79,30 @@ struct host_save_area *alloc_host_save_a
return hsa;
}
+static void disable_intercept_for_msr(char *msr_bitmap, u32 msr)
+{
+ /*
+ * See AMD64 Programmers Manual, Vol 2, Section 15.10 (MSR-Bitmap Address).
+ */
+ if ( msr <= 0x1fff )
+ {
+ __clear_bit(msr*2, msr_bitmap + 0x000);
+ __clear_bit(msr*2+1, msr_bitmap + 0x000);
+ }
+ else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) )
+ {
+ msr &= 0x1fff;
+ __clear_bit(msr*2, msr_bitmap + 0x800);
+ __clear_bit(msr*2+1, msr_bitmap + 0x800);
+ }
+ else if ( (msr >= 0xc001000) && (msr <= 0xc0011fff) )
+ {
+ msr &= 0x1fff;
+ __clear_bit(msr*2, msr_bitmap + 0x1000);
+ __clear_bit(msr*2+1, msr_bitmap + 0x1000);
+ }
+}
+
static int construct_vmcb(struct vcpu *v)
{
struct arch_svm_struct *arch_svm = &v->arch.hvm_svm;
@@ -114,6 +138,10 @@ static int construct_vmcb(struct vcpu *v
if ( arch_svm->msrpm == NULL )
return -ENOMEM;
memset(arch_svm->msrpm, 0xff, MSRPM_SIZE);
+
+ disable_intercept_for_msr((char *)arch_svm->msrpm, MSR_FS_BASE);
+ disable_intercept_for_msr((char *)arch_svm->msrpm, MSR_GS_BASE);
+
vmcb->msrpm_base_pa = (u64)virt_to_maddr(arch_svm->msrpm);
vmcb->iopm_base_pa = (u64)virt_to_maddr(hvm_io_bitmap);
diff -r fc9e2f7920c9 -r f378c424e0ce xen/arch/x86/hvm/vmx/vmcs.c
--- a/xen/arch/x86/hvm/vmx/vmcs.c Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/arch/x86/hvm/vmx/vmcs.c Tue Apr 03 13:04:51 2007 -0600
@@ -289,7 +289,7 @@ static void construct_vmcs(struct vcpu *
v->arch.hvm_vcpu.u.vmx.exec_control = vmx_cpu_based_exec_control;
if ( cpu_has_vmx_msr_bitmap )
- __vmwrite(MSR_BITMAP, virt_to_maddr(hvm_msr_bitmap));
+ __vmwrite(MSR_BITMAP, virt_to_maddr(vmx_msr_bitmap));
/* I/O access bitmap. */
__vmwrite(IO_BITMAP_A, virt_to_maddr(hvm_io_bitmap));
diff -r fc9e2f7920c9 -r f378c424e0ce xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/arch/x86/hvm/vmx/vmx.c Tue Apr 03 13:04:51 2007 -0600
@@ -51,6 +51,8 @@
#include <public/hvm/save.h>
#include <asm/hvm/trace.h>
+char *vmx_msr_bitmap;
+
static void vmx_ctxt_switch_from(struct vcpu *v);
static void vmx_ctxt_switch_to(struct vcpu *v);
@@ -1005,14 +1007,14 @@ static void disable_intercept_for_msr(u3
*/
if ( msr <= 0x1fff )
{
- __clear_bit(msr, hvm_msr_bitmap + 0x000); /* read-low */
- __clear_bit(msr, hvm_msr_bitmap + 0x800); /* write-low */
+ __clear_bit(msr, vmx_msr_bitmap + 0x000); /* read-low */
+ __clear_bit(msr, vmx_msr_bitmap + 0x800); /* write-low */
}
else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) )
{
msr &= 0x1fff;
- __clear_bit(msr, hvm_msr_bitmap + 0x400); /* read-high */
- __clear_bit(msr, hvm_msr_bitmap + 0xc00); /* write-high */
+ __clear_bit(msr, vmx_msr_bitmap + 0x400); /* read-high */
+ __clear_bit(msr, vmx_msr_bitmap + 0xc00); /* write-high */
}
}
@@ -1105,6 +1107,9 @@ int start_vmx(void)
if ( cpu_has_vmx_msr_bitmap )
{
printk("VMX: MSR intercept bitmap enabled\n");
+ vmx_msr_bitmap = alloc_xenheap_page();
+ BUG_ON(vmx_msr_bitmap == NULL);
+ memset(vmx_msr_bitmap, ~0, PAGE_SIZE);
disable_intercept_for_msr(MSR_FS_BASE);
disable_intercept_for_msr(MSR_GS_BASE);
}
diff -r fc9e2f7920c9 -r f378c424e0ce xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/arch/x86/mm.c Tue Apr 03 13:04:51 2007 -0600
@@ -806,7 +806,8 @@ void put_page_from_l1e(l1_pgentry_t l1e,
* (Note that the undestroyable active grants are not a security hole in
* Xen. All active grants can safely be cleaned up when the domain dies.)
*/
- if ( (l1e_get_flags(l1e) & _PAGE_GNTTAB) && !d->is_shutdown &&
!d->is_dying )
+ if ( (l1e_get_flags(l1e) & _PAGE_GNTTAB) &&
+ !d->is_shutting_down && !d->is_dying )
{
MEM_LOG("Attempt to implicitly unmap a granted PTE %" PRIpte,
l1e_get_intpte(l1e));
diff -r fc9e2f7920c9 -r f378c424e0ce xen/arch/x86/mm/hap/hap.c
--- a/xen/arch/x86/mm/hap/hap.c Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/arch/x86/mm/hap/hap.c Tue Apr 03 13:04:51 2007 -0600
@@ -52,7 +52,7 @@
/************************************************/
/* HAP SUPPORT FUNCTIONS */
/************************************************/
-mfn_t hap_alloc(struct domain *d, unsigned long backpointer)
+mfn_t hap_alloc(struct domain *d)
{
struct page_info *sp = NULL;
void *p;
@@ -82,43 +82,43 @@ void hap_free(struct domain *d, mfn_t sm
list_add_tail(&sp->list, &d->arch.paging.hap.freelists);
}
-static int hap_alloc_p2m_pages(struct domain *d)
-{
- struct page_info *pg;
-
- ASSERT(hap_locked_by_me(d));
-
- pg = mfn_to_page(hap_alloc(d, 0));
- d->arch.paging.hap.p2m_pages += 1;
- d->arch.paging.hap.total_pages -= 1;
-
- page_set_owner(pg, d);
- pg->count_info = 1;
- list_add_tail(&pg->list, &d->arch.paging.hap.p2m_freelist);
-
- return 1;
-}
-
struct page_info * hap_alloc_p2m_page(struct domain *d)
{
- struct list_head *entry;
struct page_info *pg;
mfn_t mfn;
void *p;
hap_lock(d);
-
- if ( list_empty(&d->arch.paging.hap.p2m_freelist) &&
- !hap_alloc_p2m_pages(d) ) {
- hap_unlock(d);
- return NULL;
- }
- entry = d->arch.paging.hap.p2m_freelist.next;
- list_del(entry);
-
+
+#if CONFIG_PAGING_LEVELS == 3
+ /* Under PAE mode, top-level P2M table should be allocated below 4GB space
+ * because the size of h_cr3 is only 32-bit. We use alloc_domheap_pages to
+ * force this requirement. This page will be de-allocated in
+ * hap_free_p2m_page(), like other P2M pages.
+ */
+ if ( d->arch.paging.hap.p2m_pages == 0 )
+ {
+ pg = alloc_domheap_pages(NULL, 0, MEMF_bits(32));
+ d->arch.paging.hap.p2m_pages += 1;
+ }
+ else
+#endif
+ {
+ pg = mfn_to_page(hap_alloc(d));
+
+ d->arch.paging.hap.p2m_pages += 1;
+ d->arch.paging.hap.total_pages -= 1;
+ }
+
+ if ( pg == NULL ) {
+ hap_unlock(d);
+ return NULL;
+ }
+
hap_unlock(d);
- pg = list_entry(entry, struct page_info, list);
+ page_set_owner(pg, d);
+ pg->count_info = 1;
mfn = page_to_mfn(pg);
p = hap_map_domain_page(mfn);
clear_page(p);
@@ -141,6 +141,7 @@ void hap_free_p2m_page(struct domain *d,
page_set_owner(pg, NULL);
free_domheap_pages(pg, 0);
d->arch.paging.hap.p2m_pages--;
+ ASSERT( d->arch.paging.hap.p2m_pages >= 0 );
}
/* Return the size of the pool, rounded up to the nearest MB */
@@ -320,7 +321,7 @@ mfn_t hap_make_monitor_table(struct vcpu
#if CONFIG_PAGING_LEVELS == 4
{
mfn_t m4mfn;
- m4mfn = hap_alloc(d, 0);
+ m4mfn = hap_alloc(d);
hap_install_xen_entries_in_l4(v, m4mfn, m4mfn);
return m4mfn;
}
@@ -331,12 +332,12 @@ mfn_t hap_make_monitor_table(struct vcpu
l2_pgentry_t *l2e;
int i;
- m3mfn = hap_alloc(d, 0);
+ m3mfn = hap_alloc(d);
/* Install a monitor l2 table in slot 3 of the l3 table.
* This is used for all Xen entries, including linear maps
*/
- m2mfn = hap_alloc(d, 0);
+ m2mfn = hap_alloc(d);
l3e = hap_map_domain_page(m3mfn);
l3e[3] = l3e_from_pfn(mfn_x(m2mfn), _PAGE_PRESENT);
hap_install_xen_entries_in_l2h(v, m2mfn);
@@ -357,7 +358,7 @@ mfn_t hap_make_monitor_table(struct vcpu
{
mfn_t m2mfn;
- m2mfn = hap_alloc(d, 0);
+ m2mfn = hap_alloc(d);
hap_install_xen_entries_in_l2(v, m2mfn, m2mfn);
return m2mfn;
@@ -390,7 +391,6 @@ void hap_domain_init(struct domain *d)
{
hap_lock_init(d);
INIT_LIST_HEAD(&d->arch.paging.hap.freelists);
- INIT_LIST_HEAD(&d->arch.paging.hap.p2m_freelist);
}
/* return 0 for success, -errno for failure */
diff -r fc9e2f7920c9 -r f378c424e0ce xen/arch/x86/mm/shadow/multi.c
--- a/xen/arch/x86/mm/shadow/multi.c Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/arch/x86/mm/shadow/multi.c Tue Apr 03 13:04:51 2007 -0600
@@ -2823,8 +2823,8 @@ static int sh_page_fault(struct vcpu *v,
* are OK, this can only have been caused by a failed
* shadow_set_l*e(), which will have crashed the guest.
* Get out of the fault handler immediately. */
- ASSERT(d->is_shutdown);
- unmap_walk(v, &gw);
+ ASSERT(d->is_shutting_down);
+ unmap_walk(v, &gw);
shadow_unlock(d);
return 0;
}
diff -r fc9e2f7920c9 -r f378c424e0ce xen/arch/x86/setup.c
--- a/xen/arch/x86/setup.c Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/arch/x86/setup.c Tue Apr 03 13:04:51 2007 -0600
@@ -591,8 +591,6 @@ void __init __start_xen(multiboot_info_t
numa_initmem_init(0, max_page);
- end_boot_allocator();
-
/* Initialise the Xen heap, skipping RAM holes. */
nr_pages = 0;
for ( i = 0; i < e820.nr_map; i++ )
@@ -617,6 +615,8 @@ void __init __start_xen(multiboot_info_t
printk("Xen heap: %luMB (%lukB)\n",
nr_pages >> (20 - PAGE_SHIFT),
nr_pages << (PAGE_SHIFT - 10));
+
+ end_boot_allocator();
early_boot = 0;
diff -r fc9e2f7920c9 -r f378c424e0ce xen/arch/x86/time.c
--- a/xen/arch/x86/time.c Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/arch/x86/time.c Tue Apr 03 13:04:51 2007 -0600
@@ -573,7 +573,7 @@ static void init_platform_timer(void)
* machines were long is 32-bit! (However, as time_t is signed, we
* will already get problems at other places on 2038-01-19 03:14:08)
*/
-static inline unsigned long
+unsigned long
mktime (unsigned int year, unsigned int mon,
unsigned int day, unsigned int hour,
unsigned int min, unsigned int sec)
diff -r fc9e2f7920c9 -r f378c424e0ce xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/arch/x86/traps.c Tue Apr 03 13:04:51 2007 -0600
@@ -285,23 +285,32 @@ void show_xen_trace()
show_trace(®s);
}
-void show_stack_overflow(unsigned long esp)
+void show_stack_overflow(unsigned int cpu, unsigned long esp)
{
#ifdef MEMORY_GUARD
- unsigned long esp_top;
+ unsigned long esp_top, esp_bottom;
unsigned long *stack, addr;
- esp_top = (esp | (STACK_SIZE - 1)) - DEBUG_STACK_SIZE;
+ esp_bottom = (esp | (STACK_SIZE - 1)) + 1;
+ esp_top = esp_bottom - DEBUG_STACK_SIZE;
+
+ printk("Valid stack range: %p-%p, sp=%p, tss.esp0=%p\n",
+ (void *)esp_top, (void *)esp_bottom, (void *)esp,
+ (void *)init_tss[cpu].esp0);
/* Trigger overflow trace if %esp is within 512 bytes of the guard page. */
if ( ((unsigned long)(esp - esp_top) > 512) &&
((unsigned long)(esp_top - esp) > 512) )
+ {
+ printk("No stack overflow detected. Skipping stack trace.\n");
return;
+ }
if ( esp < esp_top )
esp = esp_top;
- printk("Xen stack overflow:\n ");
+ printk("Xen stack overflow (dumping trace %p-%p):\n ",
+ (void *)esp, (void *)esp_bottom);
stack = (unsigned long *)esp;
while ( ((long)stack & (STACK_SIZE-BYTES_PER_LONG)) != 0 )
diff -r fc9e2f7920c9 -r f378c424e0ce xen/arch/x86/x86_32/traps.c
--- a/xen/arch/x86/x86_32/traps.c Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/arch/x86/x86_32/traps.c Tue Apr 03 13:04:51 2007 -0600
@@ -139,7 +139,7 @@ void show_page_walk(unsigned long addr)
unmap_domain_page(l1t);
}
-#define DOUBLEFAULT_STACK_SIZE 1024
+#define DOUBLEFAULT_STACK_SIZE 2048
static struct tss_struct doublefault_tss;
static unsigned char doublefault_stack[DOUBLEFAULT_STACK_SIZE];
@@ -167,7 +167,7 @@ asmlinkage void do_double_fault(void)
tss->esi, tss->edi, tss->ebp, tss->esp);
printk("ds: %04x es: %04x fs: %04x gs: %04x ss: %04x\n",
tss->ds, tss->es, tss->fs, tss->gs, tss->ss);
- show_stack_overflow(tss->esp);
+ show_stack_overflow(cpu, tss->esp);
panic("DOUBLE FAULT -- system shutdown\n");
}
@@ -268,8 +268,7 @@ void __init percpu_traps_init(void)
tss->ds = __HYPERVISOR_DS;
tss->es = __HYPERVISOR_DS;
tss->ss = __HYPERVISOR_DS;
- tss->esp = (unsigned long)
- &doublefault_stack[DOUBLEFAULT_STACK_SIZE];
+ tss->esp = (unsigned long)&doublefault_stack[DOUBLEFAULT_STACK_SIZE];
tss->__cr3 = __pa(idle_pg_table);
tss->cs = __HYPERVISOR_CS;
tss->eip = (unsigned long)do_double_fault;
diff -r fc9e2f7920c9 -r f378c424e0ce xen/arch/x86/x86_64/traps.c
--- a/xen/arch/x86/x86_64/traps.c Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/arch/x86/x86_64/traps.c Tue Apr 03 13:04:51 2007 -0600
@@ -171,7 +171,7 @@ asmlinkage void do_double_fault(struct c
printk("r12: %016lx r13: %016lx r14: %016lx\n",
regs->r12, regs->r13, regs->r14);
printk("r15: %016lx\n", regs->r15);
- show_stack_overflow(regs->rsp);
+ show_stack_overflow(cpu, regs->rsp);
panic("DOUBLE FAULT -- system shutdown\n");
}
@@ -270,18 +270,18 @@ void __init percpu_traps_init(void)
stack_bottom = (char *)get_stack_bottom();
stack = (char *)((unsigned long)stack_bottom & ~(STACK_SIZE - 1));
- /* Double-fault handler has its own per-CPU 1kB stack. */
- init_tss[cpu].ist[0] = (unsigned long)&stack[1024];
+ /* Double-fault handler has its own per-CPU 2kB stack. */
+ init_tss[cpu].ist[0] = (unsigned long)&stack[2048];
/* NMI handler has its own per-CPU 1kB stack. */
- init_tss[cpu].ist[1] = (unsigned long)&stack[2048];
+ init_tss[cpu].ist[1] = (unsigned long)&stack[3072];
/*
* Trampoline for SYSCALL entry from long mode.
*/
/* Skip the NMI and DF stacks. */
- stack = &stack[2048];
+ stack = &stack[3072];
wrmsr(MSR_LSTAR, (unsigned long)stack, ((unsigned long)stack>>32));
/* movq %rsp, saversp(%rip) */
diff -r fc9e2f7920c9 -r f378c424e0ce xen/common/domain.c
--- a/xen/common/domain.c Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/common/domain.c Tue Apr 03 13:04:51 2007 -0600
@@ -59,6 +59,7 @@ struct domain *alloc_domain(domid_t domi
atomic_set(&d->refcnt, 1);
spin_lock_init(&d->big_lock);
spin_lock_init(&d->page_alloc_lock);
+ spin_lock_init(&d->shutdown_lock);
INIT_LIST_HEAD(&d->page_list);
INIT_LIST_HEAD(&d->xenpage_list);
@@ -83,6 +84,45 @@ void free_domain(struct domain *d)
xfree(d);
}
+static void __domain_finalise_shutdown(struct domain *d)
+{
+ struct vcpu *v;
+
+ BUG_ON(!spin_is_locked(&d->shutdown_lock));
+
+ if ( d->is_shut_down )
+ return;
+
+ for_each_vcpu ( d, v )
+ if ( !v->paused_for_shutdown )
+ return;
+
+ d->is_shut_down = 1;
+
+ for_each_vcpu ( d, v )
+ vcpu_sleep_nosync(v);
+
+ send_guest_global_virq(dom0, VIRQ_DOM_EXC);
+}
+
+static void vcpu_check_shutdown(struct vcpu *v)
+{
+ struct domain *d = v->domain;
+
+ spin_lock(&d->shutdown_lock);
+
+ if ( d->is_shutting_down )
+ {
+ if ( !v->paused_for_shutdown )
+ atomic_inc(&v->pause_count);
+ v->paused_for_shutdown = 1;
+ v->defer_shutdown = 0;
+ __domain_finalise_shutdown(d);
+ }
+
+ spin_unlock(&d->shutdown_lock);
+}
+
struct vcpu *alloc_vcpu(
struct domain *d, unsigned int vcpu_id, unsigned int cpu_id)
{
@@ -121,6 +161,9 @@ struct vcpu *alloc_vcpu(
d->vcpu[vcpu_id] = v;
if ( vcpu_id != 0 )
d->vcpu[v->vcpu_id-1]->next_in_list = v;
+
+ /* Must be called after making new vcpu visible to for_each_vcpu(). */
+ vcpu_check_shutdown(v);
return v;
}
@@ -286,7 +329,7 @@ void domain_kill(struct domain *d)
void __domain_crash(struct domain *d)
{
- if ( d->is_shutdown )
+ if ( d->is_shutting_down )
{
/* Print nothing: the domain is already shutting down. */
}
@@ -335,16 +378,73 @@ void domain_shutdown(struct domain *d, u
if ( d->domain_id == 0 )
dom0_shutdown(reason);
- atomic_inc(&d->pause_count);
- if ( !xchg(&d->is_shutdown, 1) )
- d->shutdown_code = reason;
- else
- domain_unpause(d);
+ spin_lock(&d->shutdown_lock);
+
+ if ( d->is_shutting_down )
+ {
+ spin_unlock(&d->shutdown_lock);
+ return;
+ }
+
+ d->is_shutting_down = 1;
+ d->shutdown_code = reason;
+
+ smp_mb(); /* set shutdown status /then/ check for per-cpu deferrals */
for_each_vcpu ( d, v )
- vcpu_sleep_nosync(v);
-
- send_guest_global_virq(dom0, VIRQ_DOM_EXC);
+ {
+ if ( v->defer_shutdown )
+ continue;
+ atomic_inc(&v->pause_count);
+ v->paused_for_shutdown = 1;
+ }
+
+ __domain_finalise_shutdown(d);
+
+ spin_unlock(&d->shutdown_lock);
+}
+
+void domain_resume(struct domain *d)
+{
+ struct vcpu *v;
+
+ /*
+ * Some code paths assume that shutdown status does not get reset under
+ * their feet (e.g., some assertions make this assumption).
+ */
+ domain_pause(d);
+
+ spin_lock(&d->shutdown_lock);
+
+ d->is_shutting_down = d->is_shut_down = 0;
+
+ for_each_vcpu ( d, v )
+ {
+ if ( v->paused_for_shutdown )
+ vcpu_unpause(v);
+ v->paused_for_shutdown = 0;
+ }
+
+ spin_unlock(&d->shutdown_lock);
+
+ domain_unpause(d);
+}
+
+int vcpu_start_shutdown_deferral(struct vcpu *v)
+{
+ v->defer_shutdown = 1;
+ smp_mb(); /* set deferral status /then/ check for shutdown */
+ if ( unlikely(v->domain->is_shutting_down) )
+ vcpu_check_shutdown(v);
+ return v->defer_shutdown;
+}
+
+void vcpu_end_shutdown_deferral(struct vcpu *v)
+{
+ v->defer_shutdown = 0;
+ smp_mb(); /* clear deferral status /then/ check for shutdown */
+ if ( unlikely(v->domain->is_shutting_down) )
+ vcpu_check_shutdown(v);
}
void domain_pause_for_debugger(void)
@@ -425,7 +525,6 @@ void vcpu_pause_nosync(struct vcpu *v)
void vcpu_unpause(struct vcpu *v)
{
- ASSERT(v != current);
if ( atomic_dec_and_test(&v->pause_count) )
vcpu_wake(v);
}
@@ -445,8 +544,6 @@ void domain_unpause(struct domain *d)
void domain_unpause(struct domain *d)
{
struct vcpu *v;
-
- ASSERT(d != current->domain);
if ( atomic_dec_and_test(&d->pause_count) )
for_each_vcpu( d, v )
diff -r fc9e2f7920c9 -r f378c424e0ce xen/common/domctl.c
--- a/xen/common/domctl.c Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/common/domctl.c Tue Apr 03 13:04:51 2007 -0600
@@ -115,7 +115,7 @@ void getdomaininfo(struct domain *d, str
info->flags = flags |
(d->is_dying ? XEN_DOMINF_dying : 0) |
- (d->is_shutdown ? XEN_DOMINF_shutdown : 0) |
+ (d->is_shut_down ? XEN_DOMINF_shutdown : 0) |
(d->is_paused_by_controller ? XEN_DOMINF_paused : 0) |
d->shutdown_code << XEN_DOMINF_shutdownshift;
@@ -287,8 +287,7 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
if ( d == NULL )
break;
- if ( xchg(&d->is_shutdown, 0) )
- domain_unpause(d);
+ domain_resume(d);
rcu_unlock_domain(d);
ret = 0;
}
diff -r fc9e2f7920c9 -r f378c424e0ce xen/common/page_alloc.c
--- a/xen/common/page_alloc.c Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/common/page_alloc.c Tue Apr 03 13:04:51 2007 -0600
@@ -512,6 +512,14 @@ void init_heap_pages(
ASSERT(zone < NR_ZONES);
+ if ( unlikely(avail[0] == NULL) )
+ {
+ /* Start-of-day memory node 0 initialisation. */
+ init_heap_block(&_heap0);
+ _heap[0] = &_heap0;
+ avail[0] = avail0;
+ }
+
if ( likely(page_to_mfn(pg) != 0) )
nid_prev = phys_to_nid(page_to_maddr(pg-1));
else
@@ -569,10 +577,6 @@ void end_boot_allocator(void)
{
unsigned long i;
int curr_free, next_free;
-
- init_heap_block(&_heap0);
- _heap[0] = &_heap0;
- avail[0] = avail0;
/* Pages that are free now go to the domain sub-allocator. */
if ( (curr_free = next_free = !allocated_in_map(first_valid_mfn)) )
diff -r fc9e2f7920c9 -r f378c424e0ce xen/common/symbols.c
--- a/xen/common/symbols.c Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/common/symbols.c Tue Apr 03 13:04:51 2007 -0600
@@ -16,6 +16,7 @@
#include <xen/init.h>
#include <xen/lib.h>
#include <xen/string.h>
+#include <xen/spinlock.h>
extern unsigned long symbols_addresses[];
extern unsigned long symbols_num_syms;
@@ -140,12 +141,15 @@ void __print_symbol(const char *fmt, uns
void __print_symbol(const char *fmt, unsigned long address)
{
const char *name;
- unsigned long offset, size;
- char namebuf[KSYM_NAME_LEN+1];
+ unsigned long offset, size, flags;
+ static DEFINE_SPINLOCK(lock);
+ static char namebuf[KSYM_NAME_LEN+1];
#define BUFFER_SIZE sizeof("%s+%#lx/%#lx [%s]") + KSYM_NAME_LEN + \
2*(BITS_PER_LONG*3/10) + 1
- char buffer[BUFFER_SIZE];
+ static char buffer[BUFFER_SIZE];
+
+ spin_lock_irqsave(&lock, flags);
name = symbols_lookup(address, &size, &offset, namebuf);
@@ -155,4 +159,6 @@ void __print_symbol(const char *fmt, uns
snprintf(buffer, BUFFER_SIZE, "%s+%#lx/%#lx", name, offset, size);
printk(fmt, buffer);
+
+ spin_unlock_irqrestore(&lock, flags);
}
diff -r fc9e2f7920c9 -r f378c424e0ce xen/drivers/char/console.c
--- a/xen/drivers/char/console.c Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/drivers/char/console.c Tue Apr 03 13:04:51 2007 -0600
@@ -858,19 +858,20 @@ void panic(const char *fmt, ...)
void panic(const char *fmt, ...)
{
va_list args;
- char buf[128];
unsigned long flags;
static DEFINE_SPINLOCK(lock);
+ static char buf[128];
debugtrace_dump();
+
+ /* Protects buf[] and ensure multi-line message prints atomically. */
+ spin_lock_irqsave(&lock, flags);
va_start(args, fmt);
(void)vsnprintf(buf, sizeof(buf), fmt, args);
va_end(args);
- /* Spit out multiline message in one go. */
console_start_sync();
- spin_lock_irqsave(&lock, flags);
printk("\n****************************************\n");
printk("Panic on CPU %d:\n", smp_processor_id());
printk(buf);
@@ -879,6 +880,7 @@ void panic(const char *fmt, ...)
printk("Manual reset required ('noreboot' specified)\n");
else
printk("Reboot in five seconds...\n");
+
spin_unlock_irqrestore(&lock, flags);
debugger_trap_immediate();
diff -r fc9e2f7920c9 -r f378c424e0ce xen/include/asm-x86/domain.h
--- a/xen/include/asm-x86/domain.h Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/include/asm-x86/domain.h Tue Apr 03 13:04:51 2007 -0600
@@ -115,7 +115,6 @@ struct hap_domain {
const char *locker_function;
struct list_head freelists;
- struct list_head p2m_freelist;
unsigned int total_pages; /* number of pages allocated */
unsigned int free_pages; /* number of pages on freelists */
unsigned int p2m_pages; /* number of pages allocates to p2m */
diff -r fc9e2f7920c9 -r f378c424e0ce xen/include/asm-x86/hvm/io.h
--- a/xen/include/asm-x86/hvm/io.h Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/include/asm-x86/hvm/io.h Tue Apr 03 13:04:51 2007 -0600
@@ -127,6 +127,7 @@ static inline int hvm_portio_intercept(i
}
extern int hvm_mmio_intercept(ioreq_t *p);
+extern int hvm_buffered_io_send(ioreq_t *p);
extern int hvm_buffered_io_intercept(ioreq_t *p);
static inline int register_portio_handler(
@@ -145,6 +146,7 @@ static inline int irq_masked(unsigned lo
extern void send_pio_req(unsigned long port, unsigned long count, int size,
paddr_t value, int dir, int df, int value_is_ptr);
+void send_timeoffset_req(unsigned long timeoff);
extern void handle_mmio(unsigned long gpa);
extern void hvm_interrupt_post(struct vcpu *v, int vector, int type);
extern void hvm_io_assist(struct vcpu *v);
diff -r fc9e2f7920c9 -r f378c424e0ce xen/include/asm-x86/hvm/support.h
--- a/xen/include/asm-x86/hvm/support.h Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/include/asm-x86/hvm/support.h Tue Apr 03 13:04:51 2007 -0600
@@ -215,7 +215,6 @@ int hvm_load(struct domain *d, hvm_domai
/* End of save/restore */
extern char hvm_io_bitmap[];
-extern char hvm_msr_bitmap[];
extern int hvm_enabled;
void hvm_enable(struct hvm_function_table *);
diff -r fc9e2f7920c9 -r f378c424e0ce xen/include/asm-x86/hvm/vmx/vmcs.h
--- a/xen/include/asm-x86/hvm/vmx/vmcs.h Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h Tue Apr 03 13:04:51 2007 -0600
@@ -121,6 +121,7 @@ extern u32 vmx_vmentry_control;
#define cpu_has_vmx_msr_bitmap \
(vmx_cpu_based_exec_control & CPU_BASED_ACTIVATE_MSR_BITMAP)
+extern char *vmx_msr_bitmap;
/* VMCS Encordings */
enum vmcs_field {
diff -r fc9e2f7920c9 -r f378c424e0ce xen/include/asm-x86/processor.h
--- a/xen/include/asm-x86/processor.h Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/include/asm-x86/processor.h Tue Apr 03 13:04:51 2007 -0600
@@ -413,9 +413,9 @@ struct tss_struct {
struct tss_struct {
unsigned short back_link,__blh;
#ifdef __x86_64__
- u64 rsp0;
- u64 rsp1;
- u64 rsp2;
+ union { u64 rsp0, esp0; };
+ union { u64 rsp1, esp1; };
+ union { u64 rsp2, esp2; };
u64 reserved1;
u64 ist[7];
u64 reserved2;
@@ -553,7 +553,7 @@ extern always_inline void prefetchw(cons
void show_stack(struct cpu_user_regs *regs);
void show_xen_trace(void);
-void show_stack_overflow(unsigned long esp);
+void show_stack_overflow(unsigned int cpu, unsigned long esp);
void show_registers(struct cpu_user_regs *regs);
void show_execution_state(struct cpu_user_regs *regs);
void show_page_walk(unsigned long addr);
diff -r fc9e2f7920c9 -r f378c424e0ce xen/include/asm-x86/time.h
--- a/xen/include/asm-x86/time.h Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/include/asm-x86/time.h Tue Apr 03 13:04:51 2007 -0600
@@ -16,4 +16,9 @@ static inline cycles_t get_cycles(void)
return c;
}
+unsigned long
+mktime (unsigned int year, unsigned int mon,
+ unsigned int day, unsigned int hour,
+ unsigned int min, unsigned int sec);
+
#endif /* __X86_TIME_H__ */
diff -r fc9e2f7920c9 -r f378c424e0ce xen/include/public/hvm/ioreq.h
--- a/xen/include/public/hvm/ioreq.h Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/include/public/hvm/ioreq.h Tue Apr 03 13:04:51 2007 -0600
@@ -39,6 +39,7 @@
#define IOREQ_TYPE_XOR 4
#define IOREQ_TYPE_XCHG 5
#define IOREQ_TYPE_ADD 6
+#define IOREQ_TYPE_TIMEOFFSET 7
/*
* VMExit dispatcher should cooperate with instruction decoder to
diff -r fc9e2f7920c9 -r f378c424e0ce xen/include/xen/sched.h
--- a/xen/include/xen/sched.h Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/include/xen/sched.h Tue Apr 03 13:04:51 2007 -0600
@@ -114,6 +114,10 @@ struct vcpu
bool_t nmi_pending;
/* Avoid NMI reentry by allowing NMIs to be masked for short periods. */
bool_t nmi_masked;
+ /* Require shutdown to be deferred for some asynchronous operation? */
+ bool_t defer_shutdown;
+ /* VCPU is paused following shutdown request (d->is_shutting_down)? */
+ bool_t paused_for_shutdown;
unsigned long pause_flags;
atomic_t pause_count;
@@ -193,7 +197,9 @@ struct domain
bool_t is_paused_by_controller;
/* Guest has shut down (inc. reason code)? */
- bool_t is_shutdown;
+ spinlock_t shutdown_lock;
+ bool_t is_shutting_down; /* in process of shutting down? */
+ bool_t is_shut_down; /* fully shut down? */
int shutdown_code;
atomic_t pause_count;
@@ -331,7 +337,11 @@ void domain_destroy(struct domain *d);
void domain_destroy(struct domain *d);
void domain_kill(struct domain *d);
void domain_shutdown(struct domain *d, u8 reason);
+void domain_resume(struct domain *d);
void domain_pause_for_debugger(void);
+
+int vcpu_start_shutdown_deferral(struct vcpu *v);
+void vcpu_end_shutdown_deferral(struct vcpu *v);
/*
* Mark specified domain as crashed. This function always returns, even if the
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|