# HG changeset patch
# User Alex Williamson <alex.williamson@xxxxxx>
# Date 1204301998 25200
# Node ID 71a8366fb212b9199090bf9e87e389bdd65e5cbd
# Parent 0b20ac6ec64aa50558bea7145552c341277f9f19
# Parent 9049b0b62e0891e9bfb188bef40f68c04b5ea653
merge with xen-unstable.hg
---
extras/mini-os/fbfront.c | 12
extras/mini-os/include/fbfront.h | 3
extras/mini-os/kernel.c | 6
extras/mini-os/xenbus/xenbus.c | 4
stubdom/README | 91 +
stubdom/stubdom-dm | 27
tools/blktap/drivers/block-qcow2.c | 161 --
tools/ioemu/block-qcow.c | 2
tools/ioemu/block-qcow2.c | 2
tools/ioemu/block-raw.c | 2
tools/ioemu/block-vmdk.c | 2
tools/ioemu/block.c | 17
tools/ioemu/block_int.h | 4
tools/ioemu/hw/e1000.c | 2
tools/ioemu/hw/xenfb.c | 216 +++
tools/ioemu/keymaps.c | 4
tools/ioemu/monitor.c | 2
tools/ioemu/vl.c | 8
tools/ioemu/vl.h | 7
tools/ioemu/xenstore.c | 31
tools/python/xen/xend/XendAPI.py | 3
tools/python/xen/xend/XendCheckpoint.py | 33
tools/python/xen/xend/XendDomain.py | 10
tools/python/xen/xend/XendDomainInfo.py | 36
tools/python/xen/xend/image.py | 29
tools/python/xen/xm/main.py | 9
tools/python/xen/xm/migrate.py | 10
tools/xenstat/libxenstat/src/xenstat_solaris.c | 44
tools/xentrace/xentrace.c | 8
unmodified_drivers/linux-2.6/platform-pci/machine_reboot.c | 3
xen/arch/ia64/xen/machine_kexec.c | 49
xen/arch/powerpc/machine_kexec.c | 6
xen/arch/x86/machine_kexec.c | 10
xen/arch/x86/mm/shadow/multi.c | 202 ++-
xen/arch/x86/mm/shadow/types.h | 1
xen/arch/x86/x86_32/Makefile | 1
xen/arch/x86/x86_32/machine_kexec.c | 33
xen/arch/x86/x86_64/Makefile | 1
xen/arch/x86/x86_64/machine_kexec.c | 32
xen/common/compat/kexec.c | 5
xen/common/kexec.c | 97 -
xen/drivers/acpi/tables.c | 1
xen/drivers/passthrough/amd/Makefile | 1
xen/drivers/passthrough/amd/iommu_acpi.c | 874 +++++++++++++
xen/drivers/passthrough/amd/iommu_detect.c | 36
xen/drivers/passthrough/amd/iommu_init.c | 41
xen/drivers/passthrough/amd/iommu_map.c | 42
xen/drivers/passthrough/amd/pci_amd_iommu.c | 142 +-
xen/include/asm-x86/amd-iommu.h | 36
xen/include/asm-x86/domain.h | 5
xen/include/asm-x86/hvm/svm/amd-iommu-acpi.h | 176 ++
xen/include/asm-x86/hvm/svm/amd-iommu-defs.h | 6
xen/include/asm-x86/hvm/svm/amd-iommu-proto.h | 24
xen/include/asm-x86/perfc_defn.h | 5
xen/include/public/io/kbdif.h | 2
xen/include/public/kexec.h | 15
xen/include/xen/acpi.h | 1
xen/include/xen/kexec.h | 1
58 files changed, 2173 insertions(+), 460 deletions(-)
diff -r 0b20ac6ec64a -r 71a8366fb212 extras/mini-os/fbfront.c
--- a/extras/mini-os/fbfront.c Fri Feb 29 09:18:01 2008 -0700
+++ b/extras/mini-os/fbfront.c Fri Feb 29 09:19:58 2008 -0700
@@ -31,13 +31,6 @@ struct kbdfront_dev {
char *nodename;
char *backend;
- char *data;
- int width;
- int height;
- int depth;
- int line_length;
- int mem_length;
-
#ifdef HAVE_LIBC
int fd;
#endif
@@ -316,7 +309,10 @@ struct fbfront_dev *init_fbfront(char *n
for (i = 0; mapped < mem_length && i < max_pd; i++) {
unsigned long *pd = (unsigned long *) alloc_page();
for (j = 0; mapped < mem_length && j < PAGE_SIZE / sizeof(unsigned
long); j++) {
- pd[j] = virt_to_mfn((unsigned long) data + mapped);
+ /* Trigger CoW */
+ * ((char *)data + mapped) = 0;
+ barrier();
+ pd[j] = virtual_to_mfn((unsigned long) data + mapped);
mapped += PAGE_SIZE;
}
for ( ; j < PAGE_SIZE / sizeof(unsigned long); j++)
diff -r 0b20ac6ec64a -r 71a8366fb212 extras/mini-os/include/fbfront.h
--- a/extras/mini-os/include/fbfront.h Fri Feb 29 09:18:01 2008 -0700
+++ b/extras/mini-os/include/fbfront.h Fri Feb 29 09:19:58 2008 -0700
@@ -14,6 +14,9 @@
#ifndef KEY_Q
#define KEY_Q 16
#endif
+#ifndef KEY_MAX
+#define KEY_MAX 0x1ff
+#endif
struct kbdfront_dev;
diff -r 0b20ac6ec64a -r 71a8366fb212 extras/mini-os/kernel.c
--- a/extras/mini-os/kernel.c Fri Feb 29 09:18:01 2008 -0700
+++ b/extras/mini-os/kernel.c Fri Feb 29 09:19:58 2008 -0700
@@ -360,13 +360,13 @@ static void kbdfront_thread(void *p)
refresh_cursor(x, y);
break;
case XENKBD_TYPE_POS:
- printk("pos x:%d y:%d z:%d\n",
+ printk("pos x:%d y:%d dz:%d\n",
event.pos.abs_x,
event.pos.abs_y,
- event.pos.abs_z);
+ event.pos.rel_z);
x = event.pos.abs_x;
y = event.pos.abs_y;
- z = event.pos.abs_z;
+ z = event.pos.rel_z;
clip_cursor(&x, &y);
refresh_cursor(x, y);
break;
diff -r 0b20ac6ec64a -r 71a8366fb212 extras/mini-os/xenbus/xenbus.c
--- a/extras/mini-os/xenbus/xenbus.c Fri Feb 29 09:18:01 2008 -0700
+++ b/extras/mini-os/xenbus/xenbus.c Fri Feb 29 09:19:58 2008 -0700
@@ -637,9 +637,7 @@ char* xenbus_printf(xenbus_transaction_t
va_start(args, fmt);
vsprintf(val, fmt, args);
va_end(args);
- xenbus_write(xbt,fullpath,val);
-
- return NULL;
+ return xenbus_write(xbt,fullpath,val);
}
static void do_ls_test(const char *pre)
diff -r 0b20ac6ec64a -r 71a8366fb212 stubdom/README
--- a/stubdom/README Fri Feb 29 09:18:01 2008 -0700
+++ b/stubdom/README Fri Feb 29 09:19:58 2008 -0700
@@ -6,6 +6,73 @@ Then make install to install the result.
Also, run make and make install in $XEN_ROOT/tools/fs-back
+General Configuration
+=====================
+
+In your HVM config "hvmconfig",
+
+- use /usr/lib/xen/bin/stubdom-dm as dm script
+
+device_model = '/usr/lib/xen/bin/stubdom-dm'
+
+- comment the disk statement:
+
+#disk = [ 'file:/tmp/install.iso,hdc:cdrom,r', 'phy:/dev/sda6,hda,w',
'file:/tmp/test,hdb,r' ]
+
+
+Create /etc/xen/stubdom-hvmconfig (where "hvmconfig" is the name of your HVM
+guest) with
+
+kernel = "/usr/lib/xen/boot/stubdom.gz"
+vif = [ '', 'ip=10.0.1.1,mac=aa:00:00:12:23:34']
+disk = [ 'file:/tmp/install.iso,hdc:cdrom,r', 'phy:/dev/sda6,hda,w',
'file:/tmp/test,hdb,r' ]
+
+where
+- the first vif ('') is reserved for VNC (see below)
+- 'ip=10.0.1.1,mac= etc...' is the same net configuration as in the hvmconfig
+script,
+- and disk = is the same block configuration as in the hvmconfig script.
+
+Display Configuration
+=====================
+
+There are three posibilities
+
+* Using SDL
+
+In hvmconfig, disable vnc:
+
+vnc = 0
+
+In stubdom-hvmconfig, set a vfb:
+
+vfb = [ 'type=sdl' ]
+
+* Using a VNC server in the stub domain
+
+In hvmconfig, set vnclisten to "172.30.206.1" for instance. Do not use a host
+name as Mini-OS does not have a name resolver. Do not use 127.0.0.1 since then
+you will not be able to connect to it.
+
+vnc = 1
+vnclisten = "172.30.206.1"
+
+In stubdom-hvmconfig, fill the reserved vif with the same IP, for instance:
+
+vif = [ 'ip=172.30.206.1', 'ip=10.0.1.1,mac=aa:00:00:12:23:34']
+
+* Using a VNC server in dom0
+
+In hvmconfig, disable vnc:
+
+vnc = 0
+
+In stubdom-hvmconfig, set a vfb:
+
+vfb = [ 'type=vnc' ]
+
+and any other parameter as wished.
+
To run
======
@@ -13,32 +80,4 @@ ln -s /usr/share/qemu/keymaps /exports/u
ln -s /usr/share/qemu/keymaps /exports/usr/share/qemu
/usr/sbin/fs-backend &
-
-In your HVM config "hvmconfig",
-
-- use VNC, set vnclisten to "172.30.206.1" for instance. Do not use a host
name
-as Mini-OS does not have a name resolver. Do not use 127.0.0.1 since then you
-will not be able to connect to it.
-
-vnc = 1
-vnclisten = "172.30.206.1"
-
-- use /usr/lib/xen/bin/stubdom-dm as dm script
-
-device_model = '/usr/lib/xen/bin/stubdom-dm'
-
-- comment the disk statement:
-#disk = [ 'file:/tmp/install.iso,hdc:cdrom,r', 'phy:/dev/sda6,hda,w',
'file:/tmp/test,hdb,r' ]
-
-Create /etc/xen/stubdom-hvmconfig (where "hvmconfig" is your HVM guest domain
-name) with
-
-kernel = "/usr/lib/xen/boot/stubdom.gz"
-vif = [ 'ip=172.30.206.1', 'ip=10.0.1.1,mac=aa:00:00:12:23:34']
-disk = [ 'file:/tmp/install.iso,hdc:cdrom,r', 'phy:/dev/sda6,hda,w',
'file:/tmp/test,hdb,r' ]
-
-where
-- 172.30.206.1 is the IP for vnc,
-- 'ip=10.0.1.1,mac= etc...' is the same net configuration as in the hvmconfig
-script,
-- and disk = is the same block configuration as in the hvmconfig script.
+xm create hvmconfig
diff -r 0b20ac6ec64a -r 71a8366fb212 stubdom/stubdom-dm
--- a/stubdom/stubdom-dm Fri Feb 29 09:18:01 2008 -0700
+++ b/stubdom/stubdom-dm Fri Feb 29 09:19:58 2008 -0700
@@ -62,32 +62,23 @@ done
creation="xm create -c stubdom-$domname target=$domid memory=32"
-(while true ; do sleep 60 ; done) | $creation >
/var/log/xen/qemu-dm-$domid.log &
+(while true ; do sleep 60 ; done) | $creation &
#xterm -geometry +0+0 -e /bin/sh -c "$creation ; echo ; echo press ENTER to
shut down ; read" &
consolepid=$!
-
-# Wait for vnc server to appear
-while ! vnc_port=`xenstore-read /local/domain/$domid/console/vnc-port`
-do
- # Check that the stubdom job is still alive
- kill -0 $consolepid || term
- sleep 1
-done
-
-################
-# DEBUG: tcpdump
-#while ! stubdomid=`xm domid stubdom-$domname`
-#do
-# sleep 1
-#done
-#xterm -geometry 160x25+0+$height -e /bin/sh -c "tcpdump -n -i
vif$stubdomid.0" &
-#xterm -geometry 160x25+0+$((2 * $height)) -e /bin/sh -c "tcpdump -n -i
vif$stubdomid.1" &
###########
# vncviewer
if [ "$vncviewer" = 1 ]
then
+ # Wait for vnc server to appear
+ while ! vnc_port=`xenstore-read /local/domain/$domid/console/vnc-port`
+ do
+ # Check that the stubdom job is still alive
+ kill -0 $consolepid || term
+ sleep 1
+ done
+
vncviewer $ip:$vnc_port &
vncpid=$!
fi
diff -r 0b20ac6ec64a -r 71a8366fb212 tools/blktap/drivers/block-qcow2.c
--- a/tools/blktap/drivers/block-qcow2.c Fri Feb 29 09:18:01 2008 -0700
+++ b/tools/blktap/drivers/block-qcow2.c Fri Feb 29 09:19:58 2008 -0700
@@ -1241,167 +1241,6 @@ static void create_refcount_update(QCowC
refcount++;
*p = cpu_to_be16(refcount);
}
-}
-
-static int qcow2_create(const char *filename, int64_t total_size,
- const char *backing_file, int flags)
-{
- int fd, header_size, backing_filename_len, l1_size, i, shift, l2_bits;
- QCowHeader header;
- uint64_t tmp, offset;
- QCowCreateState s1, *s = &s1;
-
- memset(s, 0, sizeof(*s));
-
- fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0644);
- if (fd < 0)
- return -1;
- memset(&header, 0, sizeof(header));
- header.magic = cpu_to_be32(QCOW_MAGIC);
- header.version = cpu_to_be32(QCOW_VERSION);
- header.size = cpu_to_be64(total_size * 512);
- header_size = sizeof(header);
- backing_filename_len = 0;
- if (backing_file) {
- header.backing_file_offset = cpu_to_be64(header_size);
- backing_filename_len = strlen(backing_file);
- header.backing_file_size = cpu_to_be32(backing_filename_len);
- header_size += backing_filename_len;
- }
- s->cluster_bits = 12; /* 4 KB clusters */
- s->cluster_size = 1 << s->cluster_bits;
- header.cluster_bits = cpu_to_be32(s->cluster_bits);
- header_size = (header_size + 7) & ~7;
- if (flags & BLOCK_FLAG_ENCRYPT) {
- header.crypt_method = cpu_to_be32(QCOW_CRYPT_AES);
- } else {
- header.crypt_method = cpu_to_be32(QCOW_CRYPT_NONE);
- }
- l2_bits = s->cluster_bits - 3;
- shift = s->cluster_bits + l2_bits;
- l1_size = (((total_size * 512) + (1LL << shift) - 1) >> shift);
- offset = align_offset(header_size, s->cluster_size);
- s->l1_table_offset = offset;
- header.l1_table_offset = cpu_to_be64(s->l1_table_offset);
- header.l1_size = cpu_to_be32(l1_size);
- offset += align_offset(l1_size * sizeof(uint64_t), s->cluster_size);
-
- s->refcount_table = qemu_mallocz(s->cluster_size);
- if (!s->refcount_table)
- goto fail;
- s->refcount_block = qemu_mallocz(s->cluster_size);
- if (!s->refcount_block)
- goto fail;
-
- s->refcount_table_offset = offset;
- header.refcount_table_offset = cpu_to_be64(offset);
- header.refcount_table_clusters = cpu_to_be32(1);
- offset += s->cluster_size;
-
- s->refcount_table[0] = cpu_to_be64(offset);
- s->refcount_block_offset = offset;
- offset += s->cluster_size;
-
- /* update refcounts */
- create_refcount_update(s, 0, header_size);
- create_refcount_update(s, s->l1_table_offset, l1_size *
sizeof(uint64_t));
- create_refcount_update(s, s->refcount_table_offset, s->cluster_size);
- create_refcount_update(s, s->refcount_block_offset, s->cluster_size);
-
- /* write all the data */
- write(fd, &header, sizeof(header));
- if (backing_file) {
- write(fd, backing_file, backing_filename_len);
- }
- lseek(fd, s->l1_table_offset, SEEK_SET);
- tmp = 0;
- for(i = 0;i < l1_size; i++) {
- write(fd, &tmp, sizeof(tmp));
- }
- lseek(fd, s->refcount_table_offset, SEEK_SET);
- write(fd, s->refcount_table, s->cluster_size);
-
- lseek(fd, s->refcount_block_offset, SEEK_SET);
- write(fd, s->refcount_block, s->cluster_size);
-
- qemu_free(s->refcount_table);
- qemu_free(s->refcount_block);
- close(fd);
- return 0;
-fail:
- qemu_free(s->refcount_table);
- qemu_free(s->refcount_block);
- close(fd);
- return -ENOMEM;
-}
-
-/* XXX: put compressed sectors first, then all the cluster aligned
- tables to avoid losing bytes in alignment */
-static int qcow_write_compressed(struct disk_driver *bs, int64_t sector_num,
- const uint8_t *buf, int nb_sectors)
-{
- BDRVQcowState *s = bs->private;
- z_stream strm;
- int ret, out_len;
- uint8_t *out_buf;
- uint64_t cluster_offset;
-
- if (nb_sectors == 0) {
- /* align end of file to a sector boundary to ease reading with
- sector based I/Os */
- cluster_offset = 512 * s->total_sectors;
- cluster_offset = (cluster_offset + 511) & ~511;
- ftruncate(s->fd, cluster_offset);
- return 0;
- }
-
- if (nb_sectors != s->cluster_sectors)
- return -EINVAL;
-
- out_buf = qemu_malloc(s->cluster_size + (s->cluster_size / 1000) + 128);
- if (!out_buf)
- return -ENOMEM;
-
- /* best compression, small window, no zlib header */
- memset(&strm, 0, sizeof(strm));
- ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION,
- Z_DEFLATED, -12,
- 9, Z_DEFAULT_STRATEGY);
- if (ret != 0) {
- qemu_free(out_buf);
- return -1;
- }
-
- strm.avail_in = s->cluster_size;
- strm.next_in = (uint8_t *)buf;
- strm.avail_out = s->cluster_size;
- strm.next_out = out_buf;
-
- ret = deflate(&strm, Z_FINISH);
- if (ret != Z_STREAM_END && ret != Z_OK) {
- qemu_free(out_buf);
- deflateEnd(&strm);
- return -1;
- }
- out_len = strm.next_out - out_buf;
-
- deflateEnd(&strm);
-
- if (ret != Z_STREAM_END || out_len >= s->cluster_size) {
- /* could not compress: write normal cluster */
- qcow_write(bs, sector_num, buf, s->cluster_sectors);
- } else {
- cluster_offset = get_cluster_offset(bs, sector_num << 9, 2,
-
out_len, 0, 0);
- cluster_offset &= s->cluster_offset_mask;
- if (bdrv_pwrite(s->fd, cluster_offset, out_buf, out_len) !=
out_len) {
- qemu_free(out_buf);
- return -1;
- }
- }
-
- qemu_free(out_buf);
- return 0;
}
static int qcow_submit(struct disk_driver *bs)
diff -r 0b20ac6ec64a -r 71a8366fb212 tools/ioemu/block-qcow.c
--- a/tools/ioemu/block-qcow.c Fri Feb 29 09:18:01 2008 -0700
+++ b/tools/ioemu/block-qcow.c Fri Feb 29 09:19:58 2008 -0700
@@ -95,7 +95,7 @@ static int qcow_open(BlockDriverState *b
int len, i, shift, ret;
QCowHeader header;
- ret = bdrv_file_open(&s->hd, filename, flags);
+ ret = bdrv_file_open(&s->hd, filename, flags | BDRV_O_EXTENDABLE);
if (ret < 0)
return ret;
if (bdrv_pread(s->hd, 0, &header, sizeof(header)) != sizeof(header))
diff -r 0b20ac6ec64a -r 71a8366fb212 tools/ioemu/block-qcow2.c
--- a/tools/ioemu/block-qcow2.c Fri Feb 29 09:18:01 2008 -0700
+++ b/tools/ioemu/block-qcow2.c Fri Feb 29 09:19:58 2008 -0700
@@ -191,7 +191,7 @@ static int qcow_open(BlockDriverState *b
int len, i, shift, ret;
QCowHeader header;
- ret = bdrv_file_open(&s->hd, filename, flags);
+ ret = bdrv_file_open(&s->hd, filename, flags | BDRV_O_EXTENDABLE);
if (ret < 0)
return ret;
if (bdrv_pread(s->hd, 0, &header, sizeof(header)) != sizeof(header))
diff -r 0b20ac6ec64a -r 71a8366fb212 tools/ioemu/block-raw.c
--- a/tools/ioemu/block-raw.c Fri Feb 29 09:18:01 2008 -0700
+++ b/tools/ioemu/block-raw.c Fri Feb 29 09:19:58 2008 -0700
@@ -1489,5 +1489,7 @@ BlockDriver bdrv_host_device = {
.bdrv_pread = raw_pread,
.bdrv_pwrite = raw_pwrite,
.bdrv_getlength = raw_getlength,
+
+ .bdrv_flags = BLOCK_DRIVER_FLAG_EXTENDABLE
};
#endif /* _WIN32 */
diff -r 0b20ac6ec64a -r 71a8366fb212 tools/ioemu/block-vmdk.c
--- a/tools/ioemu/block-vmdk.c Fri Feb 29 09:18:01 2008 -0700
+++ b/tools/ioemu/block-vmdk.c Fri Feb 29 09:19:58 2008 -0700
@@ -352,7 +352,7 @@ static int vmdk_open(BlockDriverState *b
uint32_t magic;
int l1_size, i, ret;
- ret = bdrv_file_open(&s->hd, filename, flags);
+ ret = bdrv_file_open(&s->hd, filename, flags | BDRV_O_EXTENDABLE);
if (ret < 0)
return ret;
if (bdrv_pread(s->hd, 0, &magic, sizeof(magic)) != sizeof(magic))
diff -r 0b20ac6ec64a -r 71a8366fb212 tools/ioemu/block.c
--- a/tools/ioemu/block.c Fri Feb 29 09:18:01 2008 -0700
+++ b/tools/ioemu/block.c Fri Feb 29 09:19:58 2008 -0700
@@ -123,20 +123,23 @@ static int bdrv_rw_badreq_sectors(BlockD
static int bdrv_rw_badreq_sectors(BlockDriverState *bs,
int64_t sector_num, int nb_sectors)
{
- return
+ return (
nb_sectors < 0 ||
nb_sectors > bs->total_sectors ||
- sector_num > bs->total_sectors - nb_sectors;
+ sector_num > bs->total_sectors - nb_sectors
+ ) && !bs->extendable;
}
static int bdrv_rw_badreq_bytes(BlockDriverState *bs,
int64_t offset, int count)
{
int64_t size = bs->total_sectors << SECTOR_BITS;
- return
+ return (
count < 0 ||
count > size ||
- offset > size - count;
+ offset > size - count
+ ) && !bs->extendable;
+
}
void bdrv_register(BlockDriver *bdrv)
@@ -347,6 +350,12 @@ int bdrv_open2(BlockDriverState *bs, con
bs->is_temporary = 0;
bs->encrypted = 0;
+ if (flags & BDRV_O_EXTENDABLE) {
+ if (!(drv->bdrv_flags & BLOCK_DRIVER_FLAG_EXTENDABLE))
+ return -ENOSYS;
+ bs->extendable = 1;
+ }
+
if (flags & BDRV_O_SNAPSHOT) {
BlockDriverState *bs1;
int64_t total_size;
diff -r 0b20ac6ec64a -r 71a8366fb212 tools/ioemu/block_int.h
--- a/tools/ioemu/block_int.h Fri Feb 29 09:18:01 2008 -0700
+++ b/tools/ioemu/block_int.h Fri Feb 29 09:19:58 2008 -0700
@@ -23,6 +23,8 @@
*/
#ifndef BLOCK_INT_H
#define BLOCK_INT_H
+
+#define BLOCK_DRIVER_FLAG_EXTENDABLE 0x0001u
struct BlockDriver {
const char *format_name;
@@ -76,6 +78,7 @@ struct BlockDriver {
int (*bdrv_eject)(BlockDriverState *bs, int eject_flag);
int (*bdrv_set_locked)(BlockDriverState *bs, int locked);
+ unsigned bdrv_flags;
BlockDriverAIOCB *free_aiocb;
struct BlockDriver *next;
};
@@ -87,6 +90,7 @@ struct BlockDriverState {
int removable; /* if true, the media can be removed */
int locked; /* if true, the media cannot temporarily be ejected */
int encrypted; /* if true, the media is encrypted */
+ int extendable;/* if true, we may write out of original range */
/* event callback when inserting/removing */
void (*change_cb)(void *opaque);
void *change_opaque;
diff -r 0b20ac6ec64a -r 71a8366fb212 tools/ioemu/hw/e1000.c
--- a/tools/ioemu/hw/e1000.c Fri Feb 29 09:18:01 2008 -0700
+++ b/tools/ioemu/hw/e1000.c Fri Feb 29 09:19:58 2008 -0700
@@ -48,7 +48,7 @@ static int debugflags = DBGBIT(TXERR) |
#endif
#define IOPORT_SIZE 0x40
-#define PNPMMIO_SIZE 0x60000
+#define PNPMMIO_SIZE 0x20000
/*
* HW models:
diff -r 0b20ac6ec64a -r 71a8366fb212 tools/ioemu/hw/xenfb.c
--- a/tools/ioemu/hw/xenfb.c Fri Feb 29 09:18:01 2008 -0700
+++ b/tools/ioemu/hw/xenfb.c Fri Feb 29 09:19:58 2008 -0700
@@ -18,6 +18,12 @@
#include <xs.h>
#include "xenfb.h"
+
+#ifdef CONFIG_STUBDOM
+#include <semaphore.h>
+#include <sched.h>
+#include <fbfront.h>
+#endif
#ifndef BTN_LEFT
#define BTN_LEFT 0x110 /* from <linux/input.h> */
@@ -592,7 +598,8 @@ static int xenfb_send_key(struct xenfb *
}
/* Send a relative mouse movement event */
-static int xenfb_send_motion(struct xenfb *xenfb, int rel_x, int rel_y, int
rel_z)
+static int xenfb_send_motion(struct xenfb *xenfb,
+ int rel_x, int rel_y, int rel_z)
{
union xenkbd_in_event event;
@@ -606,7 +613,8 @@ static int xenfb_send_motion(struct xenf
}
/* Send an absolute mouse movement event */
-static int xenfb_send_position(struct xenfb *xenfb, int abs_x, int abs_y, int
abs_z)
+static int xenfb_send_position(struct xenfb *xenfb,
+ int abs_x, int abs_y, int rel_z)
{
union xenkbd_in_event event;
@@ -614,7 +622,7 @@ static int xenfb_send_position(struct xe
event.type = XENKBD_TYPE_POS;
event.pos.abs_x = abs_x;
event.pos.abs_y = abs_y;
- event.pos.abs_z = abs_z;
+ event.pos.rel_z = rel_z;
return xenfb_kbd_event(xenfb, &event);
}
@@ -1124,12 +1132,10 @@ static void xenfb_guest_copy(struct xenf
dpy_update(xenfb->ds, x, y, w, h);
}
-/* QEMU display state changed, so refresh the framebuffer copy */
-/* XXX - can we optimize this, or the next func at all ? */
+/* Periodic update of display, no need for any in our case */
static void xenfb_update(void *opaque)
{
struct xenfb *xenfb = opaque;
- xenfb_guest_copy(xenfb, 0, 0, xenfb->width, xenfb->height);
}
/* QEMU display state changed, so refresh the framebuffer copy */
@@ -1169,6 +1175,204 @@ static int xenfb_register_console(struct
return 0;
}
+#ifdef CONFIG_STUBDOM
+static struct semaphore kbd_sem = __SEMAPHORE_INITIALIZER(kbd_sem, 0);
+static struct kbdfront_dev *kbd_dev;
+static char *kbd_path, *fb_path;
+
+static unsigned char linux2scancode[KEY_MAX + 1];
+
+#define WIDTH 1024
+#define HEIGHT 768
+#define DEPTH 32
+#define LINESIZE (1280 * (DEPTH / 8))
+#define MEMSIZE (LINESIZE * HEIGHT)
+
+int xenfb_connect_vkbd(const char *path)
+{
+ kbd_path = strdup(path);
+ return 0;
+}
+
+int xenfb_connect_vfb(const char *path)
+{
+ fb_path = strdup(path);
+ return 0;
+}
+
+static void xenfb_pv_update(DisplayState *s, int x, int y, int w, int h)
+{
+ struct fbfront_dev *fb_dev = s->opaque;
+ fbfront_update(fb_dev, x, y, w, h);
+}
+
+static void xenfb_pv_resize(DisplayState *s, int w, int h)
+{
+ struct fbfront_dev *fb_dev = s->opaque;
+ fprintf(stderr,"resize to %dx%d required\n", w, h);
+ s->width = w;
+ s->height = h;
+ /* TODO: send resize event if supported */
+ memset(s->data, 0, MEMSIZE);
+ fbfront_update(fb_dev, 0, 0, WIDTH, HEIGHT);
+}
+
+static void xenfb_pv_colourdepth(DisplayState *s, int depth)
+{
+ /* TODO: send redepth event if supported */
+ fprintf(stderr,"redepth to %d required\n", depth);
+}
+
+static void xenfb_kbd_handler(void *opaque)
+{
+#define KBD_NUM_BATCH 64
+ union xenkbd_in_event buf[KBD_NUM_BATCH];
+ int n, i;
+ DisplayState *s = opaque;
+ static int buttons;
+ static int x, y;
+
+ n = kbdfront_receive(kbd_dev, buf, KBD_NUM_BATCH);
+ for (i = 0; i < n; i++) {
+ switch (buf[i].type) {
+
+ case XENKBD_TYPE_MOTION:
+ fprintf(stderr, "FB backend sent us relative mouse motion
event!\n");
+ break;
+
+ case XENKBD_TYPE_POS:
+ {
+ int new_x = buf[i].pos.abs_x;
+ int new_y = buf[i].pos.abs_y;
+ if (new_x >= s->width)
+ new_x = s->width - 1;
+ if (new_y >= s->height)
+ new_y = s->height - 1;
+ if (kbd_mouse_is_absolute()) {
+ kbd_mouse_event(
+ new_x * 0x7FFF / (s->width - 1),
+ new_y * 0x7FFF / (s->height - 1),
+ buf[i].pos.rel_z,
+ buttons);
+ } else {
+ kbd_mouse_event(
+ new_x - x,
+ new_y - y,
+ buf[i].pos.rel_z,
+ buttons);
+ }
+ x = new_x;
+ y = new_y;
+ break;
+ }
+
+ case XENKBD_TYPE_KEY:
+ {
+ int keycode = buf[i].key.keycode;
+ int button = 0;
+
+ if (keycode == BTN_LEFT)
+ button = MOUSE_EVENT_LBUTTON;
+ else if (keycode == BTN_RIGHT)
+ button = MOUSE_EVENT_RBUTTON;
+ else if (keycode == BTN_MIDDLE)
+ button = MOUSE_EVENT_MBUTTON;
+
+ if (button) {
+ if (buf[i].key.pressed)
+ buttons |= button;
+ else
+ buttons &= ~button;
+ if (kbd_mouse_is_absolute())
+ kbd_mouse_event(
+ x * 0x7FFF / s->width,
+ y * 0x7FFF / s->height,
+ 0,
+ buttons);
+ else
+ kbd_mouse_event(0, 0, 0, buttons);
+ } else {
+ int scancode = linux2scancode[keycode];
+ if (!scancode) {
+ fprintf(stderr, "Can't convert keycode %x to
scancode\n", keycode);
+ break;
+ }
+ if (scancode & 0x80) {
+ kbd_put_keycode(0xe0);
+ scancode &= 0x7f;
+ }
+ if (!buf[i].key.pressed)
+ scancode |= 0x80;
+ kbd_put_keycode(scancode);
+ }
+ break;
+ }
+ }
+ }
+}
+
+static void xenfb_pv_refresh(DisplayState *ds)
+{
+ vga_hw_update();
+}
+
+static void kbdfront_thread(void *p)
+{
+ int scancode, keycode;
+ kbd_dev = init_kbdfront(p, 1);
+ if (!kbd_dev) {
+ fprintf(stderr,"can't open keyboard\n");
+ exit(1);
+ }
+ up(&kbd_sem);
+ for (scancode = 0; scancode < 128; scancode++) {
+ keycode = atkbd_set2_keycode[atkbd_unxlate_table[scancode]];
+ linux2scancode[keycode] = scancode;
+ keycode = atkbd_set2_keycode[atkbd_unxlate_table[scancode] | 0x80];
+ linux2scancode[keycode] = scancode | 0x80;
+ }
+}
+
+int xenfb_pv_display_init(DisplayState *ds)
+{
+ void *data;
+ struct fbfront_dev *fb_dev;
+ int kbd_fd;
+
+ if (!fb_path || !kbd_path)
+ return -1;
+
+ create_thread("kbdfront", kbdfront_thread, (void*) kbd_path);
+
+ data = qemu_memalign(PAGE_SIZE, VGA_RAM_SIZE);
+ fb_dev = init_fbfront(fb_path, data, WIDTH, HEIGHT, DEPTH, LINESIZE,
MEMSIZE);
+ if (!fb_dev) {
+ fprintf(stderr,"can't open frame buffer\n");
+ exit(1);
+ }
+ free(fb_path);
+
+ down(&kbd_sem);
+ free(kbd_path);
+
+ kbd_fd = kbdfront_open(kbd_dev);
+ qemu_set_fd_handler(kbd_fd, xenfb_kbd_handler, NULL, ds);
+
+ ds->data = data;
+ ds->linesize = LINESIZE;
+ ds->depth = DEPTH;
+ ds->bgr = 0;
+ ds->width = WIDTH;
+ ds->height = HEIGHT;
+ ds->dpy_update = xenfb_pv_update;
+ ds->dpy_resize = xenfb_pv_resize;
+ ds->dpy_colourdepth = NULL; //xenfb_pv_colourdepth;
+ ds->dpy_refresh = xenfb_pv_refresh;
+ ds->opaque = fb_dev;
+ return 0;
+}
+#endif
+
/*
* Local variables:
* c-indent-level: 8
diff -r 0b20ac6ec64a -r 71a8366fb212 tools/ioemu/keymaps.c
--- a/tools/ioemu/keymaps.c Fri Feb 29 09:18:01 2008 -0700
+++ b/tools/ioemu/keymaps.c Fri Feb 29 09:19:58 2008 -0700
@@ -126,11 +126,11 @@ static kbd_layout_t *parse_keyboard_layo
if (rest && strstr(rest, "numlock")) {
add_to_key_range(&k->keypad_range, keycode);
add_to_key_range(&k->numlock_range, keysym);
- fprintf(stderr, "keypad keysym %04x keycode %d\n",
keysym, keycode);
+ //fprintf(stderr, "keypad keysym %04x keycode %d\n",
keysym, keycode);
}
if (rest && strstr(rest, "shift")) {
add_to_key_range(&k->shift_range, keysym);
- fprintf(stderr, "shift keysym %04x keycode %d\n",
keysym, keycode);
+ //fprintf(stderr, "shift keysym %04x keycode %d\n",
keysym, keycode);
}
/* if(keycode&0x80)
diff -r 0b20ac6ec64a -r 71a8366fb212 tools/ioemu/monitor.c
--- a/tools/ioemu/monitor.c Fri Feb 29 09:18:01 2008 -0700
+++ b/tools/ioemu/monitor.c Fri Feb 29 09:19:58 2008 -0700
@@ -2520,7 +2520,7 @@ static void monitor_handle_command1(void
static void monitor_start_input(void)
{
- readline_start("(HVMXen) ", 0, monitor_handle_command1, NULL);
+ readline_start("(qemu) ", 0, monitor_handle_command1, NULL);
}
static void term_event(void *opaque, int event)
diff -r 0b20ac6ec64a -r 71a8366fb212 tools/ioemu/vl.c
--- a/tools/ioemu/vl.c Fri Feb 29 09:18:01 2008 -0700
+++ b/tools/ioemu/vl.c Fri Feb 29 09:19:58 2008 -0700
@@ -7611,9 +7611,7 @@ int main(int argc, char **argv)
}
}
- /* Now send logs to our named config */
- sprintf(qemu_dm_logfilename, "/var/log/xen/qemu-dm-%d.log", domid);
- cpu_set_log_filename(qemu_dm_logfilename);
+ cpu_set_log(0);
#ifndef NO_DAEMONIZE
if (daemonize && !nographic && vnc_display == NULL && vncunused == 0) {
@@ -7831,6 +7829,10 @@ int main(int argc, char **argv)
init_ioports();
/* terminal init */
+#ifdef CONFIG_STUBDOM
+ if (xenfb_pv_display_init(ds) == 0) {
+ } else
+#endif
if (nographic) {
dumb_display_init(ds);
} else if (vnc_display != NULL || vncunused != 0) {
diff -r 0b20ac6ec64a -r 71a8366fb212 tools/ioemu/vl.h
--- a/tools/ioemu/vl.h Fri Feb 29 09:18:01 2008 -0700
+++ b/tools/ioemu/vl.h Fri Feb 29 09:19:58 2008 -0700
@@ -614,6 +614,8 @@ typedef struct QEMUSnapshotInfo {
use a disk image format on top of
it (default for
bdrv_file_open()) */
+#define BDRV_O_EXTENDABLE 0x0080 /* allow writes out of original size range;
+ only effective for some drivers */
void bdrv_init(void);
BlockDriver *bdrv_find_format(const char *format_name);
@@ -1525,6 +1527,11 @@ int xenstore_vm_write(int domid, char *k
int xenstore_vm_write(int domid, char *key, char *val);
char *xenstore_vm_read(int domid, char *key, unsigned int *len);
+/* xenfb.c */
+int xenfb_pv_display_init(DisplayState *ds);
+int xenfb_connect_vkbd(const char *path);
+int xenfb_connect_vfb(const char *path);
+
/* helper2.c */
extern long time_offset;
void timeoffset_get(void);
diff -r 0b20ac6ec64a -r 71a8366fb212 tools/ioemu/xenstore.c
--- a/tools/ioemu/xenstore.c Fri Feb 29 09:18:01 2008 -0700
+++ b/tools/ioemu/xenstore.c Fri Feb 29 09:19:58 2008 -0700
@@ -238,6 +238,37 @@ void xenstore_parse_domain_config(int do
}
}
+#ifdef CONFIG_STUBDOM
+ if (pasprintf(&buf, "%s/device/vkbd", path) == -1)
+ goto out;
+
+ free(e);
+ e = xs_directory(xsh, XBT_NULL, buf, &num);
+
+ if (e) {
+ for (i = 0; i < num; i++) {
+ if (pasprintf(&buf, "%s/device/vkbd/%s", path, e[i]) == -1)
+ continue;
+ xenfb_connect_vkbd(buf);
+ }
+ }
+
+ if (pasprintf(&buf, "%s/device/vfb", path) == -1)
+ goto out;
+
+ free(e);
+ e = xs_directory(xsh, XBT_NULL, buf, &num);
+
+ if (e) {
+ for (i = 0; i < num; i++) {
+ if (pasprintf(&buf, "%s/device/vfb/%s", path, e[i]) == -1)
+ continue;
+ xenfb_connect_vfb(buf);
+ }
+ }
+#endif
+
+
/* Set a watch for log-dirty requests from the migration tools */
if (pasprintf(&buf, "/local/domain/0/device-model/%u/logdirty/next-active",
domid) != -1) {
diff -r 0b20ac6ec64a -r 71a8366fb212 tools/python/xen/xend/XendAPI.py
--- a/tools/python/xen/xend/XendAPI.py Fri Feb 29 09:18:01 2008 -0700
+++ b/tools/python/xen/xend/XendAPI.py Fri Feb 29 09:19:58 2008 -0700
@@ -1761,9 +1761,10 @@ class XendAPI(object):
resource = other_config.get("resource", 0)
port = other_config.get("port", 0)
+ node = other_config.get("node", 0)
xendom.domain_migrate(xeninfo.getDomid(), destination_url,
- bool(live), resource, port)
+ bool(live), resource, port, node)
return xen_api_success_void()
def VM_save(self, _, vm_ref, dest, checkpoint):
diff -r 0b20ac6ec64a -r 71a8366fb212 tools/python/xen/xend/XendCheckpoint.py
--- a/tools/python/xen/xend/XendCheckpoint.py Fri Feb 29 09:18:01 2008 -0700
+++ b/tools/python/xen/xend/XendCheckpoint.py Fri Feb 29 09:19:58 2008 -0700
@@ -22,6 +22,7 @@ from xen.xend.XendLogging import log
from xen.xend.XendLogging import log
from xen.xend.XendConfig import XendConfig
from xen.xend.XendConstants import *
+from xen.xend import XendNode
SIGNATURE = "LinuxGuestRecord"
QEMU_SIGNATURE = "QemuDeviceModelRecord"
@@ -56,10 +57,23 @@ def read_exact(fd, size, errmsg):
return buf
-def save(fd, dominfo, network, live, dst, checkpoint=False):
+def insert_after(list, pred, value):
+ for i,k in enumerate(list):
+ if type(k) == type([]):
+ if k[0] == pred:
+ list.insert (i+1, value)
+ return
+
+
+def save(fd, dominfo, network, live, dst, checkpoint=False, node=-1):
write_exact(fd, SIGNATURE, "could not write guest state file: signature")
- config = sxp.to_string(dominfo.sxpr())
+ sxprep = dominfo.sxpr()
+
+ if node > -1:
+ insert_after(sxprep,'vcpus',['node', str(node)])
+
+ config = sxp.to_string(sxprep)
domain_name = dominfo.getName()
# Rename the domain temporarily, so that we don't get a name clash if this
@@ -191,6 +205,21 @@ def restore(xd, fd, dominfo = None, paus
dominfo.resume()
else:
dominfo = xd.restore_(vmconfig)
+
+ # repin domain vcpus if a target node number was specified
+ # this is done prior to memory allocation to aide in memory
+ # distribution for NUMA systems.
+ nodenr = -1
+ for i,l in enumerate(vmconfig):
+ if type(l) == type([]):
+ if l[0] == 'node':
+ nodenr = int(l[1])
+
+ if nodenr >= 0:
+ node_to_cpu = XendNode.instance().xc.physinfo()['node_to_cpu']
+ if nodenr < len(node_to_cpu):
+ for v in range(0, dominfo.info['VCPUs_max']):
+ xc.vcpu_setaffinity(dominfo.domid, v, node_to_cpu[nodenr])
store_port = dominfo.getStorePort()
console_port = dominfo.getConsolePort()
diff -r 0b20ac6ec64a -r 71a8366fb212 tools/python/xen/xend/XendDomain.py
--- a/tools/python/xen/xend/XendDomain.py Fri Feb 29 09:18:01 2008 -0700
+++ b/tools/python/xen/xend/XendDomain.py Fri Feb 29 09:19:58 2008 -0700
@@ -865,7 +865,7 @@ class XendDomain:
raise XendInvalidDomain(domname)
if dominfo.getDomid() == DOM0_ID:
- raise XendError("Cannot save privileged domain %s" % domname)
+ raise XendError("Cannot suspend privileged domain %s" %
domname)
if dominfo._stateGet() != DOM_STATE_RUNNING:
raise VMBadState("Domain is not running",
@@ -910,7 +910,7 @@ class XendDomain:
raise XendInvalidDomain(domname)
if dominfo.getDomid() == DOM0_ID:
- raise XendError("Cannot save privileged domain %s" %
domname)
+ raise XendError("Cannot resume privileged domain %s" %
domname)
if dominfo._stateGet() != XEN_API_VM_POWER_STATE_SUSPENDED:
raise XendError("Cannot resume domain that is not
suspended.")
@@ -1258,7 +1258,7 @@ class XendDomain:
return val
- def domain_migrate(self, domid, dst, live=False, resource=0, port=0):
+ def domain_migrate(self, domid, dst, live=False, resource=0, port=0,
node=-1):
"""Start domain migration.
@param domid: Domain ID or Name
@@ -1271,6 +1271,8 @@ class XendDomain:
@type live: bool
@keyword resource: not used??
@rtype: None
+ @keyword node: use node number for target
+ @rtype: int
@raise XendError: Failed to migrate
@raise XendInvalidDomain: Domain is not valid
"""
@@ -1299,7 +1301,7 @@ class XendDomain:
sock.send("receive\n")
sock.recv(80)
- XendCheckpoint.save(sock.fileno(), dominfo, True, live, dst)
+ XendCheckpoint.save(sock.fileno(), dominfo, True, live, dst, node=node)
sock.close()
def domain_save(self, domid, dst, checkpoint=False):
diff -r 0b20ac6ec64a -r 71a8366fb212 tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py Fri Feb 29 09:18:01 2008 -0700
+++ b/tools/python/xen/xend/XendDomainInfo.py Fri Feb 29 09:19:58 2008 -0700
@@ -1406,9 +1406,6 @@ class XendDomainInfo:
def setWeight(self, cpu_weight):
self.info['vcpus_params']['weight'] = cpu_weight
- def setResume(self, state):
- self._resume = state
-
def getRestartCount(self):
return self._readVm('xend/restart_count')
@@ -1963,6 +1960,39 @@ class XendDomainInfo:
if self.info['cpus'] is not None and len(self.info['cpus']) > 0:
for v in range(0, self.info['VCPUs_max']):
xc.vcpu_setaffinity(self.domid, v, self.info['cpus'])
+ else:
+ info = xc.physinfo()
+ if info['nr_nodes'] > 1:
+ node_memory_list = info['node_to_memory']
+ needmem =
self.image.getRequiredAvailableMemory(self.info['memory_dynamic_max']) / 1024
+ candidate_node_list = []
+ for i in range(0, info['nr_nodes']):
+ if node_memory_list[i] >= needmem:
+ candidate_node_list.append(i)
+ if candidate_node_list is None or len(candidate_node_list)
== 1:
+ index = node_memory_list.index( max(node_memory_list) )
+ cpumask = info['node_to_cpu'][index]
+ else:
+ nodeload = [0]
+ nodeload = nodeload * info['nr_nodes']
+ from xen.xend import XendDomain
+ doms = XendDomain.instance().list('all')
+ for dom in doms:
+ cpuinfo = dom.getVCPUInfo()
+ for vcpu in sxp.children(cpuinfo, 'vcpu'):
+ def vinfo(n, t):
+ return t(sxp.child_value(vcpu, n))
+ cpumap = vinfo('cpumap', list)
+ for i in candidate_node_list:
+ node_cpumask = info['node_to_cpu'][i]
+ for j in node_cpumask:
+ if j in cpumap:
+ nodeload[i] += 1
+ break
+ index = nodeload.index( min(nodeload) )
+ cpumask = info['node_to_cpu'][index]
+ for v in range(0, self.info['VCPUs_max']):
+ xc.vcpu_setaffinity(self.domid, v, cpumask)
# Use architecture- and image-specific calculations to determine
# the various headrooms necessary, given the raw configured
diff -r 0b20ac6ec64a -r 71a8366fb212 tools/python/xen/xend/image.py
--- a/tools/python/xen/xend/image.py Fri Feb 29 09:18:01 2008 -0700
+++ b/tools/python/xen/xend/image.py Fri Feb 29 09:19:58 2008 -0700
@@ -296,7 +296,34 @@ class ImageHandler:
{ 'dom': self.vm.getDomid(), 'read': True, 'write':
True })
log.info("spawning device models: %s %s", self.device_model, args)
# keep track of pid and spawned options to kill it later
- self.pid = os.spawnve(os.P_NOWAIT, self.device_model, args, env)
+
+ logfile = "/var/log/xen/qemu-dm-%s.log" %
str(self.vm.info['name_label'])
+ if os.path.exists(logfile):
+ if os.path.exists(logfile + ".1"):
+ os.unlink(logfile + ".1")
+ os.rename(logfile, logfile + ".1")
+
+ null = os.open("/dev/null", os.O_RDONLY)
+ logfd = os.open(logfile, os.O_WRONLY|os.O_CREAT|os.O_TRUNC)
+
+ pid = os.fork()
+ if pid == 0: #child
+ try:
+ os.dup2(null, 0)
+ os.dup2(logfd, 1)
+ os.dup2(logfd, 2)
+ os.close(null)
+ os.close(logfd)
+ try:
+ os.execve(self.device_model, args, env)
+ except:
+ os._exit(127)
+ except:
+ os._exit(127)
+ else:
+ self.pid = pid
+ os.close(null)
+ os.close(logfd)
self.vm.storeDom("image/device-model-pid", self.pid)
log.info("device model pid: %d", self.pid)
diff -r 0b20ac6ec64a -r 71a8366fb212 tools/python/xen/xm/main.py
--- a/tools/python/xen/xm/main.py Fri Feb 29 09:18:01 2008 -0700
+++ b/tools/python/xen/xm/main.py Fri Feb 29 09:19:58 2008 -0700
@@ -699,9 +699,6 @@ def xm_save(args):
err(opterr)
sys.exit(1)
- dom = params[0]
- savefile = params[1]
-
checkpoint = False
for (k, v) in options:
if k in ['-c', '--checkpoint']:
@@ -710,9 +707,9 @@ def xm_save(args):
if len(params) != 2:
err("Wrong number of parameters")
usage('save')
- sys.exit(1)
-
- savefile = os.path.abspath(savefile)
+
+ dom = params[0]
+ savefile = os.path.abspath(params[1])
if not os.access(os.path.dirname(savefile), os.W_OK):
err("xm save: Unable to create file %s" % savefile)
diff -r 0b20ac6ec64a -r 71a8366fb212 tools/python/xen/xm/migrate.py
--- a/tools/python/xen/xm/migrate.py Fri Feb 29 09:18:01 2008 -0700
+++ b/tools/python/xen/xm/migrate.py Fri Feb 29 09:19:58 2008 -0700
@@ -43,6 +43,10 @@ gopts.opt('port', short='p', val='portnu
fn=set_int, default=0,
use="Use specified port for migration.")
+gopts.opt('node', short='n', val='nodenum',
+ fn=set_int, default=-1,
+ use="Use specified NUMA node on target.")
+
gopts.opt('resource', short='r', val='MBIT',
fn=set_int, default=0,
use="Set level of resource usage for migration.")
@@ -65,11 +69,13 @@ def main(argv):
vm_ref = get_single_vm(dom)
other_config = {
"port": opts.vals.port,
- "resource": opts.vals.resource
+ "resource": opts.vals.resource,
+ "node": opts.vals.node
}
server.xenapi.VM.migrate(vm_ref, dst, bool(opts.vals.live),
other_config)
else:
server.xend.domain.migrate(dom, dst, opts.vals.live,
opts.vals.resource,
- opts.vals.port)
+ opts.vals.port,
+ opts.vals.node)
diff -r 0b20ac6ec64a -r 71a8366fb212
tools/xenstat/libxenstat/src/xenstat_solaris.c
--- a/tools/xenstat/libxenstat/src/xenstat_solaris.c Fri Feb 29 09:18:01
2008 -0700
+++ b/tools/xenstat/libxenstat/src/xenstat_solaris.c Fri Feb 29 09:19:58
2008 -0700
@@ -113,49 +113,23 @@ static void xenstat_uninit_devs(xenstat_
priv->kc = NULL;
}
-static int parse_nic(const char *nic, char *module, int *instance)
-{
- const char *c;
-
- for (c = &nic[strlen(nic) - 1]; c != nic && isdigit(*c); c--)
- ;
-
- if (c == nic)
- return 0;
-
- c++;
-
- if (sscanf(c, "%d", instance) != 1)
- return 0;
-
- strncpy(module, nic, c - nic);
- module[c - nic] = '\0';
- return 1;
-}
-
static int update_dev_stats(priv_data_t *priv, stdevice_t *dev)
{
- char mod[256];
- const char *name;
- int inst;
kstat_t *ksp;
+ if (kstat_chain_update(priv->kc) == -1)
+ return 0;
+
if (dev->type == DEVICE_NIC) {
- if (!parse_nic(dev->name, mod, &inst))
- return 0;
- name = "mac";
+ ksp = kstat_lookup(priv->kc, "link", 0, (char *)dev->name);
} else {
- strcpy(mod, "xdb");
- inst = dev->instance;
- name = "req_statistics";
- }
-
- if (kstat_chain_update(priv->kc) == -1)
- return 0;
-
- ksp = kstat_lookup(priv->kc, mod, inst, (char *)name);
+ ksp = kstat_lookup(priv->kc, "xdb", dev->instance,
+ (char *)"req_statistics");
+ }
+
if (ksp == NULL)
return 0;
+
if (kstat_read(priv->kc, ksp, NULL) == -1)
return 0;
diff -r 0b20ac6ec64a -r 71a8366fb212 tools/xentrace/xentrace.c
--- a/tools/xentrace/xentrace.c Fri Feb 29 09:18:01 2008 -0700
+++ b/tools/xentrace/xentrace.c Fri Feb 29 09:19:58 2008 -0700
@@ -15,7 +15,6 @@
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
-#include <sys/vfs.h>
#include <fcntl.h>
#include <unistd.h>
#include <errno.h>
@@ -25,6 +24,7 @@
#include <getopt.h>
#include <assert.h>
#include <sys/poll.h>
+#include <sys/statvfs.h>
#include <xen/xen.h>
#include <xen/trace.h>
@@ -87,7 +87,7 @@ void write_buffer(unsigned int cpu, unsi
void write_buffer(unsigned int cpu, unsigned char *start, int size,
int total_size, int outfd)
{
- struct statfs stat;
+ struct statvfs stat;
size_t written = 0;
if ( opts.disk_rsvd != 0 )
@@ -95,13 +95,13 @@ void write_buffer(unsigned int cpu, unsi
unsigned long long freespace;
/* Check that filesystem has enough space. */
- if ( fstatfs (outfd, &stat) )
+ if ( fstatvfs (outfd, &stat) )
{
fprintf(stderr, "Statfs failed!\n");
goto fail;
}
- freespace = stat.f_bsize * (unsigned long long)stat.f_bfree;
+ freespace = stat.f_frsize * (unsigned long long)stat.f_bfree;
if ( total_size )
freespace -= total_size;
diff -r 0b20ac6ec64a -r 71a8366fb212
unmodified_drivers/linux-2.6/platform-pci/machine_reboot.c
--- a/unmodified_drivers/linux-2.6/platform-pci/machine_reboot.c Fri Feb
29 09:18:01 2008 -0700
+++ b/unmodified_drivers/linux-2.6/platform-pci/machine_reboot.c Fri Feb
29 09:19:58 2008 -0700
@@ -71,7 +71,7 @@ static int bp_suspend(void)
return suspend_cancelled;
}
-int __xen_suspend(int fast_suspend)
+int __xen_suspend(int fast_suspend, void (*resume_notifier)(void))
{
int err, suspend_cancelled, nr_cpus;
struct ap_suspend_info info;
@@ -101,6 +101,7 @@ int __xen_suspend(int fast_suspend)
local_irq_disable();
suspend_cancelled = bp_suspend();
+ resume_notifier();
local_irq_enable();
smp_mb();
diff -r 0b20ac6ec64a -r 71a8366fb212 xen/arch/ia64/xen/machine_kexec.c
--- a/xen/arch/ia64/xen/machine_kexec.c Fri Feb 29 09:18:01 2008 -0700
+++ b/xen/arch/ia64/xen/machine_kexec.c Fri Feb 29 09:19:58 2008 -0700
@@ -24,6 +24,7 @@
#include <linux/cpu.h>
#include <linux/notifier.h>
#include <asm/dom_fw_dom0.h>
+#include <asm-generic/sections.h>
#define kexec_flush_icache_page(page) \
do { \
@@ -144,6 +145,54 @@ void machine_reboot_kexec(xen_kexec_imag
machine_kexec(image);
}
+static int machine_kexec_get_xen(xen_kexec_range_t *range)
+{
+ range->start = range->start = ia64_tpa(_text);
+ range->size = (unsigned long)_end - (unsigned long)_text;
+ return 0;
+}
+
+#define ELF_PAGE_SHIFT 16
+#define ELF_PAGE_SIZE (__IA64_UL_CONST(1) << ELF_PAGE_SHIFT)
+#define ELF_PAGE_MASK (~(ELF_PAGE_SIZE - 1))
+
+static int machine_kexec_get_xenheap(xen_kexec_range_t *range)
+{
+ range->start = (ia64_tpa(_end) + (ELF_PAGE_SIZE - 1)) & ELF_PAGE_MASK;
+ range->size = (unsigned long)xenheap_phys_end -
+ (unsigned long)range->start;
+ return 0;
+}
+
+static int machine_kexec_get_boot_param(xen_kexec_range_t *range)
+{
+ range->start = __pa(ia64_boot_param);
+ range->size = sizeof(*ia64_boot_param);
+ return 0;
+}
+
+static int machine_kexec_get_efi_memmap(xen_kexec_range_t *range)
+{
+ range->start = ia64_boot_param->efi_memmap;
+ range->size = ia64_boot_param->efi_memmap_size;
+ return 0;
+}
+
+int machine_kexec_get(xen_kexec_range_t *range)
+{
+ switch (range->range) {
+ case KEXEC_RANGE_MA_XEN:
+ return machine_kexec_get_xen(range);
+ case KEXEC_RANGE_MA_XENHEAP:
+ return machine_kexec_get_xenheap(range);
+ case KEXEC_RANGE_MA_BOOT_PARAM:
+ return machine_kexec_get_boot_param(range);
+ case KEXEC_RANGE_MA_EFI_MEMMAP:
+ return machine_kexec_get_efi_memmap(range);
+ }
+ return -EINVAL;
+}
+
/*
* Local variables:
* mode: C
diff -r 0b20ac6ec64a -r 71a8366fb212 xen/arch/powerpc/machine_kexec.c
--- a/xen/arch/powerpc/machine_kexec.c Fri Feb 29 09:18:01 2008 -0700
+++ b/xen/arch/powerpc/machine_kexec.c Fri Feb 29 09:19:58 2008 -0700
@@ -24,6 +24,12 @@ void machine_kexec(xen_kexec_image_t *im
printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__);
}
+int machine_kexec_get(xen_kexec_image_t *image)
+{
+ printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__);
+ return -1;
+}
+
/*
* Local variables:
* mode: C
diff -r 0b20ac6ec64a -r 71a8366fb212 xen/arch/x86/machine_kexec.c
--- a/xen/arch/x86/machine_kexec.c Fri Feb 29 09:18:01 2008 -0700
+++ b/xen/arch/x86/machine_kexec.c Fri Feb 29 09:19:58 2008 -0700
@@ -23,6 +23,9 @@ typedef void (*relocate_new_kernel_t)(
unsigned long indirection_page,
unsigned long *page_list,
unsigned long start_address);
+
+extern int machine_kexec_get_xen(xen_kexec_range_t *range);
+
int machine_kexec_load(int type, int slot, xen_kexec_image_t *image)
{
@@ -135,6 +138,13 @@ void machine_kexec(xen_kexec_image_t *im
}
}
+int machine_kexec_get(xen_kexec_range_t *range)
+{
+ if (range->range != KEXEC_RANGE_MA_XEN)
+ return -EINVAL;
+ return machine_kexec_get_xen(range);
+}
+
/*
* Local variables:
* mode: C
diff -r 0b20ac6ec64a -r 71a8366fb212 xen/arch/x86/mm/shadow/multi.c
--- a/xen/arch/x86/mm/shadow/multi.c Fri Feb 29 09:18:01 2008 -0700
+++ b/xen/arch/x86/mm/shadow/multi.c Fri Feb 29 09:19:58 2008 -0700
@@ -55,12 +55,6 @@
* l3-and-l2h-only shadow mode for PAE PV guests that would allow them
* to share l2h pages again.
*
- * GUEST_WALK_TABLES TLB FLUSH COALESCE
- * guest_walk_tables can do up to three remote TLB flushes as it walks to
- * the first l1 of a new pagetable. Should coalesce the flushes to the end,
- * and if we do flush, re-do the walk. If anything has changed, then
- * pause all the other vcpus and do the walk *again*.
- *
* PSE disabled / PSE36
* We don't support any modes other than PSE enabled, PSE36 disabled.
* Neither of those would be hard to change, but we'd need to be able to
@@ -246,10 +240,95 @@ static uint32_t set_ad_bits(void *guest_
return 0;
}
+/* This validation is called with lock held, and after write permission
+ * removal. Then check is atomic and no more inconsistent content can
+ * be observed before lock is released
+ *
+ * Return 1 to indicate success and 0 for inconsistency
+ */
+static inline uint32_t
+shadow_check_gwalk(struct vcpu *v, unsigned long va, walk_t *gw)
+{
+ struct domain *d = v->domain;
+ guest_l1e_t *l1p;
+ guest_l2e_t *l2p;
+#if GUEST_PAGING_LEVELS >= 4
+ guest_l3e_t *l3p;
+ guest_l4e_t *l4p;
+#endif
+ int mismatch = 0;
+
+ ASSERT(shadow_locked_by_me(d));
+
+ if ( gw->version ==
+ atomic_read(&d->arch.paging.shadow.gtable_dirty_version) )
+ return 1;
+
+ /* We may consider caching guest page mapping from last
+ * guest table walk. However considering this check happens
+ * relatively less-frequent, and a bit burden here to
+ * remap guest page is better than caching mapping in each
+ * guest table walk.
+ *
+ * Also when inconsistency occurs, simply return to trigger
+ * another fault instead of re-validate new path to make
+ * logic simple.
+ */
+ perfc_incr(shadow_check_gwalk);
+#if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */
+#if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
+ l4p = (guest_l4e_t *)v->arch.paging.shadow.guest_vtable;
+ mismatch |= (gw->l4e.l4 != l4p[guest_l4_table_offset(va)].l4);
+ l3p = sh_map_domain_page(gw->l3mfn);
+ mismatch |= (gw->l3e.l3 != l3p[guest_l3_table_offset(va)].l3);
+ sh_unmap_domain_page(l3p);
+#else
+ mismatch |= (gw->l3e.l3 !=
+ v->arch.paging.shadow.gl3e[guest_l3_table_offset(va)].l3);
+#endif
+ l2p = sh_map_domain_page(gw->l2mfn);
+ mismatch |= (gw->l2e.l2 != l2p[guest_l2_table_offset(va)].l2);
+ sh_unmap_domain_page(l2p);
+#else
+ l2p = (guest_l2e_t *)v->arch.paging.shadow.guest_vtable;
+ mismatch |= (gw->l2e.l2 != l2p[guest_l2_table_offset(va)].l2);
+#endif
+ if ( !(guest_supports_superpages(v) &&
+ (guest_l2e_get_flags(gw->l2e) & _PAGE_PSE)) )
+ {
+ l1p = sh_map_domain_page(gw->l1mfn);
+ mismatch |= (gw->l1e.l1 != l1p[guest_l1_table_offset(va)].l1);
+ sh_unmap_domain_page(l1p);
+ }
+
+ return !mismatch;
+}
+
+/* Remove write access permissions from a gwalk_t in a batch, and
+ * return OR-ed result for TLB flush hint
+ */
+static inline uint32_t
+gw_remove_write_accesses(struct vcpu *v, unsigned long va, walk_t *gw)
+{
+ int rc = 0;
+
+#if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */
+#if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
+ rc = sh_remove_write_access(v, gw->l3mfn, 3, va);
+#endif
+ rc |= sh_remove_write_access(v, gw->l2mfn, 2, va);
+#endif
+ if ( !(guest_supports_superpages(v) &&
+ (guest_l2e_get_flags(gw->l2e) & _PAGE_PSE)) )
+ rc |= sh_remove_write_access(v, gw->l1mfn, 1, va);
+
+ return rc;
+}
+
/* Walk the guest pagetables, after the manner of a hardware walker.
*
* Inputs: a vcpu, a virtual address, a walk_t to fill, a
- * pointer to a pagefault code, and a flag "shadow_op".
+ * pointer to a pagefault code
*
* We walk the vcpu's guest pagetables, filling the walk_t with what we
* see and adding any Accessed and Dirty bits that are needed in the
@@ -257,10 +336,9 @@ static uint32_t set_ad_bits(void *guest_
* we go. For the purposes of reading pagetables we treat all non-RAM
* memory as contining zeroes.
*
- * If "shadow_op" is non-zero, we are serving a genuine guest memory access,
- * and must (a) be under the shadow lock, and (b) remove write access
- * from any guest PT pages we see, as we will be shadowing them soon
- * and will rely on the contents' not having changed.
+ * The walk is done in a lock-free style, with some sanity check postponed
+ * after grabbing shadow lock later. Those delayed checks will make sure
+ * no inconsistent mapping being translated into shadow page table.
*
* Returns 0 for success, or the set of permission bits that we failed on
* if the walk did not complete.
@@ -268,8 +346,7 @@ static uint32_t set_ad_bits(void *guest_
* checked the old return code anyway.
*/
static uint32_t
-guest_walk_tables(struct vcpu *v, unsigned long va, walk_t *gw,
- uint32_t pfec, int shadow_op)
+guest_walk_tables(struct vcpu *v, unsigned long va, walk_t *gw, uint32_t pfec)
{
struct domain *d = v->domain;
p2m_type_t p2mt;
@@ -282,11 +359,12 @@ guest_walk_tables(struct vcpu *v, unsign
uint32_t gflags, mflags, rc = 0;
int pse;
- ASSERT(!shadow_op || shadow_locked_by_me(d));
-
perfc_incr(shadow_guest_walk);
memset(gw, 0, sizeof(*gw));
gw->va = va;
+
+ gw->version = atomic_read(&d->arch.paging.shadow.gtable_dirty_version);
+ rmb();
/* Mandatory bits that must be set in every entry. We invert NX, to
* calculate as if there were an "X" bit that allowed access.
@@ -312,9 +390,7 @@ guest_walk_tables(struct vcpu *v, unsign
goto out;
}
ASSERT(mfn_valid(gw->l3mfn));
- /* This mfn is a pagetable: make sure the guest can't write to it. */
- if ( shadow_op && sh_remove_write_access(v, gw->l3mfn, 3, va) != 0 )
- flush_tlb_mask(d->domain_dirty_cpumask);
+
/* Get the l3e and check its flags*/
l3p = sh_map_domain_page(gw->l3mfn);
gw->l3e = l3p[guest_l3_table_offset(va)];
@@ -343,9 +419,7 @@ guest_walk_tables(struct vcpu *v, unsign
goto out;
}
ASSERT(mfn_valid(gw->l2mfn));
- /* This mfn is a pagetable: make sure the guest can't write to it. */
- if ( shadow_op && sh_remove_write_access(v, gw->l2mfn, 2, va) != 0 )
- flush_tlb_mask(d->domain_dirty_cpumask);
+
/* Get the l2e */
l2p = sh_map_domain_page(gw->l2mfn);
gw->l2e = l2p[guest_l2_table_offset(va)];
@@ -403,10 +477,6 @@ guest_walk_tables(struct vcpu *v, unsign
goto out;
}
ASSERT(mfn_valid(gw->l1mfn));
- /* This mfn is a pagetable: make sure the guest can't write to it. */
- if ( shadow_op
- && sh_remove_write_access(v, gw->l1mfn, 1, va) != 0 )
- flush_tlb_mask(d->domain_dirty_cpumask);
l1p = sh_map_domain_page(gw->l1mfn);
gw->l1e = l1p[guest_l1_table_offset(va)];
gflags = guest_l1e_get_flags(gw->l1e) ^ _PAGE_NX_BIT;
@@ -548,8 +618,7 @@ sh_guest_map_l1e(struct vcpu *v, unsigne
// XXX -- this is expensive, but it's easy to cobble together...
// FIXME!
- shadow_lock(v->domain);
- if ( guest_walk_tables(v, addr, &gw, PFEC_page_present, 1) == 0
+ if ( guest_walk_tables(v, addr, &gw, PFEC_page_present) == 0
&& mfn_valid(gw.l1mfn) )
{
if ( gl1mfn )
@@ -558,8 +627,6 @@ sh_guest_map_l1e(struct vcpu *v, unsigne
(guest_l1_table_offset(addr) * sizeof(guest_l1e_t));
}
- shadow_unlock(v->domain);
-
return pl1e;
}
@@ -573,10 +640,8 @@ sh_guest_get_eff_l1e(struct vcpu *v, uns
// XXX -- this is expensive, but it's easy to cobble together...
// FIXME!
- shadow_lock(v->domain);
- (void) guest_walk_tables(v, addr, &gw, PFEC_page_present, 1);
+ (void) guest_walk_tables(v, addr, &gw, PFEC_page_present);
*(guest_l1e_t *)eff_l1e = gw.l1e;
- shadow_unlock(v->domain);
}
#endif /* CONFIG==SHADOW==GUEST */
@@ -2842,14 +2907,12 @@ static int sh_page_fault(struct vcpu *v,
return 0;
}
- shadow_lock(d);
-
- shadow_audit_tables(v);
-
- if ( guest_walk_tables(v, va, &gw, regs->error_code, 1) != 0 )
+ if ( guest_walk_tables(v, va, &gw, regs->error_code) != 0 )
{
perfc_incr(shadow_fault_bail_real_fault);
- goto not_a_shadow_fault;
+ SHADOW_PRINTK("not a shadow fault\n");
+ reset_early_unshadow(v);
+ return 0;
}
/* It's possible that the guest has put pagetables in memory that it has
@@ -2859,11 +2922,8 @@ static int sh_page_fault(struct vcpu *v,
if ( unlikely(d->is_shutting_down) )
{
SHADOW_PRINTK("guest is shutting down\n");
- shadow_unlock(d);
return 0;
}
-
- sh_audit_gw(v, &gw);
/* What kind of access are we dealing with? */
ft = ((regs->error_code & PFEC_write_access)
@@ -2879,7 +2939,8 @@ static int sh_page_fault(struct vcpu *v,
perfc_incr(shadow_fault_bail_bad_gfn);
SHADOW_PRINTK("BAD gfn=%"SH_PRI_gfn" gmfn=%"PRI_mfn"\n",
gfn_x(gfn), mfn_x(gmfn));
- goto not_a_shadow_fault;
+ reset_early_unshadow(v);
+ return 0;
}
#if (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB)
@@ -2887,6 +2948,28 @@ static int sh_page_fault(struct vcpu *v,
vtlb_insert(v, va >> PAGE_SHIFT, gfn_x(gfn),
regs->error_code | PFEC_page_present);
#endif /* (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB) */
+
+ shadow_lock(d);
+
+ if ( gw_remove_write_accesses(v, va, &gw) )
+ {
+ /* Write permission removal is also a hint that other gwalks
+ * overlapping with this one may be inconsistent
+ */
+ perfc_incr(shadow_rm_write_flush_tlb);
+ atomic_inc(&d->arch.paging.shadow.gtable_dirty_version);
+ flush_tlb_mask(d->domain_dirty_cpumask);
+ }
+
+ if ( !shadow_check_gwalk(v, va, &gw) )
+ {
+ perfc_incr(shadow_inconsistent_gwalk);
+ shadow_unlock(d);
+ return EXCRET_fault_fixed;
+ }
+
+ shadow_audit_tables(v);
+ sh_audit_gw(v, &gw);
/* Make sure there is enough free shadow memory to build a chain of
* shadow tables. (We never allocate a top-level shadow on this path,
@@ -3223,7 +3306,7 @@ sh_gva_to_gfn(struct vcpu *v, unsigned l
return vtlb_gfn;
#endif /* (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB) */
- if ( guest_walk_tables(v, va, &gw, pfec[0], 0) != 0 )
+ if ( guest_walk_tables(v, va, &gw, pfec[0]) != 0 )
{
if ( !(guest_l1e_get_flags(gw.l1e) & _PAGE_PRESENT) )
pfec[0] &= ~PFEC_page_present;
@@ -4276,6 +4359,8 @@ static void emulate_unmap_dest(struct vc
}
else
sh_unmap_domain_page(addr);
+
+ atomic_inc(&v->domain->arch.paging.shadow.gtable_dirty_version);
}
int
@@ -4430,29 +4515,13 @@ static char * sh_audit_flags(struct vcpu
return NULL;
}
-static inline mfn_t
-audit_gfn_to_mfn(struct vcpu *v, gfn_t gfn, mfn_t gmfn)
-/* Convert this gfn to an mfn in the manner appropriate for the
- * guest pagetable it's used in (gmfn) */
-{
- p2m_type_t p2mt;
- if ( !shadow_mode_translate(v->domain) )
- return _mfn(gfn_x(gfn));
-
- if ( (mfn_to_page(gmfn)->u.inuse.type_info & PGT_type_mask)
- != PGT_writable_page )
- return _mfn(gfn_x(gfn)); /* This is a paging-disabled shadow */
- else
- return gfn_to_mfn(v->domain, gfn, &p2mt);
-}
-
-
int sh_audit_l1_table(struct vcpu *v, mfn_t sl1mfn, mfn_t x)
{
guest_l1e_t *gl1e, *gp;
shadow_l1e_t *sl1e;
mfn_t mfn, gmfn, gl1mfn;
gfn_t gfn;
+ p2m_type_t p2mt;
char *s;
int done = 0;
@@ -4491,7 +4560,7 @@ int sh_audit_l1_table(struct vcpu *v, mf
{
gfn = guest_l1e_get_gfn(*gl1e);
mfn = shadow_l1e_get_mfn(*sl1e);
- gmfn = audit_gfn_to_mfn(v, gfn, gl1mfn);
+ gmfn = gfn_to_mfn(v->domain, gfn, &p2mt);
if ( mfn_x(gmfn) != mfn_x(mfn) )
AUDIT_FAIL(1, "bad translation: gfn %" SH_PRI_gfn
" --> %" PRI_mfn " != mfn %" PRI_mfn,
@@ -4532,6 +4601,7 @@ int sh_audit_l2_table(struct vcpu *v, mf
shadow_l2e_t *sl2e;
mfn_t mfn, gmfn, gl2mfn;
gfn_t gfn;
+ p2m_type_t p2mt;
char *s;
int done = 0;
@@ -4550,7 +4620,7 @@ int sh_audit_l2_table(struct vcpu *v, mf
mfn = shadow_l2e_get_mfn(*sl2e);
gmfn = (guest_l2e_get_flags(*gl2e) & _PAGE_PSE)
? get_fl1_shadow_status(v, gfn)
- : get_shadow_status(v, audit_gfn_to_mfn(v, gfn, gl2mfn),
+ : get_shadow_status(v, gfn_to_mfn(v->domain, gfn, &p2mt),
SH_type_l1_shadow);
if ( mfn_x(gmfn) != mfn_x(mfn) )
AUDIT_FAIL(2, "bad translation: gfn %" SH_PRI_gfn
@@ -4558,7 +4628,7 @@ int sh_audit_l2_table(struct vcpu *v, mf
" --> %" PRI_mfn " != mfn %" PRI_mfn,
gfn_x(gfn),
(guest_l2e_get_flags(*gl2e) & _PAGE_PSE) ? 0
- : mfn_x(audit_gfn_to_mfn(v, gfn, gl2mfn)),
+ : mfn_x(gfn_to_mfn(v->domain, gfn, &p2mt)),
mfn_x(gmfn), mfn_x(mfn));
}
});
@@ -4573,6 +4643,7 @@ int sh_audit_l3_table(struct vcpu *v, mf
shadow_l3e_t *sl3e;
mfn_t mfn, gmfn, gl3mfn;
gfn_t gfn;
+ p2m_type_t p2mt;
char *s;
int done = 0;
@@ -4589,7 +4660,7 @@ int sh_audit_l3_table(struct vcpu *v, mf
{
gfn = guest_l3e_get_gfn(*gl3e);
mfn = shadow_l3e_get_mfn(*sl3e);
- gmfn = get_shadow_status(v, audit_gfn_to_mfn(v, gfn, gl3mfn),
+ gmfn = get_shadow_status(v, gfn_to_mfn(v->domain, gfn, &p2mt),
((GUEST_PAGING_LEVELS == 3 ||
is_pv_32on64_vcpu(v))
&& !shadow_mode_external(v->domain)
@@ -4612,6 +4683,7 @@ int sh_audit_l4_table(struct vcpu *v, mf
shadow_l4e_t *sl4e;
mfn_t mfn, gmfn, gl4mfn;
gfn_t gfn;
+ p2m_type_t p2mt;
char *s;
int done = 0;
@@ -4628,7 +4700,7 @@ int sh_audit_l4_table(struct vcpu *v, mf
{
gfn = guest_l4e_get_gfn(*gl4e);
mfn = shadow_l4e_get_mfn(*sl4e);
- gmfn = get_shadow_status(v, audit_gfn_to_mfn(v, gfn, gl4mfn),
+ gmfn = get_shadow_status(v, gfn_to_mfn(v->domain, gfn, &p2mt),
SH_type_l3_shadow);
if ( mfn_x(gmfn) != mfn_x(mfn) )
AUDIT_FAIL(4, "bad translation: gfn %" SH_PRI_gfn
diff -r 0b20ac6ec64a -r 71a8366fb212 xen/arch/x86/mm/shadow/types.h
--- a/xen/arch/x86/mm/shadow/types.h Fri Feb 29 09:18:01 2008 -0700
+++ b/xen/arch/x86/mm/shadow/types.h Fri Feb 29 09:19:58 2008 -0700
@@ -435,6 +435,7 @@ struct shadow_walk_t
#endif
mfn_t l2mfn; /* MFN that the level 2 entry was in */
mfn_t l1mfn; /* MFN that the level 1 entry was in */
+ int version; /* Saved guest dirty version */
};
/* macros for dealing with the naming of the internal function names of the
diff -r 0b20ac6ec64a -r 71a8366fb212 xen/arch/x86/x86_32/Makefile
--- a/xen/arch/x86/x86_32/Makefile Fri Feb 29 09:18:01 2008 -0700
+++ b/xen/arch/x86/x86_32/Makefile Fri Feb 29 09:19:58 2008 -0700
@@ -4,6 +4,7 @@ obj-y += mm.o
obj-y += mm.o
obj-y += seg_fixup.o
obj-y += traps.o
+obj-y += machine_kexec.o
obj-$(crash_debug) += gdbstub.o
diff -r 0b20ac6ec64a -r 71a8366fb212 xen/arch/x86/x86_32/machine_kexec.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/x86_32/machine_kexec.c Fri Feb 29 09:19:58 2008 -0700
@@ -0,0 +1,33 @@
+/******************************************************************************
+ * machine_kexec.c
+ *
+ * Xen port written by:
+ * - Simon 'Horms' Horman <horms@xxxxxxxxxxxx>
+ * - Magnus Damm <magnus@xxxxxxxxxxxxx>
+ */
+
+#ifndef CONFIG_COMPAT
+
+#include <xen/types.h>
+#include <xen/kernel.h>
+#include <asm/page.h>
+#include <public/kexec.h>
+
+int machine_kexec_get_xen(xen_kexec_range_t *range)
+{
+ range->start = virt_to_maddr(_start);
+ range->size = (unsigned long)xenheap_phys_end -
+ (unsigned long)range->start;
+ return 0;
+}
+#endif
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 0b20ac6ec64a -r 71a8366fb212 xen/arch/x86/x86_64/Makefile
--- a/xen/arch/x86/x86_64/Makefile Fri Feb 29 09:18:01 2008 -0700
+++ b/xen/arch/x86/x86_64/Makefile Fri Feb 29 09:19:58 2008 -0700
@@ -4,6 +4,7 @@ obj-y += gpr_switch.o
obj-y += gpr_switch.o
obj-y += mm.o
obj-y += traps.o
+obj-y += machine_kexec.o
obj-$(crash_debug) += gdbstub.o
obj-$(CONFIG_COMPAT) += compat.o
diff -r 0b20ac6ec64a -r 71a8366fb212 xen/arch/x86/x86_64/machine_kexec.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/x86_64/machine_kexec.c Fri Feb 29 09:19:58 2008 -0700
@@ -0,0 +1,32 @@
+/******************************************************************************
+ * machine_kexec.c
+ *
+ * Xen port written by:
+ * - Simon 'Horms' Horman <horms@xxxxxxxxxxxx>
+ * - Magnus Damm <magnus@xxxxxxxxxxxxx>
+ */
+
+#ifndef CONFIG_COMPAT
+
+#include <xen/types.h>
+#include <asm/page.h>
+#include <public/kexec.h>
+
+int machine_kexec_get_xen(xen_kexec_range_t *range)
+{
+ range->start = xenheap_phys_start;
+ range->size = (unsigned long)xenheap_phys_end -
+ (unsigned long)range->start;
+ return 0;
+}
+#endif
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 0b20ac6ec64a -r 71a8366fb212 xen/common/compat/kexec.c
--- a/xen/common/compat/kexec.c Fri Feb 29 09:18:01 2008 -0700
+++ b/xen/common/compat/kexec.c Fri Feb 29 09:19:58 2008 -0700
@@ -8,11 +8,6 @@
#define ret_t int
#define do_kexec_op compat_kexec_op
-
-#undef kexec_get
-#define kexec_get(x) compat_kexec_get_##x
-#define xen_kexec_range compat_kexec_range
-#define xen_kexec_range_t compat_kexec_range_t
#define kexec_load_unload compat_kexec_load_unload
#define xen_kexec_load compat_kexec_load
diff -r 0b20ac6ec64a -r 71a8366fb212 xen/common/kexec.c
--- a/xen/common/kexec.c Fri Feb 29 09:18:01 2008 -0700
+++ b/xen/common/kexec.c Fri Feb 29 09:19:58 2008 -0700
@@ -20,6 +20,7 @@
#include <xen/spinlock.h>
#include <xen/version.h>
#include <xen/console.h>
+#include <xen/kexec.h>
#include <public/elfnote.h>
#include <xsm/xsm.h>
@@ -153,11 +154,7 @@ static int sizeof_note(const char *name,
ELFNOTE_ALIGN(descsz));
}
-#define kexec_get(x) kexec_get_##x
-
-#endif
-
-static int kexec_get(reserve)(xen_kexec_range_t *range)
+static int kexec_get_reserve(xen_kexec_range_t *range)
{
if ( kexec_crash_area.size > 0 && kexec_crash_area.start > 0) {
range->start = kexec_crash_area.start;
@@ -168,18 +165,7 @@ static int kexec_get(reserve)(xen_kexec_
return 0;
}
-static int kexec_get(xen)(xen_kexec_range_t *range)
-{
-#ifdef CONFIG_X86_64
- range->start = xenheap_phys_start;
-#else
- range->start = virt_to_maddr(_start);
-#endif
- range->size = (unsigned long)xenheap_phys_end - (unsigned
long)range->start;
- return 0;
-}
-
-static int kexec_get(cpu)(xen_kexec_range_t *range)
+static int kexec_get_cpu(xen_kexec_range_t *range)
{
int nr = range->nr;
int nr_bytes = 0;
@@ -223,7 +209,27 @@ static int kexec_get(cpu)(xen_kexec_rang
return 0;
}
-static int kexec_get(range)(XEN_GUEST_HANDLE(void) uarg)
+static int kexec_get_range_internal(xen_kexec_range_t *range)
+{
+ int ret = -EINVAL;
+
+ switch ( range->range )
+ {
+ case KEXEC_RANGE_MA_CRASH:
+ ret = kexec_get_reserve(range);
+ break;
+ case KEXEC_RANGE_MA_CPU:
+ ret = kexec_get_cpu(range);
+ break;
+ default:
+ ret = machine_kexec_get(range);
+ break;
+ }
+
+ return ret;
+}
+
+static int kexec_get_range(XEN_GUEST_HANDLE(void) uarg)
{
xen_kexec_range_t range;
int ret = -EINVAL;
@@ -231,24 +237,49 @@ static int kexec_get(range)(XEN_GUEST_HA
if ( unlikely(copy_from_guest(&range, uarg, 1)) )
return -EFAULT;
- switch ( range.range )
- {
- case KEXEC_RANGE_MA_CRASH:
- ret = kexec_get(reserve)(&range);
- break;
- case KEXEC_RANGE_MA_XEN:
- ret = kexec_get(xen)(&range);
- break;
- case KEXEC_RANGE_MA_CPU:
- ret = kexec_get(cpu)(&range);
- break;
- }
+ ret = kexec_get_range_internal(&range);
if ( ret == 0 && unlikely(copy_to_guest(uarg, &range, 1)) )
return -EFAULT;
return ret;
}
+
+#else /* COMPAT */
+
+#ifdef CONFIG_COMPAT
+static int kexec_get_range_compat(XEN_GUEST_HANDLE(void) uarg)
+{
+ xen_kexec_range_t range;
+ compat_kexec_range_t compat_range;
+ int ret = -EINVAL;
+
+ if ( unlikely(copy_from_guest(&compat_range, uarg, 1)) )
+ return -EFAULT;
+
+ range.range = compat_range.range;
+ range.nr = compat_range.nr;
+ range.size = compat_range.size;
+ range.start = compat_range.start;
+
+ ret = kexec_get_range_internal(&range);
+
+ if ( ret == 0 ) {
+ range.range = compat_range.range;
+ range.nr = compat_range.nr;
+ range.size = compat_range.size;
+ range.start = compat_range.start;
+
+ if ( unlikely(copy_to_guest(uarg, &compat_range, 1)) )
+ return -EFAULT;
+ }
+
+ return ret;
+}
+#endif /* CONFIG_COMPAT */
+
+#endif /* COMPAT */
+
#ifndef COMPAT
@@ -375,7 +406,11 @@ ret_t do_kexec_op(unsigned long op, XEN_
switch ( op )
{
case KEXEC_CMD_kexec_get_range:
- ret = kexec_get(range)(uarg);
+#ifndef COMPAT
+ ret = kexec_get_range(uarg);
+#else
+ ret = kexec_get_range_compat(uarg);
+#endif
break;
case KEXEC_CMD_kexec_load:
case KEXEC_CMD_kexec_unload:
diff -r 0b20ac6ec64a -r 71a8366fb212 xen/drivers/acpi/tables.c
--- a/xen/drivers/acpi/tables.c Fri Feb 29 09:18:01 2008 -0700
+++ b/xen/drivers/acpi/tables.c Fri Feb 29 09:19:58 2008 -0700
@@ -60,6 +60,7 @@ static char *acpi_table_signatures[ACPI_
[ACPI_HPET] = "HPET",
[ACPI_MCFG] = "MCFG",
[ACPI_DMAR] = "DMAR",
+ [ACPI_IVRS] = "IVRS",
};
static char *mps_inti_flags_polarity[] = { "dfl", "high", "res", "low" };
diff -r 0b20ac6ec64a -r 71a8366fb212 xen/drivers/passthrough/amd/Makefile
--- a/xen/drivers/passthrough/amd/Makefile Fri Feb 29 09:18:01 2008 -0700
+++ b/xen/drivers/passthrough/amd/Makefile Fri Feb 29 09:19:58 2008 -0700
@@ -2,3 +2,4 @@ obj-y += iommu_init.o
obj-y += iommu_init.o
obj-y += iommu_map.o
obj-y += pci_amd_iommu.o
+obj-y += iommu_acpi.o
diff -r 0b20ac6ec64a -r 71a8366fb212 xen/drivers/passthrough/amd/iommu_acpi.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/drivers/passthrough/amd/iommu_acpi.c Fri Feb 29 09:19:58 2008 -0700
@@ -0,0 +1,874 @@
+/*
+ * Copyright (C) 2007 Advanced Micro Devices, Inc.
+ * Author: Leo Duran <leo.duran@xxxxxxx>
+ * Author: Wei Wang <wei.wang2@xxxxxxx> - adapted to xen
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <xen/config.h>
+#include <xen/errno.h>
+#include <asm/amd-iommu.h>
+#include <asm/hvm/svm/amd-iommu-proto.h>
+#include <asm/hvm/svm/amd-iommu-acpi.h>
+
+extern unsigned long amd_iommu_page_entries;
+extern unsigned short ivrs_bdf_entries;
+extern struct ivrs_mappings *ivrs_mappings;
+
+static struct amd_iommu * __init find_iommu_from_bdf_cap(
+ u16 bdf, u8 cap_offset)
+{
+ struct amd_iommu *iommu;
+
+ for_each_amd_iommu( iommu )
+ if ( iommu->bdf == bdf && iommu->cap_offset == cap_offset )
+ return iommu;
+
+ return NULL;
+}
+
+static void __init reserve_iommu_exclusion_range(
+ struct amd_iommu *iommu, uint64_t base, uint64_t limit)
+{
+ /* need to extend exclusion range? */
+ if ( iommu->exclusion_enable )
+ {
+ if ( iommu->exclusion_base < base )
+ base = iommu->exclusion_base;
+ if ( iommu->exclusion_limit > limit )
+ limit = iommu->exclusion_limit;
+ }
+
+ iommu->exclusion_enable = IOMMU_CONTROL_ENABLED;
+ iommu->exclusion_base = base;
+ iommu->exclusion_limit = limit;
+}
+
+static void __init reserve_iommu_exclusion_range_all(struct amd_iommu *iommu,
+ unsigned long base, unsigned long limit)
+{
+ reserve_iommu_exclusion_range(iommu, base, limit);
+ iommu->exclusion_allow_all = IOMMU_CONTROL_ENABLED;
+}
+
+static void __init reserve_unity_map_for_device(u16 bdf, unsigned long base,
+ unsigned long length, u8 iw, u8 ir)
+{
+ unsigned long old_top, new_top;
+
+ /* need to extend unity-mapped range? */
+ if ( ivrs_mappings[bdf].unity_map_enable )
+ {
+ old_top = ivrs_mappings[bdf].addr_range_start +
+ ivrs_mappings[bdf].addr_range_length;
+ new_top = base + length;
+ if ( old_top > new_top )
+ new_top = old_top;
+ if ( ivrs_mappings[bdf].addr_range_start < base )
+ base = ivrs_mappings[bdf].addr_range_start;
+ length = new_top - base;
+ }
+
+ /* extend r/w permissioms and keep aggregate */
+ if ( iw )
+ ivrs_mappings[bdf].write_permission = IOMMU_CONTROL_ENABLED;
+ if ( ir )
+ ivrs_mappings[bdf].read_permission = IOMMU_CONTROL_ENABLED;
+ ivrs_mappings[bdf].unity_map_enable = IOMMU_CONTROL_ENABLED;
+ ivrs_mappings[bdf].addr_range_start = base;
+ ivrs_mappings[bdf].addr_range_length = length;
+}
+
+static int __init register_exclusion_range_for_all_devices(
+ unsigned long base, unsigned long limit, u8 iw, u8 ir)
+{
+ unsigned long range_top, iommu_top, length;
+ struct amd_iommu *iommu;
+ u16 bdf;
+
+ /* is part of exclusion range inside of IOMMU virtual address space? */
+ /* note: 'limit' parameter is assumed to be page-aligned */
+ range_top = limit + PAGE_SIZE;
+ iommu_top = max_page * PAGE_SIZE;
+ if ( base < iommu_top )
+ {
+ if (range_top > iommu_top)
+ range_top = iommu_top;
+ length = range_top - base;
+ /* reserve r/w unity-mapped page entries for devices */
+ /* note: these entries are part of the exclusion range */
+ for (bdf = 0; bdf < ivrs_bdf_entries; ++bdf)
+ reserve_unity_map_for_device(bdf, base, length, iw, ir);
+ /* push 'base' just outside of virtual address space */
+ base = iommu_top;
+ }
+ /* register IOMMU exclusion range settings */
+ if (limit >= iommu_top)
+ {
+ for_each_amd_iommu( iommu )
+ reserve_iommu_exclusion_range_all(iommu, base, limit);
+ }
+
+ return 0;
+}
+
+static int __init register_exclusion_range_for_device(u16 bdf,
+ unsigned long base, unsigned long limit, u8 iw, u8 ir)
+{
+ unsigned long range_top, iommu_top, length;
+ struct amd_iommu *iommu;
+ u16 bus, devfn, req;
+
+ bus = bdf >> 8;
+ devfn = bdf & 0xFF;
+ iommu = find_iommu_for_device(bus, devfn);
+ if ( !iommu )
+ {
+ dprintk(XENLOG_ERR, "IVMD Error: No IOMMU for Dev_Id 0x%x!\n", bdf);
+ return -ENODEV;
+ }
+ req = ivrs_mappings[bdf].dte_requestor_id;
+
+ /* note: 'limit' parameter is assumed to be page-aligned */
+ range_top = limit + PAGE_SIZE;
+ iommu_top = max_page * PAGE_SIZE;
+ if ( base < iommu_top )
+ {
+ if (range_top > iommu_top)
+ range_top = iommu_top;
+ length = range_top - base;
+ /* reserve unity-mapped page entries for device */
+ /* note: these entries are part of the exclusion range */
+ reserve_unity_map_for_device(bdf, base, length, iw, ir);
+ reserve_unity_map_for_device(req, base, length, iw, ir);
+
+ /* push 'base' just outside of virtual address space */
+ base = iommu_top;
+ }
+
+ /* register IOMMU exclusion range settings for device */
+ if ( limit >= iommu_top )
+ {
+ reserve_iommu_exclusion_range(iommu, base, limit);
+ ivrs_mappings[bdf].dte_allow_exclusion = IOMMU_CONTROL_ENABLED;
+ ivrs_mappings[req].dte_allow_exclusion = IOMMU_CONTROL_ENABLED;
+ }
+
+ return 0;
+}
+
+static int __init register_exclusion_range_for_iommu_devices(
+ struct amd_iommu *iommu,
+ unsigned long base, unsigned long limit, u8 iw, u8 ir)
+{
+ unsigned long range_top, iommu_top, length;
+ u16 bus, devfn, bdf, req;
+
+ /* is part of exclusion range inside of IOMMU virtual address space? */
+ /* note: 'limit' parameter is assumed to be page-aligned */
+ range_top = limit + PAGE_SIZE;
+ iommu_top = max_page * PAGE_SIZE;
+ if ( base < iommu_top )
+ {
+ if (range_top > iommu_top)
+ range_top = iommu_top;
+ length = range_top - base;
+ /* reserve r/w unity-mapped page entries for devices */
+ /* note: these entries are part of the exclusion range */
+ for ( bdf = 0; bdf < ivrs_bdf_entries; ++bdf )
+ {
+ bus = bdf >> 8;
+ devfn = bdf & 0xFF;
+ if ( iommu == find_iommu_for_device(bus, devfn) )
+ {
+ reserve_unity_map_for_device(bdf, base, length, iw, ir);
+ req = ivrs_mappings[bdf].dte_requestor_id;
+ reserve_unity_map_for_device(req, base, length, iw, ir);
+ }
+ }
+
+ /* push 'base' just outside of virtual address space */
+ base = iommu_top;
+ }
+
+ /* register IOMMU exclusion range settings */
+ if (limit >= iommu_top)
+ reserve_iommu_exclusion_range_all(iommu, base, limit);
+ return 0;
+}
+
+static int __init parse_ivmd_device_select(
+ struct acpi_ivmd_block_header *ivmd_block,
+ unsigned long base, unsigned long limit, u8 iw, u8 ir)
+{
+ u16 bdf;
+
+ bdf = ivmd_block->header.dev_id;
+ if (bdf >= ivrs_bdf_entries)
+ {
+ dprintk(XENLOG_ERR, "IVMD Error: Invalid Dev_Id 0x%x\n", bdf);
+ return -ENODEV;
+ }
+
+ return register_exclusion_range_for_device(bdf, base, limit, iw, ir);
+}
+
+static int __init parse_ivmd_device_range(
+ struct acpi_ivmd_block_header *ivmd_block,
+ unsigned long base, unsigned long limit, u8 iw, u8 ir)
+{
+ u16 first_bdf, last_bdf, bdf;
+ int error;
+
+ first_bdf = ivmd_block->header.dev_id;
+ if (first_bdf >= ivrs_bdf_entries)
+ {
+ dprintk(XENLOG_ERR, "IVMD Error: "
+ "Invalid Range_First Dev_Id 0x%x\n", first_bdf);
+ return -ENODEV;
+ }
+
+ last_bdf = ivmd_block->last_dev_id;
+ if (last_bdf >= ivrs_bdf_entries || last_bdf <= first_bdf)
+ {
+ dprintk(XENLOG_ERR, "IVMD Error: "
+ "Invalid Range_Last Dev_Id 0x%x\n", last_bdf);
+ return -ENODEV;
+ }
+
+ dprintk(XENLOG_ERR, " Dev_Id Range: 0x%x -> 0x%x\n",
+ first_bdf, last_bdf);
+
+ for ( bdf = first_bdf, error = 0;
+ bdf <= last_bdf && !error; ++bdf )
+ {
+ error = register_exclusion_range_for_device(
+ bdf, base, limit, iw, ir);
+ }
+
+ return error;
+}
+
+static int __init parse_ivmd_device_iommu(
+ struct acpi_ivmd_block_header *ivmd_block,
+ unsigned long base, unsigned long limit, u8 iw, u8 ir)
+{
+ struct amd_iommu *iommu;
+
+ /* find target IOMMU */
+ iommu = find_iommu_from_bdf_cap(ivmd_block->header.dev_id,
+ ivmd_block->cap_offset);
+ if ( !iommu )
+ {
+ dprintk(XENLOG_ERR,
+ "IVMD Error: No IOMMU for Dev_Id 0x%x Cap 0x%x\n",
+ ivmd_block->header.dev_id, ivmd_block->cap_offset);
+ return -ENODEV;
+ }
+
+ return register_exclusion_range_for_iommu_devices(
+ iommu, base, limit, iw, ir);
+}
+
+static int __init parse_ivmd_block(struct acpi_ivmd_block_header *ivmd_block)
+{
+ unsigned long start_addr, mem_length, base, limit;
+ u8 iw, ir;
+
+ if (ivmd_block->header.length <
+ sizeof(struct acpi_ivmd_block_header))
+ {
+ dprintk(XENLOG_ERR, "IVMD Error: Invalid Block Length!\n");
+ return -ENODEV;
+ }
+
+ start_addr = (unsigned long)ivmd_block->start_addr;
+ mem_length = (unsigned long)ivmd_block->mem_length;
+ base = start_addr & PAGE_MASK;
+ limit = (start_addr + mem_length - 1) & PAGE_MASK;
+
+ dprintk(XENLOG_INFO, "IVMD Block: Type 0x%x\n",
+ ivmd_block->header.type);
+ dprintk(XENLOG_INFO, " Start_Addr_Phys 0x%lx\n", start_addr);
+ dprintk(XENLOG_INFO, " Mem_Length 0x%lx\n", mem_length);
+
+ if ( get_field_from_byte(ivmd_block->header.flags,
+ AMD_IOMMU_ACPI_EXCLUSION_RANGE_MASK,
+ AMD_IOMMU_ACPI_EXCLUSION_RANGE_SHIFT) )
+ iw = ir = IOMMU_CONTROL_ENABLED;
+ else if ( get_field_from_byte(ivmd_block->header.flags,
+ AMD_IOMMU_ACPI_UNITY_MAPPING_MASK,
+ AMD_IOMMU_ACPI_UNITY_MAPPING_SHIFT) )
+ {
+ iw = get_field_from_byte(ivmd_block->header.flags,
+ AMD_IOMMU_ACPI_IW_PERMISSION_MASK,
+ AMD_IOMMU_ACPI_IW_PERMISSION_SHIFT);
+ ir = get_field_from_byte(ivmd_block->header.flags,
+ AMD_IOMMU_ACPI_IR_PERMISSION_MASK,
+ AMD_IOMMU_ACPI_IR_PERMISSION_SHIFT);
+ }
+ else
+ {
+ dprintk(KERN_ERR, "IVMD Error: Invalid Flag Field!\n");
+ return -ENODEV;
+ }
+
+ switch( ivmd_block->header.type )
+ {
+ case AMD_IOMMU_ACPI_IVMD_ALL_TYPE:
+ return register_exclusion_range_for_all_devices(
+ base, limit, iw, ir);
+
+ case AMD_IOMMU_ACPI_IVMD_ONE_TYPE:
+ return parse_ivmd_device_select(ivmd_block,
+ base, limit, iw, ir);
+
+ case AMD_IOMMU_ACPI_IVMD_RANGE_TYPE:
+ return parse_ivmd_device_range(ivmd_block,
+ base, limit, iw, ir);
+
+ case AMD_IOMMU_ACPI_IVMD_IOMMU_TYPE:
+ return parse_ivmd_device_iommu(ivmd_block,
+ base, limit, iw, ir);
+
+ default:
+ dprintk(XENLOG_ERR, "IVMD Error: Invalid Block Type!\n");
+ return -ENODEV;
+ }
+}
+
+static u16 __init parse_ivhd_device_padding(u16 pad_length,
+ u16 header_length, u16 block_length)
+{
+ if ( header_length < (block_length + pad_length) )
+ {
+ dprintk(XENLOG_ERR, "IVHD Error: Invalid Device_Entry Length!\n");
+ return 0;
+ }
+
+ return pad_length;
+}
+
+static u16 __init parse_ivhd_device_select(
+ union acpi_ivhd_device *ivhd_device)
+{
+ u16 bdf;
+
+ bdf = ivhd_device->header.dev_id;
+ if ( bdf >= ivrs_bdf_entries )
+ {
+ dprintk(XENLOG_ERR, "IVHD Error: "
+ "Invalid Device_Entry Dev_Id 0x%x\n", bdf);
+ return 0;
+ }
+
+ /* override flags for device */
+ ivrs_mappings[bdf].dte_sys_mgt_enable =
+ get_field_from_byte(ivhd_device->header.flags,
+ AMD_IOMMU_ACPI_SYS_MGT_MASK,
+ AMD_IOMMU_ACPI_SYS_MGT_SHIFT);
+
+ return sizeof(struct acpi_ivhd_device_header);
+}
+
+static u16 __init parse_ivhd_device_range(
+ union acpi_ivhd_device *ivhd_device,
+ u16 header_length, u16 block_length)
+{
+ u16 dev_length, first_bdf, last_bdf, bdf;
+ u8 sys_mgt;
+
+ dev_length = sizeof(struct acpi_ivhd_device_range);
+ if ( header_length < (block_length + dev_length) )
+ {
+ dprintk(XENLOG_ERR, "IVHD Error: Invalid Device_Entry Length!\n");
+ return 0;
+ }
+
+ if ( ivhd_device->range.trailer.type !=
+ AMD_IOMMU_ACPI_IVHD_DEV_RANGE_END) {
+ dprintk(XENLOG_ERR, "IVHD Error: "
+ "Invalid Range: End_Type 0x%x\n",
+ ivhd_device->range.trailer.type);
+ return 0;
+ }
+
+ first_bdf = ivhd_device->header.dev_id;
+ if ( first_bdf >= ivrs_bdf_entries )
+ {
+ dprintk(XENLOG_ERR, "IVHD Error: "
+ "Invalid Range: First Dev_Id 0x%x\n", first_bdf);
+ return 0;
+ }
+
+ last_bdf = ivhd_device->range.trailer.dev_id;
+ if ( last_bdf >= ivrs_bdf_entries || last_bdf <= first_bdf )
+ {
+ dprintk(XENLOG_ERR, "IVHD Error: "
+ "Invalid Range: Last Dev_Id 0x%x\n", last_bdf);
+ return 0;
+ }
+
+ dprintk(XENLOG_INFO, " Dev_Id Range: 0x%x -> 0x%x\n",
+ first_bdf, last_bdf);
+
+ /* override flags for range of devices */
+ sys_mgt = get_field_from_byte(ivhd_device->header.flags,
+ AMD_IOMMU_ACPI_SYS_MGT_MASK,
+ AMD_IOMMU_ACPI_SYS_MGT_SHIFT);
+ for ( bdf = first_bdf; bdf <= last_bdf; ++bdf )
+ ivrs_mappings[bdf].dte_sys_mgt_enable = sys_mgt;
+
+ return dev_length;
+}
+
+static u16 __init parse_ivhd_device_alias(
+ union acpi_ivhd_device *ivhd_device,
+ u16 header_length, u16 block_length)
+{
+ u16 dev_length, alias_id, bdf;
+
+ dev_length = sizeof(struct acpi_ivhd_device_alias);
+ if ( header_length < (block_length + dev_length) )
+ {
+ dprintk(XENLOG_ERR, "IVHD Error: "
+ "Invalid Device_Entry Length!\n");
+ return 0;
+ }
+
+ bdf = ivhd_device->header.dev_id;
+ if ( bdf >= ivrs_bdf_entries )
+ {
+ dprintk(XENLOG_ERR, "IVHD Error: "
+ "Invalid Device_Entry Dev_Id 0x%x\n", bdf);
+ return 0;
+ }
+
+ alias_id = ivhd_device->alias.dev_id;
+ if ( alias_id >= ivrs_bdf_entries )
+ {
+ dprintk(XENLOG_ERR, "IVHD Error: "
+ "Invalid Alias Dev_Id 0x%x\n", alias_id);
+ return 0;
+ }
+
+ dprintk(XENLOG_INFO, " Dev_Id Alias: 0x%x\n", alias_id);
+
+ /* override requestor_id and flags for device */
+ ivrs_mappings[bdf].dte_requestor_id = alias_id;
+ ivrs_mappings[bdf].dte_sys_mgt_enable =
+ get_field_from_byte(ivhd_device->header.flags,
+ AMD_IOMMU_ACPI_SYS_MGT_MASK,
+ AMD_IOMMU_ACPI_SYS_MGT_SHIFT);
+ ivrs_mappings[alias_id].dte_sys_mgt_enable =
+ ivrs_mappings[bdf].dte_sys_mgt_enable;
+
+ return dev_length;
+}
+
+static u16 __init parse_ivhd_device_alias_range(
+ union acpi_ivhd_device *ivhd_device,
+ u16 header_length, u16 block_length)
+{
+
+ u16 dev_length, first_bdf, last_bdf, alias_id, bdf;
+ u8 sys_mgt;
+
+ dev_length = sizeof(struct acpi_ivhd_device_alias_range);
+ if ( header_length < (block_length + dev_length) )
+ {
+ dprintk(XENLOG_ERR, "IVHD Error: "
+ "Invalid Device_Entry Length!\n");
+ return 0;
+ }
+
+ if ( ivhd_device->alias_range.trailer.type !=
+ AMD_IOMMU_ACPI_IVHD_DEV_RANGE_END )
+ {
+ dprintk(XENLOG_ERR, "IVHD Error: "
+ "Invalid Range: End_Type 0x%x\n",
+ ivhd_device->alias_range.trailer.type);
+ return 0;
+ }
+
+ first_bdf = ivhd_device->header.dev_id;
+ if ( first_bdf >= ivrs_bdf_entries )
+ {
+ dprintk(XENLOG_ERR,"IVHD Error: "
+ "Invalid Range: First Dev_Id 0x%x\n", first_bdf);
+ return 0;
+ }
+
+ last_bdf = ivhd_device->alias_range.trailer.dev_id;
+ if ( last_bdf >= ivrs_bdf_entries || last_bdf <= first_bdf )
+ {
+ dprintk(XENLOG_ERR, "IVHD Error: "
+ "Invalid Range: Last Dev_Id 0x%x\n", last_bdf);
+ return 0;
+ }
+
+ alias_id = ivhd_device->alias_range.alias.dev_id;
+ if ( alias_id >= ivrs_bdf_entries )
+ {
+ dprintk(XENLOG_ERR, "IVHD Error: "
+ "Invalid Alias Dev_Id 0x%x\n", alias_id);
+ return 0;
+ }
+
+ dprintk(XENLOG_INFO, " Dev_Id Range: 0x%x -> 0x%x\n",
+ first_bdf, last_bdf);
+ dprintk(XENLOG_INFO, " Dev_Id Alias: 0x%x\n", alias_id);
+
+ /* override requestor_id and flags for range of devices */
+ sys_mgt = get_field_from_byte(ivhd_device->header.flags,
+ AMD_IOMMU_ACPI_SYS_MGT_MASK,
+ AMD_IOMMU_ACPI_SYS_MGT_SHIFT);
+ for ( bdf = first_bdf; bdf <= last_bdf; ++bdf )
+ {
+ ivrs_mappings[bdf].dte_requestor_id = alias_id;
+ ivrs_mappings[bdf].dte_sys_mgt_enable = sys_mgt;
+ }
+ ivrs_mappings[alias_id].dte_sys_mgt_enable = sys_mgt;
+
+ return dev_length;
+}
+
+static u16 __init parse_ivhd_device_extended(
+ union acpi_ivhd_device *ivhd_device,
+ u16 header_length, u16 block_length)
+{
+ u16 dev_length, bdf;
+
+ dev_length = sizeof(struct acpi_ivhd_device_extended);
+ if ( header_length < (block_length + dev_length) )
+ {
+ dprintk(XENLOG_ERR, "IVHD Error: "
+ "Invalid Device_Entry Length!\n");
+ return 0;
+ }
+
+ bdf = ivhd_device->header.dev_id;
+ if ( bdf >= ivrs_bdf_entries )
+ {
+ dprintk(XENLOG_ERR, "IVHD Error: "
+ "Invalid Device_Entry Dev_Id 0x%x\n", bdf);
+ return 0;
+ }
+
+ /* override flags for device */
+ ivrs_mappings[bdf].dte_sys_mgt_enable =
+ get_field_from_byte(ivhd_device->header.flags,
+ AMD_IOMMU_ACPI_SYS_MGT_MASK,
+ AMD_IOMMU_ACPI_SYS_MGT_SHIFT);
+
+ return dev_length;
+}
+
+static u16 __init parse_ivhd_device_extended_range(
+ union acpi_ivhd_device *ivhd_device,
+ u16 header_length, u16 block_length)
+{
+ u16 dev_length, first_bdf, last_bdf, bdf;
+ u8 sys_mgt;
+
+ dev_length = sizeof(struct acpi_ivhd_device_extended_range);
+ if ( header_length < (block_length + dev_length) )
+ {
+ dprintk(XENLOG_ERR, "IVHD Error: "
+ "Invalid Device_Entry Length!\n");
+ return 0;
+ }
+
+ if ( ivhd_device->extended_range.trailer.type !=
+ AMD_IOMMU_ACPI_IVHD_DEV_RANGE_END )
+ {
+ dprintk(XENLOG_ERR, "IVHD Error: "
+ "Invalid Range: End_Type 0x%x\n",
+ ivhd_device->extended_range.trailer.type);
+ return 0;
+ }
+
+ first_bdf = ivhd_device->header.dev_id;
+ if ( first_bdf >= ivrs_bdf_entries )
+ {
+ dprintk(XENLOG_ERR, "IVHD Error: "
+ "Invalid Range: First Dev_Id 0x%x\n", first_bdf);
+ return 0;
+ }
+
+ last_bdf = ivhd_device->extended_range.trailer.dev_id;
+ if ( last_bdf >= ivrs_bdf_entries || last_bdf <= first_bdf )
+ {
+ dprintk(XENLOG_ERR, "IVHD Error: "
+ "Invalid Range: Last Dev_Id 0x%x\n", last_bdf);
+ return 0;
+ }
+
+ dprintk(XENLOG_INFO, " Dev_Id Range: 0x%x -> 0x%x\n",
+ first_bdf, last_bdf);
+
+ /* override flags for range of devices */
+ sys_mgt = get_field_from_byte(ivhd_device->header.flags,
+ AMD_IOMMU_ACPI_SYS_MGT_MASK,
+ AMD_IOMMU_ACPI_SYS_MGT_SHIFT);
+ for ( bdf = first_bdf; bdf <= last_bdf; ++bdf )
+ ivrs_mappings[bdf].dte_sys_mgt_enable = sys_mgt;
+
+ return dev_length;
+}
+
+static int __init parse_ivhd_block(struct acpi_ivhd_block_header *ivhd_block)
+{
+ union acpi_ivhd_device *ivhd_device;
+ u16 block_length, dev_length;
+ struct amd_iommu *iommu;
+
+ if ( ivhd_block->header.length <
+ sizeof(struct acpi_ivhd_block_header) )
+ {
+ dprintk(XENLOG_ERR, "IVHD Error: Invalid Block Length!\n");
+ return -ENODEV;
+ }
+
+ iommu = find_iommu_from_bdf_cap(ivhd_block->header.dev_id,
+ ivhd_block->cap_offset);
+ if ( !iommu )
+ {
+ dprintk(XENLOG_ERR,
+ "IVHD Error: No IOMMU for Dev_Id 0x%x Cap 0x%x\n",
+ ivhd_block->header.dev_id, ivhd_block->cap_offset);
+ return -ENODEV;
+ }
+
+ dprintk(XENLOG_INFO, "IVHD Block:\n");
+ dprintk(XENLOG_INFO, " Cap_Offset 0x%x\n",
+ ivhd_block->cap_offset);
+ dprintk(XENLOG_INFO, " MMIO_BAR_Phys 0x%lx\n",
+ (unsigned long)ivhd_block->mmio_base);
+ dprintk(XENLOG_INFO, " PCI_Segment 0x%x\n",
+ ivhd_block->pci_segment);
+ dprintk(XENLOG_INFO, " IOMMU_Info 0x%x\n",
+ ivhd_block->iommu_info);
+
+ /* override IOMMU support flags */
+ iommu->coherent = get_field_from_byte(ivhd_block->header.flags,
+ AMD_IOMMU_ACPI_COHERENT_MASK,
+ AMD_IOMMU_ACPI_COHERENT_SHIFT);
+ iommu->iotlb_support = get_field_from_byte(ivhd_block->header.flags,
+ AMD_IOMMU_ACPI_IOTLB_SUP_MASK,
+ AMD_IOMMU_ACPI_IOTLB_SUP_SHIFT);
+ iommu->isochronous = get_field_from_byte(ivhd_block->header.flags,
+ AMD_IOMMU_ACPI_ISOC_MASK,
+ AMD_IOMMU_ACPI_ISOC_SHIFT);
+ iommu->res_pass_pw = get_field_from_byte(ivhd_block->header.flags,
+ AMD_IOMMU_ACPI_RES_PASS_PW_MASK,
+ AMD_IOMMU_ACPI_RES_PASS_PW_SHIFT);
+ iommu->pass_pw = get_field_from_byte(ivhd_block->header.flags,
+ AMD_IOMMU_ACPI_PASS_PW_MASK,
+ AMD_IOMMU_ACPI_PASS_PW_SHIFT);
+ iommu->ht_tunnel_enable = get_field_from_byte(
+ ivhd_block->header.flags,
+ AMD_IOMMU_ACPI_HT_TUN_ENB_MASK,
+ AMD_IOMMU_ACPI_HT_TUN_ENB_SHIFT);
+
+ /* parse Device Entries */
+ block_length = sizeof(struct acpi_ivhd_block_header);
+ while( ivhd_block->header.length >=
+ (block_length + sizeof(struct acpi_ivhd_device_header)) )
+ {
+ ivhd_device = (union acpi_ivhd_device *)
+ ((u8 *)ivhd_block + block_length);
+
+ dprintk(XENLOG_INFO, "IVHD Device Entry:\n");
+ dprintk(XENLOG_INFO, " Type 0x%x\n",
+ ivhd_device->header.type);
+ dprintk(XENLOG_INFO, " Dev_Id 0x%x\n",
+ ivhd_device->header.dev_id);
+ dprintk(XENLOG_INFO, " Flags 0x%x\n",
+ ivhd_device->header.flags);
+
+ switch( ivhd_device->header.type )
+ {
+ case AMD_IOMMU_ACPI_IVHD_DEV_U32_PAD:
+ dev_length = parse_ivhd_device_padding(
+ sizeof(u32),
+ ivhd_block->header.length, block_length);
+ break;
+ case AMD_IOMMU_ACPI_IVHD_DEV_U64_PAD:
+ dev_length = parse_ivhd_device_padding(
+ sizeof(u64),
+ ivhd_block->header.length, block_length);
+ break;
+ case AMD_IOMMU_ACPI_IVHD_DEV_SELECT:
+ dev_length = parse_ivhd_device_select(ivhd_device);
+ break;
+ case AMD_IOMMU_ACPI_IVHD_DEV_RANGE_START:
+ dev_length = parse_ivhd_device_range(ivhd_device,
+ ivhd_block->header.length, block_length);
+ break;
+ case AMD_IOMMU_ACPI_IVHD_DEV_ALIAS_SELECT:
+ dev_length = parse_ivhd_device_alias(
+ ivhd_device,
+ ivhd_block->header.length, block_length);
+ break;
+ case AMD_IOMMU_ACPI_IVHD_DEV_ALIAS_RANGE:
+ dev_length = parse_ivhd_device_alias_range(
+ ivhd_device,
+ ivhd_block->header.length, block_length);
+ break;
+ case AMD_IOMMU_ACPI_IVHD_DEV_EXT_SELECT:
+ dev_length = parse_ivhd_device_extended(
+ ivhd_device,
+ ivhd_block->header.length, block_length);
+ break;
+ case AMD_IOMMU_ACPI_IVHD_DEV_EXT_RANGE:
+ dev_length = parse_ivhd_device_extended_range(
+ ivhd_device,
+ ivhd_block->header.length, block_length);
+ break;
+ default:
+ dprintk(XENLOG_ERR, "IVHD Error: "
+ "Invalid Device Type!\n");
+ dev_length = 0;
+ break;
+ }
+
+ block_length += dev_length;
+ if ( !dev_length )
+ return -ENODEV;
+ }
+
+ return 0;
+}
+
+static int __init parse_ivrs_block(struct acpi_ivrs_block_header *ivrs_block)
+{
+ struct acpi_ivhd_block_header *ivhd_block;
+ struct acpi_ivmd_block_header *ivmd_block;
+
+ switch(ivrs_block->type)
+ {
+ case AMD_IOMMU_ACPI_IVHD_TYPE:
+ ivhd_block = (struct acpi_ivhd_block_header *)ivrs_block;
+ return parse_ivhd_block(ivhd_block);
+
+ case AMD_IOMMU_ACPI_IVMD_ALL_TYPE:
+ case AMD_IOMMU_ACPI_IVMD_ONE_TYPE:
+ case AMD_IOMMU_ACPI_IVMD_RANGE_TYPE:
+ case AMD_IOMMU_ACPI_IVMD_IOMMU_TYPE:
+ ivmd_block = (struct acpi_ivmd_block_header *)ivrs_block;
+ return parse_ivmd_block(ivmd_block);
+
+ default:
+ dprintk(XENLOG_ERR, "IVRS Error: Invalid Block Type!\n");
+ return -ENODEV;
+ }
+
+ return 0;
+}
+
+void __init dump_acpi_table_header(struct acpi_table_header *table)
+{
+ int i;
+
+ printk(XENLOG_INFO "AMD IOMMU: ACPI Table:\n");
+ printk(XENLOG_INFO " Signature ");
+ for ( i = 0; i < ACPI_NAME_SIZE; ++i )
+ printk("%c", table->signature[i]);
+ printk("\n");
+
+ printk(" Length 0x%x\n", table->length);
+ printk(" Revision 0x%x\n", table->revision);
+ printk(" CheckSum 0x%x\n", table->checksum);
+
+ printk(" OEM_Id ");
+ for ( i = 0; i < ACPI_OEM_ID_SIZE; ++i )
+ printk("%c", table->oem_id[i]);
+ printk("\n");
+
+ printk(" OEM_Table_Id ");
+ for ( i = 0; i < ACPI_OEM_TABLE_ID_SIZE; ++i )
+ printk("%c", table->oem_table_id[i]);
+ printk("\n");
+
+ printk(" OEM_Revision 0x%x\n", table->oem_revision);
+
+ printk(" Creator_Id ");
+ for ( i = 0; i < ACPI_NAME_SIZE; ++i )
+ printk("%c", table->asl_compiler_id[i]);
+ printk("\n");
+
+ printk(" Creator_Revision 0x%x\n",
+ table->asl_compiler_revision);
+}
+
+int __init parse_ivrs_table(unsigned long phys_addr,
+ unsigned long size)
+{
+ struct acpi_ivrs_block_header *ivrs_block;
+ unsigned long length, i;
+ u8 checksum, *raw_table;
+ int error = 0;
+ struct acpi_table_header *table =
+ (struct acpi_table_header *) __acpi_map_table(phys_addr, size);
+
+ BUG_ON(!table);
+
+#if 0
+ dump_acpi_table_header(table);
+#endif
+
+ /* validate checksum: sum of entire table == 0 */
+ checksum = 0;
+ raw_table = (u8 *)table;
+ for ( i = 0; i < table->length; ++i )
+ checksum += raw_table[i];
+ if ( checksum )
+ {
+ dprintk(XENLOG_ERR, "IVRS Error: "
+ "Invalid Checksum 0x%x\n", checksum);
+ return -ENODEV;
+ }
+
+ /* parse IVRS blocks */
+ length = sizeof(struct acpi_ivrs_table_header);
+ while( error == 0 && table->length >
+ (length + sizeof(struct acpi_ivrs_block_header)) )
+ {
+ ivrs_block = (struct acpi_ivrs_block_header *)
+ ((u8 *)table + length);
+
+ dprintk(XENLOG_INFO, "IVRS Block:\n");
+ dprintk(XENLOG_INFO, " Type 0x%x\n", ivrs_block->type);
+ dprintk(XENLOG_INFO, " Flags 0x%x\n", ivrs_block->flags);
+ dprintk(XENLOG_INFO, " Length 0x%x\n", ivrs_block->length);
+ dprintk(XENLOG_INFO, " Dev_Id 0x%x\n", ivrs_block->dev_id);
+
+ if (table->length >= (length + ivrs_block->length))
+ error = parse_ivrs_block(ivrs_block);
+ else
+ {
+ dprintk(XENLOG_ERR, "IVRS Error: "
+ "Table Length Exceeded: 0x%x -> 0x%lx\n",
+ table->length,
+ (length + ivrs_block->length));
+ return -ENODEV;
+ }
+ length += ivrs_block->length;
+ }
+
+ return error;
+}
diff -r 0b20ac6ec64a -r 71a8366fb212 xen/drivers/passthrough/amd/iommu_detect.c
--- a/xen/drivers/passthrough/amd/iommu_detect.c Fri Feb 29 09:18:01
2008 -0700
+++ b/xen/drivers/passthrough/amd/iommu_detect.c Fri Feb 29 09:19:58
2008 -0700
@@ -86,30 +86,24 @@ int __init get_iommu_capabilities(u8 bus
int __init get_iommu_capabilities(u8 bus, u8 dev, u8 func, u8 cap_ptr,
struct amd_iommu *iommu)
{
- u32 cap_header, cap_range;
+ u32 cap_header, cap_range, misc_info;
u64 mmio_bar;
-#if HACK_BIOS_SETTINGS
- /* remove it when BIOS available */
- write_pci_config(bus, dev, func,
- cap_ptr + PCI_CAP_MMIO_BAR_HIGH_OFFSET, 0x00000000);
- write_pci_config(bus, dev, func,
- cap_ptr + PCI_CAP_MMIO_BAR_LOW_OFFSET, 0x40000001);
- /* remove it when BIOS available */
-#endif
-
mmio_bar = (u64)read_pci_config(bus, dev, func,
- cap_ptr + PCI_CAP_MMIO_BAR_HIGH_OFFSET) << 32;
+ cap_ptr + PCI_CAP_MMIO_BAR_HIGH_OFFSET) << 32;
mmio_bar |= read_pci_config(bus, dev, func,
- cap_ptr + PCI_CAP_MMIO_BAR_LOW_OFFSET) &
- PCI_CAP_MMIO_BAR_LOW_MASK;
- iommu->mmio_base_phys = (unsigned long)mmio_bar;
-
- if ( (mmio_bar == 0) || ( (mmio_bar & 0x3FFF) != 0 ) ) {
+ cap_ptr + PCI_CAP_MMIO_BAR_LOW_OFFSET);
+ iommu->mmio_base_phys = mmio_bar & (u64)~0x3FFF;
+
+ if ( (mmio_bar & 0x1) == 0 || iommu->mmio_base_phys == 0 )
+ {
dprintk(XENLOG_ERR ,
"AMD IOMMU: Invalid MMIO_BAR = 0x%"PRIx64"\n", mmio_bar);
return -ENODEV;
}
+
+ iommu->bdf = (bus << 8) | PCI_DEVFN(dev, func);
+ iommu->cap_offset = cap_ptr;
cap_header = read_pci_config(bus, dev, func, cap_ptr);
iommu->revision = get_field_from_reg_u32(cap_header,
@@ -119,12 +113,15 @@ int __init get_iommu_capabilities(u8 bus
iommu->ht_tunnel_support = get_field_from_reg_u32(cap_header,
PCI_CAP_HT_TUNNEL_MASK,
PCI_CAP_HT_TUNNEL_SHIFT);
- iommu->not_present_cached = get_field_from_reg_u32(cap_header,
+ iommu->pte_not_present_cached = get_field_from_reg_u32(cap_header,
PCI_CAP_NP_CACHE_MASK,
PCI_CAP_NP_CACHE_SHIFT);
cap_range = read_pci_config(bus, dev, func,
cap_ptr + PCI_CAP_RANGE_OFFSET);
+ iommu->unit_id = get_field_from_reg_u32(cap_range,
+ PCI_CAP_UNIT_ID_MASK,
+ PCI_CAP_UNIT_ID_SHIFT);
iommu->root_bus = get_field_from_reg_u32(cap_range,
PCI_CAP_BUS_NUMBER_MASK,
PCI_CAP_BUS_NUMBER_SHIFT);
@@ -135,6 +132,11 @@ int __init get_iommu_capabilities(u8 bus
PCI_CAP_LAST_DEVICE_MASK,
PCI_CAP_LAST_DEVICE_SHIFT);
+ misc_info = read_pci_config(bus, dev, func,
+ cap_ptr + PCI_MISC_INFO_OFFSET);
+ iommu->msi_number = get_field_from_reg_u32(misc_info,
+ PCI_CAP_MSI_NUMBER_MASK,
+ PCI_CAP_MSI_NUMBER_SHIFT);
return 0;
}
diff -r 0b20ac6ec64a -r 71a8366fb212 xen/drivers/passthrough/amd/iommu_init.c
--- a/xen/drivers/passthrough/amd/iommu_init.c Fri Feb 29 09:18:01 2008 -0700
+++ b/xen/drivers/passthrough/amd/iommu_init.c Fri Feb 29 09:19:58 2008 -0700
@@ -137,8 +137,49 @@ static void __init set_iommu_command_buf
writel(entry, iommu->mmio_base+IOMMU_CONTROL_MMIO_OFFSET);
}
+static void __init register_iommu_exclusion_range(struct amd_iommu *iommu)
+{
+ u64 addr_lo, addr_hi;
+ u32 entry;
+
+ addr_lo = iommu->exclusion_limit & DMA_32BIT_MASK;
+ addr_hi = iommu->exclusion_limit >> 32;
+
+ set_field_in_reg_u32((u32)addr_hi, 0,
+ IOMMU_EXCLUSION_LIMIT_HIGH_MASK,
+ IOMMU_EXCLUSION_LIMIT_HIGH_SHIFT, &entry);
+ writel(entry, iommu->mmio_base+IOMMU_EXCLUSION_LIMIT_HIGH_OFFSET);
+
+ set_field_in_reg_u32((u32)addr_lo >> PAGE_SHIFT, 0,
+ IOMMU_EXCLUSION_LIMIT_LOW_MASK,
+ IOMMU_EXCLUSION_LIMIT_LOW_SHIFT, &entry);
+ writel(entry, iommu->mmio_base+IOMMU_EXCLUSION_LIMIT_LOW_OFFSET);
+
+ addr_lo = iommu->exclusion_base & DMA_32BIT_MASK;
+ addr_hi = iommu->exclusion_base >> 32;
+
+ set_field_in_reg_u32((u32)addr_hi, 0,
+ IOMMU_EXCLUSION_BASE_HIGH_MASK,
+ IOMMU_EXCLUSION_BASE_HIGH_SHIFT, &entry);
+ writel(entry, iommu->mmio_base+IOMMU_EXCLUSION_BASE_HIGH_OFFSET);
+
+ set_field_in_reg_u32((u32)addr_lo >> PAGE_SHIFT, 0,
+ IOMMU_EXCLUSION_BASE_LOW_MASK,
+ IOMMU_EXCLUSION_BASE_LOW_SHIFT, &entry);
+
+ set_field_in_reg_u32(iommu->exclusion_allow_all, entry,
+ IOMMU_EXCLUSION_ALLOW_ALL_MASK,
+ IOMMU_EXCLUSION_ALLOW_ALL_SHIFT, &entry);
+
+ set_field_in_reg_u32(iommu->exclusion_enable, entry,
+ IOMMU_EXCLUSION_RANGE_ENABLE_MASK,
+ IOMMU_EXCLUSION_RANGE_ENABLE_SHIFT, &entry);
+ writel(entry, iommu->mmio_base+IOMMU_EXCLUSION_BASE_LOW_OFFSET);
+}
+
void __init enable_iommu(struct amd_iommu *iommu)
{
+ register_iommu_exclusion_range(iommu);
set_iommu_command_buffer_control(iommu, IOMMU_CONTROL_ENABLED);
set_iommu_translation_control(iommu, IOMMU_CONTROL_ENABLED);
printk("AMD IOMMU %d: Enabled\n", nr_amd_iommus);
diff -r 0b20ac6ec64a -r 71a8366fb212 xen/drivers/passthrough/amd/iommu_map.c
--- a/xen/drivers/passthrough/amd/iommu_map.c Fri Feb 29 09:18:01 2008 -0700
+++ b/xen/drivers/passthrough/amd/iommu_map.c Fri Feb 29 09:19:58 2008 -0700
@@ -234,16 +234,19 @@ static void amd_iommu_set_page_directory
}
void amd_iommu_set_dev_table_entry(u32 *dte, u64 root_ptr, u16 domain_id,
- u8 paging_mode)
+ u8 sys_mgt, u8 dev_ex, u8 paging_mode)
{
u64 addr_hi, addr_lo;
u32 entry;
- dte[6] = dte[5] = dte[4] = 0;
-
- set_field_in_reg_u32(IOMMU_DEV_TABLE_SYS_MGT_MSG_FORWARDED, 0,
+ dte[7] = dte[6] = dte[5] = dte[4] = 0;
+
+ set_field_in_reg_u32(sys_mgt, 0,
IOMMU_DEV_TABLE_SYS_MGT_MSG_ENABLE_MASK,
IOMMU_DEV_TABLE_SYS_MGT_MSG_ENABLE_SHIFT, &entry);
+ set_field_in_reg_u32(dev_ex, entry,
+ IOMMU_DEV_TABLE_ALLOW_EXCLUSION_MASK,
+ IOMMU_DEV_TABLE_ALLOW_EXCLUSION_SHIFT, &entry);
dte[3] = entry;
set_field_in_reg_u32(domain_id, 0,
@@ -448,3 +451,34 @@ int amd_iommu_unmap_page(struct domain *
return 0;
}
+
+int amd_iommu_reserve_domain_unity_map(
+ struct domain *domain,
+ unsigned long phys_addr,
+ unsigned long size, int iw, int ir)
+{
+ unsigned long flags, npages, i;
+ void *pte;
+ struct hvm_iommu *hd = domain_hvm_iommu(domain);
+
+ npages = region_to_pages(phys_addr, size);
+
+ spin_lock_irqsave(&hd->mapping_lock, flags);
+ for ( i = 0; i < npages; ++i )
+ {
+ pte = get_pte_from_page_tables(hd->root_table,
+ hd->paging_mode, phys_addr>>PAGE_SHIFT);
+ if ( pte == 0 )
+ {
+ dprintk(XENLOG_ERR,
+ "AMD IOMMU: Invalid IO pagetable entry phys_addr = %lx\n",
phys_addr);
+ spin_unlock_irqrestore(&hd->mapping_lock, flags);
+ return -EFAULT;
+ }
+ set_page_table_entry_present((u32 *)pte,
+ phys_addr, iw, ir);
+ phys_addr += PAGE_SIZE;
+ }
+ spin_unlock_irqrestore(&hd->mapping_lock, flags);
+ return 0;
+}
diff -r 0b20ac6ec64a -r 71a8366fb212 xen/drivers/passthrough/amd/pci_amd_iommu.c
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c Fri Feb 29 09:18:01
2008 -0700
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c Fri Feb 29 09:19:58
2008 -0700
@@ -20,6 +20,7 @@
#include <asm/amd-iommu.h>
#include <asm/hvm/svm/amd-iommu-proto.h>
+#include <asm/hvm/svm/amd-iommu-acpi.h>
#include <xen/sched.h>
#include <asm/mm.h>
#include "../pci-direct.h"
@@ -30,6 +31,9 @@ static long amd_iommu_cmd_buffer_entries
static long amd_iommu_cmd_buffer_entries = IOMMU_CMD_BUFFER_DEFAULT_ENTRIES;
int nr_amd_iommus = 0;
+unsigned short ivrs_bdf_entries = 0;
+struct ivrs_mappings *ivrs_mappings = NULL;
+
/* will set if amd-iommu HW is found */
int amd_iommu_enabled = 0;
@@ -82,13 +86,12 @@ static void __init detect_cleanup(void)
deallocate_iommu_resources(iommu);
xfree(iommu);
}
-}
-
-static int requestor_id_from_bdf(int bdf)
-{
- /* HACK - HACK */
- /* account for possible 'aliasing' by parent device */
- return bdf;
+
+ if ( ivrs_mappings )
+ {
+ xfree(ivrs_mappings);
+ ivrs_mappings = NULL;
+ }
}
static int __init allocate_iommu_table_struct(struct table_struct *table,
@@ -179,10 +182,21 @@ static int __init amd_iommu_init(void)
{
struct amd_iommu *iommu;
unsigned long flags;
+ u16 bdf;
for_each_amd_iommu ( iommu )
{
spin_lock_irqsave(&iommu->lock, flags);
+
+ /* assign default IOMMU values */
+ iommu->coherent = IOMMU_CONTROL_ENABLED;
+ iommu->isochronous = IOMMU_CONTROL_ENABLED;
+ iommu->res_pass_pw = IOMMU_CONTROL_ENABLED;
+ iommu->pass_pw = IOMMU_CONTROL_ENABLED;
+ iommu->ht_tunnel_enable = iommu->ht_tunnel_support ?
+ IOMMU_CONTROL_ENABLED : IOMMU_CONTROL_DISABLED;
+ iommu->exclusion_enable = IOMMU_CONTROL_DISABLED;
+ iommu->exclusion_allow_all = IOMMU_CONTROL_DISABLED;
/* register IOMMU data strucures in MMIO space */
if ( map_iommu_mmio_region(iommu) != 0 )
@@ -190,10 +204,30 @@ static int __init amd_iommu_init(void)
register_iommu_dev_table_in_mmio_space(iommu);
register_iommu_cmd_buffer_in_mmio_space(iommu);
+ spin_unlock_irqrestore(&iommu->lock, flags);
+ }
+
+ /* assign default values for device entries */
+ for ( bdf = 0; bdf < ivrs_bdf_entries; ++bdf )
+ {
+ ivrs_mappings[bdf].dte_requestor_id = bdf;
+ ivrs_mappings[bdf].dte_sys_mgt_enable =
+ IOMMU_DEV_TABLE_SYS_MGT_MSG_FORWARDED;
+ ivrs_mappings[bdf].dte_allow_exclusion =
+ IOMMU_CONTROL_DISABLED;
+ ivrs_mappings[bdf].unity_map_enable =
+ IOMMU_CONTROL_DISABLED;
+ }
+
+ if ( acpi_table_parse(ACPI_IVRS, parse_ivrs_table) != 0 )
+ dprintk(XENLOG_INFO, "AMD IOMMU: Did not find IVRS table!\n");
+
+ for_each_amd_iommu ( iommu )
+ {
+ spin_lock_irqsave(&iommu->lock, flags);
/* enable IOMMU translation services */
enable_iommu(iommu);
nr_amd_iommus++;
-
spin_unlock_irqrestore(&iommu->lock, flags);
}
@@ -229,31 +263,38 @@ struct amd_iommu *find_iommu_for_device(
}
void amd_iommu_setup_domain_device(
- struct domain *domain, struct amd_iommu *iommu, int requestor_id)
+ struct domain *domain, struct amd_iommu *iommu, int bdf)
{
void *dte;
u64 root_ptr;
unsigned long flags;
+ int req_id;
+ u8 sys_mgt, dev_ex;
struct hvm_iommu *hd = domain_hvm_iommu(domain);
- BUG_ON( !hd->root_table||!hd->paging_mode );
+ BUG_ON( !hd->root_table || !hd->paging_mode );
root_ptr = (u64)virt_to_maddr(hd->root_table);
+ /* get device-table entry */
+ req_id = ivrs_mappings[bdf].dte_requestor_id;
dte = iommu->dev_table.buffer +
- (requestor_id * IOMMU_DEV_TABLE_ENTRY_SIZE);
+ (req_id * IOMMU_DEV_TABLE_ENTRY_SIZE);
if ( !amd_iommu_is_dte_page_translation_valid((u32 *)dte) )
{
spin_lock_irqsave(&iommu->lock, flags);
- amd_iommu_set_dev_table_entry(
- (u32 *)dte,
- root_ptr, hd->domain_id, hd->paging_mode);
- invalidate_dev_table_entry(iommu, requestor_id);
+ /* bind DTE to domain page-tables */
+ sys_mgt = ivrs_mappings[req_id].dte_sys_mgt_enable;
+ dev_ex = ivrs_mappings[req_id].dte_allow_exclusion;
+ amd_iommu_set_dev_table_entry((u32 *)dte, root_ptr,
+ req_id, sys_mgt, dev_ex, hd->paging_mode);
+
+ invalidate_dev_table_entry(iommu, req_id);
flush_command_buffer(iommu);
dprintk(XENLOG_INFO, "AMD IOMMU: Set DTE req_id:%x, "
"root_ptr:%"PRIx64", domain_id:%d, paging_mode:%d\n",
- requestor_id, root_ptr, hd->domain_id, hd->paging_mode);
+ req_id, root_ptr, hd->domain_id, hd->paging_mode);
spin_unlock_irqrestore(&iommu->lock, flags);
}
@@ -266,7 +307,7 @@ void __init amd_iommu_setup_dom0_devices
struct pci_dev *pdev;
int bus, dev, func;
u32 l;
- int req_id, bdf;
+ int bdf;
for ( bus = 0; bus < 256; bus++ )
{
@@ -286,11 +327,12 @@ void __init amd_iommu_setup_dom0_devices
list_add_tail(&pdev->list, &hd->pdev_list);
bdf = (bus << 8) | pdev->devfn;
- req_id = requestor_id_from_bdf(bdf);
- iommu = find_iommu_for_device(bus, pdev->devfn);
+ /* supported device? */
+ iommu = (bdf < ivrs_bdf_entries) ?
+ find_iommu_for_device(bus, pdev->devfn) : NULL;
if ( iommu )
- amd_iommu_setup_domain_device(dom0, iommu, req_id);
+ amd_iommu_setup_domain_device(dom0, iommu, bdf);
}
}
}
@@ -299,6 +341,8 @@ int amd_iommu_detect(void)
int amd_iommu_detect(void)
{
unsigned long i;
+ int last_bus;
+ struct amd_iommu *iommu;
if ( !enable_amd_iommu )
{
@@ -318,6 +362,28 @@ int amd_iommu_detect(void)
{
printk("AMD IOMMU: Not found!\n");
return 0;
+ }
+ else
+ {
+ /* allocate 'ivrs mappings' table */
+ /* note: the table has entries to accomodate all IOMMUs */
+ last_bus = 0;
+ for_each_amd_iommu (iommu)
+ if (iommu->last_downstream_bus > last_bus)
+ last_bus = iommu->last_downstream_bus;
+
+ ivrs_bdf_entries = (last_bus + 1) *
+ IOMMU_DEV_TABLE_ENTRIES_PER_BUS;
+ ivrs_mappings = xmalloc_array( struct ivrs_mappings, ivrs_bdf_entries);
+
+ if ( !ivrs_mappings )
+ {
+ dprintk(XENLOG_ERR, "AMD IOMMU:"
+ " Error allocating IVRS DevMappings table\n");
+ goto error_out;
+ }
+ memset(ivrs_mappings, 0,
+ ivrs_bdf_entries * sizeof(struct ivrs_mappings));
}
if ( amd_iommu_init() != 0 )
@@ -407,23 +473,25 @@ int amd_iommu_domain_init(struct domain
}
static void amd_iommu_disable_domain_device(
- struct domain *domain, struct amd_iommu *iommu, u16 requestor_id)
+ struct domain *domain, struct amd_iommu *iommu, int bdf)
{
void *dte;
unsigned long flags;
-
+ int req_id;
+
+ req_id = ivrs_mappings[bdf].dte_requestor_id;
dte = iommu->dev_table.buffer +
- (requestor_id * IOMMU_DEV_TABLE_ENTRY_SIZE);
+ (req_id * IOMMU_DEV_TABLE_ENTRY_SIZE);
if ( amd_iommu_is_dte_page_translation_valid((u32 *)dte) )
{
spin_lock_irqsave(&iommu->lock, flags);
memset (dte, 0, IOMMU_DEV_TABLE_ENTRY_SIZE);
- invalidate_dev_table_entry(iommu, requestor_id);
+ invalidate_dev_table_entry(iommu, req_id);
flush_command_buffer(iommu);
dprintk(XENLOG_INFO , "AMD IOMMU: disable DTE 0x%x,"
" domain_id:%d, paging_mode:%d\n",
- requestor_id, domain_hvm_iommu(domain)->domain_id,
+ req_id, domain_hvm_iommu(domain)->domain_id,
domain_hvm_iommu(domain)->paging_mode);
spin_unlock_irqrestore(&iommu->lock, flags);
}
@@ -438,7 +506,7 @@ static int reassign_device( struct domai
struct hvm_iommu *target_hd = domain_hvm_iommu(target);
struct pci_dev *pdev;
struct amd_iommu *iommu;
- int req_id, bdf;
+ int bdf;
unsigned long flags;
for_each_pdev( source, pdev )
@@ -450,12 +518,13 @@ static int reassign_device( struct domai
pdev->devfn = devfn;
bdf = (bus << 8) | devfn;
- req_id = requestor_id_from_bdf(bdf);
- iommu = find_iommu_for_device(bus, devfn);
+ /* supported device? */
+ iommu = (bdf < ivrs_bdf_entries) ?
+ find_iommu_for_device(bus, pdev->devfn) : NULL;
if ( iommu )
{
- amd_iommu_disable_domain_device(source, iommu, req_id);
+ amd_iommu_disable_domain_device(source, iommu, bdf);
/* Move pci device from the source domain to target domain. */
spin_lock_irqsave(&source_hd->iommu_list_lock, flags);
spin_lock_irqsave(&target_hd->iommu_list_lock, flags);
@@ -463,7 +532,7 @@ static int reassign_device( struct domai
spin_unlock_irqrestore(&target_hd->iommu_list_lock, flags);
spin_unlock_irqrestore(&source_hd->iommu_list_lock, flags);
- amd_iommu_setup_domain_device(target, iommu, req_id);
+ amd_iommu_setup_domain_device(target, iommu, bdf);
gdprintk(XENLOG_INFO ,
"AMD IOMMU: reassign %x:%x.%x domain %d -> domain %d\n",
bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
@@ -484,6 +553,19 @@ static int reassign_device( struct domai
int amd_iommu_assign_device(struct domain *d, u8 bus, u8 devfn)
{
+ int bdf = (bus << 8) | devfn;
+ int req_id;
+ req_id = ivrs_mappings[bdf].dte_requestor_id;
+
+ if (ivrs_mappings[req_id].unity_map_enable)
+ {
+ amd_iommu_reserve_domain_unity_map(d,
+ ivrs_mappings[req_id].addr_range_start,
+ ivrs_mappings[req_id].addr_range_length,
+ ivrs_mappings[req_id].write_permission,
+ ivrs_mappings[req_id].read_permission);
+ }
+
pdev_flr(bus, devfn);
return reassign_device(dom0, d, bus, devfn);
}
diff -r 0b20ac6ec64a -r 71a8366fb212 xen/include/asm-x86/amd-iommu.h
--- a/xen/include/asm-x86/amd-iommu.h Fri Feb 29 09:18:01 2008 -0700
+++ b/xen/include/asm-x86/amd-iommu.h Fri Feb 29 09:19:58 2008 -0700
@@ -43,14 +43,25 @@ struct amd_iommu {
struct list_head list;
spinlock_t lock; /* protect iommu */
- int iotlb_support;
- int ht_tunnel_support;
- int not_present_cached;
+ u16 bdf;
+ u8 cap_offset;
u8 revision;
+ u8 unit_id;
+ u8 msi_number;
u8 root_bus;
u8 first_devfn;
u8 last_devfn;
+
+ u8 pte_not_present_cached;
+ u8 ht_tunnel_support;
+ u8 iotlb_support;
+
+ u8 isochronous;
+ u8 coherent;
+ u8 res_pass_pw;
+ u8 pass_pw;
+ u8 ht_tunnel_enable;
int last_downstream_bus;
int downstream_bus_present[PCI_MAX_BUS_COUNT];
@@ -61,10 +72,23 @@ struct amd_iommu {
struct table_struct dev_table;
struct table_struct cmd_buffer;
u32 cmd_buffer_tail;
+ struct table_struct event_log;
+ u32 event_log_head;
- int exclusion_enabled;
- unsigned long exclusion_base;
- unsigned long exclusion_limit;
+ int exclusion_enable;
+ int exclusion_allow_all;
+ uint64_t exclusion_base;
+ uint64_t exclusion_limit;
};
+struct ivrs_mappings {
+ u16 dte_requestor_id;
+ u8 dte_sys_mgt_enable;
+ u8 dte_allow_exclusion;
+ u8 unity_map_enable;
+ u8 write_permission;
+ u8 read_permission;
+ unsigned long addr_range_start;
+ unsigned long addr_range_length;
+};
#endif /* _ASM_X86_64_AMD_IOMMU_H */
diff -r 0b20ac6ec64a -r 71a8366fb212 xen/include/asm-x86/domain.h
--- a/xen/include/asm-x86/domain.h Fri Feb 29 09:18:01 2008 -0700
+++ b/xen/include/asm-x86/domain.h Fri Feb 29 09:19:58 2008 -0700
@@ -97,6 +97,11 @@ struct shadow_domain {
/* Fast MMIO path heuristic */
int has_fast_mmio_entries;
+
+ /* reflect guest table dirty status, incremented by write
+ * emulation and remove write permission
+ */
+ atomic_t gtable_dirty_version;
};
struct shadow_vcpu {
diff -r 0b20ac6ec64a -r 71a8366fb212
xen/include/asm-x86/hvm/svm/amd-iommu-acpi.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-acpi.h Fri Feb 29 09:19:58
2008 -0700
@@ -0,0 +1,176 @@
+/*
+ * Copyright (C) 2007 Advanced Micro Devices, Inc.
+ * Author: Leo Duran <leo.duran@xxxxxxx>
+ * Author: Wei Wang <wei.wang2@xxxxxxx> - adapted to xen
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _ASM_X86_64_AMD_IOMMU_ACPI_H
+#define _ASM_X86_64_AMD_IOMMU_ACPI_H
+
+#include <xen/acpi.h>
+
+/* I/O Virtualization Reporting Structure */
+#define AMD_IOMMU_ACPI_IVRS_SIG "IVRS"
+#define AMD_IOMMU_ACPI_IVHD_TYPE 0x10
+#define AMD_IOMMU_ACPI_IVMD_ALL_TYPE 0x20
+#define AMD_IOMMU_ACPI_IVMD_ONE_TYPE 0x21
+#define AMD_IOMMU_ACPI_IVMD_RANGE_TYPE 0x22
+#define AMD_IOMMU_ACPI_IVMD_IOMMU_TYPE 0x23
+
+/* 4-byte Device Entries */
+#define AMD_IOMMU_ACPI_IVHD_DEV_U32_PAD 0
+#define AMD_IOMMU_ACPI_IVHD_DEV_SELECT 2
+#define AMD_IOMMU_ACPI_IVHD_DEV_RANGE_START 3
+#define AMD_IOMMU_ACPI_IVHD_DEV_RANGE_END 4
+
+/* 8-byte Device Entries */
+#define AMD_IOMMU_ACPI_IVHD_DEV_U64_PAD 64
+#define AMD_IOMMU_ACPI_IVHD_DEV_ALIAS_SELECT 66
+#define AMD_IOMMU_ACPI_IVHD_DEV_ALIAS_RANGE 67
+#define AMD_IOMMU_ACPI_IVHD_DEV_EXT_SELECT 70
+#define AMD_IOMMU_ACPI_IVHD_DEV_EXT_RANGE 71
+
+/* IVHD IOMMU Flags */
+#define AMD_IOMMU_ACPI_COHERENT_MASK 0x20
+#define AMD_IOMMU_ACPI_COHERENT_SHIFT 5
+#define AMD_IOMMU_ACPI_IOTLB_SUP_MASK 0x10
+#define AMD_IOMMU_ACPI_IOTLB_SUP_SHIFT 4
+#define AMD_IOMMU_ACPI_ISOC_MASK 0x08
+#define AMD_IOMMU_ACPI_ISOC_SHIFT 3
+#define AMD_IOMMU_ACPI_RES_PASS_PW_MASK 0x04
+#define AMD_IOMMU_ACPI_RES_PASS_PW_SHIFT 2
+#define AMD_IOMMU_ACPI_PASS_PW_MASK 0x02
+#define AMD_IOMMU_ACPI_PASS_PW_SHIFT 1
+#define AMD_IOMMU_ACPI_HT_TUN_ENB_MASK 0x01
+#define AMD_IOMMU_ACPI_HT_TUN_ENB_SHIFT 0
+
+/* IVHD Device Flags */
+#define AMD_IOMMU_ACPI_LINT1_PASS_MASK 0x80
+#define AMD_IOMMU_ACPI_LINT1_PASS_SHIFT 7
+#define AMD_IOMMU_ACPI_LINT0_PASS_MASK 0x40
+#define AMD_IOMMU_ACPI_LINT0_PASS_SHIFT 6
+#define AMD_IOMMU_ACPI_SYS_MGT_MASK 0x30
+#define AMD_IOMMU_ACPI_SYS_MGT_SHIFT 4
+#define AMD_IOMMU_ACPI_NMI_PASS_MASK 0x04
+#define AMD_IOMMU_ACPI_NMI_PASS_SHIFT 2
+#define AMD_IOMMU_ACPI_EINT_PASS_MASK 0x02
+#define AMD_IOMMU_ACPI_EINT_PASS_SHIFT 1
+#define AMD_IOMMU_ACPI_INIT_PASS_MASK 0x01
+#define AMD_IOMMU_ACPI_INIT_PASS_SHIFT 0
+
+/* IVHD Device Extended Flags */
+#define AMD_IOMMU_ACPI_ATS_DISABLED_MASK 0x80000000
+#define AMD_IOMMU_ACPI_ATS_DISABLED_SHIFT 31
+
+/* IVMD Device Flags */
+#define AMD_IOMMU_ACPI_EXCLUSION_RANGE_MASK 0x08
+#define AMD_IOMMU_ACPI_EXCLUSION_RANGE_SHIFT 3
+#define AMD_IOMMU_ACPI_IW_PERMISSION_MASK 0x04
+#define AMD_IOMMU_ACPI_IW_PERMISSION_SHIFT 2
+#define AMD_IOMMU_ACPI_IR_PERMISSION_MASK 0x02
+#define AMD_IOMMU_ACPI_IR_PERMISSION_SHIFT 1
+#define AMD_IOMMU_ACPI_UNITY_MAPPING_MASK 0x01
+#define AMD_IOMMU_ACPI_UNITY_MAPPING_SHIFT 0
+
+#define ACPI_OEM_ID_SIZE 6
+#define ACPI_OEM_TABLE_ID_SIZE 8
+
+#pragma pack(1)
+struct acpi_ivrs_table_header {
+ struct acpi_table_header acpi_header;
+ u32 io_info;
+ u8 reserved[8];
+};
+
+struct acpi_ivrs_block_header {
+ u8 type;
+ u8 flags;
+ u16 length;
+ u16 dev_id;
+};
+
+struct acpi_ivhd_block_header {
+ struct acpi_ivrs_block_header header;
+ u16 cap_offset;
+ u64 mmio_base;
+ u16 pci_segment;
+ u16 iommu_info;
+ u8 reserved[4];
+};
+
+struct acpi_ivhd_device_header {
+ u8 type;
+ u16 dev_id;
+ u8 flags;
+};
+
+struct acpi_ivhd_device_trailer {
+ u8 type;
+ u16 dev_id;
+ u8 reserved;
+};
+
+struct acpi_ivhd_device_range {
+ struct acpi_ivhd_device_header header;
+ struct acpi_ivhd_device_trailer trailer;
+};
+
+struct acpi_ivhd_device_alias {
+ struct acpi_ivhd_device_header header;
+ u8 reserved1;
+ u16 dev_id;
+ u8 reserved2;
+};
+
+struct acpi_ivhd_device_alias_range {
+ struct acpi_ivhd_device_alias alias;
+ struct acpi_ivhd_device_trailer trailer;
+};
+
+struct acpi_ivhd_device_extended {
+ struct acpi_ivhd_device_header header;
+ u32 ext_flags;
+};
+
+struct acpi_ivhd_device_extended_range {
+ struct acpi_ivhd_device_extended extended;
+ struct acpi_ivhd_device_trailer trailer;
+};
+
+union acpi_ivhd_device {
+ struct acpi_ivhd_device_header header;
+ struct acpi_ivhd_device_range range;
+ struct acpi_ivhd_device_alias alias;
+ struct acpi_ivhd_device_alias_range alias_range;
+ struct acpi_ivhd_device_extended extended;
+ struct acpi_ivhd_device_extended_range extended_range;
+};
+
+struct acpi_ivmd_block_header {
+ struct acpi_ivrs_block_header header;
+ union {
+ u16 last_dev_id;
+ u16 cap_offset;
+ u16 reserved1;
+ };
+ u64 reserved2;
+ u64 start_addr;
+ u64 mem_length;
+};
+#pragma pack()
+
+#endif /* _ASM_X86_64_AMD_IOMMU_ACPI_H */
diff -r 0b20ac6ec64a -r 71a8366fb212
xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h Fri Feb 29 09:18:01
2008 -0700
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h Fri Feb 29 09:19:58
2008 -0700
@@ -117,6 +117,12 @@
#define PCI_CAP_FIRST_DEVICE_SHIFT 16
#define PCI_CAP_LAST_DEVICE_MASK 0xFF000000
#define PCI_CAP_LAST_DEVICE_SHIFT 24
+
+#define PCI_CAP_UNIT_ID_MASK 0x0000001F
+#define PCI_CAP_UNIT_ID_SHIFT 0
+#define PCI_MISC_INFO_OFFSET 0x10
+#define PCI_CAP_MSI_NUMBER_MASK 0x0000001F
+#define PCI_CAP_MSI_NUMBER_SHIFT 0
/* Device Table */
#define IOMMU_DEV_TABLE_BASE_LOW_OFFSET 0x00
diff -r 0b20ac6ec64a -r 71a8366fb212
xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h Fri Feb 29 09:18:01
2008 -0700
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h Fri Feb 29 09:19:58
2008 -0700
@@ -21,6 +21,7 @@
#ifndef _ASM_X86_64_AMD_IOMMU_PROTO_H
#define _ASM_X86_64_AMD_IOMMU_PROTO_H
+#include <xen/sched.h>
#include <asm/amd-iommu.h>
#define for_each_amd_iommu(amd_iommu) \
@@ -54,10 +55,12 @@ int amd_iommu_map_page(struct domain *d,
int amd_iommu_map_page(struct domain *d, unsigned long gfn, unsigned long mfn);
int amd_iommu_unmap_page(struct domain *d, unsigned long gfn);
void *amd_iommu_get_vptr_from_page_table_entry(u32 *entry);
+int amd_iommu_reserve_domain_unity_map(struct domain *domain,
+ unsigned long phys_addr, unsigned long size, int iw, int ir);
/* device table functions */
-void amd_iommu_set_dev_table_entry(u32 *dte,
- u64 root_ptr, u16 domain_id, u8 paging_mode);
+void amd_iommu_set_dev_table_entry(u32 *dte, u64 root_ptr,
+ u16 domain_id, u8 sys_mgt, u8 dev_ex, u8 paging_mode);
int amd_iommu_is_dte_page_translation_valid(u32 *entry);
void invalidate_dev_table_entry(struct amd_iommu *iommu,
u16 devic_id);
@@ -69,10 +72,13 @@ void flush_command_buffer(struct amd_iom
/* iommu domain funtions */
int amd_iommu_domain_init(struct domain *domain);
void amd_iommu_setup_domain_device(struct domain *domain,
- struct amd_iommu *iommu, int requestor_id);
+ struct amd_iommu *iommu, int bdf);
/* find iommu for bdf */
struct amd_iommu *find_iommu_for_device(int bus, int devfn);
+
+/* amd-iommu-acpi functions */
+int __init parse_ivrs_table(unsigned long phys_addr, unsigned long size);
static inline u32 get_field_from_reg_u32(u32 reg_value, u32 mask, u32 shift)
{
@@ -91,4 +97,16 @@ static inline u32 set_field_in_reg_u32(u
return reg_value;
}
+static inline u8 get_field_from_byte(u8 value, u8 mask, u8 shift)
+{
+ u8 field;
+ field = (value & mask) >> shift;
+ return field;
+}
+
+static inline unsigned long region_to_pages(unsigned long addr, unsigned long
size)
+{
+ return (PAGE_ALIGN(addr + size) - (addr & PAGE_MASK)) >> PAGE_SHIFT;
+}
+
#endif /* _ASM_X86_64_AMD_IOMMU_PROTO_H */
diff -r 0b20ac6ec64a -r 71a8366fb212 xen/include/asm-x86/perfc_defn.h
--- a/xen/include/asm-x86/perfc_defn.h Fri Feb 29 09:18:01 2008 -0700
+++ b/xen/include/asm-x86/perfc_defn.h Fri Feb 29 09:19:58 2008 -0700
@@ -88,6 +88,11 @@ PERFCOUNTER(shadow_unshadow_bf, "shad
PERFCOUNTER(shadow_unshadow_bf, "shadow unshadow brute-force")
PERFCOUNTER(shadow_get_page_fail, "shadow_get_page_from_l1e failed")
PERFCOUNTER(shadow_guest_walk, "shadow walks guest tables")
+PERFCOUNTER(shadow_check_gwalk, "shadow checks gwalk")
+PERFCOUNTER(shadow_inconsistent_gwalk, "shadow check inconsistent gwalk")
+PERFCOUNTER(shadow_rm_write_flush_tlb,
+ "shadow flush tlb by removing write perm")
+
PERFCOUNTER(shadow_invlpg, "shadow emulates invlpg")
PERFCOUNTER(shadow_invlpg_fault, "shadow invlpg faults")
diff -r 0b20ac6ec64a -r 71a8366fb212 xen/include/public/io/kbdif.h
--- a/xen/include/public/io/kbdif.h Fri Feb 29 09:18:01 2008 -0700
+++ b/xen/include/public/io/kbdif.h Fri Feb 29 09:19:58 2008 -0700
@@ -65,7 +65,7 @@ struct xenkbd_position
uint8_t type; /* XENKBD_TYPE_POS */
int32_t abs_x; /* absolute X position (in FB pixels) */
int32_t abs_y; /* absolute Y position (in FB pixels) */
- int32_t abs_z; /* absolute Z position (wheel) */
+ int32_t rel_z; /* relative Z motion (wheel) */
};
#define XENKBD_IN_EVENT_SIZE 40
diff -r 0b20ac6ec64a -r 71a8366fb212 xen/include/public/kexec.h
--- a/xen/include/public/kexec.h Fri Feb 29 09:18:01 2008 -0700
+++ b/xen/include/public/kexec.h Fri Feb 29 09:19:58 2008 -0700
@@ -126,9 +126,18 @@ typedef struct xen_kexec_load {
xen_kexec_image_t image;
} xen_kexec_load_t;
-#define KEXEC_RANGE_MA_CRASH 0 /* machine address and size of crash area */
-#define KEXEC_RANGE_MA_XEN 1 /* machine address and size of Xen itself */
-#define KEXEC_RANGE_MA_CPU 2 /* machine address and size of a CPU note */
+#define KEXEC_RANGE_MA_CRASH 0 /* machine address and size of crash area
*/
+#define KEXEC_RANGE_MA_XEN 1 /* machine address and size of Xen itself
*/
+#define KEXEC_RANGE_MA_CPU 2 /* machine address and size of a CPU note
*/
+#define KEXEC_RANGE_MA_XENHEAP 3 /* machine address and size of xenheap
+ * Note that although this is adjacent
+ * to Xen it exists in a separate EFI
+ * region on ia64, and thus needs to be
+ * inserted into iomem_machine separately
*/
+#define KEXEC_RANGE_MA_BOOT_PARAM 4 /* machine address and size of
+ * the ia64_boot_param */
+#define KEXEC_RANGE_MA_EFI_MEMMAP 5 /* machine address and size of
+ * of the EFI Memory Map */
/*
* Find the address and size of certain memory areas
diff -r 0b20ac6ec64a -r 71a8366fb212 xen/include/xen/acpi.h
--- a/xen/include/xen/acpi.h Fri Feb 29 09:18:01 2008 -0700
+++ b/xen/include/xen/acpi.h Fri Feb 29 09:19:58 2008 -0700
@@ -368,6 +368,7 @@ enum acpi_table_id {
ACPI_HPET,
ACPI_MCFG,
ACPI_DMAR,
+ ACPI_IVRS,
ACPI_TABLE_COUNT
};
diff -r 0b20ac6ec64a -r 71a8366fb212 xen/include/xen/kexec.h
--- a/xen/include/xen/kexec.h Fri Feb 29 09:18:01 2008 -0700
+++ b/xen/include/xen/kexec.h Fri Feb 29 09:19:58 2008 -0700
@@ -31,6 +31,7 @@ void kexec_crash_save_cpu(void);
void kexec_crash_save_cpu(void);
crash_xen_info_t *kexec_crash_save_info(void);
void machine_crash_shutdown(void);
+int machine_kexec_get(xen_kexec_range_t *range);
#endif /* __XEN_KEXEC_H__ */
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|