# HG changeset patch
# User Alex Williamson <alex.williamson@xxxxxx>
# Date 1183400366 21600
# Node ID d146700adf714cdc13f924ab0de1dc895b6927f8
# Parent 443ce7edad0e8a3a640960890a72ce530887b38e
# Parent 182446677b6b56d58523050a6225a73d87a86ab7
merge with xen-unstable.hg
---
buildconfigs/mk.linux-2.6-xen | 5
tools/Makefile | 1
tools/console/daemon/io.c | 38 +++--
tools/examples/init.d/xendomains | 6
tools/examples/network-bridge | 12 +
tools/firmware/vmxassist/head.S | 76 -----------
tools/firmware/vmxassist/machine.h | 15 --
tools/firmware/vmxassist/setup.c | 58 ---------
tools/firmware/vmxassist/vm86.c | 75 +++++++----
tools/ioemu/target-i386-dm/exec-dm.c | 2
tools/libxc/xc_domain_restore.c | 11 -
tools/libxc/xc_misc.c | 28 ++++
tools/libxc/xenctrl.h | 4
tools/misc/xen-python-path | 9 +
tools/python/xen/util/auxbin.py | 9 +
tools/python/xen/xend/XendCheckpoint.py | 2
tools/python/xen/xend/XendConfig.py | 105 +++++++++++++---
tools/python/xen/xend/XendDomain.py | 4
tools/python/xen/xend/server/irqif.py | 2
tools/python/xen/xend/server/pciif.py | 3
tools/python/xen/xm/main.py | 14 +-
xen/acm/acm_core.c | 2
xen/arch/x86/Makefile | 1
xen/arch/x86/boot/edd.S | 24 +--
xen/arch/x86/boot/x86_32.S | 21 ++-
xen/arch/x86/boot/x86_64.S | 15 ++
xen/arch/x86/clear_page.S | 26 ++++
xen/arch/x86/domain.c | 9 -
xen/arch/x86/hvm/hvm.c | 16 --
xen/arch/x86/hvm/io.c | 1
xen/arch/x86/hvm/platform.c | 3
xen/arch/x86/hvm/svm/svm.c | 56 +++++++-
xen/arch/x86/hvm/svm/vmcb.c | 8 -
xen/arch/x86/hvm/vmx/vmcs.c | 5
xen/arch/x86/hvm/vmx/vmx.c | 154 ++++++++++++++----------
xen/arch/x86/io_apic.c | 4
xen/arch/x86/mm.c | 3
xen/arch/x86/platform_hypercall.c | 96 ++++++++++++++
xen/arch/x86/setup.c | 34 ++++-
xen/arch/x86/traps.c | 13 +-
xen/arch/x86/x86_32/entry.S | 173 +++++++++++++--------------
xen/arch/x86/x86_32/supervisor_mode_kernel.S | 27 ++--
xen/arch/x86/x86_32/traps.c | 11 -
xen/arch/x86/x86_64/Makefile | 2
xen/arch/x86/x86_64/compat/entry.S | 10 -
xen/arch/x86/x86_64/entry.S | 68 +++++-----
xen/arch/x86/x86_64/mm.c | 9 -
xen/arch/x86/x86_64/traps.c | 10 +
xen/arch/x86/x86_emulate.c | 1
xen/common/sysctl.c | 33 +++++
xen/include/asm-x86/edd.h | 18 ++
xen/include/asm-x86/hvm/hvm.h | 14 ++
xen/include/asm-x86/hvm/svm/emulate.h | 1
xen/include/asm-x86/hvm/svm/vmcb.h | 8 -
xen/include/asm-x86/hvm/trace.h | 1
xen/include/asm-x86/hvm/vmx/vmcs.h | 2
xen/include/asm-x86/hvm/vmx/vmx.h | 7 -
xen/include/asm-x86/page.h | 13 +-
xen/include/asm-x86/processor.h | 4
xen/include/asm-x86/x86_32/asm_defns.h | 76 +++++++----
xen/include/public/platform.h | 40 ++++++
xen/include/public/sysctl.h | 22 ++-
xen/include/public/trace.h | 1
63 files changed, 961 insertions(+), 560 deletions(-)
diff -r 443ce7edad0e -r d146700adf71 buildconfigs/mk.linux-2.6-xen
--- a/buildconfigs/mk.linux-2.6-xen Mon Jul 02 10:31:03 2007 -0600
+++ b/buildconfigs/mk.linux-2.6-xen Mon Jul 02 12:19:26 2007 -0600
@@ -27,6 +27,11 @@ _build: build
_build: build
include buildconfigs/src.$(XEN_LINUX_SOURCE)
+
+# Default to allowing interface mismatch
+ifndef XEN_LINUX_ALLOW_INTERFACE_MISMATCH
+XEN_LINUX_ALLOW_INTERFACE_MISMATCH := y
+endif
# The real action starts here!
.PHONY: build
diff -r 443ce7edad0e -r d146700adf71 tools/Makefile
--- a/tools/Makefile Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/Makefile Mon Jul 02 12:19:26 2007 -0600
@@ -43,6 +43,7 @@ install: check
$(MAKE) ioemuinstall
$(INSTALL_DIR) $(DESTDIR)/var/xen/dump
$(INSTALL_DIR) $(DESTDIR)/var/log/xen
+ $(INSTALL_DIR) $(DESTDIR)/var/lib/xen
.PHONY: clean
clean: check_clean
diff -r 443ce7edad0e -r d146700adf71 tools/console/daemon/io.c
--- a/tools/console/daemon/io.c Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/console/daemon/io.c Mon Jul 02 12:19:26 2007 -0600
@@ -764,27 +764,31 @@ void handle_io(void)
/* XXX I wish we didn't have to busy wait for hypervisor logs
* but there's no obvious way to get event channel notifications
* for new HV log data as we can with guest */
- ret = select(max_fd + 1, &readfds, &writefds, 0, log_hv_fd !=
-1 ? &timeout : NULL);
-
+ ret = select(max_fd + 1, &readfds, &writefds, 0,
+ log_hv_fd != -1 ? &timeout : NULL);
+
+ if (log_reload) {
+ handle_log_reload();
+ log_reload = 0;
+ }
+
+ /* Abort if select failed, except for EINTR cases
+ which indicate a possible log reload */
if (ret == -1) {
- if (errno == EINTR) {
- if (log_reload) {
- handle_log_reload();
- log_reload = 0;
- }
+ if (errno == EINTR)
continue;
- }
dolog(LOG_ERR, "Failure in select: %d (%s)",
errno, strerror(errno));
break;
}
- /* Check for timeout */
- if (ret == 0) {
- if (log_hv_fd != -1)
- handle_hv_logs();
+ /* Always process HV logs even if not a timeout */
+ if (log_hv_fd != -1)
+ handle_hv_logs();
+
+ /* Must not check returned FDSET if it was a timeout */
+ if (ret == 0)
continue;
- }
if (FD_ISSET(xs_fileno(xs), &readfds))
handle_xs();
@@ -806,10 +810,14 @@ void handle_io(void)
}
}
- if (log_hv_fd != -1)
+ if (log_hv_fd != -1) {
close(log_hv_fd);
- if (xc_handle != -1)
+ log_hv_fd = -1;
+ }
+ if (xc_handle != -1) {
xc_interface_close(xc_handle);
+ xc_handle = -1;
+ }
}
/*
diff -r 443ce7edad0e -r d146700adf71 tools/examples/init.d/xendomains
--- a/tools/examples/init.d/xendomains Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/examples/init.d/xendomains Mon Jul 02 12:19:26 2007 -0600
@@ -221,11 +221,12 @@ start()
if [ "$XENDOMAINS_RESTORE" = "true" ] &&
contains_something "$XENDOMAINS_SAVE"
then
- mkdir -p $(dirname "$LOCKFILE")
+ XENDOMAINS_SAVED=`/bin/ls $XENDOMAINS_SAVE/* | grep -v 'lost+found'`
+ mkdir -p $(dirname "$LOCKFILE")
touch $LOCKFILE
echo -n "Restoring Xen domains:"
saved_domains=`ls $XENDOMAINS_SAVE`
- for dom in $XENDOMAINS_SAVE/*; do
+ for dom in $XENDOMAINS_SAVED; do
echo -n " ${dom##*/}"
xm restore $dom
if [ $? -ne 0 ]; then
@@ -259,6 +260,7 @@ start()
if [ $? -eq 0 ] || is_running $dom; then
echo -n "(skip)"
else
+ echo "(booting)"
xm create --quiet --defconfig $dom
if [ $? -ne 0 ]; then
rc_failed $?
diff -r 443ce7edad0e -r d146700adf71 tools/examples/network-bridge
--- a/tools/examples/network-bridge Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/examples/network-bridge Mon Jul 02 12:19:26 2007 -0600
@@ -172,9 +172,21 @@ show_status () {
echo '============================================================'
}
+is_network_root () {
+ local rootfs=$(awk '{ if ($1 !~ /^[ \t]*#/ && $2 == "/") { print $3; }}'
/etc/mtab)
+ local rootopts=$(awk '{ if ($1 !~ /^[ \t]*#/ && $2 == "/") { print $4; }}'
/etc/mtab)
+
+ [[ "$rootfs" =~ "^nfs" ]] || [[ "$rootopts" =~ "_netdev" ]] && return 0 ||
return 1
+}
+
op_start () {
if [ "${bridge}" = "null" ] ; then
return
+ fi
+
+ if is_network_root ; then
+ [ -x /usr/bin/logger ] && /usr/bin/logger "network-bridge: bridging
not supported on network root; not starting"
+ return
fi
if link_exists "$pdev"; then
diff -r 443ce7edad0e -r d146700adf71 tools/firmware/vmxassist/head.S
--- a/tools/firmware/vmxassist/head.S Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/firmware/vmxassist/head.S Mon Jul 02 12:19:26 2007 -0600
@@ -25,80 +25,12 @@
* switch happens to the environment below. The magic indicates
* that this is a valid context.
*/
-#ifdef TEST
- .byte 0x55, 0xaa
- .byte 0x80
- .code16
- jmp _start16
-#else
jmp _start
-#endif
.align 8
.long VMXASSIST_MAGIC
.long newctx /* new context */
.long oldctx /* old context */
-
-#ifdef TEST
-/*
- * We are running in 16-bit. Get into the protected mode as soon as
- * possible. We use our own (minimal) GDT to get started.
- *
- * ROM is a misnomer as this code isn't really rommable (although it
- * only requires a few changes) but it does live in a BIOS ROM segment.
- * This code allows me to debug vmxassists under (a modified version of)
- * Bochs and load it as a "optromimage1".
- */
- .code16
- .globl _start16
-_start16:
- cli
-
- /* load our own global descriptor table */
- data32 addr32 lgdt %cs:(rom_gdtr - TEXTADDR)
-
- /* go to protected mode */
- movl %cr0, %eax
- orl $(CR0_PE), %eax
- movl %eax, %cr0
- data32 ljmp $0x08, $1f
-
- .align 32
- .globl rom_gdt
-rom_gdt:
- .word 0, 0 /* 0x00: reserved */
- .byte 0, 0, 0, 0
-
- .word 0xFFFF, 0 /* 0x08: CS 32-bit */
- .byte 0, 0x9A, 0xCF, 0
-
- .word 0xFFFF, 0 /* 0x10: CS 32-bit */
- .byte 0, 0x92, 0xCF, 0
-rom_gdt_end:
-
- .align 4
- .globl rom_gdtr
-rom_gdtr:
- .word rom_gdt_end - rom_gdt - 1
- .long rom_gdt
-
- .code32
-1:
- /* welcome to the 32-bit world */
- movw $0x10, %ax
- movw %ax, %ds
- movw %ax, %es
- movw %ax, %ss
- movw %ax, %fs
- movw %ax, %gs
-
- /* enable Bochs debug facilities */
- movw $0x8A00, %dx
- movw $0x8A00, %ax
- outw %ax, (%dx)
-
- jmp _start
-#endif /* TEST */
/*
* This is the real start. Control was transfered to this point
@@ -111,9 +43,6 @@ _start:
cli
/* save register parameters to C land */
-#ifdef TEST
- xorl %edx, %edx
-#endif
/* clear bss */
cld
@@ -145,11 +74,6 @@ halt:
halt:
push $halt_msg
call printf
-#ifdef TEST
- movw $0x8A00, %dx
- movw $0x8AE0, %ax
- outw %ax, (%dx)
-#endif
cli
jmp .
diff -r 443ce7edad0e -r d146700adf71 tools/firmware/vmxassist/machine.h
--- a/tools/firmware/vmxassist/machine.h Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/firmware/vmxassist/machine.h Mon Jul 02 12:19:26 2007 -0600
@@ -55,13 +55,6 @@
#define PGMASK (~(PGSIZE - 1)) /* page mask */
#define LPGSIZE (1 << LOG_PDSIZE) /* large page size */
#define LPGMASK (~(LPGSIZE - 1)) /* large page mask */
-
-#ifdef TEST
-#define PTE_P (1 << 0) /* Present */
-#define PTE_RW (1 << 1) /* Read/Write */
-#define PTE_US (1 << 2) /* User/Supervisor */
-#define PTE_PS (1 << 7) /* Page Size */
-#endif
/* Programmable Interrupt Contoller (PIC) defines */
#define PIC_MASTER 0x20
@@ -195,14 +188,6 @@ set_cr4(unsigned value)
__asm__ __volatile__("movl %0, %%cr4" : /* no outputs */ : "r"(value));
}
-#ifdef TEST
-static inline void
-breakpoint(void)
-{
- outw(0x8A00, 0x8AE0);
-}
-#endif /* TEST */
-
#endif /* __ASSEMBLY__ */
#endif /* __MACHINE_H__ */
diff -r 443ce7edad0e -r d146700adf71 tools/firmware/vmxassist/setup.c
--- a/tools/firmware/vmxassist/setup.c Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/firmware/vmxassist/setup.c Mon Jul 02 12:19:26 2007 -0600
@@ -46,19 +46,6 @@ unsigned long long idt[NR_TRAPS] __attri
unsigned long long idt[NR_TRAPS] __attribute__ ((aligned(32)));
struct dtr idtr = { sizeof(idt)-1, (unsigned long) &idt };
-
-#ifdef TEST
-unsigned pgd[NR_PGD] __attribute__ ((aligned(PGSIZE))) = { 0 };
-
-struct e820entry e820map[] = {
- { 0x0000000000000000ULL, 0x000000000009F800ULL, E820_RAM },
- { 0x000000000009F800ULL, 0x0000000000000800ULL, E820_RESERVED },
- { 0x00000000000C0000ULL, 0x0000000000040000ULL, E820_RESERVED },
- { 0x0000000000100000ULL, 0x0000000000000000ULL, E820_RAM },
- { 0x0000000000000000ULL, 0x0000000000003000ULL, E820_NVS },
- { 0x0000000000003000ULL, 0x000000000000A000ULL, E820_ACPI },
-};
-#endif /* TEST */
struct vmx_assist_context oldctx;
struct vmx_assist_context newctx;
@@ -84,38 +71,11 @@ banner(void)
(((get_cmos(0x31) << 8) | get_cmos(0x30)) + 0x400) << 10;
memory_size += 0x400 << 10; /* + 1MB */
-#ifdef TEST
- /* Create an SMAP for our debug environment */
- e820map[4].size = memory_size - e820map[4].addr - PGSIZE;
- e820map[5].addr = memory_size - PGSIZE;
- e820map[6].addr = memory_size;
- e820map[7].addr += memory_size;
-
- *HVM_E820_NR = sizeof(e820map)/sizeof(e820map[0]);
- memcpy(HVM_E820, e820map, sizeof(e820map));
-#endif
-
printf("Memory size %ld MB\n", memory_size >> 20);
printf("E820 map:\n");
print_e820_map(HVM_E820, *HVM_E820_NR);
printf("\n");
}
-
-#ifdef TEST
-void
-setup_paging(void)
-{
- unsigned long i;
-
- if (((unsigned)pgd & ~PGMASK) != 0)
- panic("PGD not page aligned");
- set_cr4(get_cr4() | CR4_PSE);
- for (i = 0; i < NR_PGD; i++)
- pgd[i] = (i * LPGSIZE)| PTE_PS | PTE_US | PTE_RW | PTE_P;
- set_cr3((unsigned) pgd);
- set_cr0(get_cr0() | (CR0_PE|CR0_PG));
-}
-#endif /* TEST */
void
setup_gdt(void)
@@ -211,11 +171,7 @@ enter_real_mode(struct regs *regs)
regs->ves = regs->vds = regs->vfs = regs->vgs = 0xF000;
if (booting_cpu == 0) {
regs->cs = 0xF000; /* ROM BIOS POST entry point */
-#ifdef TEST
- regs->eip = 0xFFE0;
-#else
regs->eip = 0xFFF0;
-#endif
} else {
regs->cs = booting_vector << 8; /* AP entry point */
regs->eip = 0;
@@ -242,9 +198,10 @@ enter_real_mode(struct regs *regs)
}
/* go from protected to real mode */
- regs->eflags |= EFLAGS_VM;
set_mode(regs, VM86_PROTECTED_TO_REAL);
emulate(regs);
+ if (mode != VM86_REAL)
+ panic("failed to emulate between clear PE and long jump.\n");
}
/*
@@ -269,13 +226,8 @@ setup_ctx(void)
* more natural to enable CR0.PE to cause a world switch to
* protected mode rather than disabling it.
*/
-#ifdef TEST
- c->cr0 = (get_cr0() | CR0_NE | CR0_PG) & ~CR0_PE;
- c->cr3 = (unsigned long) pgd;
-#else
c->cr0 = (get_cr0() | CR0_NE) & ~CR0_PE;
c->cr3 = 0;
-#endif
c->cr4 = get_cr4();
c->idtr_limit = sizeof(idt)-1;
@@ -369,16 +321,10 @@ main(void)
if (booting_cpu == 0)
banner();
-#ifdef TEST
- setup_paging();
-#endif
-
setup_gdt();
setup_idt();
-#ifndef TEST
set_cr4(get_cr4() | CR4_VME);
-#endif
setup_ctx();
diff -r 443ce7edad0e -r d146700adf71 tools/firmware/vmxassist/vm86.c
--- a/tools/firmware/vmxassist/vm86.c Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/firmware/vmxassist/vm86.c Mon Jul 02 12:19:26 2007 -0600
@@ -561,11 +561,7 @@ lmsw(struct regs *regs, unsigned prefix,
unsigned cr0 = (oldctx.cr0 & 0xFFFFFFF0) | ax;
TRACE((regs, regs->eip - eip, "lmsw 0x%x", ax));
-#ifndef TEST
oldctx.cr0 = cr0 | CR0_PE | CR0_NE;
-#else
- oldctx.cr0 = cr0 | CR0_PE | CR0_NE | CR0_PG;
-#endif
if (cr0 & CR0_PE)
set_mode(regs, VM86_REAL_TO_PROTECTED);
@@ -584,8 +580,13 @@ movr(struct regs *regs, unsigned prefix,
unsigned addr = operand(prefix, regs, modrm);
unsigned val, r = (modrm >> 3) & 7;
- if ((modrm & 0xC0) == 0xC0) /* no registers */
- return 0;
+ if ((modrm & 0xC0) == 0xC0) {
+ /*
+ * Emulate all guest instructions in protected to real mode.
+ */
+ if (mode != VM86_PROTECTED_TO_REAL)
+ return 0;
+ }
switch (opc) {
case 0x88: /* addr32 mov r8, r/m8 */
@@ -656,13 +657,8 @@ movcr(struct regs *regs, unsigned prefix
TRACE((regs, regs->eip - eip, "movl %%cr%d, %%eax", cr));
switch (cr) {
case 0:
-#ifndef TEST
setreg32(regs, modrm,
oldctx.cr0 & ~(CR0_PE | CR0_NE));
-#else
- setreg32(regs, modrm,
- oldctx.cr0 & ~(CR0_PE | CR0_NE | CR0_PG));
-#endif
break;
case 2:
setreg32(regs, modrm, get_cr2());
@@ -680,9 +676,6 @@ movcr(struct regs *regs, unsigned prefix
switch (cr) {
case 0:
oldctx.cr0 = getreg32(regs, modrm) | (CR0_PE | CR0_NE);
-#ifdef TEST
- oldctx.cr0 |= CR0_PG;
-#endif
if (getreg32(regs, modrm) & CR0_PE)
set_mode(regs, VM86_REAL_TO_PROTECTED);
else
@@ -818,8 +811,13 @@ mov_to_seg(struct regs *regs, unsigned p
{
unsigned modrm = fetch8(regs);
- /* Only need to emulate segment loads in real->protected mode. */
- if (mode != VM86_REAL_TO_PROTECTED)
+ /*
+ * Emulate segment loads in:
+ * 1) real->protected mode.
+ * 2) protected->real mode.
+ */
+ if ((mode != VM86_REAL_TO_PROTECTED) &&
+ (mode != VM86_PROTECTED_TO_REAL))
return 0;
/* Register source only. */
@@ -829,6 +827,8 @@ mov_to_seg(struct regs *regs, unsigned p
switch ((modrm & 0x38) >> 3) {
case 0: /* es */
regs->ves = getreg16(regs, modrm);
+ if (mode == VM86_PROTECTED_TO_REAL)
+ return 1;
saved_rm_regs.ves = 0;
oldctx.es_sel = regs->ves;
return 1;
@@ -837,21 +837,29 @@ mov_to_seg(struct regs *regs, unsigned p
case 2: /* ss */
regs->uss = getreg16(regs, modrm);
+ if (mode == VM86_PROTECTED_TO_REAL)
+ return 1;
saved_rm_regs.uss = 0;
oldctx.ss_sel = regs->uss;
return 1;
case 3: /* ds */
regs->vds = getreg16(regs, modrm);
+ if (mode == VM86_PROTECTED_TO_REAL)
+ return 1;
saved_rm_regs.vds = 0;
oldctx.ds_sel = regs->vds;
return 1;
case 4: /* fs */
regs->vfs = getreg16(regs, modrm);
+ if (mode == VM86_PROTECTED_TO_REAL)
+ return 1;
saved_rm_regs.vfs = 0;
oldctx.fs_sel = regs->vfs;
return 1;
case 5: /* gs */
regs->vgs = getreg16(regs, modrm);
+ if (mode == VM86_PROTECTED_TO_REAL)
+ return 1;
saved_rm_regs.vgs = 0;
oldctx.gs_sel = regs->vgs;
return 1;
@@ -1067,7 +1075,8 @@ set_mode(struct regs *regs, enum vm86_mo
}
mode = newmode;
- TRACE((regs, 0, states[mode]));
+ if (mode != VM86_PROTECTED)
+ TRACE((regs, 0, states[mode]));
}
static void
@@ -1086,7 +1095,7 @@ jmpl(struct regs *regs, int prefix)
if (mode == VM86_REAL_TO_PROTECTED) /* jump to protected
mode */
set_mode(regs, VM86_PROTECTED);
- else if (mode == VM86_PROTECTED_TO_REAL)/* jump to real mode */
+ else if (mode == VM86_PROTECTED_TO_REAL) /* jump to real mode */
set_mode(regs, VM86_REAL);
else
panic("jmpl");
@@ -1280,6 +1289,12 @@ opcode(struct regs *regs)
unsigned eip = regs->eip;
unsigned opc, modrm, disp;
unsigned prefix = 0;
+
+ if (mode == VM86_PROTECTED_TO_REAL &&
+ oldctx.cs_arbytes.fields.default_ops_size) {
+ prefix |= DATA32;
+ prefix |= ADDR32;
+ }
for (;;) {
switch ((opc = fetch8(regs))) {
@@ -1391,17 +1406,29 @@ opcode(struct regs *regs)
continue;
case 0x66:
- TRACE((regs, regs->eip - eip, "data32"));
- prefix |= DATA32;
+ if (mode == VM86_PROTECTED_TO_REAL &&
+ oldctx.cs_arbytes.fields.default_ops_size) {
+ TRACE((regs, regs->eip - eip, "data16"));
+ prefix &= ~DATA32;
+ } else {
+ TRACE((regs, regs->eip - eip, "data32"));
+ prefix |= DATA32;
+ }
continue;
case 0x67:
- TRACE((regs, regs->eip - eip, "addr32"));
- prefix |= ADDR32;
+ if (mode == VM86_PROTECTED_TO_REAL &&
+ oldctx.cs_arbytes.fields.default_ops_size) {
+ TRACE((regs, regs->eip - eip, "addr16"));
+ prefix &= ~ADDR32;
+ } else {
+ TRACE((regs, regs->eip - eip, "addr32"));
+ prefix |= ADDR32;
+ }
continue;
- case 0x88: /* addr32 mov r8, r/m8 */
- case 0x8A: /* addr32 mov r/m8, r8 */
+ case 0x88: /* mov r8, r/m8 */
+ case 0x8A: /* mov r/m8, r8 */
if (mode != VM86_REAL && mode != VM86_REAL_TO_PROTECTED)
goto invalid;
if ((prefix & ADDR32) == 0)
diff -r 443ce7edad0e -r d146700adf71 tools/ioemu/target-i386-dm/exec-dm.c
--- a/tools/ioemu/target-i386-dm/exec-dm.c Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/ioemu/target-i386-dm/exec-dm.c Mon Jul 02 12:19:26 2007 -0600
@@ -448,7 +448,7 @@ static void memcpy_words(void *dst, void
{
asm (
" movl %%edx,%%ecx \n"
-#ifdef __x86_64
+#ifdef __x86_64__
" shrl $3,%%ecx \n"
" andl $7,%%edx \n"
" rep movsq \n"
diff -r 443ce7edad0e -r d146700adf71 tools/libxc/xc_domain_restore.c
--- a/tools/libxc/xc_domain_restore.c Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/libxc/xc_domain_restore.c Mon Jul 02 12:19:26 2007 -0600
@@ -465,7 +465,7 @@ int xc_domain_restore(int xc_handle, int
if ( j == 0 )
break; /* our work here is done */
- if ( j > MAX_BATCH_SIZE )
+ if ( (j > MAX_BATCH_SIZE) || (j < 0) )
{
ERROR("Max batch size exceeded. Giving up.");
goto out;
@@ -903,13 +903,14 @@ int xc_domain_restore(int xc_handle, int
/* Get the list of PFNs that are not in the psuedo-phys map */
{
- unsigned int count;
+ unsigned int count = 0;
unsigned long *pfntab;
int nr_frees, rc;
- if ( !read_exact(io_fd, &count, sizeof(count)) )
- {
- ERROR("Error when reading pfn count");
+ if ( !read_exact(io_fd, &count, sizeof(count)) ||
+ (count > (1U << 28)) ) /* up to 1TB of address space */
+ {
+ ERROR("Error when reading pfn count (= %u)", count);
goto out;
}
diff -r 443ce7edad0e -r d146700adf71 tools/libxc/xc_misc.c
--- a/tools/libxc/xc_misc.c Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/libxc/xc_misc.c Mon Jul 02 12:19:26 2007 -0600
@@ -101,13 +101,37 @@ int xc_perfc_control(int xc_handle,
rc = do_sysctl(xc_handle, &sysctl);
- if (nbr_desc)
+ if ( nbr_desc )
*nbr_desc = sysctl.u.perfc_op.nr_counters;
- if (nbr_val)
+ if ( nbr_val )
*nbr_val = sysctl.u.perfc_op.nr_vals;
return rc;
}
+
+int xc_getcpuinfo(int xc_handle, int max_cpus,
+ xc_cpuinfo_t *info, int *nr_cpus)
+{
+ int rc;
+ DECLARE_SYSCTL;
+
+ sysctl.cmd = XEN_SYSCTL_getcpuinfo;
+ sysctl.u.getcpuinfo.max_cpus = max_cpus;
+ set_xen_guest_handle(sysctl.u.getcpuinfo.info, info);
+
+ if ( (rc = lock_pages(info, max_cpus*sizeof(*info))) != 0 )
+ return rc;
+
+ rc = do_sysctl(xc_handle, &sysctl);
+
+ unlock_pages(info, max_cpus*sizeof(*info));
+
+ if ( nr_cpus )
+ *nr_cpus = sysctl.u.getcpuinfo.nr_cpus;
+
+ return rc;
+}
+
int xc_hvm_set_pci_intx_level(
int xc_handle, domid_t dom,
diff -r 443ce7edad0e -r d146700adf71 tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/libxc/xenctrl.h Mon Jul 02 12:19:26 2007 -0600
@@ -491,6 +491,10 @@ int xc_sched_id(int xc_handle,
int xc_sched_id(int xc_handle,
int *sched_id);
+typedef xen_sysctl_cpuinfo_t xc_cpuinfo_t;
+int xc_getcpuinfo(int xc_handle, int max_cpus,
+ xc_cpuinfo_t *info, int *nr_cpus);
+
int xc_domain_setmaxmem(int xc_handle,
uint32_t domid,
unsigned int max_memkb);
diff -r 443ce7edad0e -r d146700adf71 tools/misc/xen-python-path
--- a/tools/misc/xen-python-path Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/misc/xen-python-path Mon Jul 02 12:19:26 2007 -0600
@@ -28,8 +28,13 @@ import os.path
import os.path
import sys
-for p in ['python%s' % sys.version[:3], 'python']:
- for l in ['/usr/lib64', '/usr/lib']:
+usr = os.path.dirname(os.path.dirname(sys.argv[0]))
+list = [ os.path.join(usr,'lib64') ]
+list += [ os.path.join(usr,'lib') ]
+list += ['/usr/lib64', '/usr/lib']
+
+for l in list:
+ for p in ['python%s' % sys.version[:3], 'python']:
for k in ['', 'site-packages/']:
d = os.path.join(l, p, k)
if os.path.exists(os.path.join(d, AUXBIN)):
diff -r 443ce7edad0e -r d146700adf71 tools/python/xen/util/auxbin.py
--- a/tools/python/xen/util/auxbin.py Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/python/xen/util/auxbin.py Mon Jul 02 12:19:26 2007 -0600
@@ -27,6 +27,7 @@ LIB_64_ARCHS = [ 'x86_64', 's390x', 'spa
import os
import os.path
+import sys
def execute(exe, args = None):
@@ -47,6 +48,14 @@ def path():
def libpath():
machine = os.uname()[4]
+ if sys.argv[0] != '-c':
+ prefix = os.path.dirname(os.path.dirname(sys.argv[0]))
+ path = os.path.join(prefix, os.path.basename(LIB_64))
+ if machine in LIB_64_ARCHS and os.path.exists(path):
+ return path
+ path = os.path.join(prefix, os.path.basename(LIB_32))
+ if os.path.exists(path):
+ return path
if machine in LIB_64_ARCHS and os.path.exists(LIB_64):
return LIB_64
else:
diff -r 443ce7edad0e -r d146700adf71 tools/python/xen/xend/XendCheckpoint.py
--- a/tools/python/xen/xend/XendCheckpoint.py Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/python/xen/xend/XendCheckpoint.py Mon Jul 02 12:19:26 2007 -0600
@@ -148,6 +148,8 @@ def save(fd, dominfo, network, live, dst
except:
log.exception("Failed to reset the migrating domain's name")
+ raise exn
+
def restore(xd, fd, dominfo = None, paused = False):
signature = read_exact(fd, len(SIGNATURE),
diff -r 443ce7edad0e -r d146700adf71 tools/python/xen/xend/XendConfig.py
--- a/tools/python/xen/xend/XendConfig.py Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/python/xen/xend/XendConfig.py Mon Jul 02 12:19:26 2007 -0600
@@ -28,6 +28,7 @@ from xen.xend.PrettyPrint import prettyp
from xen.xend.PrettyPrint import prettyprintstring
from xen.xend.XendConstants import DOM_STATE_HALTED
from xen.xend.server.netif import randomMAC
+from xen.util.blkif import blkdev_name_to_number
log = logging.getLogger("xend.XendConfig")
log.setLevel(logging.WARN)
@@ -934,6 +935,62 @@ class XendConfig(dict):
return sxpr
+ def _blkdev_name_to_number(self, dev):
+ if 'ioemu:' in dev:
+ _, dev = dev.split(':', 1)
+ try:
+ dev, _ = dev.split(':', 1)
+ except ValueError:
+ pass
+
+ try:
+ devid = int(dev)
+ except ValueError:
+ # devid is not a number but a string containing either device
+ # name (e.g. xvda) or device_type/device_id (e.g. vbd/51728)
+ dev2 = type(dev) is str and dev.split('/')[-1] or None
+ if dev2 == None:
+ log.debug("Could not check the device %s", dev)
+ return None
+ try:
+ devid = int(dev2)
+ except ValueError:
+ devid = blkdev_name_to_number(dev2)
+ if devid == None:
+ log.debug("The device %s is not device name", dev2)
+ return None
+ return devid
+
+ def device_duplicate_check(self, dev_type, dev_info, defined_config):
+ defined_devices_sxpr = self.all_devices_sxpr(target = defined_config)
+
+ if dev_type == 'vbd':
+ dev_uname = dev_info.get('uname')
+ blkdev_name = dev_info.get('dev')
+ devid = self._blkdev_name_to_number(blkdev_name)
+ if devid == None:
+ return
+
+ for o_dev_type, o_dev_info in defined_devices_sxpr:
+ if dev_type == o_dev_type:
+ if dev_uname == sxp.child_value(o_dev_info, 'uname'):
+ raise XendConfigError('The uname "%s" is already
defined' %
+ dev_uname)
+ o_blkdev_name = sxp.child_value(o_dev_info, 'dev')
+ o_devid = self._blkdev_name_to_number(o_blkdev_name)
+ if o_devid != None and devid == o_devid:
+ raise XendConfigError('The device "%s" is already
defined' %
+ blkdev_name)
+
+ elif dev_type == 'vif':
+ dev_mac = dev_info.get('mac')
+
+ for o_dev_type, o_dev_info in defined_devices_sxpr:
+ if dev_type == o_dev_type:
+ if dev_mac == sxp.child_value(o_dev_info, 'mac'):
+ raise XendConfigError('The mac "%s" is already
defined' %
+ dev_mac)
+
def device_add(self, dev_type, cfg_sxp = None, cfg_xenapi = None,
target = None):
"""Add a device configuration in SXP format or XenAPI struct format.
@@ -997,6 +1054,8 @@ class XendConfig(dict):
if dev_type == 'vif':
if not dev_info.get('mac'):
dev_info['mac'] = randomMAC()
+
+ self.device_duplicate_check(dev_type, dev_info, target)
# create uuid if it doesn't exist
dev_uuid = dev_info.get('uuid', None)
@@ -1275,15 +1334,19 @@ class XendConfig(dict):
return False
- def device_sxpr(self, dev_uuid = None, dev_type = None, dev_info = None):
+ def device_sxpr(self, dev_uuid = None, dev_type = None, dev_info = None,
target = None):
"""Get Device SXPR by either giving the device UUID or (type, config).
@rtype: list of lists
@return: device config sxpr
"""
sxpr = []
- if dev_uuid != None and dev_uuid in self['devices']:
- dev_type, dev_info = self['devices'][dev_uuid]
+
+ if target == None:
+ target = self
+
+ if dev_uuid != None and dev_uuid in target['devices']:
+ dev_type, dev_info = target['devices'][dev_uuid]
if dev_type == None or dev_info == None:
raise XendConfigError("Required either UUID or device type and "
@@ -1300,8 +1363,12 @@ class XendConfig(dict):
return sxpr
- def ordered_device_refs(self):
+ def ordered_device_refs(self, target = None):
result = []
+
+ if target == None:
+ target = self
+
# vkbd devices *must* be before vfb devices, otherwise
# there is a race condition when setting up devices
# where the daemon spawned for the vfb may write stuff
@@ -1309,27 +1376,30 @@ class XendConfig(dict):
# setup permissions on the vkbd backend path. This race
# results in domain creation failing with 'device already
# connected' messages
- result.extend([u for u in self['devices'].keys() if
self['devices'][u][0] == 'vkbd'])
-
- result.extend(self['console_refs'] +
- self['vbd_refs'] +
- self['vif_refs'] +
- self['vtpm_refs'])
-
- result.extend([u for u in self['devices'].keys() if u not in result])
+ result.extend([u for u in target['devices'].keys() if
target['devices'][u][0] == 'vkbd'])
+
+ result.extend(target.get('console_refs', []) +
+ target.get('vbd_refs', []) +
+ target.get('vif_refs', []) +
+ target.get('vtpm_refs', []))
+
+ result.extend([u for u in target['devices'].keys() if u not in result])
return result
- def all_devices_sxpr(self):
+ def all_devices_sxpr(self, target = None):
"""Returns the SXPR for all devices in the current configuration."""
sxprs = []
pci_devs = []
- if 'devices' not in self:
+ if target == None:
+ target = self
+
+ if 'devices' not in target:
return sxprs
- ordered_refs = self.ordered_device_refs()
+ ordered_refs = self.ordered_device_refs(target = target)
for dev_uuid in ordered_refs:
- dev_type, dev_info = self['devices'][dev_uuid]
+ dev_type, dev_info = target['devices'][dev_uuid]
if dev_type == 'pci': # special case for pci devices
sxpr = [['uuid', dev_info['uuid']]]
for pci_dev_info in dev_info['devs']:
@@ -1340,7 +1410,8 @@ class XendConfig(dict):
sxprs.append((dev_type, sxpr))
else:
sxpr = self.device_sxpr(dev_type = dev_type,
- dev_info = dev_info)
+ dev_info = dev_info,
+ target = target)
sxprs.append((dev_type, sxpr))
return sxprs
diff -r 443ce7edad0e -r d146700adf71 tools/python/xen/xend/XendDomain.py
--- a/tools/python/xen/xend/XendDomain.py Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/python/xen/xend/XendDomain.py Mon Jul 02 12:19:26 2007 -0600
@@ -1262,8 +1262,10 @@ class XendDomain:
try:
XendCheckpoint.save(fd, dominfo, False, False, dst,
checkpoint=checkpoint)
- finally:
+ except Exception, e:
os.close(fd)
+ raise e
+ os.close(fd)
except OSError, ex:
raise XendError("can't write guest state file %s: %s" %
(dst, ex[1]))
diff -r 443ce7edad0e -r d146700adf71 tools/python/xen/xend/server/irqif.py
--- a/tools/python/xen/xend/server/irqif.py Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/python/xen/xend/server/irqif.py Mon Jul 02 12:19:26 2007 -0600
@@ -61,7 +61,7 @@ class IRQController(DevController):
pirq = get_param('irq')
- rc = xc.domain_irq_permission(dom = self.getDomid(),
+ rc = xc.domain_irq_permission(domid = self.getDomid(),
pirq = pirq,
allow_access = True)
diff -r 443ce7edad0e -r d146700adf71 tools/python/xen/xend/server/pciif.py
--- a/tools/python/xen/xend/server/pciif.py Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/python/xen/xend/server/pciif.py Mon Jul 02 12:19:26 2007 -0600
@@ -185,3 +185,6 @@ class PciController(DevController):
def waitForBackend(self,devid):
return (0, "ok - no hotplug")
+
+ def migrate(self, config, network, dst, step, domName):
+ raise XendError('Migration not permitted with assigned PCI device.')
diff -r 443ce7edad0e -r d146700adf71 tools/python/xen/xm/main.py
--- a/tools/python/xen/xm/main.py Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/python/xen/xm/main.py Mon Jul 02 12:19:26 2007 -0600
@@ -2168,9 +2168,7 @@ def xm_network_attach(args):
server.xend.domain.device_create(dom, vif)
-def detach(args, command, deviceClass):
- arg_check(args, command, 2, 3)
-
+def detach(args, deviceClass):
dom = args[0]
dev = args[1]
try:
@@ -2204,16 +2202,17 @@ def xm_block_detach(args):
raise OptionError("Cannot find device '%s' in domain '%s'"
% (dev,dom))
else:
+ arg_check(args, 'block-detach', 2, 3)
try:
- detach(args, 'block-detach', 'vbd')
+ detach(args, 'vbd')
return
except:
pass
- detach(args, 'block-detach', 'tap')
+ detach(args, 'tap')
def xm_network_detach(args):
if serverType == SERVER_XEN_API:
- arg_check(args, "xm_block_detach", 2, 3)
+ arg_check(args, "xm_network_detach", 2, 3)
dom = args[0]
devid = args[1]
vif_refs = server.xenapi.VM.get_VIFs(get_single_vm(dom))
@@ -2227,7 +2226,8 @@ def xm_network_detach(args):
else:
print "Cannot find device '%s' in domain '%s'" % (devid,dom)
else:
- detach(args, 'network-detach', 'vif')
+ arg_check(args, 'network-detach', 2, 3)
+ detach(args, 'vif')
def xm_vnet_list(args):
diff -r 443ce7edad0e -r d146700adf71 xen/acm/acm_core.c
--- a/xen/acm/acm_core.c Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/acm/acm_core.c Mon Jul 02 12:19:26 2007 -0600
@@ -89,7 +89,7 @@ static void __init set_dom0_ssidref(cons
if (hi < ACM_MAX_NUM_TYPES && hi >= 1)
dom0_ste_ssidref = hi;
for (i = 0; i < sizeof(polname); i++) {
- polname[i] = c[7+i];
+ polname[i] = c[5+i];
if (polname[i] == '\0' || polname[i] == '\t' ||
polname[i] == '\n' || polname[i] == ' ' ||
polname[i] == ':') {
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/Makefile
--- a/xen/arch/x86/Makefile Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/Makefile Mon Jul 02 12:19:26 2007 -0600
@@ -10,6 +10,7 @@ subdir-$(x86_64) += x86_64
obj-y += apic.o
obj-y += bitops.o
+obj-y += clear_page.o
obj-y += compat.o
obj-y += delay.o
obj-y += dmi_scan.o
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/boot/edd.S
--- a/xen/arch/x86/boot/edd.S Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/boot/edd.S Mon Jul 02 12:19:26 2007 -0600
@@ -24,7 +24,7 @@
/* Maximum number of EDD information structures at boot_edd_info. */
#define EDD_INFO_MAX 6
-/* Maximum number of MBR signatures at boot_edd_signature. */
+/* Maximum number of MBR signatures at boot_mbr_signature. */
#define EDD_MBR_SIG_MAX 16
/* Size of components of EDD information structure. */
@@ -40,10 +40,8 @@ get_edd:
# Read the first sector of each BIOS disk device and store the 4-byte signature
edd_mbr_sig_start:
movb $0x80, %dl # from device 80
- movw $bootsym(boot_edd_signature),%bx # store buffer ptr in bx
+ movw $bootsym(boot_mbr_signature),%bx # store buffer ptr in bx
edd_mbr_sig_read:
- movl $0xFFFFFFFF, %eax
- movl %eax, (%bx) # assume failure
pushw %bx
movb $0x02, %ah # 0x02 Read Sectors
movb $1, %al # read 1 sector
@@ -64,11 +62,12 @@ edd_mbr_sig_read:
cmpb $0, %ah # some BIOSes do not set CF
jne edd_mbr_sig_done # on failure, we're done.
movl bootsym(boot_edd_info)+EDD_MBR_SIG_OFFSET,%eax
- movl %eax, (%bx) # store signature from MBR
- incb bootsym(boot_edd_signature_nr) # note that we stored something
+ movb %dl, (%bx) # store BIOS drive number
+ movl %eax, 4(%bx) # store signature from MBR
+ incb bootsym(boot_mbr_signature_nr) # note that we stored something
incb %dl # increment to next device
- addw $4, %bx # increment sig buffer ptr
- cmpb $EDD_MBR_SIG_MAX,bootsym(boot_edd_signature_nr)
+ addw $8, %bx # increment sig buffer ptr
+ cmpb $EDD_MBR_SIG_MAX,bootsym(boot_mbr_signature_nr)
jb edd_mbr_sig_read
edd_mbr_sig_done:
@@ -150,12 +149,13 @@ opt_edd:
opt_edd:
.byte 0 # edd=on/off/skipmbr
-.globl boot_edd_info_nr, boot_edd_signature_nr
+.globl boot_edd_info, boot_edd_info_nr
+.globl boot_mbr_signature, boot_mbr_signature_nr
boot_edd_info_nr:
.byte 0
-boot_edd_signature_nr:
+boot_mbr_signature_nr:
.byte 0
-boot_edd_signature:
- .fill EDD_MBR_SIG_MAX*4,1,0
+boot_mbr_signature:
+ .fill EDD_MBR_SIG_MAX*8,1,0
boot_edd_info:
.fill 512,1,0 # big enough for a disc sector
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/boot/x86_32.S
--- a/xen/arch/x86/boot/x86_32.S Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/boot/x86_32.S Mon Jul 02 12:19:26 2007 -0600
@@ -36,15 +36,29 @@ 1: mov %eax,(%edi)
/* This is the default interrupt handler. */
int_msg:
- .asciz "Unknown interrupt\n"
+ .asciz "Unknown interrupt (cr2=%08x)\n"
+hex_msg:
+ .asciz " %08x"
ALIGN
ignore_int:
+ pusha
cld
mov $(__HYPERVISOR_DS),%eax
mov %eax,%ds
mov %eax,%es
+ mov %cr2,%eax
+ push %eax
pushl $int_msg
call printk
+ add $8,%esp
+ mov %esp,%ebp
+0: pushl (%ebp)
+ add $4,%ebp
+ pushl $hex_msg
+ call printk
+ add $8,%esp
+ test $0xffc,%ebp
+ jnz 0b
1: jmp 1b
ENTRY(stack_start)
@@ -65,11 +79,6 @@ gdt_descr:
gdt_descr:
.word LAST_RESERVED_GDT_BYTE
.long gdt_table - FIRST_RESERVED_GDT_BYTE
-
- .word 0
-nopaging_gdt_descr:
- .word LAST_RESERVED_GDT_BYTE
- .long sym_phys(gdt_table) - FIRST_RESERVED_GDT_BYTE
.align PAGE_SIZE, 0
/* NB. Rings != 0 get access up to MACH2PHYS_VIRT_END. This allows access to */
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/boot/x86_64.S
--- a/xen/arch/x86/boot/x86_64.S Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/boot/x86_64.S Mon Jul 02 12:19:26 2007 -0600
@@ -56,12 +56,23 @@ 1: movq %rax,(%rdi)
/* This is the default interrupt handler. */
int_msg:
- .asciz "Unknown interrupt\n"
+ .asciz "Unknown interrupt (cr2=%016lx)\n"
+hex_msg:
+ .asciz " %016lx"
ignore_int:
- cld
+ SAVE_ALL
+ movq %cr2,%rsi
leaq int_msg(%rip),%rdi
xorl %eax,%eax
call printk
+ movq %rsp,%rbp
+0: movq (%rbp),%rsi
+ addq $8,%rbp
+ leaq hex_msg(%rip),%rdi
+ xorl %eax,%eax
+ call printk
+ testq $0xff8,%rbp
+ jnz 0b
1: jmp 1b
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/clear_page.S
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/clear_page.S Mon Jul 02 12:19:26 2007 -0600
@@ -0,0 +1,26 @@
+#include <xen/config.h>
+#include <asm/page.h>
+
+#ifdef __i386__
+#define ptr_reg %edx
+#else
+#define ptr_reg %rdi
+#endif
+
+ENTRY(clear_page_sse2)
+#ifdef __i386__
+ mov 4(%esp), ptr_reg
+#endif
+ mov $PAGE_SIZE/16, %ecx
+ xor %eax,%eax
+
+0: dec %ecx
+ movnti %eax, (ptr_reg)
+ movnti %eax, 4(ptr_reg)
+ movnti %eax, 8(ptr_reg)
+ movnti %eax, 12(ptr_reg)
+ lea 16(ptr_reg), ptr_reg
+ jnz 0b
+
+ sfence
+ ret
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/domain.c Mon Jul 02 12:19:26 2007 -0600
@@ -151,7 +151,8 @@ int setup_arg_xlat_area(struct vcpu *v,
pg = alloc_domheap_page(NULL);
if ( !pg )
return -ENOMEM;
- d->arch.mm_arg_xlat_l3 = clear_page(page_to_virt(pg));
+ d->arch.mm_arg_xlat_l3 = page_to_virt(pg);
+ clear_page(d->arch.mm_arg_xlat_l3);
}
l4tab[l4_table_offset(COMPAT_ARG_XLAT_VIRT_BASE)] =
@@ -444,7 +445,8 @@ int arch_domain_create(struct domain *d)
if ( (pg = alloc_domheap_page(NULL)) == NULL )
goto fail;
- d->arch.mm_perdomain_l2 = clear_page(page_to_virt(pg));
+ d->arch.mm_perdomain_l2 = page_to_virt(pg);
+ clear_page(d->arch.mm_perdomain_l2);
for ( i = 0; i < (1 << pdpt_order); i++ )
d->arch.mm_perdomain_l2[l2_table_offset(PERDOMAIN_VIRT_START)+i] =
l2e_from_page(virt_to_page(d->arch.mm_perdomain_pt)+i,
@@ -452,7 +454,8 @@ int arch_domain_create(struct domain *d)
if ( (pg = alloc_domheap_page(NULL)) == NULL )
goto fail;
- d->arch.mm_perdomain_l3 = clear_page(page_to_virt(pg));
+ d->arch.mm_perdomain_l3 = page_to_virt(pg);
+ clear_page(d->arch.mm_perdomain_l3);
d->arch.mm_perdomain_l3[l3_table_offset(PERDOMAIN_VIRT_START)] =
l3e_from_page(virt_to_page(d->arch.mm_perdomain_l2),
__PAGE_HYPERVISOR);
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/hvm/hvm.c Mon Jul 02 12:19:26 2007 -0600
@@ -242,6 +242,11 @@ void hvm_domain_relinquish_resources(str
{
hvm_destroy_ioreq_page(d, &d->arch.hvm_domain.ioreq);
hvm_destroy_ioreq_page(d, &d->arch.hvm_domain.buf_ioreq);
+
+ pit_deinit(d);
+ rtc_deinit(d);
+ pmtimer_deinit(d);
+ hpet_deinit(d);
}
void hvm_domain_destroy(struct domain *d)
@@ -421,22 +426,11 @@ int hvm_vcpu_initialise(struct vcpu *v)
void hvm_vcpu_destroy(struct vcpu *v)
{
- struct domain *d = v->domain;
-
vlapic_destroy(v);
hvm_funcs.vcpu_destroy(v);
/* Event channel is already freed by evtchn_destroy(). */
/*free_xen_event_channel(v, v->arch.hvm_vcpu.xen_port);*/
-
- if ( v->vcpu_id == 0 )
- {
- /* NB. All these really belong in hvm_domain_destroy(). */
- pit_deinit(d);
- rtc_deinit(d);
- pmtimer_deinit(d);
- hpet_deinit(d);
- }
}
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/hvm/io.c
--- a/xen/arch/x86/hvm/io.c Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/hvm/io.c Mon Jul 02 12:19:26 2007 -0600
@@ -858,6 +858,7 @@ void hvm_io_assist(void)
}
/* Copy register changes back into current guest state. */
+ regs->eflags &= ~X86_EFLAGS_RF;
hvm_load_cpu_guest_regs(v, regs);
memcpy(guest_cpu_user_regs(), regs, HVM_CONTEXT_STACK_BYTES);
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/hvm/platform.c
--- a/xen/arch/x86/hvm/platform.c Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/hvm/platform.c Mon Jul 02 12:19:26 2007 -0600
@@ -1065,6 +1065,7 @@ void handle_mmio(unsigned long gpa)
}
regs->eip += inst_len; /* advance %eip */
+ regs->eflags &= ~X86_EFLAGS_RF;
switch ( mmio_op->instr ) {
case INSTR_MOV:
@@ -1122,6 +1123,7 @@ void handle_mmio(unsigned long gpa)
/* IO read --> memory write */
if ( dir == IOREQ_READ ) errcode |= PFEC_write_access;
regs->eip -= inst_len; /* do not advance %eip */
+ regs->eflags |= X86_EFLAGS_RF; /* RF was set by original #PF */
hvm_inject_exception(TRAP_page_fault, errcode, addr);
return;
}
@@ -1150,6 +1152,7 @@ void handle_mmio(unsigned long gpa)
/* Failed on the page-spanning copy. Inject PF into
* the guest for the address where we failed */
regs->eip -= inst_len; /* do not advance %eip */
+ regs->eflags |= X86_EFLAGS_RF; /* RF was set by #PF */
/* Must set CR2 at the failing address */
addr += size - rv;
gdprintk(XENLOG_DEBUG, "Pagefault on non-io side of a "
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/hvm/svm/svm.c Mon Jul 02 12:19:26 2007 -0600
@@ -391,7 +391,7 @@ int svm_vmcb_restore(struct vcpu *v, str
}
skip_cr3:
- vmcb->cr4 = c->cr4 | SVM_CR4_HOST_MASK;
+ vmcb->cr4 = c->cr4 | HVM_CR4_HOST_MASK;
v->arch.hvm_svm.cpu_shadow_cr4 = c->cr4;
vmcb->idtr.limit = c->idtr_limit;
@@ -448,7 +448,8 @@ int svm_vmcb_restore(struct vcpu *v, str
/* update VMCB for nested paging restore */
if ( paging_mode_hap(v->domain) ) {
vmcb->cr0 = v->arch.hvm_svm.cpu_shadow_cr0;
- vmcb->cr4 = v->arch.hvm_svm.cpu_shadow_cr4;
+ vmcb->cr4 = v->arch.hvm_svm.cpu_shadow_cr4 |
+ (HVM_CR4_HOST_MASK & ~X86_CR4_PAE);
vmcb->cr3 = c->cr3;
vmcb->np_enable = 1;
vmcb->g_pat = 0x0007040600070406ULL; /* guest PAT */
@@ -805,8 +806,10 @@ static void svm_ctxt_switch_from(struct
: : "a" (__pa(root_vmcb[cpu])) );
#ifdef __x86_64__
- /* Resume use of IST2 for NMIs now that the host TR is reinstated. */
- idt_tables[cpu][TRAP_nmi].a |= 2UL << 32;
+ /* Resume use of ISTs now that the host TR is reinstated. */
+ idt_tables[cpu][TRAP_double_fault].a |= 1UL << 32; /* IST1 */
+ idt_tables[cpu][TRAP_nmi].a |= 2UL << 32; /* IST2 */
+ idt_tables[cpu][TRAP_machine_check].a |= 3UL << 32; /* IST3 */
#endif
}
@@ -826,10 +829,12 @@ static void svm_ctxt_switch_to(struct vc
set_segment_register(ss, 0);
/*
- * Cannot use IST2 for NMIs while we are running with the guest TR. But
- * this doesn't matter: the IST is only needed to handle SYSCALL/SYSRET.
+ * Cannot use ISTs for NMI/#MC/#DF while we are running with the guest TR.
+ * But this doesn't matter: the IST is only req'd to handle SYSCALL/SYSRET.
*/
- idt_tables[cpu][TRAP_nmi].a &= ~(2UL << 32);
+ idt_tables[cpu][TRAP_double_fault].a &= ~(3UL << 32);
+ idt_tables[cpu][TRAP_nmi].a &= ~(3UL << 32);
+ idt_tables[cpu][TRAP_machine_check].a &= ~(3UL << 32);
#endif
svm_restore_dr(v);
@@ -1823,9 +1828,19 @@ static int mov_to_cr(int gpreg, int cr,
break;
case 4: /* CR4 */
+ if ( value & HVM_CR4_GUEST_RESERVED_BITS )
+ {
+ HVM_DBG_LOG(DBG_LEVEL_1,
+ "Guest attempts to set reserved bit in CR4: %lx",
+ value);
+ svm_inject_exception(v, TRAP_gp_fault, 1, 0);
+ break;
+ }
+
if ( paging_mode_hap(v->domain) )
{
- vmcb->cr4 = v->arch.hvm_svm.cpu_shadow_cr4 = value;
+ v->arch.hvm_svm.cpu_shadow_cr4 = value;
+ vmcb->cr4 = value | (HVM_CR4_HOST_MASK & ~X86_CR4_PAE);
paging_update_paging_modes(v);
/* signal paging update to ASID handler */
svm_asid_g_update_paging (v);
@@ -1875,7 +1890,7 @@ static int mov_to_cr(int gpreg, int cr,
}
v->arch.hvm_svm.cpu_shadow_cr4 = value;
- vmcb->cr4 = value | SVM_CR4_HOST_MASK;
+ vmcb->cr4 = value | HVM_CR4_HOST_MASK;
/*
* Writing to CR4 to modify the PSE, PGE, or PAE flag invalidates
@@ -2071,9 +2086,11 @@ static inline void svm_do_msr_access(
case MSR_IA32_TIME_STAMP_COUNTER:
msr_content = hvm_get_guest_time(v);
break;
+
case MSR_IA32_APICBASE:
msr_content = vcpu_vlapic(v)->hw.apic_base_msr;
break;
+
case MSR_EFER:
msr_content = v->arch.hvm_svm.cpu_shadow_efer;
break;
@@ -2093,6 +2110,10 @@ static inline void svm_do_msr_access(
* particularly meaningful, but at least avoids the guest crashing!
*/
msr_content = 0;
+ break;
+
+ case MSR_K8_VM_HSAVE_PA:
+ svm_inject_exception(v, TRAP_gp_fault, 1, 0);
break;
default:
@@ -2128,9 +2149,15 @@ static inline void svm_do_msr_access(
hvm_set_guest_time(v, msr_content);
pt_reset(v);
break;
+
case MSR_IA32_APICBASE:
vlapic_msr_set(vcpu_vlapic(v), msr_content);
break;
+
+ case MSR_K8_VM_HSAVE_PA:
+ svm_inject_exception(v, TRAP_gp_fault, 1, 0);
+ break;
+
default:
if ( !long_mode_do_msr_write(regs) )
wrmsr_hypervisor_regs(ecx, regs->eax, regs->edx);
@@ -2265,12 +2292,13 @@ static int svm_reset_to_realmode(struct
vmcb->cr2 = 0;
vmcb->efer = EFER_SVME;
- vmcb->cr4 = SVM_CR4_HOST_MASK;
+ vmcb->cr4 = HVM_CR4_HOST_MASK;
v->arch.hvm_svm.cpu_shadow_cr4 = 0;
if ( paging_mode_hap(v->domain) ) {
vmcb->cr0 = v->arch.hvm_svm.cpu_shadow_cr0;
- vmcb->cr4 = v->arch.hvm_svm.cpu_shadow_cr4;
+ vmcb->cr4 = v->arch.hvm_svm.cpu_shadow_cr4 |
+ (HVM_CR4_HOST_MASK & ~X86_CR4_PAE);
}
/* This will jump to ROMBIOS */
@@ -2411,6 +2439,12 @@ asmlinkage void svm_vmexit_handler(struc
break;
}
+ case VMEXIT_EXCEPTION_MC:
+ HVMTRACE_0D(MCE, v);
+ svm_store_cpu_guest_regs(v, regs, NULL);
+ do_machine_check(regs);
+ break;
+
case VMEXIT_VINTR:
vmcb->vintr.fields.irq = 0;
vmcb->general1_intercepts &= ~GENERAL1_INTERCEPT_VINTR;
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/hvm/svm/vmcb.c
--- a/xen/arch/x86/hvm/svm/vmcb.c Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/hvm/svm/vmcb.c Mon Jul 02 12:19:26 2007 -0600
@@ -224,7 +224,7 @@ static int construct_vmcb(struct vcpu *v
/* Guest CR4. */
arch_svm->cpu_shadow_cr4 =
read_cr4() & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE);
- vmcb->cr4 = arch_svm->cpu_shadow_cr4 | SVM_CR4_HOST_MASK;
+ vmcb->cr4 = arch_svm->cpu_shadow_cr4 | HVM_CR4_HOST_MASK;
paging_update_paging_modes(v);
vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3;
@@ -235,7 +235,9 @@ static int construct_vmcb(struct vcpu *v
vmcb->np_enable = 1; /* enable nested paging */
vmcb->g_pat = 0x0007040600070406ULL; /* guest PAT */
vmcb->h_cr3 = pagetable_get_paddr(v->domain->arch.phys_table);
- vmcb->cr4 = arch_svm->cpu_shadow_cr4 = 0;
+ vmcb->cr4 = arch_svm->cpu_shadow_cr4 =
+ (HVM_CR4_HOST_MASK & ~X86_CR4_PAE);
+ vmcb->exception_intercepts = HVM_TRAP_MASK;
/* No point in intercepting CR0/3/4 reads, because the hardware
* will return the guest versions anyway. */
@@ -249,7 +251,7 @@ static int construct_vmcb(struct vcpu *v
}
else
{
- vmcb->exception_intercepts = 1U << TRAP_page_fault;
+ vmcb->exception_intercepts = HVM_TRAP_MASK | (1U << TRAP_page_fault);
}
return 0;
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/hvm/vmx/vmcs.c
--- a/xen/arch/x86/hvm/vmx/vmcs.c Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/hvm/vmx/vmcs.c Mon Jul 02 12:19:26 2007 -0600
@@ -43,6 +43,7 @@ u32 vmx_secondary_exec_control __read_mo
u32 vmx_secondary_exec_control __read_mostly;
u32 vmx_vmexit_control __read_mostly;
u32 vmx_vmentry_control __read_mostly;
+bool_t cpu_has_vmx_ins_outs_instr_info __read_mostly;
static u32 vmcs_revision_id __read_mostly;
@@ -133,6 +134,7 @@ void vmx_init_vmcs_config(void)
vmx_secondary_exec_control = _vmx_secondary_exec_control;
vmx_vmexit_control = _vmx_vmexit_control;
vmx_vmentry_control = _vmx_vmentry_control;
+ cpu_has_vmx_ins_outs_instr_info = !!(vmx_msr_high & (1U<<22));
}
else
{
@@ -142,6 +144,7 @@ void vmx_init_vmcs_config(void)
BUG_ON(vmx_secondary_exec_control != _vmx_secondary_exec_control);
BUG_ON(vmx_vmexit_control != _vmx_vmexit_control);
BUG_ON(vmx_vmentry_control != _vmx_vmentry_control);
+ BUG_ON(cpu_has_vmx_ins_outs_instr_info != !!(vmx_msr_high & (1U<<22)));
}
/* IA-32 SDM Vol 3B: VMCS size is never greater than 4kB. */
@@ -421,7 +424,7 @@ static void construct_vmcs(struct vcpu *
__vmwrite(VMCS_LINK_POINTER_HIGH, ~0UL);
#endif
- __vmwrite(EXCEPTION_BITMAP, 1U << TRAP_page_fault);
+ __vmwrite(EXCEPTION_BITMAP, HVM_TRAP_MASK | (1U << TRAP_page_fault));
/* Guest CR0. */
cr0 = read_cr0();
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/hvm/vmx/vmx.c Mon Jul 02 12:19:26 2007 -0600
@@ -560,6 +560,9 @@ int vmx_vmcs_restore(struct vcpu *v, str
__vmwrite(GUEST_RSP, c->rsp);
__vmwrite(GUEST_RFLAGS, c->rflags);
+ v->arch.hvm_vmx.cpu_cr0 = (c->cr0 | X86_CR0_PE | X86_CR0_PG
+ | X86_CR0_NE | X86_CR0_WP | X86_CR0_ET);
+ __vmwrite(GUEST_CR0, v->arch.hvm_vmx.cpu_cr0);
v->arch.hvm_vmx.cpu_shadow_cr0 = c->cr0;
__vmwrite(CR0_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr0);
@@ -577,33 +580,17 @@ int vmx_vmcs_restore(struct vcpu *v, str
goto skip_cr3;
}
- if (c->cr3 == v->arch.hvm_vmx.cpu_cr3) {
- /*
- * This is simple TLB flush, implying the guest has
- * removed some translation or changed page attributes.
- * We simply invalidate the shadow.
- */
- mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT);
- if (mfn != pagetable_get_pfn(v->arch.guest_table)) {
- goto bad_cr3;
- }
- } else {
- /*
- * If different, make a shadow. Check if the PDBR is valid
- * first.
- */
- HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 c->cr3 = %"PRIx64, c->cr3);
- /* current!=vcpu as not called by arch_vmx_do_launch */
- mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT);
- if( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain)) {
- goto bad_cr3;
- }
- old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
- v->arch.guest_table = pagetable_from_pfn(mfn);
- if (old_base_mfn)
- put_page(mfn_to_page(old_base_mfn));
- v->arch.hvm_vmx.cpu_cr3 = c->cr3;
- }
+ HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 c->cr3 = %"PRIx64, c->cr3);
+ /* current!=vcpu as not called by arch_vmx_do_launch */
+ mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT);
+ if( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain)) {
+ goto bad_cr3;
+ }
+ old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
+ v->arch.guest_table = pagetable_from_pfn(mfn);
+ if (old_base_mfn)
+ put_page(mfn_to_page(old_base_mfn));
+ v->arch.hvm_vmx.cpu_cr3 = c->cr3;
skip_cr3:
#if defined(__x86_64__)
@@ -615,7 +602,7 @@ int vmx_vmcs_restore(struct vcpu *v, str
}
#endif
- __vmwrite(GUEST_CR4, (c->cr4 | VMX_CR4_HOST_MASK));
+ __vmwrite(GUEST_CR4, (c->cr4 | HVM_CR4_HOST_MASK));
v->arch.hvm_vmx.cpu_shadow_cr4 = c->cr4;
__vmwrite(CR4_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr4);
@@ -1315,16 +1302,20 @@ static int __get_instruction_length(void
static void inline __update_guest_eip(unsigned long inst_len)
{
- unsigned long current_eip, intr_shadow;
-
- current_eip = __vmread(GUEST_RIP);
- __vmwrite(GUEST_RIP, current_eip + inst_len);
-
- intr_shadow = __vmread(GUEST_INTERRUPTIBILITY_INFO);
- if ( intr_shadow & (VMX_INTR_SHADOW_STI | VMX_INTR_SHADOW_MOV_SS) )
- {
- intr_shadow &= ~(VMX_INTR_SHADOW_STI | VMX_INTR_SHADOW_MOV_SS);
- __vmwrite(GUEST_INTERRUPTIBILITY_INFO, intr_shadow);
+ unsigned long x;
+
+ x = __vmread(GUEST_RIP);
+ __vmwrite(GUEST_RIP, x + inst_len);
+
+ x = __vmread(GUEST_RFLAGS);
+ if ( x & X86_EFLAGS_RF )
+ __vmwrite(GUEST_RFLAGS, x & ~X86_EFLAGS_RF);
+
+ x = __vmread(GUEST_INTERRUPTIBILITY_INFO);
+ if ( x & (VMX_INTR_SHADOW_STI | VMX_INTR_SHADOW_MOV_SS) )
+ {
+ x &= ~(VMX_INTR_SHADOW_STI | VMX_INTR_SHADOW_MOV_SS);
+ __vmwrite(GUEST_INTERRUPTIBILITY_INFO, x);
}
}
@@ -1475,16 +1466,34 @@ static void vmx_do_invlpg(unsigned long
paging_invlpg(v, va);
}
-/*
- * get segment for string pio according to guest instruction
- */
-static void vmx_str_pio_get_segment(int long_mode, unsigned long eip,
- int inst_len, enum x86_segment *seg)
+/* Get segment for OUTS according to guest instruction. */
+static enum x86_segment vmx_outs_get_segment(
+ int long_mode, unsigned long eip, int inst_len)
{
unsigned char inst[MAX_INST_LEN];
+ enum x86_segment seg = x86_seg_ds;
int i;
extern int inst_copy_from_guest(unsigned char *, unsigned long, int);
+ if ( likely(cpu_has_vmx_ins_outs_instr_info) )
+ {
+ unsigned int instr_info = __vmread(VMX_INSTRUCTION_INFO);
+
+ /* Get segment register according to bits 17:15. */
+ switch ( (instr_info >> 15) & 7 )
+ {
+ case 0: seg = x86_seg_es; break;
+ case 1: seg = x86_seg_cs; break;
+ case 2: seg = x86_seg_ss; break;
+ case 3: seg = x86_seg_ds; break;
+ case 4: seg = x86_seg_fs; break;
+ case 5: seg = x86_seg_gs; break;
+ default: BUG();
+ }
+
+ goto out;
+ }
+
if ( !long_mode )
eip += __vmread(GUEST_CS_BASE);
@@ -1493,7 +1502,7 @@ static void vmx_str_pio_get_segment(int
{
gdprintk(XENLOG_ERR, "Get guest instruction failed\n");
domain_crash(current->domain);
- return;
+ goto out;
}
for ( i = 0; i < inst_len; i++ )
@@ -1510,25 +1519,28 @@ static void vmx_str_pio_get_segment(int
#endif
continue;
case 0x2e: /* CS */
- *seg = x86_seg_cs;
+ seg = x86_seg_cs;
continue;
case 0x36: /* SS */
- *seg = x86_seg_ss;
+ seg = x86_seg_ss;
continue;
case 0x26: /* ES */
- *seg = x86_seg_es;
+ seg = x86_seg_es;
continue;
case 0x64: /* FS */
- *seg = x86_seg_fs;
+ seg = x86_seg_fs;
continue;
case 0x65: /* GS */
- *seg = x86_seg_gs;
+ seg = x86_seg_gs;
continue;
case 0x3e: /* DS */
- *seg = x86_seg_ds;
+ seg = x86_seg_ds;
continue;
}
}
+
+ out:
+ return seg;
}
static int vmx_str_pio_check_descriptor(int long_mode, unsigned long eip,
@@ -1541,7 +1553,7 @@ static int vmx_str_pio_check_descriptor(
*base = 0;
*limit = 0;
if ( seg != x86_seg_es )
- vmx_str_pio_get_segment(long_mode, eip, inst_len, &seg);
+ seg = vmx_outs_get_segment(long_mode, eip, inst_len);
switch ( seg )
{
@@ -1587,7 +1599,7 @@ static int vmx_str_pio_check_descriptor(
}
*ar_bytes = __vmread(ar_field);
- return !(*ar_bytes & 0x10000);
+ return !(*ar_bytes & X86_SEG_AR_SEG_UNUSABLE);
}
@@ -1896,7 +1908,7 @@ static void vmx_world_save(struct vcpu *
c->eip += __get_instruction_length(); /* Safe: MOV Cn, LMSW, CLTS */
c->esp = __vmread(GUEST_RSP);
- c->eflags = __vmread(GUEST_RFLAGS);
+ c->eflags = __vmread(GUEST_RFLAGS) & ~X86_EFLAGS_RF;
c->cr0 = v->arch.hvm_vmx.cpu_shadow_cr0;
c->cr3 = v->arch.hvm_vmx.cpu_cr3;
@@ -1997,7 +2009,7 @@ static int vmx_world_restore(struct vcpu
else
HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %x", c->cr3);
- __vmwrite(GUEST_CR4, (c->cr4 | VMX_CR4_HOST_MASK));
+ __vmwrite(GUEST_CR4, (c->cr4 | HVM_CR4_HOST_MASK));
v->arch.hvm_vmx.cpu_shadow_cr4 = c->cr4;
__vmwrite(CR4_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr4);
@@ -2272,7 +2284,6 @@ static int vmx_set_cr0(unsigned long val
"Enabling CR0.PE at %%eip 0x%lx", eip);
if ( vmx_assist(v, VMX_ASSIST_RESTORE) )
{
- eip = __vmread(GUEST_RIP);
HVM_DBG_LOG(DBG_LEVEL_1,
"Restoring to %%eip 0x%lx", eip);
return 0; /* do not update eip! */
@@ -2397,6 +2408,15 @@ static int mov_to_cr(int gp, int cr, str
case 4: /* CR4 */
old_cr = v->arch.hvm_vmx.cpu_shadow_cr4;
+ if ( value & HVM_CR4_GUEST_RESERVED_BITS )
+ {
+ HVM_DBG_LOG(DBG_LEVEL_1,
+ "Guest attempts to set reserved bit in CR4: %lx",
+ value);
+ vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
+ break;
+ }
+
if ( (value & X86_CR4_PAE) && !(old_cr & X86_CR4_PAE) )
{
if ( vmx_pgbit_test(v) )
@@ -2437,7 +2457,7 @@ static int mov_to_cr(int gp, int cr, str
}
}
- __vmwrite(GUEST_CR4, value| VMX_CR4_HOST_MASK);
+ __vmwrite(GUEST_CR4, value | HVM_CR4_HOST_MASK);
v->arch.hvm_vmx.cpu_shadow_cr4 = value;
__vmwrite(CR4_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr4);
@@ -2581,7 +2601,7 @@ static inline int vmx_do_msr_read(struct
case MSR_IA32_APICBASE:
msr_content = vcpu_vlapic(v)->hw.apic_base_msr;
break;
- case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_CR4_FIXED1:
+ case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_PROCBASED_CTLS2:
goto gp_fault;
default:
if ( long_mode_do_msr_read(regs) )
@@ -2707,7 +2727,7 @@ static inline int vmx_do_msr_write(struc
case MSR_IA32_APICBASE:
vlapic_msr_set(vcpu_vlapic(v), msr_content);
break;
- case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_CR4_FIXED1:
+ case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_PROCBASED_CTLS2:
goto gp_fault;
default:
if ( !long_mode_do_msr_write(regs) )
@@ -2823,7 +2843,8 @@ static void vmx_reflect_exception(struct
}
}
-static void vmx_failed_vmentry(unsigned int exit_reason)
+static void vmx_failed_vmentry(unsigned int exit_reason,
+ struct cpu_user_regs *regs)
{
unsigned int failed_vmentry_reason = (uint16_t)exit_reason;
unsigned long exit_qualification;
@@ -2840,6 +2861,9 @@ static void vmx_failed_vmentry(unsigned
break;
case EXIT_REASON_MACHINE_CHECK:
printk("caused by machine check.\n");
+ HVMTRACE_0D(MCE, current);
+ vmx_store_cpu_guest_regs(current, regs, NULL);
+ do_machine_check(regs);
break;
default:
printk("reason not known yet!");
@@ -2869,7 +2893,7 @@ asmlinkage void vmx_vmexit_handler(struc
local_irq_enable();
if ( unlikely(exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) )
- return vmx_failed_vmentry(exit_reason);
+ return vmx_failed_vmentry(exit_reason, regs);
switch ( exit_reason )
{
@@ -2920,11 +2944,19 @@ asmlinkage void vmx_vmexit_handler(struc
vmx_inject_hw_exception(v, TRAP_page_fault, regs->error_code);
break;
case TRAP_nmi:
- HVMTRACE_0D(NMI, v);
if ( (intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI )
+ {
+ HVMTRACE_0D(NMI, v);
+ vmx_store_cpu_guest_regs(v, regs, NULL);
do_nmi(regs); /* Real NMI, vector 2: normal processing. */
+ }
else
vmx_reflect_exception(v);
+ break;
+ case TRAP_machine_check:
+ HVMTRACE_0D(MCE, v);
+ vmx_store_cpu_guest_regs(v, regs, NULL);
+ do_machine_check(regs);
break;
default:
goto exit_and_crash;
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/io_apic.c
--- a/xen/arch/x86/io_apic.c Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/io_apic.c Mon Jul 02 12:19:26 2007 -0600
@@ -371,7 +371,7 @@ static int pin_2_irq(int idx, int apic,
* so mask in all cases should simply be TARGET_CPUS
*/
#ifdef CONFIG_SMP
-void __init setup_ioapic_dest(void)
+void /*__init*/ setup_ioapic_dest(void)
{
int pin, ioapic, irq, irq_entry;
@@ -849,7 +849,7 @@ static inline void UNEXPECTED_IO_APIC(vo
{
}
-void __init __print_IO_APIC(void)
+void /*__init*/ __print_IO_APIC(void)
{
int apic, i;
union IO_APIC_reg_00 reg_00;
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/mm.c Mon Jul 02 12:19:26 2007 -0600
@@ -3240,6 +3240,7 @@ static int ptwr_emulated_update(
struct ptwr_emulate_ctxt *ptwr_ctxt)
{
unsigned long mfn;
+ unsigned long unaligned_addr = addr;
struct page_info *page;
l1_pgentry_t pte, ol1e, nl1e, *pl1e;
struct vcpu *v = current;
@@ -3294,7 +3295,7 @@ static int ptwr_emulated_update(
if ( unlikely(!get_page_from_l1e(nl1e, d)) )
{
if ( (CONFIG_PAGING_LEVELS >= 3) && is_pv_32bit_domain(d) &&
- (bytes == 4) && (addr & 4) && !do_cmpxchg &&
+ (bytes == 4) && (unaligned_addr & 4) && !do_cmpxchg &&
(l1e_get_flags(nl1e) & _PAGE_PRESENT) )
{
/*
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/platform_hypercall.c
--- a/xen/arch/x86/platform_hypercall.c Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/platform_hypercall.c Mon Jul 02 12:19:26 2007 -0600
@@ -20,12 +20,20 @@
#include <xen/guest_access.h>
#include <asm/current.h>
#include <public/platform.h>
+#include <asm/edd.h>
#include <asm/mtrr.h>
#include "cpu/mtrr/mtrr.h"
+
+extern uint16_t boot_edid_caps;
+extern uint8_t boot_edid_info[];
#ifndef COMPAT
typedef long ret_t;
DEFINE_SPINLOCK(xenpf_lock);
+# undef copy_from_compat
+# define copy_from_compat copy_from_guest
+# undef copy_to_compat
+# define copy_to_compat copy_to_guest
#else
extern spinlock_t xenpf_lock;
#endif
@@ -150,6 +158,94 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
}
}
break;
+
+ case XENPF_firmware_info:
+ switch ( op->u.firmware_info.type )
+ {
+ case XEN_FW_DISK_INFO: {
+ const struct edd_info *info;
+ u16 length;
+
+ ret = -ESRCH;
+ if ( op->u.firmware_info.index >= bootsym(boot_edd_info_nr) )
+ break;
+
+ info = bootsym(boot_edd_info) + op->u.firmware_info.index;
+
+ /* Transfer the EDD info block. */
+ ret = -EFAULT;
+ if ( copy_from_compat(&length, op->u.firmware_info.u.
+ disk_info.edd_params, 1) )
+ break;
+ if ( length > info->edd_device_params.length )
+ length = info->edd_device_params.length;
+ if ( copy_to_compat(op->u.firmware_info.u.disk_info.edd_params,
+ (u8 *)&info->edd_device_params,
+ length) )
+ break;
+ if ( copy_to_compat(op->u.firmware_info.u.disk_info.edd_params,
+ &length, 1) )
+ break;
+
+ /* Transfer miscellaneous other information values. */
+#define C(x) op->u.firmware_info.u.disk_info.x = info->x
+ C(device);
+ C(version);
+ C(interface_support);
+ C(legacy_max_cylinder);
+ C(legacy_max_head);
+ C(legacy_sectors_per_track);
+#undef C
+
+ ret = (copy_field_to_guest(u_xenpf_op, op,
+ u.firmware_info.u.disk_info)
+ ? -EFAULT : 0);
+ break;
+ }
+ case XEN_FW_DISK_MBR_SIGNATURE: {
+ const struct mbr_signature *sig;
+
+ ret = -ESRCH;
+ if ( op->u.firmware_info.index >= bootsym(boot_mbr_signature_nr) )
+ break;
+
+ sig = bootsym(boot_mbr_signature) + op->u.firmware_info.index;
+
+ op->u.firmware_info.u.disk_mbr_signature.device = sig->device;
+ op->u.firmware_info.u.disk_mbr_signature.mbr_signature =
+ sig->signature;
+
+ ret = (copy_field_to_guest(u_xenpf_op, op,
+ u.firmware_info.u.disk_mbr_signature)
+ ? -EFAULT : 0);
+ break;
+ }
+ case XEN_FW_VBEDDC_INFO:
+ ret = -ESRCH;
+ if ( op->u.firmware_info.index != 0 )
+ break;
+ if ( *(u32 *)bootsym(boot_edid_info) == 0x13131313 )
+ break;
+
+ op->u.firmware_info.u.vbeddc_info.capabilities =
+ bootsym(boot_edid_caps);
+ op->u.firmware_info.u.vbeddc_info.edid_transfer_time =
+ bootsym(boot_edid_caps) >> 8;
+
+ ret = 0;
+ if ( copy_field_to_guest(u_xenpf_op, op, u.firmware_info.
+ u.vbeddc_info.capabilities) ||
+ copy_field_to_guest(u_xenpf_op, op, u.firmware_info.
+ u.vbeddc_info.edid_transfer_time) ||
+ copy_to_compat(op->u.firmware_info.u.vbeddc_info.edid,
+ bootsym(boot_edid_info), 128) )
+ ret = -EFAULT;
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+ break;
default:
ret = -ENOSYS;
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/setup.c
--- a/xen/arch/x86/setup.c Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/setup.c Mon Jul 02 12:19:26 2007 -0600
@@ -405,7 +405,7 @@ void __init __start_xen(unsigned long mb
void __init __start_xen(unsigned long mbi_p)
{
char *memmap_type = NULL;
- char __cmdline[] = "", *cmdline = __cmdline;
+ char __cmdline[] = "", *cmdline = __cmdline, *kextra;
unsigned long _initrd_start = 0, _initrd_len = 0;
unsigned int initrdidx = 1;
char *_policy_start = NULL;
@@ -426,6 +426,17 @@ void __init __start_xen(unsigned long mb
/* Parse the command-line options. */
if ( (mbi->flags & MBI_CMDLINE) && (mbi->cmdline != 0) )
cmdline = __va(mbi->cmdline);
+ if ( (kextra = strstr(cmdline, " -- ")) != NULL )
+ {
+ /*
+ * Options after ' -- ' separator belong to dom0.
+ * 1. Orphan dom0's options from Xen's command line.
+ * 2. Skip all but final leading space from dom0's options.
+ */
+ *kextra = '\0';
+ kextra += 3;
+ while ( kextra[1] == ' ' ) kextra++;
+ }
cmdline_parse(cmdline);
parse_video_info();
@@ -494,7 +505,7 @@ void __init __start_xen(unsigned long mb
printk("Disc information:\n");
printk(" Found %d MBR signatures\n",
- bootsym(boot_edd_signature_nr));
+ bootsym(boot_mbr_signature_nr));
printk(" Found %d EDD information structures\n",
bootsym(boot_edd_info_nr));
@@ -1009,17 +1020,26 @@ void __init __start_xen(unsigned long mb
/* Grab the DOM0 command line. */
cmdline = (char *)(mod[0].string ? __va(mod[0].string) : NULL);
- if ( cmdline != NULL )
+ if ( (cmdline != NULL) || (kextra != NULL) )
{
static char dom0_cmdline[MAX_GUEST_CMDLINE];
- /* Skip past the image name and copy to a local buffer. */
- while ( *cmdline == ' ' ) cmdline++;
- if ( (cmdline = strchr(cmdline, ' ')) != NULL )
+ dom0_cmdline[0] = '\0';
+
+ if ( cmdline != NULL )
{
+ /* Skip past the image name and copy to a local buffer. */
while ( *cmdline == ' ' ) cmdline++;
- safe_strcpy(dom0_cmdline, cmdline);
+ if ( (cmdline = strchr(cmdline, ' ')) != NULL )
+ {
+ while ( *cmdline == ' ' ) cmdline++;
+ safe_strcpy(dom0_cmdline, cmdline);
+ }
}
+
+ if ( kextra != NULL )
+ /* kextra always includes exactly one leading space. */
+ safe_strcat(dom0_cmdline, kextra);
/* Append any extra parameters. */
if ( skip_ioapic_setup && !strstr(dom0_cmdline, "noapic") )
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/traps.c Mon Jul 02 12:19:26 2007 -0600
@@ -86,6 +86,7 @@ asmlinkage int do_ ## _name(struct cpu_u
asmlinkage int do_ ## _name(struct cpu_user_regs *regs)
asmlinkage void nmi(void);
+asmlinkage void machine_check(void);
DECLARE_TRAP_HANDLER(divide_error);
DECLARE_TRAP_HANDLER(debug);
DECLARE_TRAP_HANDLER(int3);
@@ -103,7 +104,6 @@ DECLARE_TRAP_HANDLER(simd_coprocessor_er
DECLARE_TRAP_HANDLER(simd_coprocessor_error);
DECLARE_TRAP_HANDLER(alignment_check);
DECLARE_TRAP_HANDLER(spurious_interrupt_bug);
-DECLARE_TRAP_HANDLER(machine_check);
long do_set_debugreg(int reg, unsigned long value);
unsigned long do_get_debugreg(int reg);
@@ -631,6 +631,7 @@ static int emulate_forced_invalid_op(str
regs->ecx = c;
regs->edx = d;
regs->eip = eip;
+ regs->eflags &= ~X86_EFLAGS_RF;
return EXCRET_fault_fixed;
}
@@ -730,10 +731,11 @@ asmlinkage int do_int3(struct cpu_user_r
return do_guest_trap(TRAP_int3, regs, 0);
}
-asmlinkage int do_machine_check(struct cpu_user_regs *regs)
-{
- fatal_trap(TRAP_machine_check, regs);
- return 0;
+asmlinkage void do_machine_check(struct cpu_user_regs *regs)
+{
+ extern fastcall void (*machine_check_vector)(
+ struct cpu_user_regs *, long error_code);
+ machine_check_vector(regs, regs->error_code);
}
void propagate_page_fault(unsigned long addr, u16 error_code)
@@ -1787,6 +1789,7 @@ static int emulate_privileged_op(struct
done:
regs->eip = eip;
+ regs->eflags &= ~X86_EFLAGS_RF;
return EXCRET_fault_fixed;
fail:
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/x86_32/entry.S
--- a/xen/arch/x86/x86_32/entry.S Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/x86_32/entry.S Mon Jul 02 12:19:26 2007 -0600
@@ -72,48 +72,36 @@
andl $~3,reg; \
movl (reg),reg;
-
ALIGN
restore_all_guest:
ASSERT_INTERRUPTS_DISABLED
testl $X86_EFLAGS_VM,UREGS_eflags(%esp)
- jnz restore_all_vm86
+ popl %ebx
+ popl %ecx
+ popl %edx
+ popl %esi
+ popl %edi
+ popl %ebp
+ popl %eax
+ leal 4(%esp),%esp
+ jnz .Lrestore_iret_guest
#ifdef CONFIG_X86_SUPERVISOR_MODE_KERNEL
- testl $2,UREGS_cs(%esp)
- jnz 1f
+ testb $2,UREGS_cs-UREGS_eip(%esp)
+ jnz .Lrestore_sregs_guest
call restore_ring0_guest
- jmp restore_all_vm86
-1:
+ jmp .Lrestore_iret_guest
#endif
-.Lft1: mov UREGS_ds(%esp),%ds
-.Lft2: mov UREGS_es(%esp),%es
-.Lft3: mov UREGS_fs(%esp),%fs
-.Lft4: mov UREGS_gs(%esp),%gs
-restore_all_vm86:
- popl %ebx
- popl %ecx
- popl %edx
- popl %esi
- popl %edi
- popl %ebp
- popl %eax
- addl $4,%esp
+.Lrestore_sregs_guest:
+.Lft1: mov UREGS_ds-UREGS_eip(%esp),%ds
+.Lft2: mov UREGS_es-UREGS_eip(%esp),%es
+.Lft3: mov UREGS_fs-UREGS_eip(%esp),%fs
+.Lft4: mov UREGS_gs-UREGS_eip(%esp),%gs
+.Lrestore_iret_guest:
.Lft5: iret
.section .fixup,"ax"
-.Lfx5: subl $28,%esp
- pushl 28(%esp) # error_code/entry_vector
- movl %eax,UREGS_eax+4(%esp)
- movl %ebp,UREGS_ebp+4(%esp)
- movl %edi,UREGS_edi+4(%esp)
- movl %esi,UREGS_esi+4(%esp)
- movl %edx,UREGS_edx+4(%esp)
- movl %ecx,UREGS_ecx+4(%esp)
- movl %ebx,UREGS_ebx+4(%esp)
-.Lfx1: SET_XEN_SEGMENTS(a)
- movl %eax,%fs
- movl %eax,%gs
- sti
- popl %esi
+.Lfx1: sti
+ SAVE_ALL_GPRS
+ mov UREGS_error_code(%esp),%esi
pushfl # EFLAGS
movl $__HYPERVISOR_CS,%eax
pushl %eax # CS
@@ -147,7 +135,7 @@ 1: call create_bounce_frame
.long .Lft2,.Lfx1
.long .Lft3,.Lfx1
.long .Lft4,.Lfx1
- .long .Lft5,.Lfx5
+ .long .Lft5,.Lfx1
.previous
.section __ex_table,"a"
.long .Ldf1,failsafe_callback
@@ -169,8 +157,8 @@ ENTRY(hypercall)
ENTRY(hypercall)
subl $4,%esp
FIXUP_RING0_GUEST_STACK
- SAVE_ALL(b)
- sti
+ SAVE_ALL(1f,1f)
+1: sti
GET_CURRENT(%ebx)
cmpl $NR_hypercalls,%eax
jae bad_hypercall
@@ -420,9 +408,14 @@ ENTRY(divide_error)
ALIGN
handle_exception:
FIXUP_RING0_GUEST_STACK
- SAVE_ALL_NOSEGREGS(a)
- SET_XEN_SEGMENTS(a)
- testb $X86_EFLAGS_IF>>8,UREGS_eflags+1(%esp)
+ SAVE_ALL(1f,2f)
+ .text 1
+ /* Exception within Xen: make sure we have valid %ds,%es. */
+1: mov %ecx,%ds
+ mov %ecx,%es
+ jmp 2f
+ .previous
+2: testb $X86_EFLAGS_IF>>8,UREGS_eflags+1(%esp)
jz exception_with_ints_disabled
sti # re-enable interrupts
1: xorl %eax,%eax
@@ -533,71 +526,81 @@ ENTRY(page_fault)
movw $TRAP_page_fault,2(%esp)
jmp handle_exception
-ENTRY(machine_check)
- pushl $TRAP_machine_check<<16
- jmp handle_exception
-
ENTRY(spurious_interrupt_bug)
pushl $TRAP_spurious_int<<16
jmp handle_exception
ENTRY(early_page_fault)
- SAVE_ALL_NOSEGREGS(a)
- movl %esp,%edx
- pushl %edx
+ SAVE_ALL(1f,1f)
+1: movl %esp,%eax
+ pushl %eax
call do_early_page_fault
addl $4,%esp
jmp restore_all_xen
-ENTRY(nmi)
+handle_nmi_mce:
#ifdef CONFIG_X86_SUPERVISOR_MODE_KERNEL
- # NMI entry protocol is incompatible with guest kernel in ring 0.
+ # NMI/MCE entry protocol is incompatible with guest kernel in ring 0.
+ addl $4,%esp
iret
#else
# Save state but do not trash the segment registers!
- # We may otherwise be unable to reload them or copy them to ring 1.
- pushl %eax
- SAVE_ALL_NOSEGREGS(a)
-
- # We can only process the NMI if:
- # A. We are the outermost Xen activation (in which case we have
- # the selectors safely saved on our stack)
- # B. DS and ES contain sane Xen values.
- # In all other cases we bail without touching DS-GS, as we have
- # interrupted an enclosing Xen activation in tricky prologue or
- # epilogue code.
- movl UREGS_eflags(%esp),%eax
- movb UREGS_cs(%esp),%al
- testl $(3|X86_EFLAGS_VM),%eax
- jnz continue_nmi
- movl %ds,%eax
- cmpw $(__HYPERVISOR_DS),%ax
- jne defer_nmi
- movl %es,%eax
- cmpw $(__HYPERVISOR_DS),%ax
- jne defer_nmi
-
-continue_nmi:
- SET_XEN_SEGMENTS(d)
+ SAVE_ALL(.Lnmi_mce_xen,.Lnmi_mce_common)
+.Lnmi_mce_common:
+ xorl %eax,%eax
+ movw UREGS_entry_vector(%esp),%ax
movl %esp,%edx
pushl %edx
- call do_nmi
+ call *exception_table(,%eax,4)
addl $4,%esp
+ /*
+ * NB. We may return to Xen context with polluted %ds/%es. But in such
+ * cases we have put guest DS/ES on the guest stack frame, which will
+ * be detected by SAVE_ALL(), or we have rolled back restore_guest.
+ */
jmp ret_from_intr
-
-defer_nmi:
- movl $FIXMAP_apic_base,%eax
- # apic_wait_icr_idle()
-1: movl %ss:APIC_ICR(%eax),%ebx
- testl $APIC_ICR_BUSY,%ebx
- jnz 1b
- # __send_IPI_shortcut(APIC_DEST_SELF, TRAP_deferred_nmi)
- movl $(APIC_DM_FIXED | APIC_DEST_SELF | APIC_DEST_PHYSICAL | \
- TRAP_deferred_nmi),%ss:APIC_ICR(%eax)
- jmp restore_all_xen
+.Lnmi_mce_xen:
+ /* Check the outer (guest) context for %ds/%es state validity. */
+ GET_GUEST_REGS(%ebx)
+ testl $X86_EFLAGS_VM,%ss:UREGS_eflags(%ebx)
+ mov %ds,%eax
+ mov %es,%edx
+ jnz .Lnmi_mce_vm86
+ /* We may have interrupted Xen while messing with %ds/%es... */
+ cmpw %ax,%cx
+ mov %ecx,%ds /* Ensure %ds is valid */
+ cmove UREGS_ds(%ebx),%eax /* Grab guest DS if it wasn't in %ds */
+ cmpw %dx,%cx
+ movl %eax,UREGS_ds(%ebx) /* Ensure guest frame contains guest DS */
+ cmove UREGS_es(%ebx),%edx /* Grab guest ES if it wasn't in %es */
+ mov %ecx,%es /* Ensure %es is valid */
+ movl $.Lrestore_sregs_guest,%ecx
+ movl %edx,UREGS_es(%ebx) /* Ensure guest frame contains guest ES */
+ cmpl %ecx,UREGS_eip(%esp)
+ jbe .Lnmi_mce_common
+ cmpl $.Lrestore_iret_guest,UREGS_eip(%esp)
+ ja .Lnmi_mce_common
+ /* Roll outer context restore_guest back to restoring %ds/%es. */
+ movl %ecx,UREGS_eip(%esp)
+ jmp .Lnmi_mce_common
+.Lnmi_mce_vm86:
+ /* vm86 is easy: the CPU saved %ds/%es so we can safely stomp them. */
+ mov %ecx,%ds
+ mov %ecx,%es
+ jmp .Lnmi_mce_common
#endif /* !CONFIG_X86_SUPERVISOR_MODE_KERNEL */
+ENTRY(nmi)
+ pushl $TRAP_nmi<<16
+ jmp handle_nmi_mce
+
+ENTRY(machine_check)
+ pushl $TRAP_machine_check<<16
+ jmp handle_nmi_mce
+
ENTRY(setup_vm86_frame)
+ mov %ecx,%ds
+ mov %ecx,%es
# Copies the entire stack frame forwards by 16 bytes.
.macro copy_vm86_words count=18
.if \count
@@ -615,7 +618,7 @@ ENTRY(exception_table)
ENTRY(exception_table)
.long do_divide_error
.long do_debug
- .long 0 # nmi
+ .long do_nmi
.long do_int3
.long do_overflow
.long do_bounds
diff -r 443ce7edad0e -r d146700adf71
xen/arch/x86/x86_32/supervisor_mode_kernel.S
--- a/xen/arch/x86/x86_32/supervisor_mode_kernel.S Mon Jul 02 10:31:03
2007 -0600
+++ b/xen/arch/x86/x86_32/supervisor_mode_kernel.S Mon Jul 02 12:19:26
2007 -0600
@@ -20,40 +20,45 @@
#include <asm/asm_defns.h>
#include <public/xen.h>
+#define guestreg(field) ((field)-UREGS_eip+36)
+
# Upon entry the stack should be the Xen stack and contain:
- # %ss, %esp, EFLAGS, %cs|1, %eip, ERROR, SAVE_ALL, RETURN
+ # %ss, %esp, EFLAGS, %cs|1, %eip, RETURN
# On exit the stack should be %ss:%esp (i.e. the guest stack)
# and contain:
- # EFLAGS, %cs, %eip, ERROR, SAVE_ALL, RETURN
+ # EFLAGS, %cs, %eip, RETURN
ALIGN
ENTRY(restore_ring0_guest)
+ pusha
+
# Point %gs:%esi to guest stack.
-RRG0: movw UREGS_ss+4(%esp),%gs
- movl UREGS_esp+4(%esp),%esi
+RRG0: movw guestreg(UREGS_ss)(%esp),%gs
+ movl guestreg(UREGS_esp)(%esp),%esi
- # Copy EFLAGS...EBX, RETURN from Xen stack to guest stack.
- movl $(UREGS_kernel_sizeof>>2)+1,%ecx
+ # Copy EFLAGS, %cs, %eip, RETURN, PUSHA from Xen stack to guest stack.
+ movl $12,%ecx /* 12 32-bit values */
1: subl $4,%esi
movl -4(%esp,%ecx,4),%eax
RRG1: movl %eax,%gs:(%esi)
loop 1b
-RRG2: andl $~3,%gs:UREGS_cs+4(%esi)
+RRG2: andl $~3,%gs:guestreg(UREGS_cs)(%esi)
movl %gs,%eax
# We need to do this because these registers are not present
# on the guest stack so they cannot be restored by the code in
# restore_all_guest.
-RRG3: mov UREGS_ds+4(%esp),%ds
-RRG4: mov UREGS_es+4(%esp),%es
-RRG5: mov UREGS_fs+4(%esp),%fs
-RRG6: mov UREGS_gs+4(%esp),%gs
+RRG3: mov guestreg(UREGS_ds)(%esp),%ds
+RRG4: mov guestreg(UREGS_es)(%esp),%es
+RRG5: mov guestreg(UREGS_fs)(%esp),%fs
+RRG6: mov guestreg(UREGS_gs)(%esp),%gs
RRG7: movl %eax,%ss
movl %esi,%esp
+ popa
ret
.section __ex_table,"a"
.long RRG0,domain_crash_synchronous
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/x86_32/traps.c
--- a/xen/arch/x86/x86_32/traps.c Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/x86_32/traps.c Mon Jul 02 12:19:26 2007 -0600
@@ -232,15 +232,6 @@ unsigned long do_iret(void)
return 0;
}
-#include <asm/asm_defns.h>
-BUILD_SMP_INTERRUPT(deferred_nmi, TRAP_deferred_nmi)
-fastcall void smp_deferred_nmi(struct cpu_user_regs *regs)
-{
- asmlinkage void do_nmi(struct cpu_user_regs *);
- ack_APIC_irq();
- do_nmi(regs);
-}
-
void __init percpu_traps_init(void)
{
struct tss_struct *tss = &doublefault_tss;
@@ -251,8 +242,6 @@ void __init percpu_traps_init(void)
/* The hypercall entry vector is only accessible from ring 1. */
_set_gate(idt_table+HYPERCALL_VECTOR, 14, 1, &hypercall);
-
- set_intr_gate(TRAP_deferred_nmi, &deferred_nmi);
/*
* Make a separate task for double faults. This will get us debug output if
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/x86_64/Makefile
--- a/xen/arch/x86/x86_64/Makefile Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/x86_64/Makefile Mon Jul 02 12:19:26 2007 -0600
@@ -1,12 +1,12 @@ subdir-y += compat
subdir-y += compat
obj-y += entry.o
-obj-y += compat_kexec.o
obj-y += gpr_switch.o
obj-y += mm.o
obj-y += traps.o
obj-$(CONFIG_COMPAT) += compat.o
+obj-$(CONFIG_COMPAT) += compat_kexec.o
obj-$(CONFIG_COMPAT) += domain.o
obj-$(CONFIG_COMPAT) += physdev.o
obj-$(CONFIG_COMPAT) += platform_hypercall.o
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/x86_64/compat/entry.S
--- a/xen/arch/x86/x86_64/compat/entry.S Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/x86_64/compat/entry.S Mon Jul 02 12:19:26 2007 -0600
@@ -143,12 +143,12 @@ compat_restore_all_guest:
.Lft0: iretq
.section .fixup,"ax"
-.Lfx0: popq -15*8-8(%rsp) # error_code/entry_vector
- SAVE_ALL # 15*8 bytes pushed
- movq -8(%rsp),%rsi # error_code/entry_vector
- sti # after stack abuse (-1024(%rsp))
+.Lfx0: sti
+ SAVE_ALL
+ movq UREGS_error_code(%rsp),%rsi
+ movq %rsp,%rax
+ andq $~0xf,%rsp
pushq $__HYPERVISOR_DS # SS
- leaq 8(%rsp),%rax
pushq %rax # RSP
pushfq # RFLAGS
pushq $__HYPERVISOR_CS # CS
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/x86_64/entry.S
--- a/xen/arch/x86/x86_64/entry.S Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/x86_64/entry.S Mon Jul 02 12:19:26 2007 -0600
@@ -57,23 +57,23 @@ 1: sysretl
/* No special register assumptions. */
iret_exit_to_guest:
addq $8,%rsp
-.Lft1: iretq
+.Lft0: iretq
.section .fixup,"ax"
-.Lfx1: popq -15*8-8(%rsp) # error_code/entry_vector
- SAVE_ALL # 15*8 bytes pushed
- movq -8(%rsp),%rsi # error_code/entry_vector
- sti # after stack abuse (-1024(%rsp))
+.Lfx0: sti
+ SAVE_ALL
+ movq UREGS_error_code(%rsp),%rsi
+ movq %rsp,%rax
+ andq $~0xf,%rsp
pushq $__HYPERVISOR_DS # SS
- leaq 8(%rsp),%rax
pushq %rax # RSP
- pushf # RFLAGS
+ pushfq # RFLAGS
pushq $__HYPERVISOR_CS # CS
- leaq .Ldf1(%rip),%rax
+ leaq .Ldf0(%rip),%rax
pushq %rax # RIP
pushq %rsi # error_code/entry_vector
jmp handle_exception
-.Ldf1: GET_CURRENT(%rbx)
+.Ldf0: GET_CURRENT(%rbx)
jmp test_all_events
failsafe_callback:
GET_CURRENT(%rbx)
@@ -88,10 +88,10 @@ 1: call create_bounce_frame
jmp test_all_events
.previous
.section __pre_ex_table,"a"
- .quad .Lft1,.Lfx1
+ .quad .Lft0,.Lfx0
.previous
.section __ex_table,"a"
- .quad .Ldf1,failsafe_callback
+ .quad .Ldf0,failsafe_callback
.previous
ALIGN
@@ -505,11 +505,6 @@ ENTRY(page_fault)
movl $TRAP_page_fault,4(%rsp)
jmp handle_exception
-ENTRY(machine_check)
- pushq $0
- movl $TRAP_machine_check,4(%rsp)
- jmp handle_exception
-
ENTRY(spurious_interrupt_bug)
pushq $0
movl $TRAP_spurious_int,4(%rsp)
@@ -527,31 +522,38 @@ ENTRY(early_page_fault)
call do_early_page_fault
jmp restore_all_xen
+handle_ist_exception:
+ SAVE_ALL
+ testb $3,UREGS_cs(%rsp)
+ jz 1f
+ /* Interrupted guest context. Copy the context to stack bottom. */
+ GET_GUEST_REGS(%rdi)
+ movq %rsp,%rsi
+ movl $UREGS_kernel_sizeof/8,%ecx
+ movq %rdi,%rsp
+ rep movsq
+1: movq %rsp,%rdi
+ movl UREGS_entry_vector(%rsp),%eax
+ leaq exception_table(%rip),%rdx
+ callq *(%rdx,%rax,8)
+ jmp ret_from_intr
+
ENTRY(nmi)
pushq $0
- SAVE_ALL
- testb $3,UREGS_cs(%rsp)
- jz nmi_in_hypervisor_mode
- /* Interrupted guest context. Copy the context to stack bottom. */
- GET_GUEST_REGS(%rbx)
- movl $UREGS_kernel_sizeof/8,%ecx
-1: popq %rax
- movq %rax,(%rbx)
- addq $8,%rbx
- loop 1b
- subq $UREGS_kernel_sizeof,%rbx
- movq %rbx,%rsp
-nmi_in_hypervisor_mode:
- movq %rsp,%rdi
- call do_nmi
- jmp ret_from_intr
+ movl $TRAP_nmi,4(%rsp)
+ jmp handle_ist_exception
+
+ENTRY(machine_check)
+ pushq $0
+ movl $TRAP_machine_check,4(%rsp)
+ jmp handle_ist_exception
.data
ENTRY(exception_table)
.quad do_divide_error
.quad do_debug
- .quad 0 # nmi
+ .quad do_nmi
.quad do_int3
.quad do_overflow
.quad do_bounds
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/x86_64/mm.c
--- a/xen/arch/x86/x86_64/mm.c Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/x86_64/mm.c Mon Jul 02 12:19:26 2007 -0600
@@ -106,7 +106,8 @@ void __init paging_init(void)
/* Create user-accessible L2 directory to map the MPT for guests. */
if ( (l2_pg = alloc_domheap_page(NULL)) == NULL )
goto nomem;
- l3_ro_mpt = clear_page(page_to_virt(l2_pg));
+ l3_ro_mpt = page_to_virt(l2_pg);
+ clear_page(l3_ro_mpt);
l4e_write(&idle_pg_table[l4_table_offset(RO_MPT_VIRT_START)],
l4e_from_page(l2_pg, __PAGE_HYPERVISOR | _PAGE_USER));
@@ -132,7 +133,8 @@ void __init paging_init(void)
if ( (l2_pg = alloc_domheap_page(NULL)) == NULL )
goto nomem;
va = RO_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT);
- l2_ro_mpt = clear_page(page_to_virt(l2_pg));
+ l2_ro_mpt = page_to_virt(l2_pg);
+ clear_page(l2_ro_mpt);
l3e_write(&l3_ro_mpt[l3_table_offset(va)],
l3e_from_page(l2_pg, __PAGE_HYPERVISOR | _PAGE_USER));
l2_ro_mpt += l2_table_offset(va);
@@ -152,7 +154,8 @@ void __init paging_init(void)
l3_ro_mpt =
l4e_to_l3e(idle_pg_table[l4_table_offset(HIRO_COMPAT_MPT_VIRT_START)]);
if ( (l2_pg = alloc_domheap_page(NULL)) == NULL )
goto nomem;
- compat_idle_pg_table_l2 = l2_ro_mpt = clear_page(page_to_virt(l2_pg));
+ compat_idle_pg_table_l2 = l2_ro_mpt = page_to_virt(l2_pg);
+ clear_page(l2_ro_mpt);
l3e_write(&l3_ro_mpt[l3_table_offset(HIRO_COMPAT_MPT_VIRT_START)],
l3e_from_page(l2_pg, __PAGE_HYPERVISOR));
l2_ro_mpt += l2_table_offset(HIRO_COMPAT_MPT_VIRT_START);
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/x86_64/traps.c
--- a/xen/arch/x86/x86_64/traps.c Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/x86_64/traps.c Mon Jul 02 12:19:26 2007 -0600
@@ -294,8 +294,9 @@ void __init percpu_traps_init(void)
{
/* Specify dedicated interrupt stacks for NMIs and double faults. */
set_intr_gate(TRAP_double_fault, &double_fault);
- idt_table[TRAP_double_fault].a |= 1UL << 32; /* IST1 */
- idt_table[TRAP_nmi].a |= 2UL << 32; /* IST2 */
+ idt_table[TRAP_double_fault].a |= 1UL << 32; /* IST1 */
+ idt_table[TRAP_nmi].a |= 2UL << 32; /* IST2 */
+ idt_table[TRAP_machine_check].a |= 3UL << 32; /* IST3 */
/*
* The 32-on-64 hypercall entry vector is only accessible from ring 1.
@@ -310,7 +311,10 @@ void __init percpu_traps_init(void)
stack_bottom = (char *)get_stack_bottom();
stack = (char *)((unsigned long)stack_bottom & ~(STACK_SIZE - 1));
- /* Double-fault handler has its own per-CPU 2kB stack. */
+ /* Machine Check handler has its own per-CPU 1kB stack. */
+ init_tss[cpu].ist[2] = (unsigned long)&stack[1024];
+
+ /* Double-fault handler has its own per-CPU 1kB stack. */
init_tss[cpu].ist[0] = (unsigned long)&stack[2048];
/* NMI handler has its own per-CPU 1kB stack. */
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/x86_emulate.c
--- a/xen/arch/x86/x86_emulate.c Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/x86_emulate.c Mon Jul 02 12:19:26 2007 -0600
@@ -1630,6 +1630,7 @@ x86_emulate(
}
/* Commit shadow register state. */
+ _regs.eflags &= ~EF_RF;
*ctxt->regs = _regs;
done:
diff -r 443ce7edad0e -r d146700adf71 xen/common/sysctl.c
--- a/xen/common/sysctl.c Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/common/sysctl.c Mon Jul 02 12:19:26 2007 -0600
@@ -136,6 +136,39 @@ long do_sysctl(XEN_GUEST_HANDLE(xen_sysc
}
break;
+ case XEN_SYSCTL_getcpuinfo:
+ {
+ uint32_t i, nr_cpus;
+ struct xen_sysctl_cpuinfo cpuinfo;
+ struct vcpu *v;
+
+ nr_cpus = min_t(uint32_t, op->u.getcpuinfo.max_cpus, NR_CPUS);
+
+ for ( i = 0; i < nr_cpus; i++ )
+ {
+ /* Assume no holes in idle-vcpu map. */
+ if ( (v = idle_vcpu[i]) == NULL )
+ break;
+
+ cpuinfo.idletime = v->runstate.time[RUNSTATE_running];
+ if ( v->is_running )
+ cpuinfo.idletime += NOW() - v->runstate.state_entry_time;
+
+ if ( copy_to_guest_offset(op->u.getcpuinfo.info, i, &cpuinfo, 1) )
+ {
+ ret = -EFAULT;
+ break;
+ }
+ }
+
+ op->u.getcpuinfo.nr_cpus = i;
+ ret = 0;
+
+ if ( copy_to_guest(u_sysctl, op, 1) )
+ ret = -EFAULT;
+ }
+ break;
+
default:
ret = arch_do_sysctl(op, u_sysctl);
break;
diff -r 443ce7edad0e -r d146700adf71 xen/include/asm-x86/edd.h
--- a/xen/include/asm-x86/edd.h Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/include/asm-x86/edd.h Mon Jul 02 12:19:26 2007 -0600
@@ -32,12 +32,22 @@ struct edd_info {
u16 legacy_max_cylinder; /* %cl[7:6]:%ch: maximum cylinder number */
u8 legacy_max_head; /* %dh: maximum head number */
u8 legacy_sectors_per_track; /* %cl[5:0]: maximum sector number */
- /* Int13, Fn41: Get Device Parameters */
- u8 edd_device_params[74]; /* as filled into %ds:%si */
+ /* Int13, Fn41: Get Device Parameters (as filled into %ds:%esi). */
+ struct {
+ u16 length;
+ u8 data[72];
+ } edd_device_params;
} __attribute__ ((packed));
-extern u32 boot_edd_signature[];
-extern u8 boot_edd_signature_nr;
+struct mbr_signature {
+ u8 device;
+ u8 pad[3];
+ u32 signature;
+} __attribute__ ((packed));
+
+/* These all reside in the boot trampoline. Access via bootsym(). */
+extern struct mbr_signature boot_mbr_signature[];
+extern u8 boot_mbr_signature_nr;
extern struct edd_info boot_edd_info[];
extern u8 boot_edd_info_nr;
diff -r 443ce7edad0e -r d146700adf71 xen/include/asm-x86/hvm/hvm.h
--- a/xen/include/asm-x86/hvm/hvm.h Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/include/asm-x86/hvm/hvm.h Mon Jul 02 12:19:26 2007 -0600
@@ -302,4 +302,18 @@ static inline int hvm_event_injection_fa
return hvm_funcs.event_injection_faulted(v);
}
+/* These bits in CR4 are owned by the host. */
+#define HVM_CR4_HOST_MASK (mmu_cr4_features & \
+ (X86_CR4_VMXE | X86_CR4_PAE | X86_CR4_MCE))
+
+/* These bits in CR4 cannot be set by the guest. */
+#define HVM_CR4_GUEST_RESERVED_BITS \
+ ~(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | \
+ X86_CR4_DE | X86_CR4_PSE | X86_CR4_PAE | \
+ X86_CR4_MCE | X86_CR4_PGE | X86_CR4_PCE | \
+ X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT)
+
+/* These exceptions must always be intercepted. */
+#define HVM_TRAP_MASK (1U << TRAP_machine_check)
+
#endif /* __ASM_X86_HVM_HVM_H__ */
diff -r 443ce7edad0e -r d146700adf71 xen/include/asm-x86/hvm/svm/emulate.h
--- a/xen/include/asm-x86/hvm/svm/emulate.h Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/include/asm-x86/hvm/svm/emulate.h Mon Jul 02 12:19:26 2007 -0600
@@ -138,6 +138,7 @@ static void inline __update_guest_eip(
{
ASSERT(inst_len > 0);
vmcb->rip += inst_len;
+ vmcb->rflags &= ~X86_EFLAGS_RF;
}
#endif /* __ASM_X86_HVM_SVM_EMULATE_H__ */
diff -r 443ce7edad0e -r d146700adf71 xen/include/asm-x86/hvm/svm/vmcb.h
--- a/xen/include/asm-x86/hvm/svm/vmcb.h Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/include/asm-x86/hvm/svm/vmcb.h Mon Jul 02 12:19:26 2007 -0600
@@ -464,14 +464,6 @@ void svm_destroy_vmcb(struct vcpu *v);
void setup_vmcb_dump(void);
-/* These bits in the CR4 are owned by the host */
-#if CONFIG_PAGING_LEVELS >= 3
-#define SVM_CR4_HOST_MASK (X86_CR4_PAE)
-#else
-#define SVM_CR4_HOST_MASK 0
-#endif
-
-
#endif /* ASM_X86_HVM_SVM_VMCS_H__ */
/*
diff -r 443ce7edad0e -r d146700adf71 xen/include/asm-x86/hvm/trace.h
--- a/xen/include/asm-x86/hvm/trace.h Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/include/asm-x86/hvm/trace.h Mon Jul 02 12:19:26 2007 -0600
@@ -21,6 +21,7 @@
#define DO_TRC_HVM_CPUID 1
#define DO_TRC_HVM_INTR 1
#define DO_TRC_HVM_NMI 1
+#define DO_TRC_HVM_MCE 1
#define DO_TRC_HVM_SMI 1
#define DO_TRC_HVM_VMMCALL 1
#define DO_TRC_HVM_HLT 1
diff -r 443ce7edad0e -r d146700adf71 xen/include/asm-x86/hvm/vmx/vmcs.h
--- a/xen/include/asm-x86/hvm/vmx/vmcs.h Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h Mon Jul 02 12:19:26 2007 -0600
@@ -130,6 +130,8 @@ extern u32 vmx_vmentry_control;
#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
extern u32 vmx_secondary_exec_control;
+
+extern bool_t cpu_has_vmx_ins_outs_instr_info;
#define cpu_has_vmx_virtualize_apic_accesses \
(vmx_secondary_exec_control & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)
diff -r 443ce7edad0e -r d146700adf71 xen/include/asm-x86/hvm/vmx/vmx.h
--- a/xen/include/asm-x86/hvm/vmx/vmx.h Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h Mon Jul 02 12:19:26 2007 -0600
@@ -143,13 +143,6 @@ void vmx_vlapic_msr_changed(struct vcpu
#define X86_SEG_AR_GRANULARITY (1u << 15) /* 15, granularity */
#define X86_SEG_AR_SEG_UNUSABLE (1u << 16) /* 16, segment unusable */
-/* These bits in the CR4 are owned by the host */
-#if CONFIG_PAGING_LEVELS >= 3
-#define VMX_CR4_HOST_MASK (X86_CR4_VMXE | X86_CR4_PAE)
-#else
-#define VMX_CR4_HOST_MASK (X86_CR4_VMXE)
-#endif
-
#define VMCALL_OPCODE ".byte 0x0f,0x01,0xc1\n"
#define VMCLEAR_OPCODE ".byte 0x66,0x0f,0xc7\n" /* reg/opcode: /6 */
#define VMLAUNCH_OPCODE ".byte 0x0f,0x01,0xc2\n"
diff -r 443ce7edad0e -r d146700adf71 xen/include/asm-x86/page.h
--- a/xen/include/asm-x86/page.h Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/include/asm-x86/page.h Mon Jul 02 12:19:26 2007 -0600
@@ -192,8 +192,9 @@ static inline l4_pgentry_t l4e_from_padd
#define pgentry_ptr_to_slot(_p) \
(((unsigned long)(_p) & ~PAGE_MASK) / sizeof(*(_p)))
+#ifndef __ASSEMBLY__
+
/* Page-table type. */
-#ifndef __ASSEMBLY__
#if CONFIG_PAGING_LEVELS == 2
/* x86_32 default */
typedef struct { u32 pfn; } pagetable_t;
@@ -214,9 +215,11 @@ typedef struct { u64 pfn; } pagetable_t;
#define pagetable_from_page(pg) pagetable_from_pfn(page_to_mfn(pg))
#define pagetable_from_paddr(p) pagetable_from_pfn((p)>>PAGE_SHIFT)
#define pagetable_null() pagetable_from_pfn(0)
-#endif
-
-#define clear_page(_p) memset((void *)(_p), 0, PAGE_SIZE)
+
+void clear_page_sse2(void *);
+#define clear_page(_p) (cpu_has_xmm2 ? \
+ clear_page_sse2((void *)(_p)) : \
+ (void)memset((void *)(_p), 0, PAGE_SIZE))
#define copy_page(_t,_f) memcpy((void *)(_t), (void *)(_f), PAGE_SIZE)
#define mfn_valid(mfn) ((mfn) < max_page)
@@ -244,6 +247,8 @@ typedef struct { u64 pfn; } pagetable_t;
/* Convert between frame number and address formats. */
#define pfn_to_paddr(pfn) ((paddr_t)(pfn) << PAGE_SHIFT)
#define paddr_to_pfn(pa) ((unsigned long)((pa) >> PAGE_SHIFT))
+
+#endif /* !defined(__ASSEMBLY__) */
/* High table entries are reserved by the hypervisor. */
#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE)
diff -r 443ce7edad0e -r d146700adf71 xen/include/asm-x86/processor.h
--- a/xen/include/asm-x86/processor.h Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/include/asm-x86/processor.h Mon Jul 02 12:19:26 2007 -0600
@@ -104,7 +104,6 @@
#define TRAP_alignment_check 17
#define TRAP_machine_check 18
#define TRAP_simd_error 19
-#define TRAP_deferred_nmi 31
/* Set for entry via SYSCALL. Informs return code to use SYSRETQ not IRETQ. */
/* NB. Same as VGCF_in_syscall. No bits in common with any other TRAP_ defn. */
@@ -567,7 +566,8 @@ extern void mtrr_ap_init(void);
extern void mtrr_ap_init(void);
extern void mtrr_bp_init(void);
-extern void mcheck_init(struct cpuinfo_x86 *c);
+void mcheck_init(struct cpuinfo_x86 *c);
+asmlinkage void do_machine_check(struct cpu_user_regs *regs);
int cpuid_hypervisor_leaves(
uint32_t idx, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx);
diff -r 443ce7edad0e -r d146700adf71 xen/include/asm-x86/x86_32/asm_defns.h
--- a/xen/include/asm-x86/x86_32/asm_defns.h Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/include/asm-x86/x86_32/asm_defns.h Mon Jul 02 12:19:26 2007 -0600
@@ -26,7 +26,7 @@ 1: addl $4,%esp;
#define ASSERT_INTERRUPTS_ENABLED ASSERT_INTERRUPT_STATUS(nz)
#define ASSERT_INTERRUPTS_DISABLED ASSERT_INTERRUPT_STATUS(z)
-#define __SAVE_ALL_PRE \
+#define SAVE_ALL_GPRS \
cld; \
pushl %eax; \
pushl %ebp; \
@@ -35,32 +35,48 @@ 1: addl $4,%esp;
pushl %esi; \
pushl %edx; \
pushl %ecx; \
- pushl %ebx; \
+ pushl %ebx
+
+/*
+ * Saves all register state into an exception/interrupt stack frame.
+ * Returns to the caller at <xen_lbl> if the interrupted context is within
+ * Xen; at <vm86_lbl> if the interrupted context is vm86; or falls through
+ * if the interrupted context is an ordinary guest protected-mode context.
+ * In all cases %ecx contains __HYPERVISOR_DS. %ds/%es are guaranteed to
+ * contain __HYPERVISOR_DS unless control passes to <xen_lbl>, in which case
+ * the caller is reponsible for validity of %ds/%es.
+ */
+#define SAVE_ALL(xen_lbl, vm86_lbl) \
+ SAVE_ALL_GPRS; \
testl $(X86_EFLAGS_VM),UREGS_eflags(%esp); \
- jz 2f; \
- call setup_vm86_frame; \
- jmp 3f; \
- 2:testb $3,UREGS_cs(%esp); \
- jz 1f; \
- mov %ds,UREGS_ds(%esp); \
- mov %es,UREGS_es(%esp); \
- mov %fs,UREGS_fs(%esp); \
- mov %gs,UREGS_gs(%esp); \
- 3:
-
-#define SAVE_ALL_NOSEGREGS(_reg) \
- __SAVE_ALL_PRE \
- 1:
-
-#define SET_XEN_SEGMENTS(_reg) \
- movl $(__HYPERVISOR_DS),%e ## _reg ## x; \
- mov %e ## _reg ## x,%ds; \
- mov %e ## _reg ## x,%es;
-
-#define SAVE_ALL(_reg) \
- __SAVE_ALL_PRE \
- SET_XEN_SEGMENTS(_reg) \
- 1:
+ mov %ds,%edi; \
+ mov %es,%esi; \
+ mov $(__HYPERVISOR_DS),%ecx; \
+ jnz 86f; \
+ .text 1; \
+ 86: call setup_vm86_frame; \
+ jmp vm86_lbl; \
+ .previous; \
+ testb $3,UREGS_cs(%esp); \
+ jz xen_lbl; \
+ /* \
+ * We are the outermost Xen context, but our \
+ * life is complicated by NMIs and MCEs. These \
+ * could occur in our critical section and \
+ * pollute %ds and %es. We have to detect that \
+ * this has occurred and avoid saving Xen DS/ES \
+ * values to the guest stack frame. \
+ */ \
+ cmpw %cx,%di; \
+ mov %ecx,%ds; \
+ mov %fs,UREGS_fs(%esp); \
+ cmove UREGS_ds(%esp),%edi; \
+ cmpw %cx,%si; \
+ mov %edi,UREGS_ds(%esp); \
+ cmove UREGS_es(%esp),%esi; \
+ mov %ecx,%es; \
+ mov %gs,UREGS_gs(%esp); \
+ mov %esi,UREGS_es(%esp)
#ifdef PERF_COUNTERS
#define PERFC_INCR(_name,_idx,_cur) \
@@ -97,8 +113,8 @@ __asm__(
STR(x) ":\n\t" \
"pushl $"#v"<<16\n\t" \
STR(FIXUP_RING0_GUEST_STACK) \
- STR(SAVE_ALL(a)) \
- "movl %esp,%eax\n\t" \
+ STR(SAVE_ALL(1f,1f)) "\n\t" \
+ "1:movl %esp,%eax\n\t" \
"pushl %eax\n\t" \
"call "STR(smp_##x)"\n\t" \
"addl $4,%esp\n\t" \
@@ -109,8 +125,8 @@ __asm__(
"\n" __ALIGN_STR"\n" \
"common_interrupt:\n\t" \
STR(FIXUP_RING0_GUEST_STACK) \
- STR(SAVE_ALL(a)) \
- "movl %esp,%eax\n\t" \
+ STR(SAVE_ALL(1f,1f)) "\n\t" \
+ "1:movl %esp,%eax\n\t" \
"pushl %eax\n\t" \
"call " STR(do_IRQ) "\n\t" \
"addl $4,%esp\n\t" \
diff -r 443ce7edad0e -r d146700adf71 xen/include/public/platform.h
--- a/xen/include/public/platform.h Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/include/public/platform.h Mon Jul 02 12:19:26 2007 -0600
@@ -114,6 +114,45 @@ typedef struct xenpf_platform_quirk xenp
typedef struct xenpf_platform_quirk xenpf_platform_quirk_t;
DEFINE_XEN_GUEST_HANDLE(xenpf_platform_quirk_t);
+#define XENPF_firmware_info 50
+#define XEN_FW_DISK_INFO 1 /* from int 13 AH=08/41/48 */
+#define XEN_FW_DISK_MBR_SIGNATURE 2 /* from MBR offset 0x1b8 */
+#define XEN_FW_VBEDDC_INFO 3 /* from int 10 AX=4f15 */
+struct xenpf_firmware_info {
+ /* IN variables. */
+ uint32_t type;
+ uint32_t index;
+ /* OUT variables. */
+ union {
+ struct {
+ /* Int13, Fn48: Check Extensions Present. */
+ uint8_t device; /* %dl: bios device number */
+ uint8_t version; /* %ah: major version */
+ uint16_t interface_support; /* %cx: support bitmap */
+ /* Int13, Fn08: Legacy Get Device Parameters. */
+ uint16_t legacy_max_cylinder; /* %cl[7:6]:%ch: max cyl # */
+ uint8_t legacy_max_head; /* %dh: max head # */
+ uint8_t legacy_sectors_per_track; /* %cl[5:0]: max sector # */
+ /* Int13, Fn41: Get Device Parameters (as filled into %ds:%esi). */
+ /* NB. First uint16_t of buffer must be set to buffer size. */
+ XEN_GUEST_HANDLE(void) edd_params;
+ } disk_info; /* XEN_FW_DISK_INFO */
+ struct {
+ uint8_t device; /* bios device number */
+ uint32_t mbr_signature; /* offset 0x1b8 in mbr */
+ } disk_mbr_signature; /* XEN_FW_DISK_MBR_SIGNATURE */
+ struct {
+ /* Int10, AX=4F15: Get EDID info. */
+ uint8_t capabilities;
+ uint8_t edid_transfer_time;
+ /* must refer to 128-byte buffer */
+ XEN_GUEST_HANDLE(uint8_t) edid;
+ } vbeddc_info; /* XEN_FW_VBEDDC_INFO */
+ } u;
+};
+typedef struct xenpf_firmware_info xenpf_firmware_info_t;
+DEFINE_XEN_GUEST_HANDLE(xenpf_firmware_info_t);
+
struct xen_platform_op {
uint32_t cmd;
uint32_t interface_version; /* XENPF_INTERFACE_VERSION */
@@ -124,6 +163,7 @@ struct xen_platform_op {
struct xenpf_read_memtype read_memtype;
struct xenpf_microcode_update microcode;
struct xenpf_platform_quirk platform_quirk;
+ struct xenpf_firmware_info firmware_info;
uint8_t pad[128];
} u;
};
diff -r 443ce7edad0e -r d146700adf71 xen/include/public/sysctl.h
--- a/xen/include/public/sysctl.h Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/include/public/sysctl.h Mon Jul 02 12:19:26 2007 -0600
@@ -140,9 +140,7 @@ typedef struct xen_sysctl_getdomaininfol
typedef struct xen_sysctl_getdomaininfolist xen_sysctl_getdomaininfolist_t;
DEFINE_XEN_GUEST_HANDLE(xen_sysctl_getdomaininfolist_t);
-/*
- * Inject debug keys into Xen.
- */
+/* Inject debug keys into Xen. */
#define XEN_SYSCTL_debug_keys 7
struct xen_sysctl_debug_keys {
/* IN variables. */
@@ -151,6 +149,23 @@ struct xen_sysctl_debug_keys {
};
typedef struct xen_sysctl_debug_keys xen_sysctl_debug_keys_t;
DEFINE_XEN_GUEST_HANDLE(xen_sysctl_debug_keys_t);
+
+/* Get physical CPU information. */
+#define XEN_SYSCTL_getcpuinfo 8
+struct xen_sysctl_cpuinfo {
+ uint64_t idletime;
+};
+typedef struct xen_sysctl_cpuinfo xen_sysctl_cpuinfo_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cpuinfo_t);
+struct xen_sysctl_getcpuinfo {
+ /* IN variables. */
+ uint32_t max_cpus;
+ XEN_GUEST_HANDLE_64(xen_sysctl_cpuinfo_t) info;
+ /* OUT variables. */
+ uint32_t nr_cpus;
+};
+typedef struct xen_sysctl_getcpuinfo xen_sysctl_getcpuinfo_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_getcpuinfo_t);
struct xen_sysctl {
uint32_t cmd;
@@ -163,6 +178,7 @@ struct xen_sysctl {
struct xen_sysctl_perfc_op perfc_op;
struct xen_sysctl_getdomaininfolist getdomaininfolist;
struct xen_sysctl_debug_keys debug_keys;
+ struct xen_sysctl_getcpuinfo getcpuinfo;
uint8_t pad[128];
} u;
};
diff -r 443ce7edad0e -r d146700adf71 xen/include/public/trace.h
--- a/xen/include/public/trace.h Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/include/public/trace.h Mon Jul 02 12:19:26 2007 -0600
@@ -88,6 +88,7 @@
#define TRC_HVM_VMMCALL (TRC_HVM_HANDLER + 0x12)
#define TRC_HVM_HLT (TRC_HVM_HANDLER + 0x13)
#define TRC_HVM_INVLPG (TRC_HVM_HANDLER + 0x14)
+#define TRC_HVM_MCE (TRC_HVM_HANDLER + 0x15)
/* This structure represents a single trace buffer record. */
struct t_rec {
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|