WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-unstable] merge with xen-unstable.hg

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] [xen-unstable] merge with xen-unstable.hg
From: Xen patchbot-unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Fri, 27 Jul 2007 02:54:17 -0700
Delivery-date: Fri, 27 Jul 2007 02:52:28 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User Alex Williamson <alex.williamson@xxxxxx>
# Date 1183400366 21600
# Node ID d146700adf714cdc13f924ab0de1dc895b6927f8
# Parent  443ce7edad0e8a3a640960890a72ce530887b38e
# Parent  182446677b6b56d58523050a6225a73d87a86ab7
merge with xen-unstable.hg
---
 buildconfigs/mk.linux-2.6-xen                |    5 
 tools/Makefile                               |    1 
 tools/console/daemon/io.c                    |   38 +++--
 tools/examples/init.d/xendomains             |    6 
 tools/examples/network-bridge                |   12 +
 tools/firmware/vmxassist/head.S              |   76 -----------
 tools/firmware/vmxassist/machine.h           |   15 --
 tools/firmware/vmxassist/setup.c             |   58 ---------
 tools/firmware/vmxassist/vm86.c              |   75 +++++++----
 tools/ioemu/target-i386-dm/exec-dm.c         |    2 
 tools/libxc/xc_domain_restore.c              |   11 -
 tools/libxc/xc_misc.c                        |   28 ++++
 tools/libxc/xenctrl.h                        |    4 
 tools/misc/xen-python-path                   |    9 +
 tools/python/xen/util/auxbin.py              |    9 +
 tools/python/xen/xend/XendCheckpoint.py      |    2 
 tools/python/xen/xend/XendConfig.py          |  105 +++++++++++++---
 tools/python/xen/xend/XendDomain.py          |    4 
 tools/python/xen/xend/server/irqif.py        |    2 
 tools/python/xen/xend/server/pciif.py        |    3 
 tools/python/xen/xm/main.py                  |   14 +-
 xen/acm/acm_core.c                           |    2 
 xen/arch/x86/Makefile                        |    1 
 xen/arch/x86/boot/edd.S                      |   24 +--
 xen/arch/x86/boot/x86_32.S                   |   21 ++-
 xen/arch/x86/boot/x86_64.S                   |   15 ++
 xen/arch/x86/clear_page.S                    |   26 ++++
 xen/arch/x86/domain.c                        |    9 -
 xen/arch/x86/hvm/hvm.c                       |   16 --
 xen/arch/x86/hvm/io.c                        |    1 
 xen/arch/x86/hvm/platform.c                  |    3 
 xen/arch/x86/hvm/svm/svm.c                   |   56 +++++++-
 xen/arch/x86/hvm/svm/vmcb.c                  |    8 -
 xen/arch/x86/hvm/vmx/vmcs.c                  |    5 
 xen/arch/x86/hvm/vmx/vmx.c                   |  154 ++++++++++++++----------
 xen/arch/x86/io_apic.c                       |    4 
 xen/arch/x86/mm.c                            |    3 
 xen/arch/x86/platform_hypercall.c            |   96 ++++++++++++++
 xen/arch/x86/setup.c                         |   34 ++++-
 xen/arch/x86/traps.c                         |   13 +-
 xen/arch/x86/x86_32/entry.S                  |  173 +++++++++++++--------------
 xen/arch/x86/x86_32/supervisor_mode_kernel.S |   27 ++--
 xen/arch/x86/x86_32/traps.c                  |   11 -
 xen/arch/x86/x86_64/Makefile                 |    2 
 xen/arch/x86/x86_64/compat/entry.S           |   10 -
 xen/arch/x86/x86_64/entry.S                  |   68 +++++-----
 xen/arch/x86/x86_64/mm.c                     |    9 -
 xen/arch/x86/x86_64/traps.c                  |   10 +
 xen/arch/x86/x86_emulate.c                   |    1 
 xen/common/sysctl.c                          |   33 +++++
 xen/include/asm-x86/edd.h                    |   18 ++
 xen/include/asm-x86/hvm/hvm.h                |   14 ++
 xen/include/asm-x86/hvm/svm/emulate.h        |    1 
 xen/include/asm-x86/hvm/svm/vmcb.h           |    8 -
 xen/include/asm-x86/hvm/trace.h              |    1 
 xen/include/asm-x86/hvm/vmx/vmcs.h           |    2 
 xen/include/asm-x86/hvm/vmx/vmx.h            |    7 -
 xen/include/asm-x86/page.h                   |   13 +-
 xen/include/asm-x86/processor.h              |    4 
 xen/include/asm-x86/x86_32/asm_defns.h       |   76 +++++++----
 xen/include/public/platform.h                |   40 ++++++
 xen/include/public/sysctl.h                  |   22 ++-
 xen/include/public/trace.h                   |    1 
 63 files changed, 961 insertions(+), 560 deletions(-)

diff -r 443ce7edad0e -r d146700adf71 buildconfigs/mk.linux-2.6-xen
--- a/buildconfigs/mk.linux-2.6-xen     Mon Jul 02 10:31:03 2007 -0600
+++ b/buildconfigs/mk.linux-2.6-xen     Mon Jul 02 12:19:26 2007 -0600
@@ -27,6 +27,11 @@ _build: build
 _build: build
 
 include buildconfigs/src.$(XEN_LINUX_SOURCE)
+
+# Default to allowing interface mismatch
+ifndef XEN_LINUX_ALLOW_INTERFACE_MISMATCH
+XEN_LINUX_ALLOW_INTERFACE_MISMATCH := y
+endif
 
 # The real action starts here!
 .PHONY: build
diff -r 443ce7edad0e -r d146700adf71 tools/Makefile
--- a/tools/Makefile    Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/Makefile    Mon Jul 02 12:19:26 2007 -0600
@@ -43,6 +43,7 @@ install: check
        $(MAKE) ioemuinstall
        $(INSTALL_DIR) $(DESTDIR)/var/xen/dump
        $(INSTALL_DIR) $(DESTDIR)/var/log/xen
+       $(INSTALL_DIR) $(DESTDIR)/var/lib/xen
 
 .PHONY: clean
 clean: check_clean
diff -r 443ce7edad0e -r d146700adf71 tools/console/daemon/io.c
--- a/tools/console/daemon/io.c Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/console/daemon/io.c Mon Jul 02 12:19:26 2007 -0600
@@ -764,27 +764,31 @@ void handle_io(void)
                /* XXX I wish we didn't have to busy wait for hypervisor logs
                 * but there's no obvious way to get event channel notifications
                 * for new HV log data as we can with guest */
-               ret = select(max_fd + 1, &readfds, &writefds, 0, log_hv_fd != 
-1 ? &timeout : NULL);
-
+               ret = select(max_fd + 1, &readfds, &writefds, 0,
+                            log_hv_fd != -1 ? &timeout : NULL);
+
+               if (log_reload) {
+                       handle_log_reload();
+                       log_reload = 0;
+               }
+
+               /* Abort if select failed, except for EINTR cases
+                  which indicate a possible log reload */
                if (ret == -1) {
-                       if (errno == EINTR) {
-                               if (log_reload) {
-                                       handle_log_reload();
-                                       log_reload = 0;
-                               }
+                       if (errno == EINTR)
                                continue;
-                       }
                        dolog(LOG_ERR, "Failure in select: %d (%s)",
                              errno, strerror(errno));
                        break;
                }
 
-               /* Check for timeout */
-               if (ret == 0) {
-                       if (log_hv_fd != -1)
-                               handle_hv_logs();
+               /* Always process HV logs even if not a timeout */
+               if (log_hv_fd != -1)
+                       handle_hv_logs();
+
+               /* Must not check returned FDSET if it was a timeout */
+               if (ret == 0)
                        continue;
-               }
 
                if (FD_ISSET(xs_fileno(xs), &readfds))
                        handle_xs();
@@ -806,10 +810,14 @@ void handle_io(void)
                }
        }
 
-       if (log_hv_fd != -1)
+       if (log_hv_fd != -1) {
                close(log_hv_fd);
-       if (xc_handle != -1)
+               log_hv_fd = -1;
+       }
+       if (xc_handle != -1) {
                xc_interface_close(xc_handle);
+               xc_handle = -1;
+       }
 }
 
 /*
diff -r 443ce7edad0e -r d146700adf71 tools/examples/init.d/xendomains
--- a/tools/examples/init.d/xendomains  Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/examples/init.d/xendomains  Mon Jul 02 12:19:26 2007 -0600
@@ -221,11 +221,12 @@ start()
     if [ "$XENDOMAINS_RESTORE" = "true" ] &&
        contains_something "$XENDOMAINS_SAVE"
     then
-        mkdir -p $(dirname "$LOCKFILE")
+       XENDOMAINS_SAVED=`/bin/ls $XENDOMAINS_SAVE/* | grep -v 'lost+found'`
+       mkdir -p $(dirname "$LOCKFILE")
        touch $LOCKFILE
        echo -n "Restoring Xen domains:"
        saved_domains=`ls $XENDOMAINS_SAVE`
-       for dom in $XENDOMAINS_SAVE/*; do
+       for dom in $XENDOMAINS_SAVED; do
            echo -n " ${dom##*/}"
            xm restore $dom
            if [ $? -ne 0 ]; then
@@ -259,6 +260,7 @@ start()
            if [ $? -eq 0 ] || is_running $dom; then
                echo -n "(skip)"
            else
+               echo "(booting)"
                xm create --quiet --defconfig $dom
                if [ $? -ne 0 ]; then
                    rc_failed $?
diff -r 443ce7edad0e -r d146700adf71 tools/examples/network-bridge
--- a/tools/examples/network-bridge     Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/examples/network-bridge     Mon Jul 02 12:19:26 2007 -0600
@@ -172,9 +172,21 @@ show_status () {
     echo '============================================================'
 }
 
+is_network_root () {
+    local rootfs=$(awk '{ if ($1 !~ /^[ \t]*#/ && $2 == "/") { print $3; }}' 
/etc/mtab)
+    local rootopts=$(awk '{ if ($1 !~ /^[ \t]*#/ && $2 == "/") { print $4; }}' 
/etc/mtab)
+
+    [[ "$rootfs" =~ "^nfs" ]] || [[ "$rootopts" =~ "_netdev" ]] && return 0 || 
return 1
+}
+
 op_start () {
     if [ "${bridge}" = "null" ] ; then
        return
+    fi
+
+    if is_network_root ; then
+        [ -x /usr/bin/logger ] && /usr/bin/logger "network-bridge: bridging 
not supported on network root; not starting"
+        return
     fi
 
     if link_exists "$pdev"; then
diff -r 443ce7edad0e -r d146700adf71 tools/firmware/vmxassist/head.S
--- a/tools/firmware/vmxassist/head.S   Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/firmware/vmxassist/head.S   Mon Jul 02 12:19:26 2007 -0600
@@ -25,80 +25,12 @@
  * switch happens to the environment below. The magic indicates
  * that this is a valid context.
  */
-#ifdef TEST
-       .byte 0x55, 0xaa
-       .byte 0x80
-       .code16
-       jmp     _start16
-#else
        jmp     _start
-#endif
 
        .align  8
        .long   VMXASSIST_MAGIC
        .long   newctx                  /* new context */
        .long   oldctx                  /* old context */
-
-#ifdef TEST
-/*
- * We are running in 16-bit. Get into the protected mode as soon as
- * possible. We use our own (minimal) GDT to get started.
- *
- * ROM is a misnomer as this code isn't really rommable (although it
- * only requires a few changes) but it does live in a BIOS ROM segment.
- * This code allows me to debug vmxassists under (a modified version of)
- * Bochs and load it as a "optromimage1".
- */
-       .code16
-       .globl  _start16
-_start16:
-        cli
-
-        /* load our own global descriptor table */
-        data32 addr32 lgdt %cs:(rom_gdtr - TEXTADDR)
-
-        /* go to protected mode */
-        movl    %cr0, %eax
-        orl     $(CR0_PE), %eax
-        movl    %eax, %cr0
-        data32  ljmp $0x08, $1f
-
-        .align  32
-        .globl  rom_gdt
-rom_gdt:
-        .word   0, 0            /* 0x00: reserved */
-        .byte   0, 0, 0, 0
-
-        .word   0xFFFF, 0       /* 0x08: CS 32-bit */
-        .byte   0, 0x9A, 0xCF, 0
-
-        .word   0xFFFF, 0       /* 0x10: CS 32-bit */
-        .byte   0, 0x92, 0xCF, 0
-rom_gdt_end:
-
-        .align  4
-        .globl  rom_gdtr
-rom_gdtr:
-        .word   rom_gdt_end - rom_gdt - 1
-        .long   rom_gdt
-
-        .code32
-1:
-        /* welcome to the 32-bit world */
-        movw    $0x10, %ax
-        movw    %ax, %ds
-        movw    %ax, %es
-        movw    %ax, %ss
-        movw    %ax, %fs
-        movw    %ax, %gs
-
-        /* enable Bochs debug facilities */
-        movw    $0x8A00, %dx
-        movw    $0x8A00, %ax
-        outw    %ax, (%dx)
-
-       jmp     _start
-#endif /* TEST */
 
 /*
  * This is the real start. Control was transfered to this point
@@ -111,9 +43,6 @@ _start:
        cli
 
        /* save register parameters to C land */
-#ifdef TEST
-       xorl    %edx, %edx
-#endif
 
        /* clear bss */
        cld
@@ -145,11 +74,6 @@ halt:
 halt:
        push    $halt_msg
        call    printf
-#ifdef TEST
-        movw    $0x8A00, %dx
-        movw    $0x8AE0, %ax
-        outw    %ax, (%dx)
-#endif
        cli
        jmp     .
 
diff -r 443ce7edad0e -r d146700adf71 tools/firmware/vmxassist/machine.h
--- a/tools/firmware/vmxassist/machine.h        Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/firmware/vmxassist/machine.h        Mon Jul 02 12:19:26 2007 -0600
@@ -55,13 +55,6 @@
 #define        PGMASK          (~(PGSIZE - 1))         /* page mask */
 #define        LPGSIZE         (1 << LOG_PDSIZE)       /* large page size */
 #define        LPGMASK         (~(LPGSIZE - 1))        /* large page mask */
-
-#ifdef TEST
-#define        PTE_P           (1 << 0)        /* Present */
-#define        PTE_RW          (1 << 1)        /* Read/Write */
-#define        PTE_US          (1 << 2)        /* User/Supervisor */
-#define        PTE_PS          (1 << 7)        /* Page Size */
-#endif
 
 /* Programmable Interrupt Contoller (PIC) defines */
 #define        PIC_MASTER      0x20
@@ -195,14 +188,6 @@ set_cr4(unsigned value)
        __asm__ __volatile__("movl %0, %%cr4" : /* no outputs */ : "r"(value));
 }
 
-#ifdef TEST
-static inline void
-breakpoint(void)
-{
-       outw(0x8A00, 0x8AE0);
-}
-#endif /* TEST */
-
 #endif /* __ASSEMBLY__ */
 
 #endif /* __MACHINE_H__ */
diff -r 443ce7edad0e -r d146700adf71 tools/firmware/vmxassist/setup.c
--- a/tools/firmware/vmxassist/setup.c  Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/firmware/vmxassist/setup.c  Mon Jul 02 12:19:26 2007 -0600
@@ -46,19 +46,6 @@ unsigned long long idt[NR_TRAPS] __attri
 unsigned long long idt[NR_TRAPS] __attribute__ ((aligned(32)));
 
 struct dtr idtr = { sizeof(idt)-1, (unsigned long) &idt };
-
-#ifdef TEST
-unsigned pgd[NR_PGD] __attribute__ ((aligned(PGSIZE))) = { 0 };
-
-struct e820entry e820map[] = {
-       { 0x0000000000000000ULL, 0x000000000009F800ULL, E820_RAM },
-       { 0x000000000009F800ULL, 0x0000000000000800ULL, E820_RESERVED },
-       { 0x00000000000C0000ULL, 0x0000000000040000ULL, E820_RESERVED },
-       { 0x0000000000100000ULL, 0x0000000000000000ULL, E820_RAM },
-       { 0x0000000000000000ULL, 0x0000000000003000ULL, E820_NVS },
-       { 0x0000000000003000ULL, 0x000000000000A000ULL, E820_ACPI },
-};
-#endif /* TEST */
 
 struct vmx_assist_context oldctx;
 struct vmx_assist_context newctx;
@@ -84,38 +71,11 @@ banner(void)
                    (((get_cmos(0x31) << 8) | get_cmos(0x30)) + 0x400) << 10;
        memory_size += 0x400 << 10; /* + 1MB */
 
-#ifdef TEST
-       /* Create an SMAP for our debug environment */
-       e820map[4].size = memory_size - e820map[4].addr - PGSIZE;
-       e820map[5].addr = memory_size - PGSIZE;
-       e820map[6].addr = memory_size;
-       e820map[7].addr += memory_size;
-
-       *HVM_E820_NR = sizeof(e820map)/sizeof(e820map[0]);
-       memcpy(HVM_E820, e820map, sizeof(e820map));
-#endif
-
        printf("Memory size %ld MB\n", memory_size >> 20);
        printf("E820 map:\n");
        print_e820_map(HVM_E820, *HVM_E820_NR);
        printf("\n");
 }
-
-#ifdef TEST
-void
-setup_paging(void)
-{
-       unsigned long i;
-
-       if (((unsigned)pgd & ~PGMASK) != 0)
-               panic("PGD not page aligned");
-       set_cr4(get_cr4() | CR4_PSE);
-       for (i = 0; i < NR_PGD; i++)
-               pgd[i] = (i * LPGSIZE)| PTE_PS | PTE_US | PTE_RW | PTE_P;
-       set_cr3((unsigned) pgd);
-       set_cr0(get_cr0() | (CR0_PE|CR0_PG));
-}
-#endif /* TEST */
 
 void
 setup_gdt(void)
@@ -211,11 +171,7 @@ enter_real_mode(struct regs *regs)
                regs->ves = regs->vds = regs->vfs = regs->vgs = 0xF000;
                if (booting_cpu == 0) {
                        regs->cs = 0xF000; /* ROM BIOS POST entry point */
-#ifdef TEST
-                       regs->eip = 0xFFE0;
-#else
                        regs->eip = 0xFFF0;
-#endif
                } else {
                        regs->cs = booting_vector << 8; /* AP entry point */
                        regs->eip = 0;
@@ -242,9 +198,10 @@ enter_real_mode(struct regs *regs)
        }
 
        /* go from protected to real mode */
-       regs->eflags |= EFLAGS_VM;
        set_mode(regs, VM86_PROTECTED_TO_REAL);
        emulate(regs);
+       if (mode != VM86_REAL)
+               panic("failed to emulate between clear PE and long jump.\n");
 }
 
 /*
@@ -269,13 +226,8 @@ setup_ctx(void)
         * more natural to enable CR0.PE to cause a world switch to
         * protected mode rather than disabling it.
         */
-#ifdef TEST
-       c->cr0 = (get_cr0() | CR0_NE | CR0_PG) & ~CR0_PE;
-       c->cr3 = (unsigned long) pgd;
-#else
        c->cr0 = (get_cr0() | CR0_NE) & ~CR0_PE;
        c->cr3 = 0;
-#endif
        c->cr4 = get_cr4();
 
        c->idtr_limit = sizeof(idt)-1;
@@ -369,16 +321,10 @@ main(void)
        if (booting_cpu == 0)
                banner();
 
-#ifdef TEST
-       setup_paging();
-#endif
-
        setup_gdt();
        setup_idt();
 
-#ifndef        TEST
        set_cr4(get_cr4() | CR4_VME);
-#endif
 
        setup_ctx();
 
diff -r 443ce7edad0e -r d146700adf71 tools/firmware/vmxassist/vm86.c
--- a/tools/firmware/vmxassist/vm86.c   Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/firmware/vmxassist/vm86.c   Mon Jul 02 12:19:26 2007 -0600
@@ -561,11 +561,7 @@ lmsw(struct regs *regs, unsigned prefix,
        unsigned cr0 = (oldctx.cr0 & 0xFFFFFFF0) | ax;
 
        TRACE((regs, regs->eip - eip, "lmsw 0x%x", ax));
-#ifndef TEST
        oldctx.cr0 = cr0 | CR0_PE | CR0_NE;
-#else
-       oldctx.cr0 = cr0 | CR0_PE | CR0_NE | CR0_PG;
-#endif
        if (cr0 & CR0_PE)
                set_mode(regs, VM86_REAL_TO_PROTECTED);
 
@@ -584,8 +580,13 @@ movr(struct regs *regs, unsigned prefix,
        unsigned addr = operand(prefix, regs, modrm);
        unsigned val, r = (modrm >> 3) & 7;
 
-       if ((modrm & 0xC0) == 0xC0) /* no registers */
-               return 0;
+       if ((modrm & 0xC0) == 0xC0) {
+               /*
+                * Emulate all guest instructions in protected to real mode.
+                */
+               if (mode != VM86_PROTECTED_TO_REAL)
+                       return 0;
+       }
 
        switch (opc) {
        case 0x88: /* addr32 mov r8, r/m8 */
@@ -656,13 +657,8 @@ movcr(struct regs *regs, unsigned prefix
                TRACE((regs, regs->eip - eip, "movl %%cr%d, %%eax", cr));
                switch (cr) {
                case 0:
-#ifndef TEST
                        setreg32(regs, modrm,
                                oldctx.cr0 & ~(CR0_PE | CR0_NE));
-#else
-                       setreg32(regs, modrm,
-                               oldctx.cr0 & ~(CR0_PE | CR0_NE | CR0_PG));
-#endif
                        break;
                case 2:
                        setreg32(regs, modrm, get_cr2());
@@ -680,9 +676,6 @@ movcr(struct regs *regs, unsigned prefix
                switch (cr) {
                case 0:
                        oldctx.cr0 = getreg32(regs, modrm) | (CR0_PE | CR0_NE);
-#ifdef TEST
-                       oldctx.cr0 |= CR0_PG;
-#endif
                        if (getreg32(regs, modrm) & CR0_PE)
                                set_mode(regs, VM86_REAL_TO_PROTECTED);
                        else
@@ -818,8 +811,13 @@ mov_to_seg(struct regs *regs, unsigned p
 {
        unsigned modrm = fetch8(regs);
 
-       /* Only need to emulate segment loads in real->protected mode. */
-       if (mode != VM86_REAL_TO_PROTECTED)
+       /*
+        * Emulate segment loads in:
+        * 1) real->protected mode.
+        * 2) protected->real mode.
+        */
+       if ((mode != VM86_REAL_TO_PROTECTED) &&
+           (mode != VM86_PROTECTED_TO_REAL))
                return 0;
 
        /* Register source only. */
@@ -829,6 +827,8 @@ mov_to_seg(struct regs *regs, unsigned p
        switch ((modrm & 0x38) >> 3) {
        case 0: /* es */
                regs->ves = getreg16(regs, modrm);
+               if (mode == VM86_PROTECTED_TO_REAL)
+                       return 1;
                saved_rm_regs.ves = 0;
                oldctx.es_sel = regs->ves;
                return 1;
@@ -837,21 +837,29 @@ mov_to_seg(struct regs *regs, unsigned p
 
        case 2: /* ss */
                regs->uss = getreg16(regs, modrm);
+               if (mode == VM86_PROTECTED_TO_REAL)
+                       return 1;
                saved_rm_regs.uss = 0;
                oldctx.ss_sel = regs->uss;
                return 1;
        case 3: /* ds */
                regs->vds = getreg16(regs, modrm);
+               if (mode == VM86_PROTECTED_TO_REAL)
+                       return 1;
                saved_rm_regs.vds = 0;
                oldctx.ds_sel = regs->vds;
                return 1;
        case 4: /* fs */
                regs->vfs = getreg16(regs, modrm);
+               if (mode == VM86_PROTECTED_TO_REAL)
+                       return 1;
                saved_rm_regs.vfs = 0;
                oldctx.fs_sel = regs->vfs;
                return 1;
        case 5: /* gs */
                regs->vgs = getreg16(regs, modrm);
+               if (mode == VM86_PROTECTED_TO_REAL)
+                       return 1;
                saved_rm_regs.vgs = 0;
                oldctx.gs_sel = regs->vgs;
                return 1;
@@ -1067,7 +1075,8 @@ set_mode(struct regs *regs, enum vm86_mo
        }
 
        mode = newmode;
-       TRACE((regs, 0, states[mode]));
+       if (mode != VM86_PROTECTED)
+               TRACE((regs, 0, states[mode]));
 }
 
 static void
@@ -1086,7 +1095,7 @@ jmpl(struct regs *regs, int prefix)
 
        if (mode == VM86_REAL_TO_PROTECTED)             /* jump to protected 
mode */
                set_mode(regs, VM86_PROTECTED);
-       else if (mode == VM86_PROTECTED_TO_REAL)/* jump to real mode */
+       else if (mode == VM86_PROTECTED_TO_REAL)        /* jump to real mode */
                set_mode(regs, VM86_REAL);
        else
                panic("jmpl");
@@ -1280,6 +1289,12 @@ opcode(struct regs *regs)
        unsigned eip = regs->eip;
        unsigned opc, modrm, disp;
        unsigned prefix = 0;
+
+       if (mode == VM86_PROTECTED_TO_REAL &&
+               oldctx.cs_arbytes.fields.default_ops_size) {
+               prefix |= DATA32;
+               prefix |= ADDR32;
+       }
 
        for (;;) {
                switch ((opc = fetch8(regs))) {
@@ -1391,17 +1406,29 @@ opcode(struct regs *regs)
                        continue;
 
                case 0x66:
-                       TRACE((regs, regs->eip - eip, "data32"));
-                       prefix |= DATA32;
+                       if (mode == VM86_PROTECTED_TO_REAL &&
+                               oldctx.cs_arbytes.fields.default_ops_size) {
+                               TRACE((regs, regs->eip - eip, "data16"));
+                               prefix &= ~DATA32;
+                       } else {
+                               TRACE((regs, regs->eip - eip, "data32"));
+                               prefix |= DATA32;
+                       }
                        continue;
 
                case 0x67:
-                       TRACE((regs, regs->eip - eip, "addr32"));
-                       prefix |= ADDR32;
+                       if (mode == VM86_PROTECTED_TO_REAL &&
+                               oldctx.cs_arbytes.fields.default_ops_size) {
+                               TRACE((regs, regs->eip - eip, "addr16"));
+                               prefix &= ~ADDR32;
+                       } else {
+                               TRACE((regs, regs->eip - eip, "addr32"));
+                               prefix |= ADDR32;
+                       }
                        continue;
 
-               case 0x88: /* addr32 mov r8, r/m8 */
-               case 0x8A: /* addr32 mov r/m8, r8 */
+               case 0x88: /* mov r8, r/m8 */
+               case 0x8A: /* mov r/m8, r8 */
                        if (mode != VM86_REAL && mode != VM86_REAL_TO_PROTECTED)
                                goto invalid;
                        if ((prefix & ADDR32) == 0)
diff -r 443ce7edad0e -r d146700adf71 tools/ioemu/target-i386-dm/exec-dm.c
--- a/tools/ioemu/target-i386-dm/exec-dm.c      Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/ioemu/target-i386-dm/exec-dm.c      Mon Jul 02 12:19:26 2007 -0600
@@ -448,7 +448,7 @@ static void memcpy_words(void *dst, void
 {
     asm (
         "   movl %%edx,%%ecx \n"
-#ifdef __x86_64
+#ifdef __x86_64__
         "   shrl $3,%%ecx    \n"
         "   andl $7,%%edx    \n"
         "   rep  movsq       \n"
diff -r 443ce7edad0e -r d146700adf71 tools/libxc/xc_domain_restore.c
--- a/tools/libxc/xc_domain_restore.c   Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/libxc/xc_domain_restore.c   Mon Jul 02 12:19:26 2007 -0600
@@ -465,7 +465,7 @@ int xc_domain_restore(int xc_handle, int
         if ( j == 0 )
             break;  /* our work here is done */
 
-        if ( j > MAX_BATCH_SIZE )
+        if ( (j > MAX_BATCH_SIZE) || (j < 0) )
         {
             ERROR("Max batch size exceeded. Giving up.");
             goto out;
@@ -903,13 +903,14 @@ int xc_domain_restore(int xc_handle, int
 
     /* Get the list of PFNs that are not in the psuedo-phys map */
     {
-        unsigned int count;
+        unsigned int count = 0;
         unsigned long *pfntab;
         int nr_frees, rc;
 
-        if ( !read_exact(io_fd, &count, sizeof(count)) )
-        {
-            ERROR("Error when reading pfn count");
+        if ( !read_exact(io_fd, &count, sizeof(count)) ||
+             (count > (1U << 28)) ) /* up to 1TB of address space */
+        {
+            ERROR("Error when reading pfn count (= %u)", count);
             goto out;
         }
 
diff -r 443ce7edad0e -r d146700adf71 tools/libxc/xc_misc.c
--- a/tools/libxc/xc_misc.c     Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/libxc/xc_misc.c     Mon Jul 02 12:19:26 2007 -0600
@@ -101,13 +101,37 @@ int xc_perfc_control(int xc_handle,
 
     rc = do_sysctl(xc_handle, &sysctl);
 
-    if (nbr_desc)
+    if ( nbr_desc )
         *nbr_desc = sysctl.u.perfc_op.nr_counters;
-    if (nbr_val)
+    if ( nbr_val )
         *nbr_val = sysctl.u.perfc_op.nr_vals;
 
     return rc;
 }
+
+int xc_getcpuinfo(int xc_handle, int max_cpus,
+                  xc_cpuinfo_t *info, int *nr_cpus)
+{
+    int rc;
+    DECLARE_SYSCTL;
+
+    sysctl.cmd = XEN_SYSCTL_getcpuinfo;
+    sysctl.u.getcpuinfo.max_cpus = max_cpus; 
+    set_xen_guest_handle(sysctl.u.getcpuinfo.info, info); 
+
+    if ( (rc = lock_pages(info, max_cpus*sizeof(*info))) != 0 )
+        return rc;
+
+    rc = do_sysctl(xc_handle, &sysctl);
+
+    unlock_pages(info, max_cpus*sizeof(*info));
+
+    if ( nr_cpus )
+        *nr_cpus = sysctl.u.getcpuinfo.nr_cpus; 
+
+    return rc;
+}
+
 
 int xc_hvm_set_pci_intx_level(
     int xc_handle, domid_t dom,
diff -r 443ce7edad0e -r d146700adf71 tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h     Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/libxc/xenctrl.h     Mon Jul 02 12:19:26 2007 -0600
@@ -491,6 +491,10 @@ int xc_sched_id(int xc_handle,
 int xc_sched_id(int xc_handle,
                 int *sched_id);
 
+typedef xen_sysctl_cpuinfo_t xc_cpuinfo_t;
+int xc_getcpuinfo(int xc_handle, int max_cpus,
+                  xc_cpuinfo_t *info, int *nr_cpus); 
+
 int xc_domain_setmaxmem(int xc_handle,
                         uint32_t domid,
                         unsigned int max_memkb);
diff -r 443ce7edad0e -r d146700adf71 tools/misc/xen-python-path
--- a/tools/misc/xen-python-path        Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/misc/xen-python-path        Mon Jul 02 12:19:26 2007 -0600
@@ -28,8 +28,13 @@ import os.path
 import os.path
 import sys
 
-for p in ['python%s' % sys.version[:3], 'python']:
-    for l in ['/usr/lib64', '/usr/lib']:
+usr   = os.path.dirname(os.path.dirname(sys.argv[0]))
+list  = [ os.path.join(usr,'lib64') ]
+list += [ os.path.join(usr,'lib') ]
+list += ['/usr/lib64', '/usr/lib']
+
+for l in list:
+    for p in ['python%s' % sys.version[:3], 'python']:
         for k in ['', 'site-packages/']:
             d = os.path.join(l, p, k)
             if os.path.exists(os.path.join(d, AUXBIN)):
diff -r 443ce7edad0e -r d146700adf71 tools/python/xen/util/auxbin.py
--- a/tools/python/xen/util/auxbin.py   Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/python/xen/util/auxbin.py   Mon Jul 02 12:19:26 2007 -0600
@@ -27,6 +27,7 @@ LIB_64_ARCHS = [ 'x86_64', 's390x', 'spa
 
 import os
 import os.path
+import sys
 
 
 def execute(exe, args = None):
@@ -47,6 +48,14 @@ def path():
 
 def libpath():
     machine = os.uname()[4]
+    if sys.argv[0] != '-c':
+        prefix = os.path.dirname(os.path.dirname(sys.argv[0]))
+        path = os.path.join(prefix, os.path.basename(LIB_64))
+        if machine in LIB_64_ARCHS and os.path.exists(path):
+            return path
+        path = os.path.join(prefix, os.path.basename(LIB_32))
+        if os.path.exists(path):
+            return path
     if machine in LIB_64_ARCHS and os.path.exists(LIB_64):
         return LIB_64
     else:
diff -r 443ce7edad0e -r d146700adf71 tools/python/xen/xend/XendCheckpoint.py
--- a/tools/python/xen/xend/XendCheckpoint.py   Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/python/xen/xend/XendCheckpoint.py   Mon Jul 02 12:19:26 2007 -0600
@@ -148,6 +148,8 @@ def save(fd, dominfo, network, live, dst
         except:
             log.exception("Failed to reset the migrating domain's name")
 
+        raise exn
+
 
 def restore(xd, fd, dominfo = None, paused = False):
     signature = read_exact(fd, len(SIGNATURE),
diff -r 443ce7edad0e -r d146700adf71 tools/python/xen/xend/XendConfig.py
--- a/tools/python/xen/xend/XendConfig.py       Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/python/xen/xend/XendConfig.py       Mon Jul 02 12:19:26 2007 -0600
@@ -28,6 +28,7 @@ from xen.xend.PrettyPrint import prettyp
 from xen.xend.PrettyPrint import prettyprintstring
 from xen.xend.XendConstants import DOM_STATE_HALTED
 from xen.xend.server.netif import randomMAC
+from xen.util.blkif import blkdev_name_to_number
 
 log = logging.getLogger("xend.XendConfig")
 log.setLevel(logging.WARN)
@@ -934,6 +935,62 @@ class XendConfig(dict):
 
         return sxpr    
     
+    def _blkdev_name_to_number(self, dev):
+        if 'ioemu:' in dev:
+            _, dev = dev.split(':', 1)
+        try:
+            dev, _ = dev.split(':', 1)
+        except ValueError:
+            pass
+        
+        try:
+            devid = int(dev)
+        except ValueError:
+            # devid is not a number but a string containing either device
+            # name (e.g. xvda) or device_type/device_id (e.g. vbd/51728)
+            dev2 = type(dev) is str and dev.split('/')[-1] or None
+            if dev2 == None:
+                log.debug("Could not check the device %s", dev)
+                return None
+            try:
+                devid = int(dev2)
+            except ValueError:
+                devid = blkdev_name_to_number(dev2)
+                if devid == None:
+                    log.debug("The device %s is not device name", dev2)
+                    return None
+        return devid
+    
+    def device_duplicate_check(self, dev_type, dev_info, defined_config):
+        defined_devices_sxpr = self.all_devices_sxpr(target = defined_config)
+        
+        if dev_type == 'vbd':
+            dev_uname = dev_info.get('uname')
+            blkdev_name = dev_info.get('dev')
+            devid = self._blkdev_name_to_number(blkdev_name)
+            if devid == None:
+                return
+            
+            for o_dev_type, o_dev_info in defined_devices_sxpr:
+                if dev_type == o_dev_type:
+                    if dev_uname == sxp.child_value(o_dev_info, 'uname'):
+                        raise XendConfigError('The uname "%s" is already 
defined' %
+                                              dev_uname)
+                    o_blkdev_name = sxp.child_value(o_dev_info, 'dev')
+                    o_devid = self._blkdev_name_to_number(o_blkdev_name)
+                    if o_devid != None and devid == o_devid:
+                        raise XendConfigError('The device "%s" is already 
defined' %
+                                              blkdev_name)
+                    
+        elif dev_type == 'vif':
+            dev_mac = dev_info.get('mac')
+            
+            for o_dev_type, o_dev_info in defined_devices_sxpr:
+                if dev_type == o_dev_type:
+                    if dev_mac == sxp.child_value(o_dev_info, 'mac'):
+                        raise XendConfigError('The mac "%s" is already 
defined' %
+                                              dev_mac)
+    
     def device_add(self, dev_type, cfg_sxp = None, cfg_xenapi = None,
                    target = None):
         """Add a device configuration in SXP format or XenAPI struct format.
@@ -997,6 +1054,8 @@ class XendConfig(dict):
             if dev_type == 'vif':
                 if not dev_info.get('mac'):
                     dev_info['mac'] = randomMAC()
+
+            self.device_duplicate_check(dev_type, dev_info, target)
 
             # create uuid if it doesn't exist
             dev_uuid = dev_info.get('uuid', None)
@@ -1275,15 +1334,19 @@ class XendConfig(dict):
         return False
 
 
-    def device_sxpr(self, dev_uuid = None, dev_type = None, dev_info = None):
+    def device_sxpr(self, dev_uuid = None, dev_type = None, dev_info = None, 
target = None):
         """Get Device SXPR by either giving the device UUID or (type, config).
 
         @rtype: list of lists
         @return: device config sxpr
         """
         sxpr = []
-        if dev_uuid != None and dev_uuid in self['devices']:
-            dev_type, dev_info = self['devices'][dev_uuid]
+
+        if target == None:
+            target = self
+
+        if dev_uuid != None and dev_uuid in target['devices']:
+            dev_type, dev_info = target['devices'][dev_uuid]
 
         if dev_type == None or dev_info == None:
             raise XendConfigError("Required either UUID or device type and "
@@ -1300,8 +1363,12 @@ class XendConfig(dict):
 
         return sxpr
 
-    def ordered_device_refs(self):
+    def ordered_device_refs(self, target = None):
         result = []
+
+        if target == None:
+            target = self
+
         # vkbd devices *must* be before vfb devices, otherwise
         # there is a race condition when setting up devices
         # where the daemon spawned for the vfb may write stuff
@@ -1309,27 +1376,30 @@ class XendConfig(dict):
         # setup permissions on the vkbd backend path. This race
         # results in domain creation failing with 'device already
         # connected' messages
-        result.extend([u for u in self['devices'].keys() if 
self['devices'][u][0] == 'vkbd'])
-
-        result.extend(self['console_refs'] +
-                      self['vbd_refs'] +
-                      self['vif_refs'] +
-                      self['vtpm_refs'])
-
-        result.extend([u for u in self['devices'].keys() if u not in result])
+        result.extend([u for u in target['devices'].keys() if 
target['devices'][u][0] == 'vkbd'])
+
+        result.extend(target.get('console_refs', []) +
+                      target.get('vbd_refs', []) +
+                      target.get('vif_refs', []) +
+                      target.get('vtpm_refs', []))
+
+        result.extend([u for u in target['devices'].keys() if u not in result])
         return result
 
-    def all_devices_sxpr(self):
+    def all_devices_sxpr(self, target = None):
         """Returns the SXPR for all devices in the current configuration."""
         sxprs = []
         pci_devs = []
 
-        if 'devices' not in self:
+        if target == None:
+            target = self
+
+        if 'devices' not in target:
             return sxprs
         
-        ordered_refs = self.ordered_device_refs()
+        ordered_refs = self.ordered_device_refs(target = target)
         for dev_uuid in ordered_refs:
-            dev_type, dev_info = self['devices'][dev_uuid]
+            dev_type, dev_info = target['devices'][dev_uuid]
             if dev_type == 'pci': # special case for pci devices
                 sxpr = [['uuid', dev_info['uuid']]]
                 for pci_dev_info in dev_info['devs']:
@@ -1340,7 +1410,8 @@ class XendConfig(dict):
                 sxprs.append((dev_type, sxpr))
             else:
                 sxpr = self.device_sxpr(dev_type = dev_type,
-                                        dev_info = dev_info)
+                                        dev_info = dev_info,
+                                        target   = target)
                 sxprs.append((dev_type, sxpr))
 
         return sxprs
diff -r 443ce7edad0e -r d146700adf71 tools/python/xen/xend/XendDomain.py
--- a/tools/python/xen/xend/XendDomain.py       Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/python/xen/xend/XendDomain.py       Mon Jul 02 12:19:26 2007 -0600
@@ -1262,8 +1262,10 @@ class XendDomain:
             try:
                 XendCheckpoint.save(fd, dominfo, False, False, dst,
                                     checkpoint=checkpoint)
-            finally:
+            except Exception, e:
                 os.close(fd)
+                raise e
+            os.close(fd)
         except OSError, ex:
             raise XendError("can't write guest state file %s: %s" %
                             (dst, ex[1]))
diff -r 443ce7edad0e -r d146700adf71 tools/python/xen/xend/server/irqif.py
--- a/tools/python/xen/xend/server/irqif.py     Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/python/xen/xend/server/irqif.py     Mon Jul 02 12:19:26 2007 -0600
@@ -61,7 +61,7 @@ class IRQController(DevController):
        
         pirq = get_param('irq')
 
-        rc = xc.domain_irq_permission(dom          = self.getDomid(),
+        rc = xc.domain_irq_permission(domid        = self.getDomid(),
                                       pirq         = pirq,
                                       allow_access = True)
 
diff -r 443ce7edad0e -r d146700adf71 tools/python/xen/xend/server/pciif.py
--- a/tools/python/xen/xend/server/pciif.py     Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/python/xen/xend/server/pciif.py     Mon Jul 02 12:19:26 2007 -0600
@@ -185,3 +185,6 @@ class PciController(DevController):
 
     def waitForBackend(self,devid):
         return (0, "ok - no hotplug")
+
+    def migrate(self, config, network, dst, step, domName):
+        raise XendError('Migration not permitted with assigned PCI device.')
diff -r 443ce7edad0e -r d146700adf71 tools/python/xen/xm/main.py
--- a/tools/python/xen/xm/main.py       Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/python/xen/xm/main.py       Mon Jul 02 12:19:26 2007 -0600
@@ -2168,9 +2168,7 @@ def xm_network_attach(args):
         server.xend.domain.device_create(dom, vif)
 
 
-def detach(args, command, deviceClass):
-    arg_check(args, command, 2, 3)
-
+def detach(args, deviceClass):
     dom = args[0]
     dev = args[1]
     try:
@@ -2204,16 +2202,17 @@ def xm_block_detach(args):
             raise OptionError("Cannot find device '%s' in domain '%s'"
                               % (dev,dom))
     else:
+        arg_check(args, 'block-detach', 2, 3)
         try:
-            detach(args, 'block-detach', 'vbd')
+            detach(args, 'vbd')
             return
         except:
             pass
-        detach(args, 'block-detach', 'tap')
+        detach(args, 'tap')
 
 def xm_network_detach(args):
     if serverType == SERVER_XEN_API:
-        arg_check(args, "xm_block_detach", 2, 3)
+        arg_check(args, "xm_network_detach", 2, 3)
         dom = args[0]
         devid = args[1]
         vif_refs = server.xenapi.VM.get_VIFs(get_single_vm(dom))
@@ -2227,7 +2226,8 @@ def xm_network_detach(args):
         else:
             print "Cannot find device '%s' in domain '%s'" % (devid,dom)
     else:
-        detach(args, 'network-detach', 'vif')
+        arg_check(args, 'network-detach', 2, 3)
+        detach(args, 'vif')
 
 
 def xm_vnet_list(args):
diff -r 443ce7edad0e -r d146700adf71 xen/acm/acm_core.c
--- a/xen/acm/acm_core.c        Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/acm/acm_core.c        Mon Jul 02 12:19:26 2007 -0600
@@ -89,7 +89,7 @@ static void __init set_dom0_ssidref(cons
         if (hi < ACM_MAX_NUM_TYPES && hi >= 1)
             dom0_ste_ssidref = hi;
         for (i = 0; i < sizeof(polname); i++) {
-            polname[i] = c[7+i];
+            polname[i] = c[5+i];
             if (polname[i] == '\0' || polname[i] == '\t' ||
                 polname[i] == '\n' || polname[i] == ' '  ||
                 polname[i] == ':') {
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/Makefile
--- a/xen/arch/x86/Makefile     Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/Makefile     Mon Jul 02 12:19:26 2007 -0600
@@ -10,6 +10,7 @@ subdir-$(x86_64) += x86_64
 
 obj-y += apic.o
 obj-y += bitops.o
+obj-y += clear_page.o
 obj-y += compat.o
 obj-y += delay.o
 obj-y += dmi_scan.o
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/boot/edd.S
--- a/xen/arch/x86/boot/edd.S   Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/boot/edd.S   Mon Jul 02 12:19:26 2007 -0600
@@ -24,7 +24,7 @@
 /* Maximum number of EDD information structures at boot_edd_info. */
 #define EDD_INFO_MAX            6
 
-/* Maximum number of MBR signatures at boot_edd_signature. */
+/* Maximum number of MBR signatures at boot_mbr_signature. */
 #define EDD_MBR_SIG_MAX         16
 
 /* Size of components of EDD information structure. */
@@ -40,10 +40,8 @@ get_edd:
 # Read the first sector of each BIOS disk device and store the 4-byte signature
 edd_mbr_sig_start:
         movb    $0x80, %dl                      # from device 80
-        movw    $bootsym(boot_edd_signature),%bx # store buffer ptr in bx
+        movw    $bootsym(boot_mbr_signature),%bx # store buffer ptr in bx
 edd_mbr_sig_read:
-        movl    $0xFFFFFFFF, %eax
-        movl    %eax, (%bx)                     # assume failure
         pushw   %bx
         movb    $0x02, %ah                      # 0x02 Read Sectors
         movb    $1, %al                         # read 1 sector
@@ -64,11 +62,12 @@ edd_mbr_sig_read:
         cmpb    $0, %ah                         # some BIOSes do not set CF
         jne     edd_mbr_sig_done                # on failure, we're done.
         movl    bootsym(boot_edd_info)+EDD_MBR_SIG_OFFSET,%eax
-        movl    %eax, (%bx)                     # store signature from MBR
-        incb    bootsym(boot_edd_signature_nr)  # note that we stored something
+        movb    %dl, (%bx)                      # store BIOS drive number
+        movl    %eax, 4(%bx)                    # store signature from MBR
+        incb    bootsym(boot_mbr_signature_nr)  # note that we stored something
         incb    %dl                             # increment to next device
-        addw    $4, %bx                         # increment sig buffer ptr
-        cmpb    $EDD_MBR_SIG_MAX,bootsym(boot_edd_signature_nr)
+        addw    $8, %bx                         # increment sig buffer ptr
+        cmpb    $EDD_MBR_SIG_MAX,bootsym(boot_mbr_signature_nr)
         jb      edd_mbr_sig_read
 edd_mbr_sig_done:
 
@@ -150,12 +149,13 @@ opt_edd:
 opt_edd:
         .byte   0                               # edd=on/off/skipmbr
 
-.globl  boot_edd_info_nr, boot_edd_signature_nr
+.globl  boot_edd_info, boot_edd_info_nr
+.globl  boot_mbr_signature, boot_mbr_signature_nr
 boot_edd_info_nr:
         .byte   0
-boot_edd_signature_nr:
+boot_mbr_signature_nr:
         .byte   0
-boot_edd_signature:
-        .fill   EDD_MBR_SIG_MAX*4,1,0
+boot_mbr_signature:
+        .fill   EDD_MBR_SIG_MAX*8,1,0
 boot_edd_info:
         .fill   512,1,0                         # big enough for a disc sector
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/boot/x86_32.S
--- a/xen/arch/x86/boot/x86_32.S        Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/boot/x86_32.S        Mon Jul 02 12:19:26 2007 -0600
@@ -36,15 +36,29 @@ 1:      mov     %eax,(%edi)
 
 /* This is the default interrupt handler. */
 int_msg:
-        .asciz "Unknown interrupt\n"
+        .asciz "Unknown interrupt (cr2=%08x)\n"
+hex_msg:
+        .asciz "  %08x"
         ALIGN
 ignore_int:
+        pusha
         cld
         mov     $(__HYPERVISOR_DS),%eax
         mov     %eax,%ds
         mov     %eax,%es
+        mov     %cr2,%eax
+        push    %eax
         pushl   $int_msg
         call    printk
+        add     $8,%esp
+        mov     %esp,%ebp
+0:      pushl   (%ebp)
+        add     $4,%ebp
+        pushl   $hex_msg
+        call    printk
+        add     $8,%esp
+        test    $0xffc,%ebp
+        jnz     0b
 1:      jmp     1b
 
 ENTRY(stack_start)
@@ -65,11 +79,6 @@ gdt_descr:
 gdt_descr:
         .word   LAST_RESERVED_GDT_BYTE
         .long   gdt_table - FIRST_RESERVED_GDT_BYTE
-
-        .word   0
-nopaging_gdt_descr:
-        .word   LAST_RESERVED_GDT_BYTE
-        .long   sym_phys(gdt_table) - FIRST_RESERVED_GDT_BYTE
 
         .align PAGE_SIZE, 0
 /* NB. Rings != 0 get access up to MACH2PHYS_VIRT_END. This allows access to */
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/boot/x86_64.S
--- a/xen/arch/x86/boot/x86_64.S        Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/boot/x86_64.S        Mon Jul 02 12:19:26 2007 -0600
@@ -56,12 +56,23 @@ 1:      movq    %rax,(%rdi)
 
 /* This is the default interrupt handler. */
 int_msg:
-        .asciz "Unknown interrupt\n"
+        .asciz "Unknown interrupt (cr2=%016lx)\n"
+hex_msg:
+        .asciz "    %016lx"
 ignore_int:
-        cld
+        SAVE_ALL
+        movq    %cr2,%rsi
         leaq    int_msg(%rip),%rdi
         xorl    %eax,%eax
         call    printk
+        movq    %rsp,%rbp
+0:      movq    (%rbp),%rsi
+        addq    $8,%rbp
+        leaq    hex_msg(%rip),%rdi
+        xorl    %eax,%eax
+        call    printk
+        testq   $0xff8,%rbp
+        jnz     0b
 1:      jmp     1b
 
 
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/clear_page.S
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/clear_page.S Mon Jul 02 12:19:26 2007 -0600
@@ -0,0 +1,26 @@
+#include <xen/config.h>
+#include <asm/page.h>
+
+#ifdef __i386__
+#define ptr_reg %edx
+#else
+#define ptr_reg %rdi
+#endif
+
+ENTRY(clear_page_sse2)
+#ifdef __i386__
+        mov     4(%esp), ptr_reg
+#endif
+        mov     $PAGE_SIZE/16, %ecx
+        xor     %eax,%eax
+
+0:      dec     %ecx
+        movnti  %eax, (ptr_reg)
+        movnti  %eax, 4(ptr_reg)
+        movnti  %eax, 8(ptr_reg)
+        movnti  %eax, 12(ptr_reg)
+        lea     16(ptr_reg), ptr_reg
+        jnz     0b
+
+        sfence
+        ret
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c     Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/domain.c     Mon Jul 02 12:19:26 2007 -0600
@@ -151,7 +151,8 @@ int setup_arg_xlat_area(struct vcpu *v, 
         pg = alloc_domheap_page(NULL);
         if ( !pg )
             return -ENOMEM;
-        d->arch.mm_arg_xlat_l3 = clear_page(page_to_virt(pg));
+        d->arch.mm_arg_xlat_l3 = page_to_virt(pg);
+        clear_page(d->arch.mm_arg_xlat_l3);
     }
 
     l4tab[l4_table_offset(COMPAT_ARG_XLAT_VIRT_BASE)] =
@@ -444,7 +445,8 @@ int arch_domain_create(struct domain *d)
 
     if ( (pg = alloc_domheap_page(NULL)) == NULL )
         goto fail;
-    d->arch.mm_perdomain_l2 = clear_page(page_to_virt(pg));
+    d->arch.mm_perdomain_l2 = page_to_virt(pg);
+    clear_page(d->arch.mm_perdomain_l2);
     for ( i = 0; i < (1 << pdpt_order); i++ )
         d->arch.mm_perdomain_l2[l2_table_offset(PERDOMAIN_VIRT_START)+i] =
             l2e_from_page(virt_to_page(d->arch.mm_perdomain_pt)+i,
@@ -452,7 +454,8 @@ int arch_domain_create(struct domain *d)
 
     if ( (pg = alloc_domheap_page(NULL)) == NULL )
         goto fail;
-    d->arch.mm_perdomain_l3 = clear_page(page_to_virt(pg));
+    d->arch.mm_perdomain_l3 = page_to_virt(pg);
+    clear_page(d->arch.mm_perdomain_l3);
     d->arch.mm_perdomain_l3[l3_table_offset(PERDOMAIN_VIRT_START)] =
         l3e_from_page(virt_to_page(d->arch.mm_perdomain_l2),
                             __PAGE_HYPERVISOR);
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c    Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/hvm/hvm.c    Mon Jul 02 12:19:26 2007 -0600
@@ -242,6 +242,11 @@ void hvm_domain_relinquish_resources(str
 {
     hvm_destroy_ioreq_page(d, &d->arch.hvm_domain.ioreq);
     hvm_destroy_ioreq_page(d, &d->arch.hvm_domain.buf_ioreq);
+
+    pit_deinit(d);
+    rtc_deinit(d);
+    pmtimer_deinit(d);
+    hpet_deinit(d);
 }
 
 void hvm_domain_destroy(struct domain *d)
@@ -421,22 +426,11 @@ int hvm_vcpu_initialise(struct vcpu *v)
 
 void hvm_vcpu_destroy(struct vcpu *v)
 {
-    struct domain *d = v->domain;
-
     vlapic_destroy(v);
     hvm_funcs.vcpu_destroy(v);
 
     /* Event channel is already freed by evtchn_destroy(). */
     /*free_xen_event_channel(v, v->arch.hvm_vcpu.xen_port);*/
-
-    if ( v->vcpu_id == 0 )
-    {
-        /* NB. All these really belong in hvm_domain_destroy(). */
-        pit_deinit(d);
-        rtc_deinit(d);
-        pmtimer_deinit(d);
-        hpet_deinit(d);
-    }
 }
 
 
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/hvm/io.c
--- a/xen/arch/x86/hvm/io.c     Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/hvm/io.c     Mon Jul 02 12:19:26 2007 -0600
@@ -858,6 +858,7 @@ void hvm_io_assist(void)
     }
 
     /* Copy register changes back into current guest state. */
+    regs->eflags &= ~X86_EFLAGS_RF;
     hvm_load_cpu_guest_regs(v, regs);
     memcpy(guest_cpu_user_regs(), regs, HVM_CONTEXT_STACK_BYTES);
 
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/hvm/platform.c
--- a/xen/arch/x86/hvm/platform.c       Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/hvm/platform.c       Mon Jul 02 12:19:26 2007 -0600
@@ -1065,6 +1065,7 @@ void handle_mmio(unsigned long gpa)
     }
 
     regs->eip += inst_len; /* advance %eip */
+    regs->eflags &= ~X86_EFLAGS_RF;
 
     switch ( mmio_op->instr ) {
     case INSTR_MOV:
@@ -1122,6 +1123,7 @@ void handle_mmio(unsigned long gpa)
             /* IO read --> memory write */
             if ( dir == IOREQ_READ ) errcode |= PFEC_write_access;
             regs->eip -= inst_len; /* do not advance %eip */
+            regs->eflags |= X86_EFLAGS_RF; /* RF was set by original #PF */
             hvm_inject_exception(TRAP_page_fault, errcode, addr);
             return;
         }
@@ -1150,6 +1152,7 @@ void handle_mmio(unsigned long gpa)
                         /* Failed on the page-spanning copy.  Inject PF into
                          * the guest for the address where we failed */
                         regs->eip -= inst_len; /* do not advance %eip */
+                        regs->eflags |= X86_EFLAGS_RF; /* RF was set by #PF */
                         /* Must set CR2 at the failing address */ 
                         addr += size - rv;
                         gdprintk(XENLOG_DEBUG, "Pagefault on non-io side of a "
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c        Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/hvm/svm/svm.c        Mon Jul 02 12:19:26 2007 -0600
@@ -391,7 +391,7 @@ int svm_vmcb_restore(struct vcpu *v, str
     }
 
  skip_cr3:
-    vmcb->cr4 = c->cr4 | SVM_CR4_HOST_MASK;
+    vmcb->cr4 = c->cr4 | HVM_CR4_HOST_MASK;
     v->arch.hvm_svm.cpu_shadow_cr4 = c->cr4;
     
     vmcb->idtr.limit = c->idtr_limit;
@@ -448,7 +448,8 @@ int svm_vmcb_restore(struct vcpu *v, str
     /* update VMCB for nested paging restore */
     if ( paging_mode_hap(v->domain) ) {
         vmcb->cr0 = v->arch.hvm_svm.cpu_shadow_cr0;
-        vmcb->cr4 = v->arch.hvm_svm.cpu_shadow_cr4;
+        vmcb->cr4 = v->arch.hvm_svm.cpu_shadow_cr4 |
+                    (HVM_CR4_HOST_MASK & ~X86_CR4_PAE);
         vmcb->cr3 = c->cr3;
         vmcb->np_enable = 1;
         vmcb->g_pat = 0x0007040600070406ULL; /* guest PAT */
@@ -805,8 +806,10 @@ static void svm_ctxt_switch_from(struct 
         : : "a" (__pa(root_vmcb[cpu])) );
 
 #ifdef __x86_64__
-    /* Resume use of IST2 for NMIs now that the host TR is reinstated. */
-    idt_tables[cpu][TRAP_nmi].a |= 2UL << 32;
+    /* Resume use of ISTs now that the host TR is reinstated. */
+    idt_tables[cpu][TRAP_double_fault].a  |= 1UL << 32; /* IST1 */
+    idt_tables[cpu][TRAP_nmi].a           |= 2UL << 32; /* IST2 */
+    idt_tables[cpu][TRAP_machine_check].a |= 3UL << 32; /* IST3 */
 #endif
 }
 
@@ -826,10 +829,12 @@ static void svm_ctxt_switch_to(struct vc
     set_segment_register(ss, 0);
 
     /*
-     * Cannot use IST2 for NMIs while we are running with the guest TR. But
-     * this doesn't matter: the IST is only needed to handle SYSCALL/SYSRET.
+     * Cannot use ISTs for NMI/#MC/#DF while we are running with the guest TR.
+     * But this doesn't matter: the IST is only req'd to handle SYSCALL/SYSRET.
      */
-    idt_tables[cpu][TRAP_nmi].a &= ~(2UL << 32);
+    idt_tables[cpu][TRAP_double_fault].a  &= ~(3UL << 32);
+    idt_tables[cpu][TRAP_nmi].a           &= ~(3UL << 32);
+    idt_tables[cpu][TRAP_machine_check].a &= ~(3UL << 32);
 #endif
 
     svm_restore_dr(v);
@@ -1823,9 +1828,19 @@ static int mov_to_cr(int gpreg, int cr, 
         break;
 
     case 4: /* CR4 */
+        if ( value & HVM_CR4_GUEST_RESERVED_BITS )
+        {
+            HVM_DBG_LOG(DBG_LEVEL_1,
+                        "Guest attempts to set reserved bit in CR4: %lx",
+                        value);
+            svm_inject_exception(v, TRAP_gp_fault, 1, 0);
+            break;
+        }
+
         if ( paging_mode_hap(v->domain) )
         {
-            vmcb->cr4 = v->arch.hvm_svm.cpu_shadow_cr4 = value;
+            v->arch.hvm_svm.cpu_shadow_cr4 = value;
+            vmcb->cr4 = value | (HVM_CR4_HOST_MASK & ~X86_CR4_PAE);
             paging_update_paging_modes(v);
             /* signal paging update to ASID handler */
             svm_asid_g_update_paging (v);
@@ -1875,7 +1890,7 @@ static int mov_to_cr(int gpreg, int cr, 
         }
 
         v->arch.hvm_svm.cpu_shadow_cr4 = value;
-        vmcb->cr4 = value | SVM_CR4_HOST_MASK;
+        vmcb->cr4 = value | HVM_CR4_HOST_MASK;
   
         /*
          * Writing to CR4 to modify the PSE, PGE, or PAE flag invalidates
@@ -2071,9 +2086,11 @@ static inline void svm_do_msr_access(
         case MSR_IA32_TIME_STAMP_COUNTER:
             msr_content = hvm_get_guest_time(v);
             break;
+
         case MSR_IA32_APICBASE:
             msr_content = vcpu_vlapic(v)->hw.apic_base_msr;
             break;
+
         case MSR_EFER:
             msr_content = v->arch.hvm_svm.cpu_shadow_efer;
             break;
@@ -2093,6 +2110,10 @@ static inline void svm_do_msr_access(
              * particularly meaningful, but at least avoids the guest crashing!
              */
             msr_content = 0;
+            break;
+
+        case MSR_K8_VM_HSAVE_PA:
+            svm_inject_exception(v, TRAP_gp_fault, 1, 0);
             break;
 
         default:
@@ -2128,9 +2149,15 @@ static inline void svm_do_msr_access(
             hvm_set_guest_time(v, msr_content);
             pt_reset(v);
             break;
+
         case MSR_IA32_APICBASE:
             vlapic_msr_set(vcpu_vlapic(v), msr_content);
             break;
+
+        case MSR_K8_VM_HSAVE_PA:
+            svm_inject_exception(v, TRAP_gp_fault, 1, 0);
+            break;
+
         default:
             if ( !long_mode_do_msr_write(regs) )
                 wrmsr_hypervisor_regs(ecx, regs->eax, regs->edx);
@@ -2265,12 +2292,13 @@ static int svm_reset_to_realmode(struct 
     vmcb->cr2 = 0;
     vmcb->efer = EFER_SVME;
 
-    vmcb->cr4 = SVM_CR4_HOST_MASK;
+    vmcb->cr4 = HVM_CR4_HOST_MASK;
     v->arch.hvm_svm.cpu_shadow_cr4 = 0;
 
     if ( paging_mode_hap(v->domain) ) {
         vmcb->cr0 = v->arch.hvm_svm.cpu_shadow_cr0;
-        vmcb->cr4 = v->arch.hvm_svm.cpu_shadow_cr4;
+        vmcb->cr4 = v->arch.hvm_svm.cpu_shadow_cr4 |
+                    (HVM_CR4_HOST_MASK & ~X86_CR4_PAE);
     }
 
     /* This will jump to ROMBIOS */
@@ -2411,6 +2439,12 @@ asmlinkage void svm_vmexit_handler(struc
         break;
     }
 
+    case VMEXIT_EXCEPTION_MC:
+        HVMTRACE_0D(MCE, v);
+        svm_store_cpu_guest_regs(v, regs, NULL);
+        do_machine_check(regs);
+        break;
+
     case VMEXIT_VINTR:
         vmcb->vintr.fields.irq = 0;
         vmcb->general1_intercepts &= ~GENERAL1_INTERCEPT_VINTR;
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/hvm/svm/vmcb.c
--- a/xen/arch/x86/hvm/svm/vmcb.c       Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/hvm/svm/vmcb.c       Mon Jul 02 12:19:26 2007 -0600
@@ -224,7 +224,7 @@ static int construct_vmcb(struct vcpu *v
     /* Guest CR4. */
     arch_svm->cpu_shadow_cr4 =
         read_cr4() & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE);
-    vmcb->cr4 = arch_svm->cpu_shadow_cr4 | SVM_CR4_HOST_MASK;
+    vmcb->cr4 = arch_svm->cpu_shadow_cr4 | HVM_CR4_HOST_MASK;
 
     paging_update_paging_modes(v);
     vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3; 
@@ -235,7 +235,9 @@ static int construct_vmcb(struct vcpu *v
         vmcb->np_enable = 1; /* enable nested paging */
         vmcb->g_pat = 0x0007040600070406ULL; /* guest PAT */
         vmcb->h_cr3 = pagetable_get_paddr(v->domain->arch.phys_table);
-        vmcb->cr4 = arch_svm->cpu_shadow_cr4 = 0;
+        vmcb->cr4 = arch_svm->cpu_shadow_cr4 =
+                    (HVM_CR4_HOST_MASK & ~X86_CR4_PAE);
+        vmcb->exception_intercepts = HVM_TRAP_MASK;
 
         /* No point in intercepting CR0/3/4 reads, because the hardware 
          * will return the guest versions anyway. */
@@ -249,7 +251,7 @@ static int construct_vmcb(struct vcpu *v
     }
     else
     {
-        vmcb->exception_intercepts = 1U << TRAP_page_fault;
+        vmcb->exception_intercepts = HVM_TRAP_MASK | (1U << TRAP_page_fault);
     }
 
     return 0;
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/hvm/vmx/vmcs.c
--- a/xen/arch/x86/hvm/vmx/vmcs.c       Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/hvm/vmx/vmcs.c       Mon Jul 02 12:19:26 2007 -0600
@@ -43,6 +43,7 @@ u32 vmx_secondary_exec_control __read_mo
 u32 vmx_secondary_exec_control __read_mostly;
 u32 vmx_vmexit_control __read_mostly;
 u32 vmx_vmentry_control __read_mostly;
+bool_t cpu_has_vmx_ins_outs_instr_info __read_mostly;
 
 static u32 vmcs_revision_id __read_mostly;
 
@@ -133,6 +134,7 @@ void vmx_init_vmcs_config(void)
         vmx_secondary_exec_control = _vmx_secondary_exec_control;
         vmx_vmexit_control         = _vmx_vmexit_control;
         vmx_vmentry_control        = _vmx_vmentry_control;
+        cpu_has_vmx_ins_outs_instr_info = !!(vmx_msr_high & (1U<<22));
     }
     else
     {
@@ -142,6 +144,7 @@ void vmx_init_vmcs_config(void)
         BUG_ON(vmx_secondary_exec_control != _vmx_secondary_exec_control);
         BUG_ON(vmx_vmexit_control != _vmx_vmexit_control);
         BUG_ON(vmx_vmentry_control != _vmx_vmentry_control);
+        BUG_ON(cpu_has_vmx_ins_outs_instr_info != !!(vmx_msr_high & (1U<<22)));
     }
 
     /* IA-32 SDM Vol 3B: VMCS size is never greater than 4kB. */
@@ -421,7 +424,7 @@ static void construct_vmcs(struct vcpu *
     __vmwrite(VMCS_LINK_POINTER_HIGH, ~0UL);
 #endif
 
-    __vmwrite(EXCEPTION_BITMAP, 1U << TRAP_page_fault);
+    __vmwrite(EXCEPTION_BITMAP, HVM_TRAP_MASK | (1U << TRAP_page_fault));
 
     /* Guest CR0. */
     cr0 = read_cr0();
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c        Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/hvm/vmx/vmx.c        Mon Jul 02 12:19:26 2007 -0600
@@ -560,6 +560,9 @@ int vmx_vmcs_restore(struct vcpu *v, str
     __vmwrite(GUEST_RSP, c->rsp);
     __vmwrite(GUEST_RFLAGS, c->rflags);
 
+    v->arch.hvm_vmx.cpu_cr0 = (c->cr0 | X86_CR0_PE | X86_CR0_PG 
+                               | X86_CR0_NE | X86_CR0_WP | X86_CR0_ET);
+    __vmwrite(GUEST_CR0, v->arch.hvm_vmx.cpu_cr0);
     v->arch.hvm_vmx.cpu_shadow_cr0 = c->cr0;
     __vmwrite(CR0_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr0);
 
@@ -577,33 +580,17 @@ int vmx_vmcs_restore(struct vcpu *v, str
         goto skip_cr3;
     }
 
-    if (c->cr3 == v->arch.hvm_vmx.cpu_cr3) {
-        /*
-         * This is simple TLB flush, implying the guest has
-         * removed some translation or changed page attributes.
-         * We simply invalidate the shadow.
-         */
-        mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT);
-        if (mfn != pagetable_get_pfn(v->arch.guest_table)) {
-            goto bad_cr3;
-        }
-    } else {
-        /*
-         * If different, make a shadow. Check if the PDBR is valid
-         * first.
-         */
-        HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 c->cr3 = %"PRIx64, c->cr3);
-        /* current!=vcpu as not called by arch_vmx_do_launch */
-        mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT);
-        if( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain)) {
-            goto bad_cr3;
-        }
-        old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
-        v->arch.guest_table = pagetable_from_pfn(mfn);
-        if (old_base_mfn)
-             put_page(mfn_to_page(old_base_mfn));
-        v->arch.hvm_vmx.cpu_cr3 = c->cr3;
-    }
+    HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 c->cr3 = %"PRIx64, c->cr3);
+    /* current!=vcpu as not called by arch_vmx_do_launch */
+    mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT);
+    if( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain)) {
+        goto bad_cr3;
+    }
+    old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
+    v->arch.guest_table = pagetable_from_pfn(mfn);
+    if (old_base_mfn)
+        put_page(mfn_to_page(old_base_mfn));
+    v->arch.hvm_vmx.cpu_cr3 = c->cr3;
 
  skip_cr3:
 #if defined(__x86_64__)
@@ -615,7 +602,7 @@ int vmx_vmcs_restore(struct vcpu *v, str
     }
 #endif
 
-    __vmwrite(GUEST_CR4, (c->cr4 | VMX_CR4_HOST_MASK));
+    __vmwrite(GUEST_CR4, (c->cr4 | HVM_CR4_HOST_MASK));
     v->arch.hvm_vmx.cpu_shadow_cr4 = c->cr4;
     __vmwrite(CR4_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr4);
 
@@ -1315,16 +1302,20 @@ static int __get_instruction_length(void
 
 static void inline __update_guest_eip(unsigned long inst_len)
 {
-    unsigned long current_eip, intr_shadow;
-
-    current_eip = __vmread(GUEST_RIP);
-    __vmwrite(GUEST_RIP, current_eip + inst_len);
-
-    intr_shadow = __vmread(GUEST_INTERRUPTIBILITY_INFO);
-    if ( intr_shadow & (VMX_INTR_SHADOW_STI | VMX_INTR_SHADOW_MOV_SS) )
-    {
-        intr_shadow &= ~(VMX_INTR_SHADOW_STI | VMX_INTR_SHADOW_MOV_SS);
-        __vmwrite(GUEST_INTERRUPTIBILITY_INFO, intr_shadow);
+    unsigned long x;
+
+    x = __vmread(GUEST_RIP);
+    __vmwrite(GUEST_RIP, x + inst_len);
+
+    x = __vmread(GUEST_RFLAGS);
+    if ( x & X86_EFLAGS_RF )
+        __vmwrite(GUEST_RFLAGS, x & ~X86_EFLAGS_RF);
+
+    x = __vmread(GUEST_INTERRUPTIBILITY_INFO);
+    if ( x & (VMX_INTR_SHADOW_STI | VMX_INTR_SHADOW_MOV_SS) )
+    {
+        x &= ~(VMX_INTR_SHADOW_STI | VMX_INTR_SHADOW_MOV_SS);
+        __vmwrite(GUEST_INTERRUPTIBILITY_INFO, x);
     }
 }
 
@@ -1475,16 +1466,34 @@ static void vmx_do_invlpg(unsigned long 
     paging_invlpg(v, va);
 }
 
-/*
- * get segment for string pio according to guest instruction
- */
-static void vmx_str_pio_get_segment(int long_mode, unsigned long eip,
-                                   int inst_len, enum x86_segment *seg)
+/* Get segment for OUTS according to guest instruction. */
+static enum x86_segment vmx_outs_get_segment(
+    int long_mode, unsigned long eip, int inst_len)
 {
     unsigned char inst[MAX_INST_LEN];
+    enum x86_segment seg = x86_seg_ds;
     int i;
     extern int inst_copy_from_guest(unsigned char *, unsigned long, int);
 
+    if ( likely(cpu_has_vmx_ins_outs_instr_info) )
+    {
+        unsigned int instr_info = __vmread(VMX_INSTRUCTION_INFO);
+
+        /* Get segment register according to bits 17:15. */
+        switch ( (instr_info >> 15) & 7 )
+        {
+        case 0: seg = x86_seg_es; break;
+        case 1: seg = x86_seg_cs; break;
+        case 2: seg = x86_seg_ss; break;
+        case 3: seg = x86_seg_ds; break;
+        case 4: seg = x86_seg_fs; break;
+        case 5: seg = x86_seg_gs; break;
+        default: BUG();
+        }
+
+        goto out;
+    }
+
     if ( !long_mode )
         eip += __vmread(GUEST_CS_BASE);
 
@@ -1493,7 +1502,7 @@ static void vmx_str_pio_get_segment(int 
     {
         gdprintk(XENLOG_ERR, "Get guest instruction failed\n");
         domain_crash(current->domain);
-        return;
+        goto out;
     }
 
     for ( i = 0; i < inst_len; i++ )
@@ -1510,25 +1519,28 @@ static void vmx_str_pio_get_segment(int 
 #endif
             continue;
         case 0x2e: /* CS */
-            *seg = x86_seg_cs;
+            seg = x86_seg_cs;
             continue;
         case 0x36: /* SS */
-            *seg = x86_seg_ss;
+            seg = x86_seg_ss;
             continue;
         case 0x26: /* ES */
-            *seg = x86_seg_es;
+            seg = x86_seg_es;
             continue;
         case 0x64: /* FS */
-            *seg = x86_seg_fs;
+            seg = x86_seg_fs;
             continue;
         case 0x65: /* GS */
-            *seg = x86_seg_gs;
+            seg = x86_seg_gs;
             continue;
         case 0x3e: /* DS */
-            *seg = x86_seg_ds;
+            seg = x86_seg_ds;
             continue;
         }
     }
+
+ out:
+    return seg;
 }
 
 static int vmx_str_pio_check_descriptor(int long_mode, unsigned long eip,
@@ -1541,7 +1553,7 @@ static int vmx_str_pio_check_descriptor(
     *base = 0;
     *limit = 0;
     if ( seg != x86_seg_es )
-        vmx_str_pio_get_segment(long_mode, eip, inst_len, &seg);
+        seg = vmx_outs_get_segment(long_mode, eip, inst_len);
 
     switch ( seg )
     {
@@ -1587,7 +1599,7 @@ static int vmx_str_pio_check_descriptor(
     }
     *ar_bytes = __vmread(ar_field);
 
-    return !(*ar_bytes & 0x10000);
+    return !(*ar_bytes & X86_SEG_AR_SEG_UNUSABLE);
 }
 
 
@@ -1896,7 +1908,7 @@ static void vmx_world_save(struct vcpu *
     c->eip += __get_instruction_length(); /* Safe: MOV Cn, LMSW, CLTS */
 
     c->esp = __vmread(GUEST_RSP);
-    c->eflags = __vmread(GUEST_RFLAGS);
+    c->eflags = __vmread(GUEST_RFLAGS) & ~X86_EFLAGS_RF;
 
     c->cr0 = v->arch.hvm_vmx.cpu_shadow_cr0;
     c->cr3 = v->arch.hvm_vmx.cpu_cr3;
@@ -1997,7 +2009,7 @@ static int vmx_world_restore(struct vcpu
     else
         HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %x", c->cr3);
 
-    __vmwrite(GUEST_CR4, (c->cr4 | VMX_CR4_HOST_MASK));
+    __vmwrite(GUEST_CR4, (c->cr4 | HVM_CR4_HOST_MASK));
     v->arch.hvm_vmx.cpu_shadow_cr4 = c->cr4;
     __vmwrite(CR4_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr4);
 
@@ -2272,7 +2284,6 @@ static int vmx_set_cr0(unsigned long val
                     "Enabling CR0.PE at %%eip 0x%lx", eip);
         if ( vmx_assist(v, VMX_ASSIST_RESTORE) )
         {
-            eip = __vmread(GUEST_RIP);
             HVM_DBG_LOG(DBG_LEVEL_1,
                         "Restoring to %%eip 0x%lx", eip);
             return 0; /* do not update eip! */
@@ -2397,6 +2408,15 @@ static int mov_to_cr(int gp, int cr, str
     case 4: /* CR4 */
         old_cr = v->arch.hvm_vmx.cpu_shadow_cr4;
 
+        if ( value & HVM_CR4_GUEST_RESERVED_BITS )
+        {
+            HVM_DBG_LOG(DBG_LEVEL_1,
+                        "Guest attempts to set reserved bit in CR4: %lx",
+                        value);
+            vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
+            break;
+        }
+
         if ( (value & X86_CR4_PAE) && !(old_cr & X86_CR4_PAE) )
         {
             if ( vmx_pgbit_test(v) )
@@ -2437,7 +2457,7 @@ static int mov_to_cr(int gp, int cr, str
             }
         }
 
-        __vmwrite(GUEST_CR4, value| VMX_CR4_HOST_MASK);
+        __vmwrite(GUEST_CR4, value | HVM_CR4_HOST_MASK);
         v->arch.hvm_vmx.cpu_shadow_cr4 = value;
         __vmwrite(CR4_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr4);
 
@@ -2581,7 +2601,7 @@ static inline int vmx_do_msr_read(struct
     case MSR_IA32_APICBASE:
         msr_content = vcpu_vlapic(v)->hw.apic_base_msr;
         break;
-    case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_CR4_FIXED1:
+    case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_PROCBASED_CTLS2:
         goto gp_fault;
     default:
         if ( long_mode_do_msr_read(regs) )
@@ -2707,7 +2727,7 @@ static inline int vmx_do_msr_write(struc
     case MSR_IA32_APICBASE:
         vlapic_msr_set(vcpu_vlapic(v), msr_content);
         break;
-    case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_CR4_FIXED1:
+    case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_PROCBASED_CTLS2:
         goto gp_fault;
     default:
         if ( !long_mode_do_msr_write(regs) )
@@ -2823,7 +2843,8 @@ static void vmx_reflect_exception(struct
     }
 }
 
-static void vmx_failed_vmentry(unsigned int exit_reason)
+static void vmx_failed_vmentry(unsigned int exit_reason,
+                               struct cpu_user_regs *regs)
 {
     unsigned int failed_vmentry_reason = (uint16_t)exit_reason;
     unsigned long exit_qualification;
@@ -2840,6 +2861,9 @@ static void vmx_failed_vmentry(unsigned 
         break;
     case EXIT_REASON_MACHINE_CHECK:
         printk("caused by machine check.\n");
+        HVMTRACE_0D(MCE, current);
+        vmx_store_cpu_guest_regs(current, regs, NULL);
+        do_machine_check(regs);
         break;
     default:
         printk("reason not known yet!");
@@ -2869,7 +2893,7 @@ asmlinkage void vmx_vmexit_handler(struc
         local_irq_enable();
 
     if ( unlikely(exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) )
-        return vmx_failed_vmentry(exit_reason);
+        return vmx_failed_vmentry(exit_reason, regs);
 
     switch ( exit_reason )
     {
@@ -2920,11 +2944,19 @@ asmlinkage void vmx_vmexit_handler(struc
             vmx_inject_hw_exception(v, TRAP_page_fault, regs->error_code);
             break;
         case TRAP_nmi:
-            HVMTRACE_0D(NMI, v);
             if ( (intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI )
+            {
+                HVMTRACE_0D(NMI, v);
+                vmx_store_cpu_guest_regs(v, regs, NULL);
                 do_nmi(regs); /* Real NMI, vector 2: normal processing. */
+            }
             else
                 vmx_reflect_exception(v);
+            break;
+        case TRAP_machine_check:
+            HVMTRACE_0D(MCE, v);
+            vmx_store_cpu_guest_regs(v, regs, NULL);
+            do_machine_check(regs);
             break;
         default:
             goto exit_and_crash;
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/io_apic.c
--- a/xen/arch/x86/io_apic.c    Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/io_apic.c    Mon Jul 02 12:19:26 2007 -0600
@@ -371,7 +371,7 @@ static int pin_2_irq(int idx, int apic, 
  * so mask in all cases should simply be TARGET_CPUS
  */
 #ifdef CONFIG_SMP
-void __init setup_ioapic_dest(void)
+void /*__init*/ setup_ioapic_dest(void)
 {
     int pin, ioapic, irq, irq_entry;
 
@@ -849,7 +849,7 @@ static inline void UNEXPECTED_IO_APIC(vo
 {
 }
 
-void __init __print_IO_APIC(void)
+void /*__init*/ __print_IO_APIC(void)
 {
     int apic, i;
     union IO_APIC_reg_00 reg_00;
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/mm.c Mon Jul 02 12:19:26 2007 -0600
@@ -3240,6 +3240,7 @@ static int ptwr_emulated_update(
     struct ptwr_emulate_ctxt *ptwr_ctxt)
 {
     unsigned long mfn;
+    unsigned long unaligned_addr = addr;
     struct page_info *page;
     l1_pgentry_t pte, ol1e, nl1e, *pl1e;
     struct vcpu *v = current;
@@ -3294,7 +3295,7 @@ static int ptwr_emulated_update(
     if ( unlikely(!get_page_from_l1e(nl1e, d)) )
     {
         if ( (CONFIG_PAGING_LEVELS >= 3) && is_pv_32bit_domain(d) &&
-             (bytes == 4) && (addr & 4) && !do_cmpxchg &&
+             (bytes == 4) && (unaligned_addr & 4) && !do_cmpxchg &&
              (l1e_get_flags(nl1e) & _PAGE_PRESENT) )
         {
             /*
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/platform_hypercall.c
--- a/xen/arch/x86/platform_hypercall.c Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/platform_hypercall.c Mon Jul 02 12:19:26 2007 -0600
@@ -20,12 +20,20 @@
 #include <xen/guest_access.h>
 #include <asm/current.h>
 #include <public/platform.h>
+#include <asm/edd.h>
 #include <asm/mtrr.h>
 #include "cpu/mtrr/mtrr.h"
+
+extern uint16_t boot_edid_caps;
+extern uint8_t boot_edid_info[];
 
 #ifndef COMPAT
 typedef long ret_t;
 DEFINE_SPINLOCK(xenpf_lock);
+# undef copy_from_compat
+# define copy_from_compat copy_from_guest
+# undef copy_to_compat
+# define copy_to_compat copy_to_guest
 #else
 extern spinlock_t xenpf_lock;
 #endif
@@ -150,6 +158,94 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
         }
     }
     break;
+
+    case XENPF_firmware_info:
+        switch ( op->u.firmware_info.type )
+        {
+        case XEN_FW_DISK_INFO: {
+            const struct edd_info *info;
+            u16 length;
+
+            ret = -ESRCH;
+            if ( op->u.firmware_info.index >= bootsym(boot_edd_info_nr) )
+                break;
+
+            info = bootsym(boot_edd_info) + op->u.firmware_info.index;
+
+            /* Transfer the EDD info block. */
+            ret = -EFAULT;
+            if ( copy_from_compat(&length, op->u.firmware_info.u.
+                                  disk_info.edd_params, 1) )
+                break;
+            if ( length > info->edd_device_params.length )
+                length = info->edd_device_params.length;
+            if ( copy_to_compat(op->u.firmware_info.u.disk_info.edd_params,
+                                (u8 *)&info->edd_device_params,
+                                length) )
+                break;
+            if ( copy_to_compat(op->u.firmware_info.u.disk_info.edd_params,
+                                &length, 1) )
+                break;
+
+            /* Transfer miscellaneous other information values. */
+#define C(x) op->u.firmware_info.u.disk_info.x = info->x
+            C(device);
+            C(version);
+            C(interface_support);
+            C(legacy_max_cylinder);
+            C(legacy_max_head);
+            C(legacy_sectors_per_track);
+#undef C
+
+            ret = (copy_field_to_guest(u_xenpf_op, op,
+                                      u.firmware_info.u.disk_info)
+                   ? -EFAULT : 0);
+            break;
+        }
+        case XEN_FW_DISK_MBR_SIGNATURE: {
+            const struct mbr_signature *sig;
+
+            ret = -ESRCH;
+            if ( op->u.firmware_info.index >= bootsym(boot_mbr_signature_nr) )
+                break;
+
+            sig = bootsym(boot_mbr_signature) + op->u.firmware_info.index;
+
+            op->u.firmware_info.u.disk_mbr_signature.device = sig->device;
+            op->u.firmware_info.u.disk_mbr_signature.mbr_signature =
+                sig->signature;
+
+            ret = (copy_field_to_guest(u_xenpf_op, op,
+                                      u.firmware_info.u.disk_mbr_signature)
+                   ? -EFAULT : 0);
+            break;
+        }
+        case XEN_FW_VBEDDC_INFO:
+            ret = -ESRCH;
+            if ( op->u.firmware_info.index != 0 )
+                break;
+            if ( *(u32 *)bootsym(boot_edid_info) == 0x13131313 )
+                break;
+
+            op->u.firmware_info.u.vbeddc_info.capabilities =
+                bootsym(boot_edid_caps);
+            op->u.firmware_info.u.vbeddc_info.edid_transfer_time =
+                bootsym(boot_edid_caps) >> 8;
+
+            ret = 0;
+            if ( copy_field_to_guest(u_xenpf_op, op, u.firmware_info.
+                                     u.vbeddc_info.capabilities) ||
+                 copy_field_to_guest(u_xenpf_op, op, u.firmware_info.
+                                     u.vbeddc_info.edid_transfer_time) ||
+                 copy_to_compat(op->u.firmware_info.u.vbeddc_info.edid,
+                                bootsym(boot_edid_info), 128) )
+                ret = -EFAULT;
+            break;
+        default:
+            ret = -EINVAL;
+            break;
+        }
+        break;
 
     default:
         ret = -ENOSYS;
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/setup.c
--- a/xen/arch/x86/setup.c      Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/setup.c      Mon Jul 02 12:19:26 2007 -0600
@@ -405,7 +405,7 @@ void __init __start_xen(unsigned long mb
 void __init __start_xen(unsigned long mbi_p)
 {
     char *memmap_type = NULL;
-    char __cmdline[] = "", *cmdline = __cmdline;
+    char __cmdline[] = "", *cmdline = __cmdline, *kextra;
     unsigned long _initrd_start = 0, _initrd_len = 0;
     unsigned int initrdidx = 1;
     char *_policy_start = NULL;
@@ -426,6 +426,17 @@ void __init __start_xen(unsigned long mb
     /* Parse the command-line options. */
     if ( (mbi->flags & MBI_CMDLINE) && (mbi->cmdline != 0) )
         cmdline = __va(mbi->cmdline);
+    if ( (kextra = strstr(cmdline, " -- ")) != NULL )
+    {
+        /*
+         * Options after ' -- ' separator belong to dom0.
+         *  1. Orphan dom0's options from Xen's command line.
+         *  2. Skip all but final leading space from dom0's options.
+         */
+        *kextra = '\0';
+        kextra += 3;
+        while ( kextra[1] == ' ' ) kextra++;
+    }
     cmdline_parse(cmdline);
 
     parse_video_info();
@@ -494,7 +505,7 @@ void __init __start_xen(unsigned long mb
 
     printk("Disc information:\n");
     printk(" Found %d MBR signatures\n",
-           bootsym(boot_edd_signature_nr));
+           bootsym(boot_mbr_signature_nr));
     printk(" Found %d EDD information structures\n",
            bootsym(boot_edd_info_nr));
 
@@ -1009,17 +1020,26 @@ void __init __start_xen(unsigned long mb
 
     /* Grab the DOM0 command line. */
     cmdline = (char *)(mod[0].string ? __va(mod[0].string) : NULL);
-    if ( cmdline != NULL )
+    if ( (cmdline != NULL) || (kextra != NULL) )
     {
         static char dom0_cmdline[MAX_GUEST_CMDLINE];
 
-        /* Skip past the image name and copy to a local buffer. */
-        while ( *cmdline == ' ' ) cmdline++;
-        if ( (cmdline = strchr(cmdline, ' ')) != NULL )
+        dom0_cmdline[0] = '\0';
+
+        if ( cmdline != NULL )
         {
+            /* Skip past the image name and copy to a local buffer. */
             while ( *cmdline == ' ' ) cmdline++;
-            safe_strcpy(dom0_cmdline, cmdline);
+            if ( (cmdline = strchr(cmdline, ' ')) != NULL )
+            {
+                while ( *cmdline == ' ' ) cmdline++;
+                safe_strcpy(dom0_cmdline, cmdline);
+            }
         }
+
+        if ( kextra != NULL )
+            /* kextra always includes exactly one leading space. */
+            safe_strcat(dom0_cmdline, kextra);
 
         /* Append any extra parameters. */
         if ( skip_ioapic_setup && !strstr(dom0_cmdline, "noapic") )
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c      Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/traps.c      Mon Jul 02 12:19:26 2007 -0600
@@ -86,6 +86,7 @@ asmlinkage int do_ ## _name(struct cpu_u
 asmlinkage int do_ ## _name(struct cpu_user_regs *regs)
 
 asmlinkage void nmi(void);
+asmlinkage void machine_check(void);
 DECLARE_TRAP_HANDLER(divide_error);
 DECLARE_TRAP_HANDLER(debug);
 DECLARE_TRAP_HANDLER(int3);
@@ -103,7 +104,6 @@ DECLARE_TRAP_HANDLER(simd_coprocessor_er
 DECLARE_TRAP_HANDLER(simd_coprocessor_error);
 DECLARE_TRAP_HANDLER(alignment_check);
 DECLARE_TRAP_HANDLER(spurious_interrupt_bug);
-DECLARE_TRAP_HANDLER(machine_check);
 
 long do_set_debugreg(int reg, unsigned long value);
 unsigned long do_get_debugreg(int reg);
@@ -631,6 +631,7 @@ static int emulate_forced_invalid_op(str
     regs->ecx = c;
     regs->edx = d;
     regs->eip = eip;
+    regs->eflags &= ~X86_EFLAGS_RF;
 
     return EXCRET_fault_fixed;
 }
@@ -730,10 +731,11 @@ asmlinkage int do_int3(struct cpu_user_r
     return do_guest_trap(TRAP_int3, regs, 0);
 }
 
-asmlinkage int do_machine_check(struct cpu_user_regs *regs)
-{
-    fatal_trap(TRAP_machine_check, regs);
-    return 0;
+asmlinkage void do_machine_check(struct cpu_user_regs *regs)
+{
+    extern fastcall void (*machine_check_vector)(
+        struct cpu_user_regs *, long error_code);
+    machine_check_vector(regs, regs->error_code);
 }
 
 void propagate_page_fault(unsigned long addr, u16 error_code)
@@ -1787,6 +1789,7 @@ static int emulate_privileged_op(struct 
 
  done:
     regs->eip = eip;
+    regs->eflags &= ~X86_EFLAGS_RF;
     return EXCRET_fault_fixed;
 
  fail:
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/x86_32/entry.S
--- a/xen/arch/x86/x86_32/entry.S       Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/x86_32/entry.S       Mon Jul 02 12:19:26 2007 -0600
@@ -72,48 +72,36 @@
         andl $~3,reg;            \
         movl (reg),reg;
 
-
         ALIGN
 restore_all_guest:
         ASSERT_INTERRUPTS_DISABLED
         testl $X86_EFLAGS_VM,UREGS_eflags(%esp)
-        jnz  restore_all_vm86
+        popl  %ebx
+        popl  %ecx
+        popl  %edx
+        popl  %esi
+        popl  %edi
+        popl  %ebp
+        popl  %eax
+        leal  4(%esp),%esp
+        jnz   .Lrestore_iret_guest
 #ifdef CONFIG_X86_SUPERVISOR_MODE_KERNEL
-        testl $2,UREGS_cs(%esp)
-        jnz   1f
+        testb $2,UREGS_cs-UREGS_eip(%esp)
+        jnz   .Lrestore_sregs_guest
         call  restore_ring0_guest
-        jmp   restore_all_vm86
-1:
+        jmp   .Lrestore_iret_guest
 #endif
-.Lft1:  mov  UREGS_ds(%esp),%ds
-.Lft2:  mov  UREGS_es(%esp),%es
-.Lft3:  mov  UREGS_fs(%esp),%fs
-.Lft4:  mov  UREGS_gs(%esp),%gs
-restore_all_vm86:
-        popl %ebx
-        popl %ecx
-        popl %edx
-        popl %esi
-        popl %edi
-        popl %ebp
-        popl %eax
-        addl $4,%esp
+.Lrestore_sregs_guest:
+.Lft1:  mov  UREGS_ds-UREGS_eip(%esp),%ds
+.Lft2:  mov  UREGS_es-UREGS_eip(%esp),%es
+.Lft3:  mov  UREGS_fs-UREGS_eip(%esp),%fs
+.Lft4:  mov  UREGS_gs-UREGS_eip(%esp),%gs
+.Lrestore_iret_guest:
 .Lft5:  iret
 .section .fixup,"ax"
-.Lfx5:  subl  $28,%esp
-        pushl 28(%esp)                 # error_code/entry_vector
-        movl  %eax,UREGS_eax+4(%esp)
-        movl  %ebp,UREGS_ebp+4(%esp)
-        movl  %edi,UREGS_edi+4(%esp)
-        movl  %esi,UREGS_esi+4(%esp)
-        movl  %edx,UREGS_edx+4(%esp)
-        movl  %ecx,UREGS_ecx+4(%esp)
-        movl  %ebx,UREGS_ebx+4(%esp)
-.Lfx1:  SET_XEN_SEGMENTS(a)
-        movl  %eax,%fs
-        movl  %eax,%gs
-        sti
-        popl  %esi
+.Lfx1:  sti
+        SAVE_ALL_GPRS
+        mov   UREGS_error_code(%esp),%esi
         pushfl                         # EFLAGS
         movl  $__HYPERVISOR_CS,%eax
         pushl %eax                     # CS
@@ -147,7 +135,7 @@ 1:      call  create_bounce_frame
         .long .Lft2,.Lfx1
         .long .Lft3,.Lfx1
         .long .Lft4,.Lfx1
-        .long .Lft5,.Lfx5
+        .long .Lft5,.Lfx1
 .previous
 .section __ex_table,"a"
         .long .Ldf1,failsafe_callback
@@ -169,8 +157,8 @@ ENTRY(hypercall)
 ENTRY(hypercall)
         subl $4,%esp
         FIXUP_RING0_GUEST_STACK
-        SAVE_ALL(b)
-        sti
+        SAVE_ALL(1f,1f)
+1:      sti
         GET_CURRENT(%ebx)
         cmpl  $NR_hypercalls,%eax
         jae   bad_hypercall
@@ -420,9 +408,14 @@ ENTRY(divide_error)
         ALIGN
 handle_exception:
         FIXUP_RING0_GUEST_STACK
-        SAVE_ALL_NOSEGREGS(a)
-        SET_XEN_SEGMENTS(a)
-        testb $X86_EFLAGS_IF>>8,UREGS_eflags+1(%esp)
+        SAVE_ALL(1f,2f)
+        .text 1
+        /* Exception within Xen: make sure we have valid %ds,%es. */
+1:      mov   %ecx,%ds
+        mov   %ecx,%es
+        jmp   2f
+        .previous
+2:      testb $X86_EFLAGS_IF>>8,UREGS_eflags+1(%esp)
         jz    exception_with_ints_disabled
         sti                             # re-enable interrupts
 1:      xorl  %eax,%eax
@@ -533,71 +526,81 @@ ENTRY(page_fault)
         movw  $TRAP_page_fault,2(%esp)
         jmp   handle_exception
 
-ENTRY(machine_check)
-        pushl $TRAP_machine_check<<16
-        jmp   handle_exception
-
 ENTRY(spurious_interrupt_bug)
         pushl $TRAP_spurious_int<<16
         jmp   handle_exception
 
 ENTRY(early_page_fault)
-        SAVE_ALL_NOSEGREGS(a)
-        movl  %esp,%edx
-        pushl %edx
+        SAVE_ALL(1f,1f)
+1:      movl  %esp,%eax
+        pushl %eax
         call  do_early_page_fault
         addl  $4,%esp
         jmp   restore_all_xen
 
-ENTRY(nmi)
+handle_nmi_mce:
 #ifdef CONFIG_X86_SUPERVISOR_MODE_KERNEL
-        # NMI entry protocol is incompatible with guest kernel in ring 0.
+        # NMI/MCE entry protocol is incompatible with guest kernel in ring 0.
+        addl  $4,%esp
         iret
 #else
         # Save state but do not trash the segment registers!
-        # We may otherwise be unable to reload them or copy them to ring 1. 
-        pushl %eax
-        SAVE_ALL_NOSEGREGS(a)
-
-        # We can only process the NMI if:
-        #  A. We are the outermost Xen activation (in which case we have
-        #     the selectors safely saved on our stack)
-        #  B. DS and ES contain sane Xen values.
-        # In all other cases we bail without touching DS-GS, as we have
-        # interrupted an enclosing Xen activation in tricky prologue or
-        # epilogue code.
-        movl  UREGS_eflags(%esp),%eax
-        movb  UREGS_cs(%esp),%al
-        testl $(3|X86_EFLAGS_VM),%eax
-        jnz   continue_nmi
-        movl  %ds,%eax
-        cmpw  $(__HYPERVISOR_DS),%ax
-        jne   defer_nmi
-        movl  %es,%eax
-        cmpw  $(__HYPERVISOR_DS),%ax
-        jne   defer_nmi
-
-continue_nmi:
-        SET_XEN_SEGMENTS(d)
+        SAVE_ALL(.Lnmi_mce_xen,.Lnmi_mce_common)
+.Lnmi_mce_common:
+        xorl  %eax,%eax
+        movw  UREGS_entry_vector(%esp),%ax
         movl  %esp,%edx
         pushl %edx
-        call  do_nmi
+        call  *exception_table(,%eax,4)
         addl  $4,%esp
+        /* 
+         * NB. We may return to Xen context with polluted %ds/%es. But in such
+         * cases we have put guest DS/ES on the guest stack frame, which will
+         * be detected by SAVE_ALL(), or we have rolled back restore_guest.
+         */
         jmp   ret_from_intr
-
-defer_nmi:
-        movl  $FIXMAP_apic_base,%eax
-        # apic_wait_icr_idle()
-1:      movl  %ss:APIC_ICR(%eax),%ebx
-        testl $APIC_ICR_BUSY,%ebx
-        jnz   1b
-        # __send_IPI_shortcut(APIC_DEST_SELF, TRAP_deferred_nmi)
-        movl  $(APIC_DM_FIXED | APIC_DEST_SELF | APIC_DEST_PHYSICAL | \
-                TRAP_deferred_nmi),%ss:APIC_ICR(%eax)
-        jmp   restore_all_xen
+.Lnmi_mce_xen:
+        /* Check the outer (guest) context for %ds/%es state validity. */
+        GET_GUEST_REGS(%ebx)
+        testl $X86_EFLAGS_VM,%ss:UREGS_eflags(%ebx)
+        mov   %ds,%eax
+        mov   %es,%edx
+        jnz   .Lnmi_mce_vm86
+        /* We may have interrupted Xen while messing with %ds/%es... */
+        cmpw  %ax,%cx
+        mov   %ecx,%ds             /* Ensure %ds is valid */
+        cmove UREGS_ds(%ebx),%eax  /* Grab guest DS if it wasn't in %ds */
+        cmpw  %dx,%cx
+        movl  %eax,UREGS_ds(%ebx)  /* Ensure guest frame contains guest DS */
+        cmove UREGS_es(%ebx),%edx  /* Grab guest ES if it wasn't in %es */
+        mov   %ecx,%es             /* Ensure %es is valid */
+        movl  $.Lrestore_sregs_guest,%ecx
+        movl  %edx,UREGS_es(%ebx)  /* Ensure guest frame contains guest ES */
+        cmpl  %ecx,UREGS_eip(%esp)
+        jbe   .Lnmi_mce_common
+        cmpl  $.Lrestore_iret_guest,UREGS_eip(%esp)
+        ja    .Lnmi_mce_common
+        /* Roll outer context restore_guest back to restoring %ds/%es. */
+        movl  %ecx,UREGS_eip(%esp)
+        jmp   .Lnmi_mce_common
+.Lnmi_mce_vm86:
+        /* vm86 is easy: the CPU saved %ds/%es so we can safely stomp them. */
+        mov   %ecx,%ds
+        mov   %ecx,%es
+        jmp   .Lnmi_mce_common
 #endif /* !CONFIG_X86_SUPERVISOR_MODE_KERNEL */
 
+ENTRY(nmi)
+        pushl $TRAP_nmi<<16
+        jmp   handle_nmi_mce
+
+ENTRY(machine_check)
+        pushl $TRAP_machine_check<<16
+        jmp   handle_nmi_mce
+
 ENTRY(setup_vm86_frame)
+        mov %ecx,%ds
+        mov %ecx,%es
         # Copies the entire stack frame forwards by 16 bytes.
         .macro copy_vm86_words count=18
         .if \count
@@ -615,7 +618,7 @@ ENTRY(exception_table)
 ENTRY(exception_table)
         .long do_divide_error
         .long do_debug
-        .long 0 # nmi
+        .long do_nmi
         .long do_int3
         .long do_overflow
         .long do_bounds
diff -r 443ce7edad0e -r d146700adf71 
xen/arch/x86/x86_32/supervisor_mode_kernel.S
--- a/xen/arch/x86/x86_32/supervisor_mode_kernel.S      Mon Jul 02 10:31:03 
2007 -0600
+++ b/xen/arch/x86/x86_32/supervisor_mode_kernel.S      Mon Jul 02 12:19:26 
2007 -0600
@@ -20,40 +20,45 @@
 #include <asm/asm_defns.h>
 #include <public/xen.h>
 
+#define guestreg(field) ((field)-UREGS_eip+36)
+
         # Upon entry the stack should be the Xen stack and contain:
-        #   %ss, %esp, EFLAGS, %cs|1, %eip, ERROR, SAVE_ALL, RETURN
+        #   %ss, %esp, EFLAGS, %cs|1, %eip, RETURN
         # On exit the stack should be %ss:%esp (i.e. the guest stack)
         # and contain:
-        #   EFLAGS, %cs, %eip, ERROR, SAVE_ALL, RETURN
+        #   EFLAGS, %cs, %eip, RETURN
         ALIGN
 ENTRY(restore_ring0_guest)
+        pusha
+
         # Point %gs:%esi to guest stack.
-RRG0:   movw UREGS_ss+4(%esp),%gs
-        movl UREGS_esp+4(%esp),%esi
+RRG0:   movw guestreg(UREGS_ss)(%esp),%gs
+        movl guestreg(UREGS_esp)(%esp),%esi
 
-        # Copy EFLAGS...EBX, RETURN from Xen stack to guest stack.
-        movl $(UREGS_kernel_sizeof>>2)+1,%ecx
+        # Copy EFLAGS, %cs, %eip, RETURN, PUSHA from Xen stack to guest stack.
+        movl $12,%ecx /* 12 32-bit values */
 
 1:      subl $4,%esi
         movl -4(%esp,%ecx,4),%eax
 RRG1:   movl %eax,%gs:(%esi)
         loop 1b
 
-RRG2:   andl $~3,%gs:UREGS_cs+4(%esi)
+RRG2:   andl $~3,%gs:guestreg(UREGS_cs)(%esi)
 
         movl %gs,%eax
 
         # We need to do this because these registers are not present
         # on the guest stack so they cannot be restored by the code in
         # restore_all_guest.
-RRG3:   mov  UREGS_ds+4(%esp),%ds
-RRG4:   mov  UREGS_es+4(%esp),%es
-RRG5:   mov  UREGS_fs+4(%esp),%fs
-RRG6:   mov  UREGS_gs+4(%esp),%gs
+RRG3:   mov  guestreg(UREGS_ds)(%esp),%ds
+RRG4:   mov  guestreg(UREGS_es)(%esp),%es
+RRG5:   mov  guestreg(UREGS_fs)(%esp),%fs
+RRG6:   mov  guestreg(UREGS_gs)(%esp),%gs
 
 RRG7:   movl %eax,%ss
         movl %esi,%esp
 
+        popa
         ret
 .section __ex_table,"a"
         .long RRG0,domain_crash_synchronous
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/x86_32/traps.c
--- a/xen/arch/x86/x86_32/traps.c       Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/x86_32/traps.c       Mon Jul 02 12:19:26 2007 -0600
@@ -232,15 +232,6 @@ unsigned long do_iret(void)
     return 0;
 }
 
-#include <asm/asm_defns.h>
-BUILD_SMP_INTERRUPT(deferred_nmi, TRAP_deferred_nmi)
-fastcall void smp_deferred_nmi(struct cpu_user_regs *regs)
-{
-    asmlinkage void do_nmi(struct cpu_user_regs *);
-    ack_APIC_irq();
-    do_nmi(regs);
-}
-
 void __init percpu_traps_init(void)
 {
     struct tss_struct *tss = &doublefault_tss;
@@ -251,8 +242,6 @@ void __init percpu_traps_init(void)
 
     /* The hypercall entry vector is only accessible from ring 1. */
     _set_gate(idt_table+HYPERCALL_VECTOR, 14, 1, &hypercall);
-
-    set_intr_gate(TRAP_deferred_nmi, &deferred_nmi);
 
     /*
      * Make a separate task for double faults. This will get us debug output if
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/x86_64/Makefile
--- a/xen/arch/x86/x86_64/Makefile      Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/x86_64/Makefile      Mon Jul 02 12:19:26 2007 -0600
@@ -1,12 +1,12 @@ subdir-y += compat
 subdir-y += compat
 
 obj-y += entry.o
-obj-y += compat_kexec.o
 obj-y += gpr_switch.o
 obj-y += mm.o
 obj-y += traps.o
 
 obj-$(CONFIG_COMPAT) += compat.o
+obj-$(CONFIG_COMPAT) += compat_kexec.o
 obj-$(CONFIG_COMPAT) += domain.o
 obj-$(CONFIG_COMPAT) += physdev.o
 obj-$(CONFIG_COMPAT) += platform_hypercall.o
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/x86_64/compat/entry.S
--- a/xen/arch/x86/x86_64/compat/entry.S        Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/x86_64/compat/entry.S        Mon Jul 02 12:19:26 2007 -0600
@@ -143,12 +143,12 @@ compat_restore_all_guest:
 .Lft0:  iretq
 
 .section .fixup,"ax"
-.Lfx0:  popq  -15*8-8(%rsp)            # error_code/entry_vector
-        SAVE_ALL                       # 15*8 bytes pushed
-        movq  -8(%rsp),%rsi            # error_code/entry_vector
-        sti                            # after stack abuse (-1024(%rsp))
+.Lfx0:  sti
+        SAVE_ALL
+        movq  UREGS_error_code(%rsp),%rsi
+        movq  %rsp,%rax
+        andq  $~0xf,%rsp
         pushq $__HYPERVISOR_DS         # SS
-        leaq  8(%rsp),%rax
         pushq %rax                     # RSP
         pushfq                         # RFLAGS
         pushq $__HYPERVISOR_CS         # CS
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/x86_64/entry.S
--- a/xen/arch/x86/x86_64/entry.S       Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/x86_64/entry.S       Mon Jul 02 12:19:26 2007 -0600
@@ -57,23 +57,23 @@ 1:      sysretl
 /* No special register assumptions. */
 iret_exit_to_guest:
         addq  $8,%rsp
-.Lft1:  iretq
+.Lft0:  iretq
 
 .section .fixup,"ax"
-.Lfx1:  popq  -15*8-8(%rsp)            # error_code/entry_vector
-        SAVE_ALL                       # 15*8 bytes pushed
-        movq  -8(%rsp),%rsi            # error_code/entry_vector
-        sti                            # after stack abuse (-1024(%rsp))
+.Lfx0:  sti
+        SAVE_ALL
+        movq  UREGS_error_code(%rsp),%rsi
+        movq  %rsp,%rax
+        andq  $~0xf,%rsp
         pushq $__HYPERVISOR_DS         # SS
-        leaq  8(%rsp),%rax
         pushq %rax                     # RSP
-        pushf                          # RFLAGS
+        pushfq                         # RFLAGS
         pushq $__HYPERVISOR_CS         # CS
-        leaq  .Ldf1(%rip),%rax
+        leaq  .Ldf0(%rip),%rax
         pushq %rax                     # RIP
         pushq %rsi                     # error_code/entry_vector
         jmp   handle_exception
-.Ldf1:  GET_CURRENT(%rbx)
+.Ldf0:  GET_CURRENT(%rbx)
         jmp   test_all_events
 failsafe_callback:
         GET_CURRENT(%rbx)
@@ -88,10 +88,10 @@ 1:      call  create_bounce_frame
         jmp   test_all_events
 .previous
 .section __pre_ex_table,"a"
-        .quad .Lft1,.Lfx1
+        .quad .Lft0,.Lfx0
 .previous
 .section __ex_table,"a"
-        .quad .Ldf1,failsafe_callback
+        .quad .Ldf0,failsafe_callback
 .previous
 
         ALIGN
@@ -505,11 +505,6 @@ ENTRY(page_fault)
         movl  $TRAP_page_fault,4(%rsp)
         jmp   handle_exception
 
-ENTRY(machine_check)
-        pushq $0
-        movl  $TRAP_machine_check,4(%rsp)
-        jmp   handle_exception
-
 ENTRY(spurious_interrupt_bug)
         pushq $0
         movl  $TRAP_spurious_int,4(%rsp)
@@ -527,31 +522,38 @@ ENTRY(early_page_fault)
         call  do_early_page_fault
         jmp   restore_all_xen
 
+handle_ist_exception:
+        SAVE_ALL
+        testb $3,UREGS_cs(%rsp)
+        jz    1f
+        /* Interrupted guest context. Copy the context to stack bottom. */
+        GET_GUEST_REGS(%rdi)
+        movq  %rsp,%rsi
+        movl  $UREGS_kernel_sizeof/8,%ecx
+        movq  %rdi,%rsp
+        rep   movsq
+1:      movq  %rsp,%rdi
+        movl  UREGS_entry_vector(%rsp),%eax
+        leaq  exception_table(%rip),%rdx
+        callq *(%rdx,%rax,8)
+        jmp   ret_from_intr
+
 ENTRY(nmi)
         pushq $0
-        SAVE_ALL
-        testb $3,UREGS_cs(%rsp)
-        jz    nmi_in_hypervisor_mode
-        /* Interrupted guest context. Copy the context to stack bottom. */
-        GET_GUEST_REGS(%rbx)
-        movl  $UREGS_kernel_sizeof/8,%ecx
-1:      popq  %rax
-        movq  %rax,(%rbx)
-        addq  $8,%rbx
-        loop  1b
-        subq  $UREGS_kernel_sizeof,%rbx
-        movq  %rbx,%rsp
-nmi_in_hypervisor_mode:
-        movq  %rsp,%rdi
-        call  do_nmi
-        jmp   ret_from_intr
+        movl  $TRAP_nmi,4(%rsp)
+        jmp   handle_ist_exception
+
+ENTRY(machine_check)
+        pushq $0
+        movl  $TRAP_machine_check,4(%rsp)
+        jmp   handle_ist_exception
 
 .data
 
 ENTRY(exception_table)
         .quad do_divide_error
         .quad do_debug
-        .quad 0 # nmi
+        .quad do_nmi
         .quad do_int3
         .quad do_overflow
         .quad do_bounds
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/x86_64/mm.c
--- a/xen/arch/x86/x86_64/mm.c  Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/x86_64/mm.c  Mon Jul 02 12:19:26 2007 -0600
@@ -106,7 +106,8 @@ void __init paging_init(void)
     /* Create user-accessible L2 directory to map the MPT for guests. */
     if ( (l2_pg = alloc_domheap_page(NULL)) == NULL )
         goto nomem;
-    l3_ro_mpt = clear_page(page_to_virt(l2_pg));
+    l3_ro_mpt = page_to_virt(l2_pg);
+    clear_page(l3_ro_mpt);
     l4e_write(&idle_pg_table[l4_table_offset(RO_MPT_VIRT_START)],
               l4e_from_page(l2_pg, __PAGE_HYPERVISOR | _PAGE_USER));
 
@@ -132,7 +133,8 @@ void __init paging_init(void)
             if ( (l2_pg = alloc_domheap_page(NULL)) == NULL )
                 goto nomem;
             va = RO_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT);
-            l2_ro_mpt = clear_page(page_to_virt(l2_pg));
+            l2_ro_mpt = page_to_virt(l2_pg);
+            clear_page(l2_ro_mpt);
             l3e_write(&l3_ro_mpt[l3_table_offset(va)],
                       l3e_from_page(l2_pg, __PAGE_HYPERVISOR | _PAGE_USER));
             l2_ro_mpt += l2_table_offset(va);
@@ -152,7 +154,8 @@ void __init paging_init(void)
         l3_ro_mpt = 
l4e_to_l3e(idle_pg_table[l4_table_offset(HIRO_COMPAT_MPT_VIRT_START)]);
         if ( (l2_pg = alloc_domheap_page(NULL)) == NULL )
             goto nomem;
-        compat_idle_pg_table_l2 = l2_ro_mpt = clear_page(page_to_virt(l2_pg));
+        compat_idle_pg_table_l2 = l2_ro_mpt = page_to_virt(l2_pg);
+        clear_page(l2_ro_mpt);
         l3e_write(&l3_ro_mpt[l3_table_offset(HIRO_COMPAT_MPT_VIRT_START)],
                   l3e_from_page(l2_pg, __PAGE_HYPERVISOR));
         l2_ro_mpt += l2_table_offset(HIRO_COMPAT_MPT_VIRT_START);
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/x86_64/traps.c
--- a/xen/arch/x86/x86_64/traps.c       Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/x86_64/traps.c       Mon Jul 02 12:19:26 2007 -0600
@@ -294,8 +294,9 @@ void __init percpu_traps_init(void)
     {
         /* Specify dedicated interrupt stacks for NMIs and double faults. */
         set_intr_gate(TRAP_double_fault, &double_fault);
-        idt_table[TRAP_double_fault].a |= 1UL << 32; /* IST1 */
-        idt_table[TRAP_nmi].a          |= 2UL << 32; /* IST2 */
+        idt_table[TRAP_double_fault].a  |= 1UL << 32; /* IST1 */
+        idt_table[TRAP_nmi].a           |= 2UL << 32; /* IST2 */
+        idt_table[TRAP_machine_check].a |= 3UL << 32; /* IST3 */
 
         /*
          * The 32-on-64 hypercall entry vector is only accessible from ring 1.
@@ -310,7 +311,10 @@ void __init percpu_traps_init(void)
     stack_bottom = (char *)get_stack_bottom();
     stack        = (char *)((unsigned long)stack_bottom & ~(STACK_SIZE - 1));
 
-    /* Double-fault handler has its own per-CPU 2kB stack. */
+    /* Machine Check handler has its own per-CPU 1kB stack. */
+    init_tss[cpu].ist[2] = (unsigned long)&stack[1024];
+
+    /* Double-fault handler has its own per-CPU 1kB stack. */
     init_tss[cpu].ist[0] = (unsigned long)&stack[2048];
 
     /* NMI handler has its own per-CPU 1kB stack. */
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/x86_emulate.c
--- a/xen/arch/x86/x86_emulate.c        Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/x86_emulate.c        Mon Jul 02 12:19:26 2007 -0600
@@ -1630,6 +1630,7 @@ x86_emulate(
     }
 
     /* Commit shadow register state. */
+    _regs.eflags &= ~EF_RF;
     *ctxt->regs = _regs;
 
  done:
diff -r 443ce7edad0e -r d146700adf71 xen/common/sysctl.c
--- a/xen/common/sysctl.c       Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/common/sysctl.c       Mon Jul 02 12:19:26 2007 -0600
@@ -136,6 +136,39 @@ long do_sysctl(XEN_GUEST_HANDLE(xen_sysc
     }
     break;
 
+    case XEN_SYSCTL_getcpuinfo:
+    {
+        uint32_t i, nr_cpus;
+        struct xen_sysctl_cpuinfo cpuinfo;
+        struct vcpu *v;
+
+        nr_cpus = min_t(uint32_t, op->u.getcpuinfo.max_cpus, NR_CPUS);
+
+        for ( i = 0; i < nr_cpus; i++ )
+        {
+            /* Assume no holes in idle-vcpu map. */
+            if ( (v = idle_vcpu[i]) == NULL )
+                break;
+
+            cpuinfo.idletime = v->runstate.time[RUNSTATE_running];
+            if ( v->is_running )
+                cpuinfo.idletime += NOW() - v->runstate.state_entry_time;
+
+            if ( copy_to_guest_offset(op->u.getcpuinfo.info, i, &cpuinfo, 1) )
+            {
+                ret = -EFAULT;
+                break;
+            }
+        }
+
+        op->u.getcpuinfo.nr_cpus = i;
+        ret = 0;
+
+        if ( copy_to_guest(u_sysctl, op, 1) )
+            ret = -EFAULT;
+    }
+    break;
+
     default:
         ret = arch_do_sysctl(op, u_sysctl);
         break;
diff -r 443ce7edad0e -r d146700adf71 xen/include/asm-x86/edd.h
--- a/xen/include/asm-x86/edd.h Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/include/asm-x86/edd.h Mon Jul 02 12:19:26 2007 -0600
@@ -32,12 +32,22 @@ struct edd_info {
     u16 legacy_max_cylinder;     /* %cl[7:6]:%ch: maximum cylinder number */
     u8 legacy_max_head;          /* %dh: maximum head number */
     u8 legacy_sectors_per_track; /* %cl[5:0]: maximum sector number */
-    /* Int13, Fn41: Get Device Parameters */
-    u8 edd_device_params[74];    /* as filled into %ds:%si */
+    /* Int13, Fn41: Get Device Parameters (as filled into %ds:%esi). */
+    struct {
+        u16 length;
+        u8 data[72];
+    } edd_device_params;
 } __attribute__ ((packed));
 
-extern u32 boot_edd_signature[];
-extern u8 boot_edd_signature_nr;
+struct mbr_signature {
+    u8 device;
+    u8 pad[3];
+    u32 signature;
+} __attribute__ ((packed));
+
+/* These all reside in the boot trampoline. Access via bootsym(). */
+extern struct mbr_signature boot_mbr_signature[];
+extern u8 boot_mbr_signature_nr;
 extern struct edd_info boot_edd_info[];
 extern u8 boot_edd_info_nr;
 
diff -r 443ce7edad0e -r d146700adf71 xen/include/asm-x86/hvm/hvm.h
--- a/xen/include/asm-x86/hvm/hvm.h     Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/include/asm-x86/hvm/hvm.h     Mon Jul 02 12:19:26 2007 -0600
@@ -302,4 +302,18 @@ static inline int hvm_event_injection_fa
     return hvm_funcs.event_injection_faulted(v);
 }
 
+/* These bits in CR4 are owned by the host. */
+#define HVM_CR4_HOST_MASK (mmu_cr4_features & \
+    (X86_CR4_VMXE | X86_CR4_PAE | X86_CR4_MCE))
+
+/* These bits in CR4 cannot be set by the guest. */
+#define HVM_CR4_GUEST_RESERVED_BITS \
+    ~(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | \
+      X86_CR4_DE  | X86_CR4_PSE | X86_CR4_PAE | \
+      X86_CR4_MCE | X86_CR4_PGE | X86_CR4_PCE | \
+      X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT)
+
+/* These exceptions must always be intercepted. */
+#define HVM_TRAP_MASK (1U << TRAP_machine_check)
+
 #endif /* __ASM_X86_HVM_HVM_H__ */
diff -r 443ce7edad0e -r d146700adf71 xen/include/asm-x86/hvm/svm/emulate.h
--- a/xen/include/asm-x86/hvm/svm/emulate.h     Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/include/asm-x86/hvm/svm/emulate.h     Mon Jul 02 12:19:26 2007 -0600
@@ -138,6 +138,7 @@ static void inline __update_guest_eip(
 {
     ASSERT(inst_len > 0);
     vmcb->rip += inst_len;
+    vmcb->rflags &= ~X86_EFLAGS_RF;
 }
 
 #endif /* __ASM_X86_HVM_SVM_EMULATE_H__ */
diff -r 443ce7edad0e -r d146700adf71 xen/include/asm-x86/hvm/svm/vmcb.h
--- a/xen/include/asm-x86/hvm/svm/vmcb.h        Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/include/asm-x86/hvm/svm/vmcb.h        Mon Jul 02 12:19:26 2007 -0600
@@ -464,14 +464,6 @@ void svm_destroy_vmcb(struct vcpu *v);
 
 void setup_vmcb_dump(void);
 
-/* These bits in the CR4 are owned by the host */
-#if CONFIG_PAGING_LEVELS >= 3
-#define SVM_CR4_HOST_MASK (X86_CR4_PAE)
-#else
-#define SVM_CR4_HOST_MASK 0
-#endif
-
-
 #endif /* ASM_X86_HVM_SVM_VMCS_H__ */
 
 /*
diff -r 443ce7edad0e -r d146700adf71 xen/include/asm-x86/hvm/trace.h
--- a/xen/include/asm-x86/hvm/trace.h   Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/include/asm-x86/hvm/trace.h   Mon Jul 02 12:19:26 2007 -0600
@@ -21,6 +21,7 @@
 #define DO_TRC_HVM_CPUID       1
 #define DO_TRC_HVM_INTR        1
 #define DO_TRC_HVM_NMI         1
+#define DO_TRC_HVM_MCE         1
 #define DO_TRC_HVM_SMI         1
 #define DO_TRC_HVM_VMMCALL     1
 #define DO_TRC_HVM_HLT         1
diff -r 443ce7edad0e -r d146700adf71 xen/include/asm-x86/hvm/vmx/vmcs.h
--- a/xen/include/asm-x86/hvm/vmx/vmcs.h        Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h        Mon Jul 02 12:19:26 2007 -0600
@@ -130,6 +130,8 @@ extern u32 vmx_vmentry_control;
 
 #define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
 extern u32 vmx_secondary_exec_control;
+
+extern bool_t cpu_has_vmx_ins_outs_instr_info;
 
 #define cpu_has_vmx_virtualize_apic_accesses \
     (vmx_secondary_exec_control & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)
diff -r 443ce7edad0e -r d146700adf71 xen/include/asm-x86/hvm/vmx/vmx.h
--- a/xen/include/asm-x86/hvm/vmx/vmx.h Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h Mon Jul 02 12:19:26 2007 -0600
@@ -143,13 +143,6 @@ void vmx_vlapic_msr_changed(struct vcpu 
 #define X86_SEG_AR_GRANULARITY  (1u << 15) /* 15, granularity */
 #define X86_SEG_AR_SEG_UNUSABLE (1u << 16) /* 16, segment unusable */
 
-/* These bits in the CR4 are owned by the host */
-#if CONFIG_PAGING_LEVELS >= 3
-#define VMX_CR4_HOST_MASK (X86_CR4_VMXE | X86_CR4_PAE)
-#else
-#define VMX_CR4_HOST_MASK (X86_CR4_VMXE)
-#endif
-
 #define VMCALL_OPCODE   ".byte 0x0f,0x01,0xc1\n"
 #define VMCLEAR_OPCODE  ".byte 0x66,0x0f,0xc7\n"        /* reg/opcode: /6 */
 #define VMLAUNCH_OPCODE ".byte 0x0f,0x01,0xc2\n"
diff -r 443ce7edad0e -r d146700adf71 xen/include/asm-x86/page.h
--- a/xen/include/asm-x86/page.h        Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/include/asm-x86/page.h        Mon Jul 02 12:19:26 2007 -0600
@@ -192,8 +192,9 @@ static inline l4_pgentry_t l4e_from_padd
 #define pgentry_ptr_to_slot(_p)    \
     (((unsigned long)(_p) & ~PAGE_MASK) / sizeof(*(_p)))
 
+#ifndef __ASSEMBLY__
+
 /* Page-table type. */
-#ifndef __ASSEMBLY__
 #if CONFIG_PAGING_LEVELS == 2
 /* x86_32 default */
 typedef struct { u32 pfn; } pagetable_t;
@@ -214,9 +215,11 @@ typedef struct { u64 pfn; } pagetable_t;
 #define pagetable_from_page(pg) pagetable_from_pfn(page_to_mfn(pg))
 #define pagetable_from_paddr(p) pagetable_from_pfn((p)>>PAGE_SHIFT)
 #define pagetable_null()        pagetable_from_pfn(0)
-#endif
-
-#define clear_page(_p)      memset((void *)(_p), 0, PAGE_SIZE)
+
+void clear_page_sse2(void *);
+#define clear_page(_p)      (cpu_has_xmm2 ?                             \
+                             clear_page_sse2((void *)(_p)) :            \
+                             (void)memset((void *)(_p), 0, PAGE_SIZE))
 #define copy_page(_t,_f)    memcpy((void *)(_t), (void *)(_f), PAGE_SIZE)
 
 #define mfn_valid(mfn)      ((mfn) < max_page)
@@ -244,6 +247,8 @@ typedef struct { u64 pfn; } pagetable_t;
 /* Convert between frame number and address formats.  */
 #define pfn_to_paddr(pfn)   ((paddr_t)(pfn) << PAGE_SHIFT)
 #define paddr_to_pfn(pa)    ((unsigned long)((pa) >> PAGE_SHIFT))
+
+#endif /* !defined(__ASSEMBLY__) */
 
 /* High table entries are reserved by the hypervisor. */
 #if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE)
diff -r 443ce7edad0e -r d146700adf71 xen/include/asm-x86/processor.h
--- a/xen/include/asm-x86/processor.h   Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/include/asm-x86/processor.h   Mon Jul 02 12:19:26 2007 -0600
@@ -104,7 +104,6 @@
 #define TRAP_alignment_check  17
 #define TRAP_machine_check    18
 #define TRAP_simd_error       19
-#define TRAP_deferred_nmi     31
 
 /* Set for entry via SYSCALL. Informs return code to use SYSRETQ not IRETQ. */
 /* NB. Same as VGCF_in_syscall. No bits in common with any other TRAP_ defn. */
@@ -567,7 +566,8 @@ extern void mtrr_ap_init(void);
 extern void mtrr_ap_init(void);
 extern void mtrr_bp_init(void);
 
-extern void mcheck_init(struct cpuinfo_x86 *c);
+void mcheck_init(struct cpuinfo_x86 *c);
+asmlinkage void do_machine_check(struct cpu_user_regs *regs);
 
 int cpuid_hypervisor_leaves(
     uint32_t idx, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx);
diff -r 443ce7edad0e -r d146700adf71 xen/include/asm-x86/x86_32/asm_defns.h
--- a/xen/include/asm-x86/x86_32/asm_defns.h    Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/include/asm-x86/x86_32/asm_defns.h    Mon Jul 02 12:19:26 2007 -0600
@@ -26,7 +26,7 @@ 1:      addl  $4,%esp;
 #define ASSERT_INTERRUPTS_ENABLED  ASSERT_INTERRUPT_STATUS(nz)
 #define ASSERT_INTERRUPTS_DISABLED ASSERT_INTERRUPT_STATUS(z)
 
-#define __SAVE_ALL_PRE                                  \
+#define SAVE_ALL_GPRS                                   \
         cld;                                            \
         pushl %eax;                                     \
         pushl %ebp;                                     \
@@ -35,32 +35,48 @@ 1:      addl  $4,%esp;
         pushl %esi;                                     \
         pushl %edx;                                     \
         pushl %ecx;                                     \
-        pushl %ebx;                                     \
+        pushl %ebx
+
+/*
+ * Saves all register state into an exception/interrupt stack frame.
+ * Returns to the caller at <xen_lbl> if the interrupted context is within
+ * Xen; at <vm86_lbl> if the interrupted context is vm86; or falls through
+ * if the interrupted context is an ordinary guest protected-mode context.
+ * In all cases %ecx contains __HYPERVISOR_DS. %ds/%es are guaranteed to
+ * contain __HYPERVISOR_DS unless control passes to <xen_lbl>, in which case
+ * the caller is reponsible for validity of %ds/%es.
+ */
+#define SAVE_ALL(xen_lbl, vm86_lbl)                     \
+        SAVE_ALL_GPRS;                                  \
         testl $(X86_EFLAGS_VM),UREGS_eflags(%esp);      \
-        jz 2f;                                          \
-        call setup_vm86_frame;                          \
-        jmp 3f;                                         \
-        2:testb $3,UREGS_cs(%esp);                      \
-        jz 1f;                                          \
-        mov %ds,UREGS_ds(%esp);                         \
-        mov %es,UREGS_es(%esp);                         \
-        mov %fs,UREGS_fs(%esp);                         \
-        mov %gs,UREGS_gs(%esp);                         \
-        3:
-
-#define SAVE_ALL_NOSEGREGS(_reg)                \
-        __SAVE_ALL_PRE                          \
-        1:
-
-#define SET_XEN_SEGMENTS(_reg)                          \
-        movl $(__HYPERVISOR_DS),%e ## _reg ## x;        \
-        mov %e ## _reg ## x,%ds;                        \
-        mov %e ## _reg ## x,%es;
-
-#define SAVE_ALL(_reg)                          \
-        __SAVE_ALL_PRE                          \
-        SET_XEN_SEGMENTS(_reg)                  \
-        1:
+        mov   %ds,%edi;                                 \
+        mov   %es,%esi;                                 \
+        mov   $(__HYPERVISOR_DS),%ecx;                  \
+        jnz   86f;                                      \
+        .text 1;                                        \
+        86:   call setup_vm86_frame;                    \
+        jmp   vm86_lbl;                                 \
+        .previous;                                      \
+        testb $3,UREGS_cs(%esp);                        \
+        jz    xen_lbl;                                  \
+        /*                                              \
+         * We are the outermost Xen context, but our    \
+         * life is complicated by NMIs and MCEs. These  \
+         * could occur in our critical section and      \
+         * pollute %ds and %es. We have to detect that  \
+         * this has occurred and avoid saving Xen DS/ES \
+         * values to the guest stack frame.             \
+         */                                             \
+        cmpw  %cx,%di;                                  \
+        mov   %ecx,%ds;                                 \
+        mov   %fs,UREGS_fs(%esp);                       \
+        cmove UREGS_ds(%esp),%edi;                      \
+        cmpw  %cx,%si;                                  \
+        mov   %edi,UREGS_ds(%esp);                      \
+        cmove UREGS_es(%esp),%esi;                      \
+        mov   %ecx,%es;                                 \
+        mov   %gs,UREGS_gs(%esp);                       \
+        mov   %esi,UREGS_es(%esp)
 
 #ifdef PERF_COUNTERS
 #define PERFC_INCR(_name,_idx,_cur)                     \
@@ -97,8 +113,8 @@ __asm__(                                
     STR(x) ":\n\t"                              \
     "pushl $"#v"<<16\n\t"                       \
     STR(FIXUP_RING0_GUEST_STACK)                \
-    STR(SAVE_ALL(a))                            \
-    "movl %esp,%eax\n\t"                        \
+    STR(SAVE_ALL(1f,1f)) "\n\t"                 \
+    "1:movl %esp,%eax\n\t"                      \
     "pushl %eax\n\t"                            \
     "call "STR(smp_##x)"\n\t"                   \
     "addl $4,%esp\n\t"                          \
@@ -109,8 +125,8 @@ __asm__(                                
     "\n" __ALIGN_STR"\n"                        \
     "common_interrupt:\n\t"                     \
     STR(FIXUP_RING0_GUEST_STACK)                \
-    STR(SAVE_ALL(a))                            \
-    "movl %esp,%eax\n\t"                        \
+    STR(SAVE_ALL(1f,1f)) "\n\t"                 \
+    "1:movl %esp,%eax\n\t"                      \
     "pushl %eax\n\t"                            \
     "call " STR(do_IRQ) "\n\t"                  \
     "addl $4,%esp\n\t"                          \
diff -r 443ce7edad0e -r d146700adf71 xen/include/public/platform.h
--- a/xen/include/public/platform.h     Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/include/public/platform.h     Mon Jul 02 12:19:26 2007 -0600
@@ -114,6 +114,45 @@ typedef struct xenpf_platform_quirk xenp
 typedef struct xenpf_platform_quirk xenpf_platform_quirk_t;
 DEFINE_XEN_GUEST_HANDLE(xenpf_platform_quirk_t);
 
+#define XENPF_firmware_info       50
+#define XEN_FW_DISK_INFO          1 /* from int 13 AH=08/41/48 */
+#define XEN_FW_DISK_MBR_SIGNATURE 2 /* from MBR offset 0x1b8 */
+#define XEN_FW_VBEDDC_INFO        3 /* from int 10 AX=4f15 */
+struct xenpf_firmware_info {
+    /* IN variables. */
+    uint32_t type;
+    uint32_t index;
+    /* OUT variables. */
+    union {
+        struct {
+            /* Int13, Fn48: Check Extensions Present. */
+            uint8_t device;                   /* %dl: bios device number */
+            uint8_t version;                  /* %ah: major version      */
+            uint16_t interface_support;       /* %cx: support bitmap     */
+            /* Int13, Fn08: Legacy Get Device Parameters. */
+            uint16_t legacy_max_cylinder;     /* %cl[7:6]:%ch: max cyl # */
+            uint8_t legacy_max_head;          /* %dh: max head #         */
+            uint8_t legacy_sectors_per_track; /* %cl[5:0]: max sector #  */
+            /* Int13, Fn41: Get Device Parameters (as filled into %ds:%esi). */
+            /* NB. First uint16_t of buffer must be set to buffer size.      */
+            XEN_GUEST_HANDLE(void) edd_params;
+        } disk_info; /* XEN_FW_DISK_INFO */
+        struct {
+            uint8_t device;                   /* bios device number  */
+            uint32_t mbr_signature;           /* offset 0x1b8 in mbr */
+        } disk_mbr_signature; /* XEN_FW_DISK_MBR_SIGNATURE */
+        struct {
+            /* Int10, AX=4F15: Get EDID info. */
+            uint8_t capabilities;
+            uint8_t edid_transfer_time;
+            /* must refer to 128-byte buffer */
+            XEN_GUEST_HANDLE(uint8_t) edid;
+        } vbeddc_info; /* XEN_FW_VBEDDC_INFO */
+    } u;
+};
+typedef struct xenpf_firmware_info xenpf_firmware_info_t;
+DEFINE_XEN_GUEST_HANDLE(xenpf_firmware_info_t);
+
 struct xen_platform_op {
     uint32_t cmd;
     uint32_t interface_version; /* XENPF_INTERFACE_VERSION */
@@ -124,6 +163,7 @@ struct xen_platform_op {
         struct xenpf_read_memtype      read_memtype;
         struct xenpf_microcode_update  microcode;
         struct xenpf_platform_quirk    platform_quirk;
+        struct xenpf_firmware_info     firmware_info;
         uint8_t                        pad[128];
     } u;
 };
diff -r 443ce7edad0e -r d146700adf71 xen/include/public/sysctl.h
--- a/xen/include/public/sysctl.h       Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/include/public/sysctl.h       Mon Jul 02 12:19:26 2007 -0600
@@ -140,9 +140,7 @@ typedef struct xen_sysctl_getdomaininfol
 typedef struct xen_sysctl_getdomaininfolist xen_sysctl_getdomaininfolist_t;
 DEFINE_XEN_GUEST_HANDLE(xen_sysctl_getdomaininfolist_t);
 
-/*
- * Inject debug keys into Xen.
- */
+/* Inject debug keys into Xen. */
 #define XEN_SYSCTL_debug_keys        7
 struct xen_sysctl_debug_keys {
     /* IN variables. */
@@ -151,6 +149,23 @@ struct xen_sysctl_debug_keys {
 };
 typedef struct xen_sysctl_debug_keys xen_sysctl_debug_keys_t;
 DEFINE_XEN_GUEST_HANDLE(xen_sysctl_debug_keys_t);
+
+/* Get physical CPU information. */
+#define XEN_SYSCTL_getcpuinfo        8
+struct xen_sysctl_cpuinfo {
+    uint64_t idletime;
+};
+typedef struct xen_sysctl_cpuinfo xen_sysctl_cpuinfo_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cpuinfo_t); 
+struct xen_sysctl_getcpuinfo {
+    /* IN variables. */
+    uint32_t max_cpus;
+    XEN_GUEST_HANDLE_64(xen_sysctl_cpuinfo_t) info;
+    /* OUT variables. */
+    uint32_t nr_cpus;
+}; 
+typedef struct xen_sysctl_getcpuinfo xen_sysctl_getcpuinfo_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_getcpuinfo_t); 
 
 struct xen_sysctl {
     uint32_t cmd;
@@ -163,6 +178,7 @@ struct xen_sysctl {
         struct xen_sysctl_perfc_op          perfc_op;
         struct xen_sysctl_getdomaininfolist getdomaininfolist;
         struct xen_sysctl_debug_keys        debug_keys;
+        struct xen_sysctl_getcpuinfo        getcpuinfo;
         uint8_t                             pad[128];
     } u;
 };
diff -r 443ce7edad0e -r d146700adf71 xen/include/public/trace.h
--- a/xen/include/public/trace.h        Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/include/public/trace.h        Mon Jul 02 12:19:26 2007 -0600
@@ -88,6 +88,7 @@
 #define TRC_HVM_VMMCALL         (TRC_HVM_HANDLER + 0x12)
 #define TRC_HVM_HLT             (TRC_HVM_HANDLER + 0x13)
 #define TRC_HVM_INVLPG          (TRC_HVM_HANDLER + 0x14)
+#define TRC_HVM_MCE             (TRC_HVM_HANDLER + 0x15)
 
 /* This structure represents a single trace buffer record. */
 struct t_rec {

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>