WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] Merged.

# HG changeset patch
# User emellor@xxxxxxxxxxxxxxxxxxxxxx
# Node ID 4b89195850398b85cd5a3b57ba8228209f010fd9
# Parent  642b26779c4ecb1538032f5fb66b3a83f3ce9d73
# Parent  821368442403cb9110f466a9c7c2c9849bef9733
Merged.

diff -r 642b26779c4e -r 4b8919585039 
linux-2.6-xen-sparse/arch/xen/i386/kernel/entry.S
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/entry.S Thu Jan 12 12:13:34 2006
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/entry.S Thu Jan 12 12:20:04 2006
@@ -76,7 +76,9 @@
 DF_MASK                = 0x00000400 
 NT_MASK                = 0x00004000
 VM_MASK                = 0x00020000
-
+/* Pseudo-eflags. */
+NMI_MASK       = 0x80000000
+       
 /* Offsets into shared_info_t. */
 #define evtchn_upcall_pending          /* 0 */
 #define evtchn_upcall_mask             1
@@ -305,8 +307,8 @@
        je ldt_ss                       # returning to user-space with LDT SS
 #endif /* XEN */
 restore_nocheck:
-       testl $VM_MASK, EFLAGS(%esp)
-       jnz resume_vm86
+       testl $(VM_MASK|NMI_MASK), EFLAGS(%esp)
+       jnz hypervisor_iret
        movb EVENT_MASK(%esp), %al
        notb %al                        # %al == ~saved_mask
        XEN_GET_VCPU_INFO(%esi)
@@ -328,11 +330,11 @@
        .long 1b,iret_exc
 .previous
 
-resume_vm86:
-       XEN_UNBLOCK_EVENTS(%esi)
+hypervisor_iret:
+       andl $~NMI_MASK, EFLAGS(%esp)
        RESTORE_REGS
        movl %eax,(%esp)
-       movl $__HYPERVISOR_switch_vm86,%eax
+       movl $__HYPERVISOR_iret,%eax
        int $0x82
        ud2
 
@@ -691,6 +693,15 @@
        call do_debug
        jmp ret_from_exception
 
+ENTRY(nmi)
+       pushl %eax
+       SAVE_ALL
+       xorl %edx,%edx          # zero error code
+       movl %esp,%eax          # pt_regs pointer
+       call do_nmi
+       orl  $NMI_MASK, EFLAGS(%esp)
+       jmp restore_all
+
 #if 0 /* XEN */
 /*
  * NMI is doubly nasty. It can happen _while_ we're handling
diff -r 642b26779c4e -r 4b8919585039 
linux-2.6-xen-sparse/arch/xen/i386/kernel/traps.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/traps.c Thu Jan 12 12:13:34 2006
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/traps.c Thu Jan 12 12:20:04 2006
@@ -506,18 +506,11 @@
 
 static void io_check_error(unsigned char reason, struct pt_regs * regs)
 {
-       unsigned long i;
-
        printk("NMI: IOCK error (debug interrupt?)\n");
        show_registers(regs);
 
        /* Re-enable the IOCK line, wait for a few seconds */
-       reason = (reason & 0xf) | 8;
-       outb(reason, 0x61);
-       i = 2000;
-       while (--i) udelay(1000);
-       reason &= ~8;
-       outb(reason, 0x61);
+       clear_io_check_error(reason);
 }
 
 static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
diff -r 642b26779c4e -r 4b8919585039 
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/entry.S
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/entry.S       Thu Jan 12 
12:13:34 2006
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/entry.S       Thu Jan 12 
12:20:04 2006
@@ -121,19 +121,19 @@
        .endm
 
         /*
-         * Must be consistent with the definition in arch_x86_64.h:    
-         *     struct switch_to_user {
+         * Must be consistent with the definition in arch-x86_64.h:    
+         *     struct iret_context {
          *        u64 rax, r11, rcx, flags, rip, cs, rflags, rsp, ss;
          *     };
          * #define VGCF_IN_SYSCALL (1<<8) 
          */
-        .macro SWITCH_TO_USER flag
+        .macro HYPERVISOR_IRET flag
         subq $8*4,%rsp                   # reuse rip, cs, rflags, rsp, ss in 
the stack
         movq %rax,(%rsp)
         movq %r11,1*8(%rsp)
         movq %rcx,2*8(%rsp)              # we saved %rcx upon exceptions
         movq $\flag,3*8(%rsp)
-        movq $__HYPERVISOR_switch_to_user,%rax
+        movq $__HYPERVISOR_iret,%rax
         syscall
         .endm
 
@@ -225,7 +225,7 @@
        jnz  sysret_careful 
         XEN_UNBLOCK_EVENTS(%rsi)                
        RESTORE_ARGS 0,8,0
-        SWITCH_TO_USER VGCF_IN_SYSCALL
+        HYPERVISOR_IRET VGCF_IN_SYSCALL
 
        /* Handle reschedules */
        /* edx: work, edi: workmask */  
@@ -478,7 +478,7 @@
         orb   $3,1*8(%rsp)
        iretq
 user_mode:
-       SWITCH_TO_USER 0                        
+       HYPERVISOR_IRET 0
        
        /* edi: workmask, edx: work */  
 retint_careful:
@@ -719,6 +719,18 @@
        call evtchn_do_upcall
         jmp  error_exit
 
+#ifdef CONFIG_X86_LOCAL_APIC
+ENTRY(nmi)
+       zeroentry do_nmi_callback
+ENTRY(do_nmi_callback)
+        addq $8, %rsp
+        call do_nmi
+        RESTORE_REST
+        XEN_BLOCK_EVENTS(%rsi)
+        GET_THREAD_INFO(%rcx)
+        jmp  retint_restore_args
+#endif
+
         ALIGN
 restore_all_enable_events:  
        XEN_UNBLOCK_EVENTS(%rsi)        # %rsi is already set up...
@@ -733,7 +745,7 @@
         orb   $3,1*8(%rsp)
         iretq
 crit_user_mode:
-        SWITCH_TO_USER 0
+        HYPERVISOR_IRET 0
         
 14:    XEN_LOCKED_BLOCK_EVENTS(%rsi)
        XEN_PUT_VCPU_INFO(%rsi)
diff -r 642b26779c4e -r 4b8919585039 
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c       Thu Jan 12 
12:13:34 2006
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c       Thu Jan 12 
12:20:04 2006
@@ -62,6 +62,7 @@
 #include <asm-xen/xen-public/physdev.h>
 #include "setup_arch_pre.h"
 #include <asm/hypervisor.h>
+#include <asm-xen/xen-public/nmi.h>
 #define PFN_UP(x)       (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
 #define PFN_PHYS(x)     ((x) << PAGE_SHIFT)
 #define end_pfn_map end_pfn
diff -r 642b26779c4e -r 4b8919585039 
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/traps.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/traps.c       Thu Jan 12 
12:13:34 2006
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/traps.c       Thu Jan 12 
12:20:04 2006
@@ -559,9 +559,11 @@
        printk("Uhhuh. NMI received. Dazed and confused, but trying to 
continue\n");
        printk("You probably have a hardware problem with your RAM chips\n");
 
+#if 0 /* XEN */
        /* Clear and disable the memory parity error line. */
        reason = (reason & 0xf) | 4;
        outb(reason, 0x61);
+#endif /* XEN */
 }
 
 static void io_check_error(unsigned char reason, struct pt_regs * regs)
@@ -569,12 +571,14 @@
        printk("NMI: IOCK error (debug interrupt?)\n");
        show_registers(regs);
 
+#if 0 /* XEN */
        /* Re-enable the IOCK line, wait for a few seconds */
        reason = (reason & 0xf) | 8;
        outb(reason, 0x61);
        mdelay(2000);
        reason &= ~8;
        outb(reason, 0x61);
+#endif /* XEN */
 }
 
 static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
diff -r 642b26779c4e -r 4b8919585039 
linux-2.6-xen-sparse/include/asm-xen/asm-i386/hypercall.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/hypercall.h Thu Jan 12 
12:13:34 2006
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/hypercall.h Thu Jan 12 
12:20:04 2006
@@ -32,6 +32,7 @@
 
 #include <asm-xen/xen-public/xen.h>
 #include <asm-xen/xen-public/sched.h>
+#include <asm-xen/xen-public/nmi.h>
 
 #define _hypercall0(type, name)                        \
 ({                                             \
@@ -300,6 +301,14 @@
                           SHUTDOWN_suspend, srec);
 }
 
+static inline int
+HYPERVISOR_nmi_op(
+       unsigned long op,
+       unsigned long arg)
+{
+       return _hypercall2(int, nmi_op, op, arg);
+}
+
 #endif /* __HYPERCALL_H__ */
 
 /*
diff -r 642b26779c4e -r 4b8919585039 
linux-2.6-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_post.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_post.h  
Thu Jan 12 12:13:34 2006
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_post.h  
Thu Jan 12 12:20:04 2006
@@ -29,6 +29,7 @@
 
 extern void hypervisor_callback(void);
 extern void failsafe_callback(void);
+extern void nmi(void);
 
 static void __init machine_specific_arch_setup(void)
 {
@@ -36,5 +37,7 @@
            __KERNEL_CS, (unsigned long)hypervisor_callback,
            __KERNEL_CS, (unsigned long)failsafe_callback);
 
+       HYPERVISOR_nmi_op(XENNMI_register_callback, (unsigned long)&nmi);
+
        machine_specific_modify_cpu_capabilities(&boot_cpu_data);
 }
diff -r 642b26779c4e -r 4b8919585039 
linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/hypercall.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/hypercall.h       Thu Jan 
12 12:13:34 2006
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/hypercall.h       Thu Jan 
12 12:20:04 2006
@@ -287,9 +287,9 @@
 }
 
 static inline int
-HYPERVISOR_switch_to_user(void)
-{
-       return _hypercall0(int, switch_to_user);
+HYPERVISOR_iret(void)
+{
+       return _hypercall0(int, iret);
 }
 
 static inline int
@@ -305,6 +305,14 @@
 {
        return _hypercall3(int, sched_op, SCHEDOP_shutdown,
                           SHUTDOWN_suspend, srec);
+}
+
+static inline int
+HYPERVISOR_nmi_op(
+       unsigned long op,
+       unsigned long arg)
+{
+       return _hypercall2(int, nmi_op, op, arg);
 }
 
 #endif /* __HYPERCALL_H__ */
diff -r 642b26779c4e -r 4b8919585039 
linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/setup_arch_post.h
--- 
a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/setup_arch_post.h    
    Thu Jan 12 12:13:34 2006
+++ 
b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/setup_arch_post.h    
    Thu Jan 12 12:20:04 2006
@@ -35,6 +35,7 @@
 
 extern void hypervisor_callback(void);
 extern void failsafe_callback(void);
+extern void nmi(void);
 
 static void __init machine_specific_arch_setup(void)
 {
@@ -43,5 +44,9 @@
                 (unsigned long) failsafe_callback,
                 (unsigned long) system_call);
 
+#ifdef CONFIG_X86_LOCAL_APIC
+       HYPERVISOR_nmi_op(XENNMI_register_callback, (unsigned long)&nmi);
+#endif
+
        machine_specific_modify_cpu_capabilities(&boot_cpu_data);
 }
diff -r 642b26779c4e -r 4b8919585039 xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c     Thu Jan 12 12:13:34 2006
+++ b/xen/arch/x86/domain.c     Thu Jan 12 12:20:04 2006
@@ -288,9 +288,7 @@
 
 #if defined(__i386__)
 
-    d->arch.mapcache.l1tab = d->arch.mm_perdomain_pt +
-        (GDT_LDT_MBYTES << (20 - PAGE_SHIFT));
-    spin_lock_init(&d->arch.mapcache.lock);
+    mapcache_init(d);
 
 #else /* __x86_64__ */
 
@@ -481,14 +479,6 @@
 
 
 #ifdef __x86_64__
-
-void toggle_guest_mode(struct vcpu *v)
-{
-    v->arch.flags ^= TF_kernel_mode;
-    __asm__ __volatile__ ( "swapgs" );
-    update_pagetables(v);
-    write_ptbase(v);
-}
 
 #define loadsegment(seg,value) ({               \
     int __r = 1;                                \
@@ -659,35 +649,6 @@
     percpu_ctxt[smp_processor_id()].dirty_segment_mask = dirty_segment_mask;
 }
 
-long do_switch_to_user(void)
-{
-    struct cpu_user_regs  *regs = guest_cpu_user_regs();
-    struct switch_to_user  stu;
-    struct vcpu    *v = current;
-
-    if ( unlikely(copy_from_user(&stu, (void *)regs->rsp, sizeof(stu))) ||
-         unlikely(pagetable_get_paddr(v->arch.guest_table_user) == 0) )
-        return -EFAULT;
-
-    toggle_guest_mode(v);
-
-    regs->rip    = stu.rip;
-    regs->cs     = stu.cs | 3; /* force guest privilege */
-    regs->rflags = (stu.rflags & ~(EF_IOPL|EF_VM)) | EF_IE;
-    regs->rsp    = stu.rsp;
-    regs->ss     = stu.ss | 3; /* force guest privilege */
-
-    if ( !(stu.flags & VGCF_IN_SYSCALL) )
-    {
-        regs->entry_vector = 0;
-        regs->r11 = stu.r11;
-        regs->rcx = stu.rcx;
-    }
-
-    /* Saved %rax gets written back to regs->rax in entry.S. */
-    return stu.rax;
-}
-
 #define switch_kernel_stack(_n,_c) ((void)0)
 
 #elif defined(__i386__)
diff -r 642b26779c4e -r 4b8919585039 xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Thu Jan 12 12:13:34 2006
+++ b/xen/arch/x86/mm.c Thu Jan 12 12:20:04 2006
@@ -297,7 +297,6 @@
 
 #if defined(__x86_64__)
     /* If in user mode, switch to kernel mode just to read LDT mapping. */
-    extern void toggle_guest_mode(struct vcpu *);
     int user_mode = !(v->arch.flags & TF_kernel_mode);
 #define TOGGLE_MODE() if ( user_mode ) toggle_guest_mode(v)
 #elif defined(__i386__)
@@ -2971,7 +2970,6 @@
 
 #ifdef CONFIG_X86_64
     struct vcpu *v = current;
-    extern void toggle_guest_mode(struct vcpu *);
     int user_mode = !(v->arch.flags & TF_kernel_mode);
 #endif
 
diff -r 642b26779c4e -r 4b8919585039 xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c      Thu Jan 12 12:13:34 2006
+++ b/xen/arch/x86/traps.c      Thu Jan 12 12:20:04 2006
@@ -596,7 +596,6 @@
     u16 x;
 #if defined(__x86_64__)
     /* If in user mode, switch to kernel mode just to read I/O bitmap. */
-    extern void toggle_guest_mode(struct vcpu *);
     int user_mode = !(v->arch.flags & TF_kernel_mode);
 #define TOGGLE_MODE() if ( user_mode ) toggle_guest_mode(v)
 #elif defined(__i386__)
@@ -1080,26 +1079,23 @@
     return 0;
 }
 
-
-/* Defer dom0 notification to softirq context (unsafe in NMI context). */
-static unsigned long nmi_dom0_softirq_reason;
-#define NMI_DOM0_PARITY_ERR 0
-#define NMI_DOM0_IO_ERR     1
-#define NMI_DOM0_UNKNOWN    2
-
-static void nmi_dom0_softirq(void)
-{
-    if ( dom0 == NULL )
+static void nmi_softirq(void)
+{
+    /* Only used to defer wakeup of dom0,vcpu0 to a safe (non-NMI) context. */
+    evtchn_notify(dom0->vcpu[0]);
+}
+
+static void nmi_dom0_report(unsigned int reason_idx)
+{
+    struct domain *d;
+
+    if ( (d = dom0) == NULL )
         return;
 
-    if ( test_and_clear_bit(NMI_DOM0_PARITY_ERR, &nmi_dom0_softirq_reason) )
-        send_guest_virq(dom0->vcpu[0], VIRQ_PARITY_ERR);
-
-    if ( test_and_clear_bit(NMI_DOM0_IO_ERR, &nmi_dom0_softirq_reason) )
-        send_guest_virq(dom0->vcpu[0], VIRQ_IO_ERR);
-
-    if ( test_and_clear_bit(NMI_DOM0_UNKNOWN, &nmi_dom0_softirq_reason) )
-        send_guest_virq(dom0->vcpu[0], VIRQ_NMI);
+    set_bit(reason_idx, &d->shared_info->arch.nmi_reason);
+
+    if ( test_and_set_bit(_VCPUF_nmi_pending, &d->vcpu[0]->vcpu_flags) )
+        raise_softirq(NMI_SOFTIRQ); /* not safe to wake up a vcpu here */
 }
 
 asmlinkage void mem_parity_error(struct cpu_user_regs *regs)
@@ -1107,8 +1103,7 @@
     switch ( opt_nmi[0] )
     {
     case 'd': /* 'dom0' */
-        set_bit(NMI_DOM0_PARITY_ERR, &nmi_dom0_softirq_reason);
-        raise_softirq(NMI_DOM0_SOFTIRQ);
+        nmi_dom0_report(_XEN_NMIREASON_parity_error);
     case 'i': /* 'ignore' */
         break;
     default:  /* 'fatal' */
@@ -1127,8 +1122,7 @@
     switch ( opt_nmi[0] )
     {
     case 'd': /* 'dom0' */
-        set_bit(NMI_DOM0_IO_ERR, &nmi_dom0_softirq_reason);
-        raise_softirq(NMI_DOM0_SOFTIRQ);
+        nmi_dom0_report(_XEN_NMIREASON_io_error);
     case 'i': /* 'ignore' */
         break;
     default:  /* 'fatal' */
@@ -1147,8 +1141,7 @@
     switch ( opt_nmi[0] )
     {
     case 'd': /* 'dom0' */
-        set_bit(NMI_DOM0_UNKNOWN, &nmi_dom0_softirq_reason);
-        raise_softirq(NMI_DOM0_SOFTIRQ);
+        nmi_dom0_report(_XEN_NMIREASON_unknown);
     case 'i': /* 'ignore' */
         break;
     default:  /* 'fatal' */
@@ -1347,7 +1340,7 @@
 
     cpu_init();
 
-    open_softirq(NMI_DOM0_SOFTIRQ, nmi_dom0_softirq);
+    open_softirq(NMI_SOFTIRQ, nmi_softirq);
 }
 
 
diff -r 642b26779c4e -r 4b8919585039 xen/arch/x86/x86_32/asm-offsets.c
--- a/xen/arch/x86/x86_32/asm-offsets.c Thu Jan 12 12:13:34 2006
+++ b/xen/arch/x86/x86_32/asm-offsets.c Thu Jan 12 12:20:04 2006
@@ -65,6 +65,10 @@
            arch.guest_context.kernel_ss);
     OFFSET(VCPU_kernel_sp, struct vcpu,
            arch.guest_context.kernel_sp);
+    OFFSET(VCPU_flags, struct vcpu, vcpu_flags);
+    OFFSET(VCPU_nmi_addr, struct vcpu, nmi_addr);
+    DEFINE(_VCPUF_nmi_pending, _VCPUF_nmi_pending);
+    DEFINE(_VCPUF_nmi_masked, _VCPUF_nmi_masked);
     BLANK();
 
     OFFSET(VCPUINFO_upcall_pending, vcpu_info_t, evtchn_upcall_pending);
diff -r 642b26779c4e -r 4b8919585039 xen/arch/x86/x86_32/domain_page.c
--- a/xen/arch/x86/x86_32/domain_page.c Thu Jan 12 12:13:34 2006
+++ b/xen/arch/x86/x86_32/domain_page.c Thu Jan 12 12:20:04 2006
@@ -20,33 +20,16 @@
 #include <asm/flushtlb.h>
 #include <asm/hardirq.h>
 
-#define MAPCACHE_ORDER    10
-#define MAPCACHE_ENTRIES  (1 << MAPCACHE_ORDER)
-
-/* Use a spare PTE bit to mark entries ready for recycling. */
-#define READY_FOR_TLB_FLUSH (1<<10)
-
-static void flush_all_ready_maps(void)
-{
-    struct mapcache *cache = &current->domain->arch.mapcache;
-    unsigned int i;
-
-    for ( i = 0; i < MAPCACHE_ENTRIES; i++ )
-        if ( (l1e_get_flags(cache->l1tab[i]) & READY_FOR_TLB_FLUSH) )
-            cache->l1tab[i] = l1e_empty();
-}
-
-void *map_domain_pages(unsigned long pfn, unsigned int order)
+void *map_domain_page(unsigned long pfn)
 {
     unsigned long va;
-    unsigned int idx, i, flags, vcpu = current->vcpu_id;
+    unsigned int idx, i, vcpu = current->vcpu_id;
     struct domain *d;
     struct mapcache *cache;
-#ifndef NDEBUG
-    unsigned int flush_count = 0;
-#endif
+    struct vcpu_maphash_entry *hashent;
 
     ASSERT(!in_irq());
+
     perfc_incrc(map_domain_page_count);
 
     /* If we are the idle domain, ensure that we run on our own page tables. */
@@ -56,6 +39,18 @@
 
     cache = &d->arch.mapcache;
 
+    hashent = &cache->vcpu_maphash[vcpu].hash[MAPHASH_HASHFN(pfn)];
+#if 0
+    if ( hashent->pfn == pfn )
+    {
+        idx = hashent->idx;
+        hashent->refcnt++;
+        ASSERT(hashent->refcnt != 0);
+        ASSERT(l1e_get_pfn(cache->l1tab[idx]) == pfn);
+        goto out;
+    }
+#endif
+
     spin_lock(&cache->lock);
 
     /* Has some other CPU caused a wrap? We must flush if so. */
@@ -70,45 +65,97 @@
         }
     }
 
-    do {
-        idx = cache->cursor = (cache->cursor + 1) & (MAPCACHE_ENTRIES - 1);
-        if ( unlikely(idx == 0) )
-        {
-            ASSERT(flush_count++ == 0);
-            flush_all_ready_maps();
-            perfc_incrc(domain_page_tlb_flush);
-            local_flush_tlb();
-            cache->shadow_epoch[vcpu] = ++cache->epoch;
-            cache->tlbflush_timestamp = tlbflush_current_time();
-        }
-
-        flags = 0;
-        for ( i = 0; i < (1U << order); i++ )
-            flags |= l1e_get_flags(cache->l1tab[idx+i]);
-    }
-    while ( flags & _PAGE_PRESENT );
-
-    for ( i = 0; i < (1U << order); i++ )
-        cache->l1tab[idx+i] = l1e_from_pfn(pfn+i, __PAGE_HYPERVISOR);
+    idx = find_next_zero_bit(cache->inuse, MAPCACHE_ENTRIES, cache->cursor);
+    if ( unlikely(idx >= MAPCACHE_ENTRIES) )
+    {
+        /* /First/, clean the garbage map and update the inuse list. */
+        for ( i = 0; i < ARRAY_SIZE(cache->garbage); i++ )
+        {
+            unsigned long x = xchg(&cache->garbage[i], 0);
+            cache->inuse[i] &= ~x;
+        }
+
+        /* /Second/, flush TLBs. */
+        perfc_incrc(domain_page_tlb_flush);
+        local_flush_tlb();
+        cache->shadow_epoch[vcpu] = ++cache->epoch;
+        cache->tlbflush_timestamp = tlbflush_current_time();
+
+        idx = find_first_zero_bit(cache->inuse, MAPCACHE_ENTRIES);
+        ASSERT(idx < MAPCACHE_ENTRIES);
+    }
+
+    set_bit(idx, cache->inuse);
+    cache->cursor = idx + 1;
 
     spin_unlock(&cache->lock);
 
+    cache->l1tab[idx] = l1e_from_pfn(pfn, __PAGE_HYPERVISOR);
+
+/*out:*/
     va = MAPCACHE_VIRT_START + (idx << PAGE_SHIFT);
     return (void *)va;
 }
 
-void unmap_domain_pages(void *va, unsigned int order)
-{
-    unsigned int idx, i;
+void unmap_domain_page(void *va)
+{
+    unsigned int idx;
     struct mapcache *cache = &current->domain->arch.mapcache;
+    unsigned long pfn;
+    struct vcpu_maphash_entry *hashent;
+
+    ASSERT(!in_irq());
 
     ASSERT((void *)MAPCACHE_VIRT_START <= va);
     ASSERT(va < (void *)MAPCACHE_VIRT_END);
 
     idx = ((unsigned long)va - MAPCACHE_VIRT_START) >> PAGE_SHIFT;
-
-    for ( i = 0; i < (1U << order); i++ )
-        l1e_add_flags(cache->l1tab[idx+i], READY_FOR_TLB_FLUSH);
+    pfn = l1e_get_pfn(cache->l1tab[idx]);
+    hashent = &cache->vcpu_maphash[current->vcpu_id].hash[MAPHASH_HASHFN(pfn)];
+
+    if ( hashent->idx == idx )
+    {
+        ASSERT(hashent->pfn == pfn);
+        ASSERT(hashent->refcnt != 0);
+        hashent->refcnt--;
+    }
+    else if ( hashent->refcnt == 0 )
+    {
+        if ( hashent->idx != MAPHASHENT_NOTINUSE )
+        {
+            /* /First/, zap the PTE. */
+            ASSERT(l1e_get_pfn(cache->l1tab[hashent->idx]) == hashent->pfn);
+            cache->l1tab[hashent->idx] = l1e_empty();
+            /* /Second/, mark as garbage. */
+            set_bit(hashent->idx, cache->garbage);
+        }
+
+        /* Add newly-freed mapping to the maphash. */
+        hashent->pfn = pfn;
+        hashent->idx = idx;
+    }
+    else
+    {
+        /* /First/, zap the PTE. */
+        cache->l1tab[idx] = l1e_empty();
+        /* /Second/, mark as garbage. */
+        set_bit(idx, cache->garbage);
+    }
+}
+
+void mapcache_init(struct domain *d)
+{
+    unsigned int i, j;
+
+    d->arch.mapcache.l1tab = d->arch.mm_perdomain_pt +
+        (GDT_LDT_MBYTES << (20 - PAGE_SHIFT));
+    spin_lock_init(&d->arch.mapcache.lock);
+
+    /* Mark all maphash entries as not in use. */
+    for ( i = 0; i < MAX_VIRT_CPUS; i++ )
+        for ( j = 0; j < MAPHASH_ENTRIES; j++ )
+            d->arch.mapcache.vcpu_maphash[i].hash[j].idx =
+                MAPHASHENT_NOTINUSE;
 }
 
 #define GLOBALMAP_BITS (IOREMAP_MBYTES << (20 - PAGE_SHIFT))
@@ -128,15 +175,10 @@
 
     spin_lock(&globalmap_lock);
 
-    for ( ; ; )
-    {
-        idx = find_next_zero_bit(inuse, GLOBALMAP_BITS, inuse_cursor);
-        va = IOREMAP_VIRT_START + (idx << PAGE_SHIFT);
-
-        /* End of round? If not then we're done in this loop. */
-        if ( va < FIXADDR_START )
-            break;
-
+    idx = find_next_zero_bit(inuse, GLOBALMAP_BITS, inuse_cursor);
+    va = IOREMAP_VIRT_START + (idx << PAGE_SHIFT);
+    if ( unlikely(va >= FIXADDR_START) )
+    {
         /* /First/, clean the garbage map and update the inuse list. */
         for ( i = 0; i < ARRAY_SIZE(garbage); i++ )
         {
@@ -147,7 +189,9 @@
         /* /Second/, flush all TLBs to get rid of stale garbage mappings. */
         flush_tlb_all();
 
-        inuse_cursor = 0;
+        idx = find_first_zero_bit(inuse, GLOBALMAP_BITS);
+        va = IOREMAP_VIRT_START + (idx << PAGE_SHIFT);
+        ASSERT(va < FIXADDR_START);
     }
 
     set_bit(idx, inuse);
diff -r 642b26779c4e -r 4b8919585039 xen/arch/x86/x86_32/entry.S
--- a/xen/arch/x86/x86_32/entry.S       Thu Jan 12 12:13:34 2006
+++ b/xen/arch/x86/x86_32/entry.S       Thu Jan 12 12:20:04 2006
@@ -326,7 +326,9 @@
         shl  $IRQSTAT_shift,%eax
         test %ecx,irq_stat(%eax,1)
         jnz  process_softirqs
-/*test_guest_events:*/
+        btr  $_VCPUF_nmi_pending,VCPU_flags(%ebx)
+        jc   process_nmi
+test_guest_events:
         movl VCPU_vcpu_info(%ebx),%eax
         testb $0xFF,VCPUINFO_upcall_mask(%eax)
         jnz  restore_all_guest
@@ -348,7 +350,24 @@
         sti       
         call do_softirq
         jmp  test_all_events
-                
+       
+       ALIGN
+process_nmi:
+        movl VCPU_nmi_addr(%ebx),%eax
+        test %eax,%eax
+        jz   test_all_events
+        bts  $_VCPUF_nmi_masked,VCPU_flags(%ebx)
+        jc   1f
+        sti
+        leal VCPU_trap_bounce(%ebx),%edx
+        movl %eax,TRAPBOUNCE_eip(%edx)
+        movw $FLAT_KERNEL_CS,TRAPBOUNCE_cs(%edx)
+        movw $TBF_INTERRUPT,TRAPBOUNCE_flags(%edx)
+        call create_bounce_frame
+        jmp  test_all_events
+1:      bts  $_VCPUF_nmi_pending,VCPU_flags(%ebx)
+        jmp  test_guest_events
+
 /* CREATE A BASIC EXCEPTION FRAME ON GUEST OS (RING-1) STACK:            */
 /*   {EIP, CS, EFLAGS, [ESP, SS]}                                        */
 /* %edx == trap_bounce, %ebx == struct vcpu                       */
@@ -620,9 +639,7 @@
         jne   defer_nmi
 
 continue_nmi:
-        movl  $(__HYPERVISOR_DS),%edx
-        movl  %edx,%ds
-        movl  %edx,%es
+        SET_XEN_SEGMENTS(d)
         movl  %esp,%edx
         pushl %edx
         call  do_nmi
@@ -659,42 +676,6 @@
         GET_GUEST_REGS(%ecx)
         movl %eax,UREGS_eax(%ecx)
         jmp  do_sched_op
-
-do_switch_vm86:
-        # Reset the stack pointer
-        GET_GUEST_REGS(%ecx)
-        movl %ecx,%esp
-
-        # GS:ESI == Ring-1 stack activation
-        movl UREGS_esp(%esp),%esi
-VFLT1:  mov  UREGS_ss(%esp),%gs
-
-        # ES:EDI == Ring-0 stack activation
-        leal UREGS_eip(%esp),%edi
-
-        # Restore the hypercall-number-clobbered EAX on our stack frame
-VFLT2:  movl %gs:(%esi),%eax
-        movl %eax,UREGS_eax(%esp)
-        addl $4,%esi
-               
-       # Copy the VM86 activation from the ring-1 stack to the ring-0 stack
-        movl $(UREGS_user_sizeof-UREGS_eip)/4,%ecx
-VFLT3:  movl %gs:(%esi),%eax
-        stosl
-        addl $4,%esi
-        loop VFLT3
-
-        # Fix up EFLAGS: IOPL=0, IF=1, VM=1
-        andl $~X86_EFLAGS_IOPL,UREGS_eflags(%esp)
-        orl  $X86_EFLAGS_IF|X86_EFLAGS_VM,UREGS_eflags(%esp)
-        
-        jmp test_all_events
-
-.section __ex_table,"a"
-        .long VFLT1,domain_crash_synchronous
-        .long VFLT2,domain_crash_synchronous
-        .long VFLT3,domain_crash_synchronous
-.previous
 
 .data
 
@@ -744,11 +725,12 @@
         .long do_grant_table_op     /* 20 */
         .long do_vm_assist
         .long do_update_va_mapping_otherdomain
-        .long do_switch_vm86
+        .long do_iret
         .long do_vcpu_op
         .long do_ni_hypercall       /* 25 */
         .long do_mmuext_op
-        .long do_acm_op             /* 27 */
+        .long do_acm_op
+        .long do_nmi_op
         .rept NR_hypercalls-((.-hypercall_table)/4)
         .long do_ni_hypercall
         .endr
@@ -777,11 +759,12 @@
         .byte 3 /* do_grant_table_op    */  /* 20 */
         .byte 2 /* do_vm_assist         */
         .byte 5 /* do_update_va_mapping_otherdomain */
-        .byte 0 /* do_switch_vm86       */
+        .byte 0 /* do_iret              */
         .byte 3 /* do_vcpu_op           */
         .byte 0 /* do_ni_hypercall      */  /* 25 */
         .byte 4 /* do_mmuext_op         */
         .byte 1 /* do_acm_op            */
+        .byte 2 /* do_nmi_op            */
         .rept NR_hypercalls-(.-hypercall_args_table)
         .byte 0 /* do_ni_hypercall      */
         .endr
diff -r 642b26779c4e -r 4b8919585039 xen/arch/x86/x86_32/traps.c
--- a/xen/arch/x86/x86_32/traps.c       Thu Jan 12 12:13:34 2006
+++ b/xen/arch/x86/x86_32/traps.c       Thu Jan 12 12:20:04 2006
@@ -157,6 +157,64 @@
         __asm__ __volatile__ ( "hlt" );
 }
 
+static inline void pop_from_guest_stack(
+    void *dst, struct cpu_user_regs *regs, unsigned int bytes)
+{
+    if ( unlikely(__copy_from_user(dst, (void __user *)regs->esp, bytes)) )
+        domain_crash_synchronous();
+    regs->esp += bytes;
+}
+
+asmlinkage unsigned long do_iret(void)
+{
+    struct cpu_user_regs *regs = guest_cpu_user_regs();
+    u32 eflags;
+
+    /* Check worst-case stack frame for overlap with Xen protected area. */
+    if ( unlikely(!access_ok(regs->esp, 40)) )
+        domain_crash_synchronous();
+
+    /* Pop and restore EAX (clobbered by hypercall). */
+    pop_from_guest_stack(&regs->eax, regs, 4);
+
+    /* Pop and restore CS and EIP. */
+    pop_from_guest_stack(&regs->eip, regs, 8);
+
+    /*
+     * Pop, fix up and restore EFLAGS. We fix up in a local staging area
+     * to avoid firing the BUG_ON(IOPL) check in arch_getdomaininfo_ctxt.
+     */
+    pop_from_guest_stack(&eflags, regs, 4);
+    regs->eflags = (eflags & ~X86_EFLAGS_IOPL) | X86_EFLAGS_IF;
+
+    if ( VM86_MODE(regs) )
+    {
+        /* Return to VM86 mode: pop and restore ESP,SS,ES,DS,FS and GS. */
+        pop_from_guest_stack(&regs->esp, regs, 24);
+    }
+    else if ( unlikely(RING_0(regs)) )
+    {
+        domain_crash_synchronous();
+    }
+    else if ( !RING_1(regs) )
+    {
+        /* Return to ring 2/3: pop and restore ESP and SS. */
+        pop_from_guest_stack(&regs->esp, regs, 8);
+    }
+
+    /* No longer in NMI context. */
+    clear_bit(_VCPUF_nmi_masked, &current->vcpu_flags);
+
+    /* Restore upcall mask from saved value. */
+    current->vcpu_info->evtchn_upcall_mask = regs->saved_upcall_mask;
+
+    /*
+     * The hypercall exit path will overwrite EAX with this return
+     * value.
+     */
+    return regs->eax;
+}
+
 BUILD_SMP_INTERRUPT(deferred_nmi, TRAP_deferred_nmi)
 asmlinkage void smp_deferred_nmi(struct cpu_user_regs regs)
 {
diff -r 642b26779c4e -r 4b8919585039 xen/arch/x86/x86_64/asm-offsets.c
--- a/xen/arch/x86/x86_64/asm-offsets.c Thu Jan 12 12:13:34 2006
+++ b/xen/arch/x86/x86_64/asm-offsets.c Thu Jan 12 12:20:04 2006
@@ -65,6 +65,10 @@
            arch.guest_context.syscall_callback_eip);
     OFFSET(VCPU_kernel_sp, struct vcpu,
            arch.guest_context.kernel_sp);
+    OFFSET(VCPU_flags, struct vcpu, vcpu_flags);
+    OFFSET(VCPU_nmi_addr, struct vcpu, nmi_addr);
+    DEFINE(_VCPUF_nmi_pending, _VCPUF_nmi_pending);
+    DEFINE(_VCPUF_nmi_masked, _VCPUF_nmi_masked);
     BLANK();
 
     OFFSET(VCPUINFO_upcall_pending, vcpu_info_t, evtchn_upcall_pending);
diff -r 642b26779c4e -r 4b8919585039 xen/arch/x86/x86_64/entry.S
--- a/xen/arch/x86/x86_64/entry.S       Thu Jan 12 12:13:34 2006
+++ b/xen/arch/x86/x86_64/entry.S       Thu Jan 12 12:20:04 2006
@@ -171,7 +171,9 @@
         leaq  irq_stat(%rip),%rcx
         testl $~0,(%rcx,%rax,1)
         jnz   process_softirqs
-/*test_guest_events:*/
+        btr   $_VCPUF_nmi_pending,VCPU_flags(%rbx)
+        jc    process_nmi
+test_guest_events:
         movq  VCPU_vcpu_info(%rbx),%rax
         testb $0xFF,VCPUINFO_upcall_mask(%rax)
         jnz   restore_all_guest
@@ -322,6 +324,23 @@
         call do_softirq
         jmp  test_all_events
 
+       ALIGN
+/* %rbx: struct vcpu */
+process_nmi:
+        movq VCPU_nmi_addr(%rbx),%rax
+        test %rax,%rax
+        jz   test_all_events
+        bts  $_VCPUF_nmi_masked,VCPU_flags(%rbx)
+        jc   1f
+        sti
+        leaq VCPU_trap_bounce(%rbx),%rdx
+        movq %rax,TRAPBOUNCE_eip(%rdx)
+        movw $(TBF_INTERRUPT|TBF_SLOW_IRET),TRAPBOUNCE_flags(%rdx)
+        call create_bounce_frame
+        jmp  test_all_events
+1:      bts  $_VCPUF_nmi_pending,VCPU_flags(%rbx)
+        jmp  test_guest_events
+       
 /* CREATE A BASIC EXCEPTION FRAME ON GUEST OS STACK:                     */
 /*   { RCX, R11, [DS-GS,] [CR2,] [ERRCODE,] RIP, CS, RFLAGS, RSP, SS }   */
 /* %rdx: trap_bounce, %rbx: struct vcpu                           */
@@ -339,6 +358,9 @@
 1:      /* In kernel context already: push new frame at existing %rsp. */
         movq  UREGS_rsp+8(%rsp),%rsi
         andb  $0xfc,UREGS_cs+8(%rsp)    # Indicate kernel context to guest.
+       testw $(TBF_SLOW_IRET),TRAPBOUNCE_flags(%rdx)
+       jz    2f
+       orb   $0x01,UREGS_cs+8(%rsp)
 2:      andq  $~0xf,%rsi                # Stack frames are 16-byte aligned.
         movq  $HYPERVISOR_VIRT_START,%rax
         cmpq  %rax,%rsi
@@ -569,7 +591,7 @@
         SAVE_ALL
         movq  %rsp,%rdi
         call  do_nmi
-       jmp   restore_all_xen
+        jmp   ret_from_intr
 
 do_arch_sched_op:
         # Ensure we return success even if we return via schedule_tail()
@@ -626,11 +648,12 @@
         .quad do_grant_table_op     /* 20 */
         .quad do_vm_assist
         .quad do_update_va_mapping_otherdomain
-        .quad do_switch_to_user
+        .quad do_iret
         .quad do_vcpu_op
         .quad do_set_segment_base   /* 25 */
         .quad do_mmuext_op
         .quad do_acm_op
+        .quad do_nmi_op
         .rept NR_hypercalls-((.-hypercall_table)/4)
         .quad do_ni_hypercall
         .endr
@@ -659,11 +682,12 @@
         .byte 3 /* do_grant_table_op    */  /* 20 */
         .byte 2 /* do_vm_assist         */
         .byte 4 /* do_update_va_mapping_otherdomain */
-        .byte 0 /* do_switch_to_user    */
+        .byte 0 /* do_iret              */
         .byte 3 /* do_vcpu_op           */
         .byte 2 /* do_set_segment_base  */  /* 25 */
         .byte 4 /* do_mmuext_op         */
         .byte 1 /* do_acm_op            */
+        .byte 2 /* do_nmi_op            */
         .rept NR_hypercalls-(.-hypercall_args_table)
         .byte 0 /* do_ni_hypercall      */
         .endr
diff -r 642b26779c4e -r 4b8919585039 xen/arch/x86/x86_64/traps.c
--- a/xen/arch/x86/x86_64/traps.c       Thu Jan 12 12:13:34 2006
+++ b/xen/arch/x86/x86_64/traps.c       Thu Jan 12 12:20:04 2006
@@ -12,6 +12,7 @@
 #include <asm/current.h>
 #include <asm/flushtlb.h>
 #include <asm/msr.h>
+#include <asm/shadow.h>
 #include <asm/vmx.h>
 
 void show_registers(struct cpu_user_regs *regs)
@@ -113,6 +114,52 @@
         __asm__ __volatile__ ( "hlt" );
 }
 
+void toggle_guest_mode(struct vcpu *v)
+{
+    v->arch.flags ^= TF_kernel_mode;
+    __asm__ __volatile__ ( "swapgs" );
+    update_pagetables(v);
+    write_ptbase(v);
+}
+
+long do_iret(void)
+{
+    struct cpu_user_regs *regs = guest_cpu_user_regs();
+    struct iret_context iret_saved;
+    struct vcpu *v = current;
+
+    if ( unlikely(copy_from_user(&iret_saved, (void *)regs->rsp,
+                                 sizeof(iret_saved))) )
+        domain_crash_synchronous();
+
+    /* Returning to user mode? */
+    if ( (iret_saved.cs & 3) == 3 )
+    {
+        if ( unlikely(pagetable_get_paddr(v->arch.guest_table_user) == 0) )
+            return -EFAULT;
+        toggle_guest_mode(v);
+    }
+
+    regs->rip    = iret_saved.rip;
+    regs->cs     = iret_saved.cs | 3; /* force guest privilege */
+    regs->rflags = (iret_saved.rflags & ~(EF_IOPL|EF_VM)) | EF_IE;
+    regs->rsp    = iret_saved.rsp;
+    regs->ss     = iret_saved.ss | 3; /* force guest privilege */
+
+    if ( !(iret_saved.flags & VGCF_IN_SYSCALL) )
+    {
+        regs->entry_vector = 0;
+        regs->r11 = iret_saved.r11;
+        regs->rcx = iret_saved.rcx;
+    }
+
+    /* No longer in NMI context. */
+    clear_bit(_VCPUF_nmi_masked, &current->vcpu_flags);
+
+    /* Saved %rax gets written back to regs->rax in entry.S. */
+    return iret_saved.rax;
+}
+
 asmlinkage void syscall_enter(void);
 void __init percpu_traps_init(void)
 {
diff -r 642b26779c4e -r 4b8919585039 xen/common/dom0_ops.c
--- a/xen/common/dom0_ops.c     Thu Jan 12 12:13:34 2006
+++ b/xen/common/dom0_ops.c     Thu Jan 12 12:20:04 2006
@@ -323,7 +323,7 @@
         new_affinity = v->cpu_affinity;
         memcpy(cpus_addr(new_affinity),
                &op->u.setvcpuaffinity.cpumap,
-               min((int)BITS_TO_LONGS(NR_CPUS),
+               min((int)(BITS_TO_LONGS(NR_CPUS) * sizeof(long)),
                    (int)sizeof(op->u.setvcpuaffinity.cpumap)));
 
         ret = vcpu_set_affinity(v, &new_affinity);
@@ -501,7 +501,7 @@
         op->u.getvcpuinfo.cpumap   = 0;
         memcpy(&op->u.getvcpuinfo.cpumap,
                cpus_addr(v->cpu_affinity),
-               min((int)BITS_TO_LONGS(NR_CPUS),
+               min((int)(BITS_TO_LONGS(NR_CPUS) * sizeof(long)),
                    (int)sizeof(op->u.getvcpuinfo.cpumap)));
         ret = 0;
 
diff -r 642b26779c4e -r 4b8919585039 xen/common/kernel.c
--- a/xen/common/kernel.c       Thu Jan 12 12:13:34 2006
+++ b/xen/common/kernel.c       Thu Jan 12 12:20:04 2006
@@ -11,6 +11,7 @@
 #include <xen/compile.h>
 #include <xen/sched.h>
 #include <asm/current.h>
+#include <public/nmi.h>
 #include <public/version.h>
 
 void cmdline_parse(char *cmdline)
@@ -146,6 +147,43 @@
     }
 
     return -ENOSYS;
+}
+
+long do_nmi_op(unsigned int cmd, void *arg)
+{
+    struct vcpu *v = current;
+    struct domain *d = current->domain;
+    long rc = 0;
+
+    switch ( cmd )
+    {
+    case XENNMI_register_callback:
+        if ( (d->domain_id != 0) || (v->vcpu_id != 0) )
+        { 
+           rc = -EINVAL;
+        }
+        else
+        {
+            v->nmi_addr = (unsigned long)arg;
+#ifdef CONFIG_X86
+            /*
+             * If no handler was registered we can 'lose the NMI edge'.
+             * Re-assert it now.
+             */
+            if ( d->shared_info->arch.nmi_reason != 0 )
+                set_bit(_VCPUF_nmi_pending, &v->vcpu_flags);
+#endif
+        }
+        break;
+    case XENNMI_unregister_callback:
+        v->nmi_addr = 0;
+        break;
+    default:
+        rc = -ENOSYS;
+        break;
+    }
+
+    return rc;
 }
 
 long do_vm_assist(unsigned int cmd, unsigned int type)
diff -r 642b26779c4e -r 4b8919585039 xen/common/schedule.c
--- a/xen/common/schedule.c     Thu Jan 12 12:13:34 2006
+++ b/xen/common/schedule.c     Thu Jan 12 12:20:04 2006
@@ -207,7 +207,10 @@
 
 int vcpu_set_affinity(struct vcpu *v, cpumask_t *affinity)
 {
-    if ( cpus_empty(*affinity) )
+    cpumask_t online_affinity;
+
+    cpus_and(online_affinity, *affinity, cpu_online_map);
+    if ( cpus_empty(online_affinity) )
         return -EINVAL;
 
     return SCHED_OP(set_affinity, v, affinity);
diff -r 642b26779c4e -r 4b8919585039 xen/include/asm-x86/domain.h
--- a/xen/include/asm-x86/domain.h      Thu Jan 12 12:13:34 2006
+++ b/xen/include/asm-x86/domain.h      Thu Jan 12 12:20:04 2006
@@ -13,13 +13,43 @@
     unsigned long  eip;
 };
 
+#define MAPHASH_ENTRIES 8
+#define MAPHASH_HASHFN(pfn) ((pfn) & (MAPHASH_ENTRIES-1))
+#define MAPHASHENT_NOTINUSE ((u16)~0U)
+struct vcpu_maphash {
+    struct vcpu_maphash_entry {
+        unsigned long pfn;
+        uint16_t      idx;
+        uint16_t      refcnt;
+    } hash[MAPHASH_ENTRIES];
+} __cacheline_aligned;
+
+#define MAPCACHE_ORDER   10
+#define MAPCACHE_ENTRIES (1 << MAPCACHE_ORDER)
 struct mapcache {
+    /* The PTEs that provide the mappings, and a cursor into the array. */
     l1_pgentry_t *l1tab;
     unsigned int cursor;
+
+    /* Protects map_domain_page(). */
+    spinlock_t lock;
+
+    /* Garbage mappings are flushed from TLBs in batches called 'epochs'. */
     unsigned int epoch, shadow_epoch[MAX_VIRT_CPUS];
     u32 tlbflush_timestamp;
-    spinlock_t lock;
+
+    /* Which mappings are in use, and which are garbage to reap next epoch? */
+    unsigned long inuse[BITS_TO_LONGS(MAPCACHE_ENTRIES)];
+    unsigned long garbage[BITS_TO_LONGS(MAPCACHE_ENTRIES)];
+
+    /* Lock-free per-VCPU hash of recently-used mappings. */
+    struct vcpu_maphash vcpu_maphash[MAX_VIRT_CPUS];
 };
+
+extern void mapcache_init(struct domain *);
+
+/* x86/64: toggle guest between kernel and user modes. */
+extern void toggle_guest_mode(struct vcpu *);
 
 struct arch_domain
 {
diff -r 642b26779c4e -r 4b8919585039 xen/include/asm-x86/nmi.h
--- a/xen/include/asm-x86/nmi.h Thu Jan 12 12:13:34 2006
+++ b/xen/include/asm-x86/nmi.h Thu Jan 12 12:20:04 2006
@@ -1,6 +1,8 @@
 
 #ifndef ASM_NMI_H
 #define ASM_NMI_H
+
+#include <public/nmi.h>
 
 struct cpu_user_regs;
  
diff -r 642b26779c4e -r 4b8919585039 xen/include/asm-x86/processor.h
--- a/xen/include/asm-x86/processor.h   Thu Jan 12 12:13:34 2006
+++ b/xen/include/asm-x86/processor.h   Thu Jan 12 12:20:04 2006
@@ -123,6 +123,7 @@
 #define TBF_EXCEPTION_ERRCODE  2
 #define TBF_INTERRUPT          8
 #define TBF_FAILSAFE          16
+#define TBF_SLOW_IRET         32
 
 /* 'arch_vcpu' flags values */
 #define _TF_kernel_mode        0
diff -r 642b26779c4e -r 4b8919585039 xen/include/public/arch-x86_32.h
--- a/xen/include/public/arch-x86_32.h  Thu Jan 12 12:13:34 2006
+++ b/xen/include/public/arch-x86_32.h  Thu Jan 12 12:20:04 2006
@@ -135,6 +135,7 @@
     unsigned long max_pfn;                  /* max pfn that appears in table */
     /* Frame containing list of mfns containing list of mfns containing p2m. */
     unsigned long pfn_to_mfn_frame_list_list; 
+    unsigned long nmi_reason;
 } arch_shared_info_t;
 
 typedef struct {
diff -r 642b26779c4e -r 4b8919585039 xen/include/public/arch-x86_64.h
--- a/xen/include/public/arch-x86_64.h  Thu Jan 12 12:13:34 2006
+++ b/xen/include/public/arch-x86_64.h  Thu Jan 12 12:20:04 2006
@@ -88,11 +88,20 @@
 #define SEGBASE_GS_USER_SEL 3 /* Set user %gs specified in base[15:0] */
 
 /*
- * int HYPERVISOR_switch_to_user(void)
+ * int HYPERVISOR_iret(void)
  * All arguments are on the kernel stack, in the following format.
  * Never returns if successful. Current kernel context is lost.
+ * The saved CS is mapped as follows:
+ *   RING0 -> RING3 kernel mode.
+ *   RING1 -> RING3 kernel mode.
+ *   RING2 -> RING3 kernel mode.
+ *   RING3 -> RING3 user mode.
+ * However RING0 indicates that the guest kernel should return to iteself
+ * directly with
+ *      orb   $3,1*8(%rsp)
+ *      iretq
  * If flags contains VGCF_IN_SYSCALL:
- *   Restore RAX, RIP, RFLAGS, RSP. 
+ *   Restore RAX, RIP, RFLAGS, RSP.
  *   Discard R11, RCX, CS, SS.
  * Otherwise:
  *   Restore RAX, R11, RCX, CS:RIP, RFLAGS, SS:RSP.
@@ -100,10 +109,19 @@
  */
 /* Guest exited in SYSCALL context? Return to guest with SYSRET? */
 #define VGCF_IN_SYSCALL (1<<8)
+struct iret_context {
+    /* Top of stack (%rsp at point of hypercall). */
+    uint64_t rax, r11, rcx, flags, rip, cs, rflags, rsp, ss;
+    /* Bottom of iret stack frame. */
+};
+/*
+ * For compatibility with HYPERVISOR_switch_to_user which is the old
+ * name for HYPERVISOR_iret.
+ */
 struct switch_to_user {
     /* Top of stack (%rsp at point of hypercall). */
     uint64_t rax, r11, rcx, flags, rip, cs, rflags, rsp, ss;
-    /* Bottom of switch_to_user stack frame. */
+    /* Bottom of iret stack frame. */
 };
 
 /*
@@ -202,6 +220,7 @@
     unsigned long max_pfn;                  /* max pfn that appears in table */
     /* Frame containing list of mfns containing list of mfns containing p2m. */
     unsigned long pfn_to_mfn_frame_list_list; 
+    unsigned long nmi_reason;
 } arch_shared_info_t;
 
 typedef struct {
diff -r 642b26779c4e -r 4b8919585039 xen/include/public/xen.h
--- a/xen/include/public/xen.h  Thu Jan 12 12:13:34 2006
+++ b/xen/include/public/xen.h  Thu Jan 12 12:20:04 2006
@@ -53,12 +53,14 @@
 #define __HYPERVISOR_grant_table_op       20
 #define __HYPERVISOR_vm_assist            21
 #define __HYPERVISOR_update_va_mapping_otherdomain 22
-#define __HYPERVISOR_switch_vm86          23 /* x86/32 only */
-#define __HYPERVISOR_switch_to_user       23 /* x86/64 only */
+#define __HYPERVISOR_iret                 23 /* x86 only */
+#define __HYPERVISOR_switch_vm86          23 /* x86/32 only (obsolete name) */
+#define __HYPERVISOR_switch_to_user       23 /* x86/64 only (obsolete name) */
 #define __HYPERVISOR_vcpu_op              24
 #define __HYPERVISOR_set_segment_base     25 /* x86/64 only */
 #define __HYPERVISOR_mmuext_op            26
 #define __HYPERVISOR_acm_op               27
+#define __HYPERVISOR_nmi_op               28
 
 /* 
  * VIRTUAL INTERRUPTS
@@ -69,10 +71,7 @@
 #define VIRQ_DEBUG      1  /* Request guest to dump debug info.           */
 #define VIRQ_CONSOLE    2  /* (DOM0) Bytes received on emergency console. */
 #define VIRQ_DOM_EXC    3  /* (DOM0) Exceptional event for some domain.   */
-#define VIRQ_PARITY_ERR 4  /* (DOM0) NMI parity error (port 0x61, bit 7). */
-#define VIRQ_IO_ERR     5  /* (DOM0) NMI I/O error    (port 0x61, bit 6). */
 #define VIRQ_DEBUGGER   6  /* (DOM0) A domain has paused for debugging.   */
-#define VIRQ_NMI        7  /* (DOM0) Unknown NMI (not from ISA port 0x61).*/
 #define NR_VIRQS        8
 
 /*
diff -r 642b26779c4e -r 4b8919585039 xen/include/xen/domain_page.h
--- a/xen/include/xen/domain_page.h     Thu Jan 12 12:13:34 2006
+++ b/xen/include/xen/domain_page.h     Thu Jan 12 12:20:04 2006
@@ -10,24 +10,19 @@
 #include <xen/config.h>
 #include <xen/mm.h>
 
-#define map_domain_page(pfn)   map_domain_pages(pfn,0)
-#define unmap_domain_page(va)  unmap_domain_pages(va,0)
-
 #ifdef CONFIG_DOMAIN_PAGE
 
 /*
- * Maps a given range of page frames, returning the mapped virtual address. The
- * pages are now accessible within the current VCPU until a corresponding
- * call to unmap_domain_page().
+ * Map a given page frame, returning the mapped virtual address. The page is
+ * then accessible within the current VCPU until a corresponding unmap call.
  */
-extern void *map_domain_pages(unsigned long pfn, unsigned int order);
+extern void *map_domain_page(unsigned long pfn);
 
 /*
- * Pass a VA within the first page of a range previously mapped in the context
- * of the currently-executing VCPU via a call to map_domain_pages(). Those
- * pages will then be removed from the mapping lists.
+ * Pass a VA within a page previously mapped in the context of the
+ * currently-executing VCPU via a call to map_domain_pages().
  */
-extern void unmap_domain_pages(void *va, unsigned int order);
+extern void unmap_domain_page(void *va);
 
 /*
  * Similar to the above calls, except the mapping is accessible in all
@@ -97,8 +92,8 @@
 
 #else /* !CONFIG_DOMAIN_PAGE */
 
-#define map_domain_pages(pfn,order)         phys_to_virt((pfn)<<PAGE_SHIFT)
-#define unmap_domain_pages(va,order)        ((void)((void)(va),(void)(order)))
+#define map_domain_page(pfn)                phys_to_virt((pfn)<<PAGE_SHIFT)
+#define unmap_domain_page(va)               ((void)(va))
 
 #define map_domain_page_global(pfn)         phys_to_virt((pfn)<<PAGE_SHIFT)
 #define unmap_domain_page_global(va)        ((void)(va))
diff -r 642b26779c4e -r 4b8919585039 xen/include/xen/sched.h
--- a/xen/include/xen/sched.h   Thu Jan 12 12:13:34 2006
+++ b/xen/include/xen/sched.h   Thu Jan 12 12:20:04 2006
@@ -80,6 +80,8 @@
 
     /* Bitmask of CPUs on which this VCPU may run. */
     cpumask_t        cpu_affinity;
+
+    unsigned long    nmi_addr;      /* NMI callback address. */
 
     /* Bitmask of CPUs which are holding onto this VCPU's state. */
     cpumask_t        vcpu_dirty_cpumask;
@@ -361,6 +363,12 @@
  /* VCPU is not-runnable */
 #define _VCPUF_down            5
 #define VCPUF_down             (1UL<<_VCPUF_down)
+ /* NMI callback pending for this VCPU? */
+#define _VCPUF_nmi_pending     8
+#define VCPUF_nmi_pending      (1UL<<_VCPUF_nmi_pending)
+ /* Avoid NMI reentry by allowing NMIs to be masked for short periods. */
+#define _VCPUF_nmi_masked      9
+#define VCPUF_nmi_masked       (1UL<<_VCPUF_nmi_masked)
 
 /*
  * Per-domain flags (domain_flags).
diff -r 642b26779c4e -r 4b8919585039 xen/include/xen/softirq.h
--- a/xen/include/xen/softirq.h Thu Jan 12 12:13:34 2006
+++ b/xen/include/xen/softirq.h Thu Jan 12 12:20:04 2006
@@ -6,7 +6,7 @@
 #define SCHEDULE_SOFTIRQ                  1
 #define NEW_TLBFLUSH_CLOCK_PERIOD_SOFTIRQ 2
 #define KEYPRESS_SOFTIRQ                  3
-#define NMI_DOM0_SOFTIRQ                  4
+#define NMI_SOFTIRQ                       4
 #define PAGE_SCRUB_SOFTIRQ                5
 #define DOMAIN_SHUTDOWN_FINALISE_SOFTIRQ  6
 #define NR_SOFTIRQS                       7
diff -r 642b26779c4e -r 4b8919585039 
linux-2.6-xen-sparse/include/asm-xen/asm-i386/mach-xen/mach_traps.h
--- /dev/null   Thu Jan 12 12:13:34 2006
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/mach-xen/mach_traps.h       
Thu Jan 12 12:20:04 2006
@@ -0,0 +1,33 @@
+/*
+ *  include/asm-xen/asm-i386/mach-xen/mach_traps.h
+ *
+ *  Machine specific NMI handling for Xen
+ */
+#ifndef _MACH_TRAPS_H
+#define _MACH_TRAPS_H
+
+#include <linux/bitops.h>
+#include <asm-xen/xen-public/nmi.h>
+
+static inline void clear_mem_error(unsigned char reason) {}
+static inline void clear_io_check_error(unsigned char reason) {}
+
+static inline unsigned char get_nmi_reason(void)
+{
+       shared_info_t *s = HYPERVISOR_shared_info;
+       unsigned char reason = 0;
+
+       /* construct a value which looks like it came from
+        * port 0x61.
+        */
+       if (test_bit(_XEN_NMIREASON_io_error, &s->arch.nmi_reason))
+               reason |= 0x40;
+       if (test_bit(_XEN_NMIREASON_parity_error, &s->arch.nmi_reason))
+               reason |= 0x80;
+
+        return reason;
+}
+
+static inline void reassert_nmi(void) {}
+
+#endif /* !_MACH_TRAPS_H */
diff -r 642b26779c4e -r 4b8919585039 
linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/nmi.h
--- /dev/null   Thu Jan 12 12:13:34 2006
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/nmi.h     Thu Jan 12 
12:20:04 2006
@@ -0,0 +1,75 @@
+/*
+ *  linux/include/asm-i386/nmi.h
+ */
+#ifndef ASM_NMI_H
+#define ASM_NMI_H
+
+#include <linux/pm.h>
+
+#include <asm-xen/xen-public/nmi.h>
+
+struct pt_regs;
+ 
+typedef int (*nmi_callback_t)(struct pt_regs * regs, int cpu);
+ 
+/** 
+ * set_nmi_callback
+ *
+ * Set a handler for an NMI. Only one handler may be
+ * set. Return 1 if the NMI was handled.
+ */
+void set_nmi_callback(nmi_callback_t callback);
+ 
+/** 
+ * unset_nmi_callback
+ *
+ * Remove the handler previously set.
+ */
+void unset_nmi_callback(void);
+ 
+#ifdef CONFIG_PM
+ 
+/** Replace the PM callback routine for NMI. */
+struct pm_dev * set_nmi_pm_callback(pm_callback callback);
+
+/** Unset the PM callback routine back to the default. */
+void unset_nmi_pm_callback(struct pm_dev * dev);
+
+#else
+
+static inline struct pm_dev * set_nmi_pm_callback(pm_callback callback)
+{
+       return 0;
+} 
+ 
+static inline void unset_nmi_pm_callback(struct pm_dev * dev)
+{
+}
+
+#endif /* CONFIG_PM */
+ 
+extern void default_do_nmi(struct pt_regs *);
+extern void die_nmi(char *str, struct pt_regs *regs);
+
+static inline unsigned char get_nmi_reason(void)
+{
+        shared_info_t *s = HYPERVISOR_shared_info;
+        unsigned char reason = 0;
+
+        /* construct a value which looks like it came from
+         * port 0x61.
+         */
+        if (test_bit(_XEN_NMIREASON_io_error, &s->arch.nmi_reason))
+                reason |= 0x40;
+        if (test_bit(_XEN_NMIREASON_parity_error, &s->arch.nmi_reason))
+                reason |= 0x80;
+
+        return reason;
+}
+
+extern int panic_on_timeout;
+extern int unknown_nmi_panic;
+
+extern int check_nmi_watchdog(void);
+ 
+#endif /* ASM_NMI_H */
diff -r 642b26779c4e -r 4b8919585039 
patches/linux-2.6.12/i386-mach-io-check-nmi.patch
--- /dev/null   Thu Jan 12 12:13:34 2006
+++ b/patches/linux-2.6.12/i386-mach-io-check-nmi.patch Thu Jan 12 12:20:04 2006
@@ -0,0 +1,43 @@
+--- ref-linux-2.6.12/arch/i386/kernel/traps.c  2005-12-19 09:23:44.000000000 
+0000
++++ linux-2.6.12-xen0/arch/i386/kernel/traps.c 2006-01-05 15:51:52.000000000 
+0000
+@@ -521,18 +521,11 @@
+ 
+ static void io_check_error(unsigned char reason, struct pt_regs * regs)
+ {
+-      unsigned long i;
+-
+       printk("NMI: IOCK error (debug interrupt?)\n");
+       show_registers(regs);
+ 
+       /* Re-enable the IOCK line, wait for a few seconds */
+-      reason = (reason & 0xf) | 8;
+-      outb(reason, 0x61);
+-      i = 2000;
+-      while (--i) udelay(1000);
+-      reason &= ~8;
+-      outb(reason, 0x61);
++      clear_io_check_error(reason);
+ }
+ 
+ static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
+--- ref-linux-2.6.12/include/asm-i386/mach-default/mach_traps.h        
2005-06-17 20:48:29.000000000 +0100
++++ linux-2.6.12-xen0/include/asm-i386/mach-default/mach_traps.h       
2006-01-05 15:52:33.000000000 +0000
+@@ -15,6 +15,18 @@
+       outb(reason, 0x61);
+ }
+ 
++static inline void clear_io_check_error(unsigned char reason)
++{
++      unsigned long i;
++
++      reason = (reason & 0xf) | 8;
++      outb(reason, 0x61);
++      i = 2000;
++      while (--i) udelay(1000);
++      reason &= ~8;
++      outb(reason, 0x61);
++}
++
+ static inline unsigned char get_nmi_reason(void)
+ {
+       return inb(0x61);
diff -r 642b26779c4e -r 4b8919585039 xen/include/public/nmi.h
--- /dev/null   Thu Jan 12 12:13:34 2006
+++ b/xen/include/public/nmi.h  Thu Jan 12 12:20:04 2006
@@ -0,0 +1,54 @@
+/******************************************************************************
+ * nmi.h
+ * 
+ * NMI callback registration and reason codes.
+ * 
+ * Copyright (c) 2005, Keir Fraser <keir@xxxxxxxxxxxxx>
+ */
+
+#ifndef __XEN_PUBLIC_NMI_H__
+#define __XEN_PUBLIC_NMI_H__
+
+/*
+ * NMI reason codes:
+ * Currently these are x86-specific, stored in arch_shared_info.nmi_reason.
+ */
+ /* I/O-check error reported via ISA port 0x61, bit 6. */
+#define _XEN_NMIREASON_io_error     0
+#define XEN_NMIREASON_io_error      (1UL << _XEN_NMIREASON_io_error)
+ /* Parity error reported via ISA port 0x61, bit 7. */
+#define _XEN_NMIREASON_parity_error 1
+#define XEN_NMIREASON_parity_error  (1UL << _XEN_NMIREASON_parity_error)
+ /* Unknown hardware-generated NMI. */
+#define _XEN_NMIREASON_unknown      2
+#define XEN_NMIREASON_unknown       (1UL << _XEN_NMIREASON_unknown)
+
+/*
+ * long nmi_op(unsigned int cmd, void *arg)
+ * NB. All ops return zero on success, else a negative error code.
+ */
+
+/*
+ * Register NMI callback for this (calling) VCPU. Currently this only makes
+ * sense for domain 0, vcpu 0. All other callers will be returned EINVAL.
+ * arg == address of callback function.
+ */
+#define XENNMI_register_callback   0
+
+/*
+ * Deregister NMI callback for this (calling) VCPU.
+ * arg == NULL.
+ */
+#define XENNMI_unregister_callback 1
+
+#endif /* __XEN_PUBLIC_NMI_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>