# HG changeset patch
# User emellor@xxxxxxxxxxxxxxxxxxxxxx
# Node ID 4b89195850398b85cd5a3b57ba8228209f010fd9
# Parent 642b26779c4ecb1538032f5fb66b3a83f3ce9d73
# Parent 821368442403cb9110f466a9c7c2c9849bef9733
Merged.
diff -r 642b26779c4e -r 4b8919585039
linux-2.6-xen-sparse/arch/xen/i386/kernel/entry.S
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/entry.S Thu Jan 12 12:13:34 2006
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/entry.S Thu Jan 12 12:20:04 2006
@@ -76,7 +76,9 @@
DF_MASK = 0x00000400
NT_MASK = 0x00004000
VM_MASK = 0x00020000
-
+/* Pseudo-eflags. */
+NMI_MASK = 0x80000000
+
/* Offsets into shared_info_t. */
#define evtchn_upcall_pending /* 0 */
#define evtchn_upcall_mask 1
@@ -305,8 +307,8 @@
je ldt_ss # returning to user-space with LDT SS
#endif /* XEN */
restore_nocheck:
- testl $VM_MASK, EFLAGS(%esp)
- jnz resume_vm86
+ testl $(VM_MASK|NMI_MASK), EFLAGS(%esp)
+ jnz hypervisor_iret
movb EVENT_MASK(%esp), %al
notb %al # %al == ~saved_mask
XEN_GET_VCPU_INFO(%esi)
@@ -328,11 +330,11 @@
.long 1b,iret_exc
.previous
-resume_vm86:
- XEN_UNBLOCK_EVENTS(%esi)
+hypervisor_iret:
+ andl $~NMI_MASK, EFLAGS(%esp)
RESTORE_REGS
movl %eax,(%esp)
- movl $__HYPERVISOR_switch_vm86,%eax
+ movl $__HYPERVISOR_iret,%eax
int $0x82
ud2
@@ -691,6 +693,15 @@
call do_debug
jmp ret_from_exception
+ENTRY(nmi)
+ pushl %eax
+ SAVE_ALL
+ xorl %edx,%edx # zero error code
+ movl %esp,%eax # pt_regs pointer
+ call do_nmi
+ orl $NMI_MASK, EFLAGS(%esp)
+ jmp restore_all
+
#if 0 /* XEN */
/*
* NMI is doubly nasty. It can happen _while_ we're handling
diff -r 642b26779c4e -r 4b8919585039
linux-2.6-xen-sparse/arch/xen/i386/kernel/traps.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/traps.c Thu Jan 12 12:13:34 2006
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/traps.c Thu Jan 12 12:20:04 2006
@@ -506,18 +506,11 @@
static void io_check_error(unsigned char reason, struct pt_regs * regs)
{
- unsigned long i;
-
printk("NMI: IOCK error (debug interrupt?)\n");
show_registers(regs);
/* Re-enable the IOCK line, wait for a few seconds */
- reason = (reason & 0xf) | 8;
- outb(reason, 0x61);
- i = 2000;
- while (--i) udelay(1000);
- reason &= ~8;
- outb(reason, 0x61);
+ clear_io_check_error(reason);
}
static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
diff -r 642b26779c4e -r 4b8919585039
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/entry.S
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/entry.S Thu Jan 12
12:13:34 2006
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/entry.S Thu Jan 12
12:20:04 2006
@@ -121,19 +121,19 @@
.endm
/*
- * Must be consistent with the definition in arch_x86_64.h:
- * struct switch_to_user {
+ * Must be consistent with the definition in arch-x86_64.h:
+ * struct iret_context {
* u64 rax, r11, rcx, flags, rip, cs, rflags, rsp, ss;
* };
* #define VGCF_IN_SYSCALL (1<<8)
*/
- .macro SWITCH_TO_USER flag
+ .macro HYPERVISOR_IRET flag
subq $8*4,%rsp # reuse rip, cs, rflags, rsp, ss in
the stack
movq %rax,(%rsp)
movq %r11,1*8(%rsp)
movq %rcx,2*8(%rsp) # we saved %rcx upon exceptions
movq $\flag,3*8(%rsp)
- movq $__HYPERVISOR_switch_to_user,%rax
+ movq $__HYPERVISOR_iret,%rax
syscall
.endm
@@ -225,7 +225,7 @@
jnz sysret_careful
XEN_UNBLOCK_EVENTS(%rsi)
RESTORE_ARGS 0,8,0
- SWITCH_TO_USER VGCF_IN_SYSCALL
+ HYPERVISOR_IRET VGCF_IN_SYSCALL
/* Handle reschedules */
/* edx: work, edi: workmask */
@@ -478,7 +478,7 @@
orb $3,1*8(%rsp)
iretq
user_mode:
- SWITCH_TO_USER 0
+ HYPERVISOR_IRET 0
/* edi: workmask, edx: work */
retint_careful:
@@ -719,6 +719,18 @@
call evtchn_do_upcall
jmp error_exit
+#ifdef CONFIG_X86_LOCAL_APIC
+ENTRY(nmi)
+ zeroentry do_nmi_callback
+ENTRY(do_nmi_callback)
+ addq $8, %rsp
+ call do_nmi
+ RESTORE_REST
+ XEN_BLOCK_EVENTS(%rsi)
+ GET_THREAD_INFO(%rcx)
+ jmp retint_restore_args
+#endif
+
ALIGN
restore_all_enable_events:
XEN_UNBLOCK_EVENTS(%rsi) # %rsi is already set up...
@@ -733,7 +745,7 @@
orb $3,1*8(%rsp)
iretq
crit_user_mode:
- SWITCH_TO_USER 0
+ HYPERVISOR_IRET 0
14: XEN_LOCKED_BLOCK_EVENTS(%rsi)
XEN_PUT_VCPU_INFO(%rsi)
diff -r 642b26779c4e -r 4b8919585039
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c Thu Jan 12
12:13:34 2006
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c Thu Jan 12
12:20:04 2006
@@ -62,6 +62,7 @@
#include <asm-xen/xen-public/physdev.h>
#include "setup_arch_pre.h"
#include <asm/hypervisor.h>
+#include <asm-xen/xen-public/nmi.h>
#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
#define PFN_PHYS(x) ((x) << PAGE_SHIFT)
#define end_pfn_map end_pfn
diff -r 642b26779c4e -r 4b8919585039
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/traps.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/traps.c Thu Jan 12
12:13:34 2006
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/traps.c Thu Jan 12
12:20:04 2006
@@ -559,9 +559,11 @@
printk("Uhhuh. NMI received. Dazed and confused, but trying to
continue\n");
printk("You probably have a hardware problem with your RAM chips\n");
+#if 0 /* XEN */
/* Clear and disable the memory parity error line. */
reason = (reason & 0xf) | 4;
outb(reason, 0x61);
+#endif /* XEN */
}
static void io_check_error(unsigned char reason, struct pt_regs * regs)
@@ -569,12 +571,14 @@
printk("NMI: IOCK error (debug interrupt?)\n");
show_registers(regs);
+#if 0 /* XEN */
/* Re-enable the IOCK line, wait for a few seconds */
reason = (reason & 0xf) | 8;
outb(reason, 0x61);
mdelay(2000);
reason &= ~8;
outb(reason, 0x61);
+#endif /* XEN */
}
static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
diff -r 642b26779c4e -r 4b8919585039
linux-2.6-xen-sparse/include/asm-xen/asm-i386/hypercall.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/hypercall.h Thu Jan 12
12:13:34 2006
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/hypercall.h Thu Jan 12
12:20:04 2006
@@ -32,6 +32,7 @@
#include <asm-xen/xen-public/xen.h>
#include <asm-xen/xen-public/sched.h>
+#include <asm-xen/xen-public/nmi.h>
#define _hypercall0(type, name) \
({ \
@@ -300,6 +301,14 @@
SHUTDOWN_suspend, srec);
}
+static inline int
+HYPERVISOR_nmi_op(
+ unsigned long op,
+ unsigned long arg)
+{
+ return _hypercall2(int, nmi_op, op, arg);
+}
+
#endif /* __HYPERCALL_H__ */
/*
diff -r 642b26779c4e -r 4b8919585039
linux-2.6-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_post.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_post.h
Thu Jan 12 12:13:34 2006
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_post.h
Thu Jan 12 12:20:04 2006
@@ -29,6 +29,7 @@
extern void hypervisor_callback(void);
extern void failsafe_callback(void);
+extern void nmi(void);
static void __init machine_specific_arch_setup(void)
{
@@ -36,5 +37,7 @@
__KERNEL_CS, (unsigned long)hypervisor_callback,
__KERNEL_CS, (unsigned long)failsafe_callback);
+ HYPERVISOR_nmi_op(XENNMI_register_callback, (unsigned long)&nmi);
+
machine_specific_modify_cpu_capabilities(&boot_cpu_data);
}
diff -r 642b26779c4e -r 4b8919585039
linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/hypercall.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/hypercall.h Thu Jan
12 12:13:34 2006
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/hypercall.h Thu Jan
12 12:20:04 2006
@@ -287,9 +287,9 @@
}
static inline int
-HYPERVISOR_switch_to_user(void)
-{
- return _hypercall0(int, switch_to_user);
+HYPERVISOR_iret(void)
+{
+ return _hypercall0(int, iret);
}
static inline int
@@ -305,6 +305,14 @@
{
return _hypercall3(int, sched_op, SCHEDOP_shutdown,
SHUTDOWN_suspend, srec);
+}
+
+static inline int
+HYPERVISOR_nmi_op(
+ unsigned long op,
+ unsigned long arg)
+{
+ return _hypercall2(int, nmi_op, op, arg);
}
#endif /* __HYPERCALL_H__ */
diff -r 642b26779c4e -r 4b8919585039
linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/setup_arch_post.h
---
a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/setup_arch_post.h
Thu Jan 12 12:13:34 2006
+++
b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/setup_arch_post.h
Thu Jan 12 12:20:04 2006
@@ -35,6 +35,7 @@
extern void hypervisor_callback(void);
extern void failsafe_callback(void);
+extern void nmi(void);
static void __init machine_specific_arch_setup(void)
{
@@ -43,5 +44,9 @@
(unsigned long) failsafe_callback,
(unsigned long) system_call);
+#ifdef CONFIG_X86_LOCAL_APIC
+ HYPERVISOR_nmi_op(XENNMI_register_callback, (unsigned long)&nmi);
+#endif
+
machine_specific_modify_cpu_capabilities(&boot_cpu_data);
}
diff -r 642b26779c4e -r 4b8919585039 xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c Thu Jan 12 12:13:34 2006
+++ b/xen/arch/x86/domain.c Thu Jan 12 12:20:04 2006
@@ -288,9 +288,7 @@
#if defined(__i386__)
- d->arch.mapcache.l1tab = d->arch.mm_perdomain_pt +
- (GDT_LDT_MBYTES << (20 - PAGE_SHIFT));
- spin_lock_init(&d->arch.mapcache.lock);
+ mapcache_init(d);
#else /* __x86_64__ */
@@ -481,14 +479,6 @@
#ifdef __x86_64__
-
-void toggle_guest_mode(struct vcpu *v)
-{
- v->arch.flags ^= TF_kernel_mode;
- __asm__ __volatile__ ( "swapgs" );
- update_pagetables(v);
- write_ptbase(v);
-}
#define loadsegment(seg,value) ({ \
int __r = 1; \
@@ -659,35 +649,6 @@
percpu_ctxt[smp_processor_id()].dirty_segment_mask = dirty_segment_mask;
}
-long do_switch_to_user(void)
-{
- struct cpu_user_regs *regs = guest_cpu_user_regs();
- struct switch_to_user stu;
- struct vcpu *v = current;
-
- if ( unlikely(copy_from_user(&stu, (void *)regs->rsp, sizeof(stu))) ||
- unlikely(pagetable_get_paddr(v->arch.guest_table_user) == 0) )
- return -EFAULT;
-
- toggle_guest_mode(v);
-
- regs->rip = stu.rip;
- regs->cs = stu.cs | 3; /* force guest privilege */
- regs->rflags = (stu.rflags & ~(EF_IOPL|EF_VM)) | EF_IE;
- regs->rsp = stu.rsp;
- regs->ss = stu.ss | 3; /* force guest privilege */
-
- if ( !(stu.flags & VGCF_IN_SYSCALL) )
- {
- regs->entry_vector = 0;
- regs->r11 = stu.r11;
- regs->rcx = stu.rcx;
- }
-
- /* Saved %rax gets written back to regs->rax in entry.S. */
- return stu.rax;
-}
-
#define switch_kernel_stack(_n,_c) ((void)0)
#elif defined(__i386__)
diff -r 642b26779c4e -r 4b8919585039 xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Thu Jan 12 12:13:34 2006
+++ b/xen/arch/x86/mm.c Thu Jan 12 12:20:04 2006
@@ -297,7 +297,6 @@
#if defined(__x86_64__)
/* If in user mode, switch to kernel mode just to read LDT mapping. */
- extern void toggle_guest_mode(struct vcpu *);
int user_mode = !(v->arch.flags & TF_kernel_mode);
#define TOGGLE_MODE() if ( user_mode ) toggle_guest_mode(v)
#elif defined(__i386__)
@@ -2971,7 +2970,6 @@
#ifdef CONFIG_X86_64
struct vcpu *v = current;
- extern void toggle_guest_mode(struct vcpu *);
int user_mode = !(v->arch.flags & TF_kernel_mode);
#endif
diff -r 642b26779c4e -r 4b8919585039 xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c Thu Jan 12 12:13:34 2006
+++ b/xen/arch/x86/traps.c Thu Jan 12 12:20:04 2006
@@ -596,7 +596,6 @@
u16 x;
#if defined(__x86_64__)
/* If in user mode, switch to kernel mode just to read I/O bitmap. */
- extern void toggle_guest_mode(struct vcpu *);
int user_mode = !(v->arch.flags & TF_kernel_mode);
#define TOGGLE_MODE() if ( user_mode ) toggle_guest_mode(v)
#elif defined(__i386__)
@@ -1080,26 +1079,23 @@
return 0;
}
-
-/* Defer dom0 notification to softirq context (unsafe in NMI context). */
-static unsigned long nmi_dom0_softirq_reason;
-#define NMI_DOM0_PARITY_ERR 0
-#define NMI_DOM0_IO_ERR 1
-#define NMI_DOM0_UNKNOWN 2
-
-static void nmi_dom0_softirq(void)
-{
- if ( dom0 == NULL )
+static void nmi_softirq(void)
+{
+ /* Only used to defer wakeup of dom0,vcpu0 to a safe (non-NMI) context. */
+ evtchn_notify(dom0->vcpu[0]);
+}
+
+static void nmi_dom0_report(unsigned int reason_idx)
+{
+ struct domain *d;
+
+ if ( (d = dom0) == NULL )
return;
- if ( test_and_clear_bit(NMI_DOM0_PARITY_ERR, &nmi_dom0_softirq_reason) )
- send_guest_virq(dom0->vcpu[0], VIRQ_PARITY_ERR);
-
- if ( test_and_clear_bit(NMI_DOM0_IO_ERR, &nmi_dom0_softirq_reason) )
- send_guest_virq(dom0->vcpu[0], VIRQ_IO_ERR);
-
- if ( test_and_clear_bit(NMI_DOM0_UNKNOWN, &nmi_dom0_softirq_reason) )
- send_guest_virq(dom0->vcpu[0], VIRQ_NMI);
+ set_bit(reason_idx, &d->shared_info->arch.nmi_reason);
+
+ if ( test_and_set_bit(_VCPUF_nmi_pending, &d->vcpu[0]->vcpu_flags) )
+ raise_softirq(NMI_SOFTIRQ); /* not safe to wake up a vcpu here */
}
asmlinkage void mem_parity_error(struct cpu_user_regs *regs)
@@ -1107,8 +1103,7 @@
switch ( opt_nmi[0] )
{
case 'd': /* 'dom0' */
- set_bit(NMI_DOM0_PARITY_ERR, &nmi_dom0_softirq_reason);
- raise_softirq(NMI_DOM0_SOFTIRQ);
+ nmi_dom0_report(_XEN_NMIREASON_parity_error);
case 'i': /* 'ignore' */
break;
default: /* 'fatal' */
@@ -1127,8 +1122,7 @@
switch ( opt_nmi[0] )
{
case 'd': /* 'dom0' */
- set_bit(NMI_DOM0_IO_ERR, &nmi_dom0_softirq_reason);
- raise_softirq(NMI_DOM0_SOFTIRQ);
+ nmi_dom0_report(_XEN_NMIREASON_io_error);
case 'i': /* 'ignore' */
break;
default: /* 'fatal' */
@@ -1147,8 +1141,7 @@
switch ( opt_nmi[0] )
{
case 'd': /* 'dom0' */
- set_bit(NMI_DOM0_UNKNOWN, &nmi_dom0_softirq_reason);
- raise_softirq(NMI_DOM0_SOFTIRQ);
+ nmi_dom0_report(_XEN_NMIREASON_unknown);
case 'i': /* 'ignore' */
break;
default: /* 'fatal' */
@@ -1347,7 +1340,7 @@
cpu_init();
- open_softirq(NMI_DOM0_SOFTIRQ, nmi_dom0_softirq);
+ open_softirq(NMI_SOFTIRQ, nmi_softirq);
}
diff -r 642b26779c4e -r 4b8919585039 xen/arch/x86/x86_32/asm-offsets.c
--- a/xen/arch/x86/x86_32/asm-offsets.c Thu Jan 12 12:13:34 2006
+++ b/xen/arch/x86/x86_32/asm-offsets.c Thu Jan 12 12:20:04 2006
@@ -65,6 +65,10 @@
arch.guest_context.kernel_ss);
OFFSET(VCPU_kernel_sp, struct vcpu,
arch.guest_context.kernel_sp);
+ OFFSET(VCPU_flags, struct vcpu, vcpu_flags);
+ OFFSET(VCPU_nmi_addr, struct vcpu, nmi_addr);
+ DEFINE(_VCPUF_nmi_pending, _VCPUF_nmi_pending);
+ DEFINE(_VCPUF_nmi_masked, _VCPUF_nmi_masked);
BLANK();
OFFSET(VCPUINFO_upcall_pending, vcpu_info_t, evtchn_upcall_pending);
diff -r 642b26779c4e -r 4b8919585039 xen/arch/x86/x86_32/domain_page.c
--- a/xen/arch/x86/x86_32/domain_page.c Thu Jan 12 12:13:34 2006
+++ b/xen/arch/x86/x86_32/domain_page.c Thu Jan 12 12:20:04 2006
@@ -20,33 +20,16 @@
#include <asm/flushtlb.h>
#include <asm/hardirq.h>
-#define MAPCACHE_ORDER 10
-#define MAPCACHE_ENTRIES (1 << MAPCACHE_ORDER)
-
-/* Use a spare PTE bit to mark entries ready for recycling. */
-#define READY_FOR_TLB_FLUSH (1<<10)
-
-static void flush_all_ready_maps(void)
-{
- struct mapcache *cache = ¤t->domain->arch.mapcache;
- unsigned int i;
-
- for ( i = 0; i < MAPCACHE_ENTRIES; i++ )
- if ( (l1e_get_flags(cache->l1tab[i]) & READY_FOR_TLB_FLUSH) )
- cache->l1tab[i] = l1e_empty();
-}
-
-void *map_domain_pages(unsigned long pfn, unsigned int order)
+void *map_domain_page(unsigned long pfn)
{
unsigned long va;
- unsigned int idx, i, flags, vcpu = current->vcpu_id;
+ unsigned int idx, i, vcpu = current->vcpu_id;
struct domain *d;
struct mapcache *cache;
-#ifndef NDEBUG
- unsigned int flush_count = 0;
-#endif
+ struct vcpu_maphash_entry *hashent;
ASSERT(!in_irq());
+
perfc_incrc(map_domain_page_count);
/* If we are the idle domain, ensure that we run on our own page tables. */
@@ -56,6 +39,18 @@
cache = &d->arch.mapcache;
+ hashent = &cache->vcpu_maphash[vcpu].hash[MAPHASH_HASHFN(pfn)];
+#if 0
+ if ( hashent->pfn == pfn )
+ {
+ idx = hashent->idx;
+ hashent->refcnt++;
+ ASSERT(hashent->refcnt != 0);
+ ASSERT(l1e_get_pfn(cache->l1tab[idx]) == pfn);
+ goto out;
+ }
+#endif
+
spin_lock(&cache->lock);
/* Has some other CPU caused a wrap? We must flush if so. */
@@ -70,45 +65,97 @@
}
}
- do {
- idx = cache->cursor = (cache->cursor + 1) & (MAPCACHE_ENTRIES - 1);
- if ( unlikely(idx == 0) )
- {
- ASSERT(flush_count++ == 0);
- flush_all_ready_maps();
- perfc_incrc(domain_page_tlb_flush);
- local_flush_tlb();
- cache->shadow_epoch[vcpu] = ++cache->epoch;
- cache->tlbflush_timestamp = tlbflush_current_time();
- }
-
- flags = 0;
- for ( i = 0; i < (1U << order); i++ )
- flags |= l1e_get_flags(cache->l1tab[idx+i]);
- }
- while ( flags & _PAGE_PRESENT );
-
- for ( i = 0; i < (1U << order); i++ )
- cache->l1tab[idx+i] = l1e_from_pfn(pfn+i, __PAGE_HYPERVISOR);
+ idx = find_next_zero_bit(cache->inuse, MAPCACHE_ENTRIES, cache->cursor);
+ if ( unlikely(idx >= MAPCACHE_ENTRIES) )
+ {
+ /* /First/, clean the garbage map and update the inuse list. */
+ for ( i = 0; i < ARRAY_SIZE(cache->garbage); i++ )
+ {
+ unsigned long x = xchg(&cache->garbage[i], 0);
+ cache->inuse[i] &= ~x;
+ }
+
+ /* /Second/, flush TLBs. */
+ perfc_incrc(domain_page_tlb_flush);
+ local_flush_tlb();
+ cache->shadow_epoch[vcpu] = ++cache->epoch;
+ cache->tlbflush_timestamp = tlbflush_current_time();
+
+ idx = find_first_zero_bit(cache->inuse, MAPCACHE_ENTRIES);
+ ASSERT(idx < MAPCACHE_ENTRIES);
+ }
+
+ set_bit(idx, cache->inuse);
+ cache->cursor = idx + 1;
spin_unlock(&cache->lock);
+ cache->l1tab[idx] = l1e_from_pfn(pfn, __PAGE_HYPERVISOR);
+
+/*out:*/
va = MAPCACHE_VIRT_START + (idx << PAGE_SHIFT);
return (void *)va;
}
-void unmap_domain_pages(void *va, unsigned int order)
-{
- unsigned int idx, i;
+void unmap_domain_page(void *va)
+{
+ unsigned int idx;
struct mapcache *cache = ¤t->domain->arch.mapcache;
+ unsigned long pfn;
+ struct vcpu_maphash_entry *hashent;
+
+ ASSERT(!in_irq());
ASSERT((void *)MAPCACHE_VIRT_START <= va);
ASSERT(va < (void *)MAPCACHE_VIRT_END);
idx = ((unsigned long)va - MAPCACHE_VIRT_START) >> PAGE_SHIFT;
-
- for ( i = 0; i < (1U << order); i++ )
- l1e_add_flags(cache->l1tab[idx+i], READY_FOR_TLB_FLUSH);
+ pfn = l1e_get_pfn(cache->l1tab[idx]);
+ hashent = &cache->vcpu_maphash[current->vcpu_id].hash[MAPHASH_HASHFN(pfn)];
+
+ if ( hashent->idx == idx )
+ {
+ ASSERT(hashent->pfn == pfn);
+ ASSERT(hashent->refcnt != 0);
+ hashent->refcnt--;
+ }
+ else if ( hashent->refcnt == 0 )
+ {
+ if ( hashent->idx != MAPHASHENT_NOTINUSE )
+ {
+ /* /First/, zap the PTE. */
+ ASSERT(l1e_get_pfn(cache->l1tab[hashent->idx]) == hashent->pfn);
+ cache->l1tab[hashent->idx] = l1e_empty();
+ /* /Second/, mark as garbage. */
+ set_bit(hashent->idx, cache->garbage);
+ }
+
+ /* Add newly-freed mapping to the maphash. */
+ hashent->pfn = pfn;
+ hashent->idx = idx;
+ }
+ else
+ {
+ /* /First/, zap the PTE. */
+ cache->l1tab[idx] = l1e_empty();
+ /* /Second/, mark as garbage. */
+ set_bit(idx, cache->garbage);
+ }
+}
+
+void mapcache_init(struct domain *d)
+{
+ unsigned int i, j;
+
+ d->arch.mapcache.l1tab = d->arch.mm_perdomain_pt +
+ (GDT_LDT_MBYTES << (20 - PAGE_SHIFT));
+ spin_lock_init(&d->arch.mapcache.lock);
+
+ /* Mark all maphash entries as not in use. */
+ for ( i = 0; i < MAX_VIRT_CPUS; i++ )
+ for ( j = 0; j < MAPHASH_ENTRIES; j++ )
+ d->arch.mapcache.vcpu_maphash[i].hash[j].idx =
+ MAPHASHENT_NOTINUSE;
}
#define GLOBALMAP_BITS (IOREMAP_MBYTES << (20 - PAGE_SHIFT))
@@ -128,15 +175,10 @@
spin_lock(&globalmap_lock);
- for ( ; ; )
- {
- idx = find_next_zero_bit(inuse, GLOBALMAP_BITS, inuse_cursor);
- va = IOREMAP_VIRT_START + (idx << PAGE_SHIFT);
-
- /* End of round? If not then we're done in this loop. */
- if ( va < FIXADDR_START )
- break;
-
+ idx = find_next_zero_bit(inuse, GLOBALMAP_BITS, inuse_cursor);
+ va = IOREMAP_VIRT_START + (idx << PAGE_SHIFT);
+ if ( unlikely(va >= FIXADDR_START) )
+ {
/* /First/, clean the garbage map and update the inuse list. */
for ( i = 0; i < ARRAY_SIZE(garbage); i++ )
{
@@ -147,7 +189,9 @@
/* /Second/, flush all TLBs to get rid of stale garbage mappings. */
flush_tlb_all();
- inuse_cursor = 0;
+ idx = find_first_zero_bit(inuse, GLOBALMAP_BITS);
+ va = IOREMAP_VIRT_START + (idx << PAGE_SHIFT);
+ ASSERT(va < FIXADDR_START);
}
set_bit(idx, inuse);
diff -r 642b26779c4e -r 4b8919585039 xen/arch/x86/x86_32/entry.S
--- a/xen/arch/x86/x86_32/entry.S Thu Jan 12 12:13:34 2006
+++ b/xen/arch/x86/x86_32/entry.S Thu Jan 12 12:20:04 2006
@@ -326,7 +326,9 @@
shl $IRQSTAT_shift,%eax
test %ecx,irq_stat(%eax,1)
jnz process_softirqs
-/*test_guest_events:*/
+ btr $_VCPUF_nmi_pending,VCPU_flags(%ebx)
+ jc process_nmi
+test_guest_events:
movl VCPU_vcpu_info(%ebx),%eax
testb $0xFF,VCPUINFO_upcall_mask(%eax)
jnz restore_all_guest
@@ -348,7 +350,24 @@
sti
call do_softirq
jmp test_all_events
-
+
+ ALIGN
+process_nmi:
+ movl VCPU_nmi_addr(%ebx),%eax
+ test %eax,%eax
+ jz test_all_events
+ bts $_VCPUF_nmi_masked,VCPU_flags(%ebx)
+ jc 1f
+ sti
+ leal VCPU_trap_bounce(%ebx),%edx
+ movl %eax,TRAPBOUNCE_eip(%edx)
+ movw $FLAT_KERNEL_CS,TRAPBOUNCE_cs(%edx)
+ movw $TBF_INTERRUPT,TRAPBOUNCE_flags(%edx)
+ call create_bounce_frame
+ jmp test_all_events
+1: bts $_VCPUF_nmi_pending,VCPU_flags(%ebx)
+ jmp test_guest_events
+
/* CREATE A BASIC EXCEPTION FRAME ON GUEST OS (RING-1) STACK: */
/* {EIP, CS, EFLAGS, [ESP, SS]} */
/* %edx == trap_bounce, %ebx == struct vcpu */
@@ -620,9 +639,7 @@
jne defer_nmi
continue_nmi:
- movl $(__HYPERVISOR_DS),%edx
- movl %edx,%ds
- movl %edx,%es
+ SET_XEN_SEGMENTS(d)
movl %esp,%edx
pushl %edx
call do_nmi
@@ -659,42 +676,6 @@
GET_GUEST_REGS(%ecx)
movl %eax,UREGS_eax(%ecx)
jmp do_sched_op
-
-do_switch_vm86:
- # Reset the stack pointer
- GET_GUEST_REGS(%ecx)
- movl %ecx,%esp
-
- # GS:ESI == Ring-1 stack activation
- movl UREGS_esp(%esp),%esi
-VFLT1: mov UREGS_ss(%esp),%gs
-
- # ES:EDI == Ring-0 stack activation
- leal UREGS_eip(%esp),%edi
-
- # Restore the hypercall-number-clobbered EAX on our stack frame
-VFLT2: movl %gs:(%esi),%eax
- movl %eax,UREGS_eax(%esp)
- addl $4,%esi
-
- # Copy the VM86 activation from the ring-1 stack to the ring-0 stack
- movl $(UREGS_user_sizeof-UREGS_eip)/4,%ecx
-VFLT3: movl %gs:(%esi),%eax
- stosl
- addl $4,%esi
- loop VFLT3
-
- # Fix up EFLAGS: IOPL=0, IF=1, VM=1
- andl $~X86_EFLAGS_IOPL,UREGS_eflags(%esp)
- orl $X86_EFLAGS_IF|X86_EFLAGS_VM,UREGS_eflags(%esp)
-
- jmp test_all_events
-
-.section __ex_table,"a"
- .long VFLT1,domain_crash_synchronous
- .long VFLT2,domain_crash_synchronous
- .long VFLT3,domain_crash_synchronous
-.previous
.data
@@ -744,11 +725,12 @@
.long do_grant_table_op /* 20 */
.long do_vm_assist
.long do_update_va_mapping_otherdomain
- .long do_switch_vm86
+ .long do_iret
.long do_vcpu_op
.long do_ni_hypercall /* 25 */
.long do_mmuext_op
- .long do_acm_op /* 27 */
+ .long do_acm_op
+ .long do_nmi_op
.rept NR_hypercalls-((.-hypercall_table)/4)
.long do_ni_hypercall
.endr
@@ -777,11 +759,12 @@
.byte 3 /* do_grant_table_op */ /* 20 */
.byte 2 /* do_vm_assist */
.byte 5 /* do_update_va_mapping_otherdomain */
- .byte 0 /* do_switch_vm86 */
+ .byte 0 /* do_iret */
.byte 3 /* do_vcpu_op */
.byte 0 /* do_ni_hypercall */ /* 25 */
.byte 4 /* do_mmuext_op */
.byte 1 /* do_acm_op */
+ .byte 2 /* do_nmi_op */
.rept NR_hypercalls-(.-hypercall_args_table)
.byte 0 /* do_ni_hypercall */
.endr
diff -r 642b26779c4e -r 4b8919585039 xen/arch/x86/x86_32/traps.c
--- a/xen/arch/x86/x86_32/traps.c Thu Jan 12 12:13:34 2006
+++ b/xen/arch/x86/x86_32/traps.c Thu Jan 12 12:20:04 2006
@@ -157,6 +157,64 @@
__asm__ __volatile__ ( "hlt" );
}
+static inline void pop_from_guest_stack(
+ void *dst, struct cpu_user_regs *regs, unsigned int bytes)
+{
+ if ( unlikely(__copy_from_user(dst, (void __user *)regs->esp, bytes)) )
+ domain_crash_synchronous();
+ regs->esp += bytes;
+}
+
+asmlinkage unsigned long do_iret(void)
+{
+ struct cpu_user_regs *regs = guest_cpu_user_regs();
+ u32 eflags;
+
+ /* Check worst-case stack frame for overlap with Xen protected area. */
+ if ( unlikely(!access_ok(regs->esp, 40)) )
+ domain_crash_synchronous();
+
+ /* Pop and restore EAX (clobbered by hypercall). */
+ pop_from_guest_stack(®s->eax, regs, 4);
+
+ /* Pop and restore CS and EIP. */
+ pop_from_guest_stack(®s->eip, regs, 8);
+
+ /*
+ * Pop, fix up and restore EFLAGS. We fix up in a local staging area
+ * to avoid firing the BUG_ON(IOPL) check in arch_getdomaininfo_ctxt.
+ */
+ pop_from_guest_stack(&eflags, regs, 4);
+ regs->eflags = (eflags & ~X86_EFLAGS_IOPL) | X86_EFLAGS_IF;
+
+ if ( VM86_MODE(regs) )
+ {
+ /* Return to VM86 mode: pop and restore ESP,SS,ES,DS,FS and GS. */
+ pop_from_guest_stack(®s->esp, regs, 24);
+ }
+ else if ( unlikely(RING_0(regs)) )
+ {
+ domain_crash_synchronous();
+ }
+ else if ( !RING_1(regs) )
+ {
+ /* Return to ring 2/3: pop and restore ESP and SS. */
+ pop_from_guest_stack(®s->esp, regs, 8);
+ }
+
+ /* No longer in NMI context. */
+ clear_bit(_VCPUF_nmi_masked, ¤t->vcpu_flags);
+
+ /* Restore upcall mask from saved value. */
+ current->vcpu_info->evtchn_upcall_mask = regs->saved_upcall_mask;
+
+ /*
+ * The hypercall exit path will overwrite EAX with this return
+ * value.
+ */
+ return regs->eax;
+}
+
BUILD_SMP_INTERRUPT(deferred_nmi, TRAP_deferred_nmi)
asmlinkage void smp_deferred_nmi(struct cpu_user_regs regs)
{
diff -r 642b26779c4e -r 4b8919585039 xen/arch/x86/x86_64/asm-offsets.c
--- a/xen/arch/x86/x86_64/asm-offsets.c Thu Jan 12 12:13:34 2006
+++ b/xen/arch/x86/x86_64/asm-offsets.c Thu Jan 12 12:20:04 2006
@@ -65,6 +65,10 @@
arch.guest_context.syscall_callback_eip);
OFFSET(VCPU_kernel_sp, struct vcpu,
arch.guest_context.kernel_sp);
+ OFFSET(VCPU_flags, struct vcpu, vcpu_flags);
+ OFFSET(VCPU_nmi_addr, struct vcpu, nmi_addr);
+ DEFINE(_VCPUF_nmi_pending, _VCPUF_nmi_pending);
+ DEFINE(_VCPUF_nmi_masked, _VCPUF_nmi_masked);
BLANK();
OFFSET(VCPUINFO_upcall_pending, vcpu_info_t, evtchn_upcall_pending);
diff -r 642b26779c4e -r 4b8919585039 xen/arch/x86/x86_64/entry.S
--- a/xen/arch/x86/x86_64/entry.S Thu Jan 12 12:13:34 2006
+++ b/xen/arch/x86/x86_64/entry.S Thu Jan 12 12:20:04 2006
@@ -171,7 +171,9 @@
leaq irq_stat(%rip),%rcx
testl $~0,(%rcx,%rax,1)
jnz process_softirqs
-/*test_guest_events:*/
+ btr $_VCPUF_nmi_pending,VCPU_flags(%rbx)
+ jc process_nmi
+test_guest_events:
movq VCPU_vcpu_info(%rbx),%rax
testb $0xFF,VCPUINFO_upcall_mask(%rax)
jnz restore_all_guest
@@ -322,6 +324,23 @@
call do_softirq
jmp test_all_events
+ ALIGN
+/* %rbx: struct vcpu */
+process_nmi:
+ movq VCPU_nmi_addr(%rbx),%rax
+ test %rax,%rax
+ jz test_all_events
+ bts $_VCPUF_nmi_masked,VCPU_flags(%rbx)
+ jc 1f
+ sti
+ leaq VCPU_trap_bounce(%rbx),%rdx
+ movq %rax,TRAPBOUNCE_eip(%rdx)
+ movw $(TBF_INTERRUPT|TBF_SLOW_IRET),TRAPBOUNCE_flags(%rdx)
+ call create_bounce_frame
+ jmp test_all_events
+1: bts $_VCPUF_nmi_pending,VCPU_flags(%rbx)
+ jmp test_guest_events
+
/* CREATE A BASIC EXCEPTION FRAME ON GUEST OS STACK: */
/* { RCX, R11, [DS-GS,] [CR2,] [ERRCODE,] RIP, CS, RFLAGS, RSP, SS } */
/* %rdx: trap_bounce, %rbx: struct vcpu */
@@ -339,6 +358,9 @@
1: /* In kernel context already: push new frame at existing %rsp. */
movq UREGS_rsp+8(%rsp),%rsi
andb $0xfc,UREGS_cs+8(%rsp) # Indicate kernel context to guest.
+ testw $(TBF_SLOW_IRET),TRAPBOUNCE_flags(%rdx)
+ jz 2f
+ orb $0x01,UREGS_cs+8(%rsp)
2: andq $~0xf,%rsi # Stack frames are 16-byte aligned.
movq $HYPERVISOR_VIRT_START,%rax
cmpq %rax,%rsi
@@ -569,7 +591,7 @@
SAVE_ALL
movq %rsp,%rdi
call do_nmi
- jmp restore_all_xen
+ jmp ret_from_intr
do_arch_sched_op:
# Ensure we return success even if we return via schedule_tail()
@@ -626,11 +648,12 @@
.quad do_grant_table_op /* 20 */
.quad do_vm_assist
.quad do_update_va_mapping_otherdomain
- .quad do_switch_to_user
+ .quad do_iret
.quad do_vcpu_op
.quad do_set_segment_base /* 25 */
.quad do_mmuext_op
.quad do_acm_op
+ .quad do_nmi_op
.rept NR_hypercalls-((.-hypercall_table)/4)
.quad do_ni_hypercall
.endr
@@ -659,11 +682,12 @@
.byte 3 /* do_grant_table_op */ /* 20 */
.byte 2 /* do_vm_assist */
.byte 4 /* do_update_va_mapping_otherdomain */
- .byte 0 /* do_switch_to_user */
+ .byte 0 /* do_iret */
.byte 3 /* do_vcpu_op */
.byte 2 /* do_set_segment_base */ /* 25 */
.byte 4 /* do_mmuext_op */
.byte 1 /* do_acm_op */
+ .byte 2 /* do_nmi_op */
.rept NR_hypercalls-(.-hypercall_args_table)
.byte 0 /* do_ni_hypercall */
.endr
diff -r 642b26779c4e -r 4b8919585039 xen/arch/x86/x86_64/traps.c
--- a/xen/arch/x86/x86_64/traps.c Thu Jan 12 12:13:34 2006
+++ b/xen/arch/x86/x86_64/traps.c Thu Jan 12 12:20:04 2006
@@ -12,6 +12,7 @@
#include <asm/current.h>
#include <asm/flushtlb.h>
#include <asm/msr.h>
+#include <asm/shadow.h>
#include <asm/vmx.h>
void show_registers(struct cpu_user_regs *regs)
@@ -113,6 +114,52 @@
__asm__ __volatile__ ( "hlt" );
}
+void toggle_guest_mode(struct vcpu *v)
+{
+ v->arch.flags ^= TF_kernel_mode;
+ __asm__ __volatile__ ( "swapgs" );
+ update_pagetables(v);
+ write_ptbase(v);
+}
+
+long do_iret(void)
+{
+ struct cpu_user_regs *regs = guest_cpu_user_regs();
+ struct iret_context iret_saved;
+ struct vcpu *v = current;
+
+ if ( unlikely(copy_from_user(&iret_saved, (void *)regs->rsp,
+ sizeof(iret_saved))) )
+ domain_crash_synchronous();
+
+ /* Returning to user mode? */
+ if ( (iret_saved.cs & 3) == 3 )
+ {
+ if ( unlikely(pagetable_get_paddr(v->arch.guest_table_user) == 0) )
+ return -EFAULT;
+ toggle_guest_mode(v);
+ }
+
+ regs->rip = iret_saved.rip;
+ regs->cs = iret_saved.cs | 3; /* force guest privilege */
+ regs->rflags = (iret_saved.rflags & ~(EF_IOPL|EF_VM)) | EF_IE;
+ regs->rsp = iret_saved.rsp;
+ regs->ss = iret_saved.ss | 3; /* force guest privilege */
+
+ if ( !(iret_saved.flags & VGCF_IN_SYSCALL) )
+ {
+ regs->entry_vector = 0;
+ regs->r11 = iret_saved.r11;
+ regs->rcx = iret_saved.rcx;
+ }
+
+ /* No longer in NMI context. */
+ clear_bit(_VCPUF_nmi_masked, ¤t->vcpu_flags);
+
+ /* Saved %rax gets written back to regs->rax in entry.S. */
+ return iret_saved.rax;
+}
+
asmlinkage void syscall_enter(void);
void __init percpu_traps_init(void)
{
diff -r 642b26779c4e -r 4b8919585039 xen/common/dom0_ops.c
--- a/xen/common/dom0_ops.c Thu Jan 12 12:13:34 2006
+++ b/xen/common/dom0_ops.c Thu Jan 12 12:20:04 2006
@@ -323,7 +323,7 @@
new_affinity = v->cpu_affinity;
memcpy(cpus_addr(new_affinity),
&op->u.setvcpuaffinity.cpumap,
- min((int)BITS_TO_LONGS(NR_CPUS),
+ min((int)(BITS_TO_LONGS(NR_CPUS) * sizeof(long)),
(int)sizeof(op->u.setvcpuaffinity.cpumap)));
ret = vcpu_set_affinity(v, &new_affinity);
@@ -501,7 +501,7 @@
op->u.getvcpuinfo.cpumap = 0;
memcpy(&op->u.getvcpuinfo.cpumap,
cpus_addr(v->cpu_affinity),
- min((int)BITS_TO_LONGS(NR_CPUS),
+ min((int)(BITS_TO_LONGS(NR_CPUS) * sizeof(long)),
(int)sizeof(op->u.getvcpuinfo.cpumap)));
ret = 0;
diff -r 642b26779c4e -r 4b8919585039 xen/common/kernel.c
--- a/xen/common/kernel.c Thu Jan 12 12:13:34 2006
+++ b/xen/common/kernel.c Thu Jan 12 12:20:04 2006
@@ -11,6 +11,7 @@
#include <xen/compile.h>
#include <xen/sched.h>
#include <asm/current.h>
+#include <public/nmi.h>
#include <public/version.h>
void cmdline_parse(char *cmdline)
@@ -146,6 +147,43 @@
}
return -ENOSYS;
+}
+
+long do_nmi_op(unsigned int cmd, void *arg)
+{
+ struct vcpu *v = current;
+ struct domain *d = current->domain;
+ long rc = 0;
+
+ switch ( cmd )
+ {
+ case XENNMI_register_callback:
+ if ( (d->domain_id != 0) || (v->vcpu_id != 0) )
+ {
+ rc = -EINVAL;
+ }
+ else
+ {
+ v->nmi_addr = (unsigned long)arg;
+#ifdef CONFIG_X86
+ /*
+ * If no handler was registered we can 'lose the NMI edge'.
+ * Re-assert it now.
+ */
+ if ( d->shared_info->arch.nmi_reason != 0 )
+ set_bit(_VCPUF_nmi_pending, &v->vcpu_flags);
+#endif
+ }
+ break;
+ case XENNMI_unregister_callback:
+ v->nmi_addr = 0;
+ break;
+ default:
+ rc = -ENOSYS;
+ break;
+ }
+
+ return rc;
}
long do_vm_assist(unsigned int cmd, unsigned int type)
diff -r 642b26779c4e -r 4b8919585039 xen/common/schedule.c
--- a/xen/common/schedule.c Thu Jan 12 12:13:34 2006
+++ b/xen/common/schedule.c Thu Jan 12 12:20:04 2006
@@ -207,7 +207,10 @@
int vcpu_set_affinity(struct vcpu *v, cpumask_t *affinity)
{
- if ( cpus_empty(*affinity) )
+ cpumask_t online_affinity;
+
+ cpus_and(online_affinity, *affinity, cpu_online_map);
+ if ( cpus_empty(online_affinity) )
return -EINVAL;
return SCHED_OP(set_affinity, v, affinity);
diff -r 642b26779c4e -r 4b8919585039 xen/include/asm-x86/domain.h
--- a/xen/include/asm-x86/domain.h Thu Jan 12 12:13:34 2006
+++ b/xen/include/asm-x86/domain.h Thu Jan 12 12:20:04 2006
@@ -13,13 +13,43 @@
unsigned long eip;
};
+#define MAPHASH_ENTRIES 8
+#define MAPHASH_HASHFN(pfn) ((pfn) & (MAPHASH_ENTRIES-1))
+#define MAPHASHENT_NOTINUSE ((u16)~0U)
+struct vcpu_maphash {
+ struct vcpu_maphash_entry {
+ unsigned long pfn;
+ uint16_t idx;
+ uint16_t refcnt;
+ } hash[MAPHASH_ENTRIES];
+} __cacheline_aligned;
+
+#define MAPCACHE_ORDER 10
+#define MAPCACHE_ENTRIES (1 << MAPCACHE_ORDER)
struct mapcache {
+ /* The PTEs that provide the mappings, and a cursor into the array. */
l1_pgentry_t *l1tab;
unsigned int cursor;
+
+ /* Protects map_domain_page(). */
+ spinlock_t lock;
+
+ /* Garbage mappings are flushed from TLBs in batches called 'epochs'. */
unsigned int epoch, shadow_epoch[MAX_VIRT_CPUS];
u32 tlbflush_timestamp;
- spinlock_t lock;
+
+ /* Which mappings are in use, and which are garbage to reap next epoch? */
+ unsigned long inuse[BITS_TO_LONGS(MAPCACHE_ENTRIES)];
+ unsigned long garbage[BITS_TO_LONGS(MAPCACHE_ENTRIES)];
+
+ /* Lock-free per-VCPU hash of recently-used mappings. */
+ struct vcpu_maphash vcpu_maphash[MAX_VIRT_CPUS];
};
+
+extern void mapcache_init(struct domain *);
+
+/* x86/64: toggle guest between kernel and user modes. */
+extern void toggle_guest_mode(struct vcpu *);
struct arch_domain
{
diff -r 642b26779c4e -r 4b8919585039 xen/include/asm-x86/nmi.h
--- a/xen/include/asm-x86/nmi.h Thu Jan 12 12:13:34 2006
+++ b/xen/include/asm-x86/nmi.h Thu Jan 12 12:20:04 2006
@@ -1,6 +1,8 @@
#ifndef ASM_NMI_H
#define ASM_NMI_H
+
+#include <public/nmi.h>
struct cpu_user_regs;
diff -r 642b26779c4e -r 4b8919585039 xen/include/asm-x86/processor.h
--- a/xen/include/asm-x86/processor.h Thu Jan 12 12:13:34 2006
+++ b/xen/include/asm-x86/processor.h Thu Jan 12 12:20:04 2006
@@ -123,6 +123,7 @@
#define TBF_EXCEPTION_ERRCODE 2
#define TBF_INTERRUPT 8
#define TBF_FAILSAFE 16
+#define TBF_SLOW_IRET 32
/* 'arch_vcpu' flags values */
#define _TF_kernel_mode 0
diff -r 642b26779c4e -r 4b8919585039 xen/include/public/arch-x86_32.h
--- a/xen/include/public/arch-x86_32.h Thu Jan 12 12:13:34 2006
+++ b/xen/include/public/arch-x86_32.h Thu Jan 12 12:20:04 2006
@@ -135,6 +135,7 @@
unsigned long max_pfn; /* max pfn that appears in table */
/* Frame containing list of mfns containing list of mfns containing p2m. */
unsigned long pfn_to_mfn_frame_list_list;
+ unsigned long nmi_reason;
} arch_shared_info_t;
typedef struct {
diff -r 642b26779c4e -r 4b8919585039 xen/include/public/arch-x86_64.h
--- a/xen/include/public/arch-x86_64.h Thu Jan 12 12:13:34 2006
+++ b/xen/include/public/arch-x86_64.h Thu Jan 12 12:20:04 2006
@@ -88,11 +88,20 @@
#define SEGBASE_GS_USER_SEL 3 /* Set user %gs specified in base[15:0] */
/*
- * int HYPERVISOR_switch_to_user(void)
+ * int HYPERVISOR_iret(void)
* All arguments are on the kernel stack, in the following format.
* Never returns if successful. Current kernel context is lost.
+ * The saved CS is mapped as follows:
+ * RING0 -> RING3 kernel mode.
+ * RING1 -> RING3 kernel mode.
+ * RING2 -> RING3 kernel mode.
+ * RING3 -> RING3 user mode.
+ * However RING0 indicates that the guest kernel should return to iteself
+ * directly with
+ * orb $3,1*8(%rsp)
+ * iretq
* If flags contains VGCF_IN_SYSCALL:
- * Restore RAX, RIP, RFLAGS, RSP.
+ * Restore RAX, RIP, RFLAGS, RSP.
* Discard R11, RCX, CS, SS.
* Otherwise:
* Restore RAX, R11, RCX, CS:RIP, RFLAGS, SS:RSP.
@@ -100,10 +109,19 @@
*/
/* Guest exited in SYSCALL context? Return to guest with SYSRET? */
#define VGCF_IN_SYSCALL (1<<8)
+struct iret_context {
+ /* Top of stack (%rsp at point of hypercall). */
+ uint64_t rax, r11, rcx, flags, rip, cs, rflags, rsp, ss;
+ /* Bottom of iret stack frame. */
+};
+/*
+ * For compatibility with HYPERVISOR_switch_to_user which is the old
+ * name for HYPERVISOR_iret.
+ */
struct switch_to_user {
/* Top of stack (%rsp at point of hypercall). */
uint64_t rax, r11, rcx, flags, rip, cs, rflags, rsp, ss;
- /* Bottom of switch_to_user stack frame. */
+ /* Bottom of iret stack frame. */
};
/*
@@ -202,6 +220,7 @@
unsigned long max_pfn; /* max pfn that appears in table */
/* Frame containing list of mfns containing list of mfns containing p2m. */
unsigned long pfn_to_mfn_frame_list_list;
+ unsigned long nmi_reason;
} arch_shared_info_t;
typedef struct {
diff -r 642b26779c4e -r 4b8919585039 xen/include/public/xen.h
--- a/xen/include/public/xen.h Thu Jan 12 12:13:34 2006
+++ b/xen/include/public/xen.h Thu Jan 12 12:20:04 2006
@@ -53,12 +53,14 @@
#define __HYPERVISOR_grant_table_op 20
#define __HYPERVISOR_vm_assist 21
#define __HYPERVISOR_update_va_mapping_otherdomain 22
-#define __HYPERVISOR_switch_vm86 23 /* x86/32 only */
-#define __HYPERVISOR_switch_to_user 23 /* x86/64 only */
+#define __HYPERVISOR_iret 23 /* x86 only */
+#define __HYPERVISOR_switch_vm86 23 /* x86/32 only (obsolete name) */
+#define __HYPERVISOR_switch_to_user 23 /* x86/64 only (obsolete name) */
#define __HYPERVISOR_vcpu_op 24
#define __HYPERVISOR_set_segment_base 25 /* x86/64 only */
#define __HYPERVISOR_mmuext_op 26
#define __HYPERVISOR_acm_op 27
+#define __HYPERVISOR_nmi_op 28
/*
* VIRTUAL INTERRUPTS
@@ -69,10 +71,7 @@
#define VIRQ_DEBUG 1 /* Request guest to dump debug info. */
#define VIRQ_CONSOLE 2 /* (DOM0) Bytes received on emergency console. */
#define VIRQ_DOM_EXC 3 /* (DOM0) Exceptional event for some domain. */
-#define VIRQ_PARITY_ERR 4 /* (DOM0) NMI parity error (port 0x61, bit 7). */
-#define VIRQ_IO_ERR 5 /* (DOM0) NMI I/O error (port 0x61, bit 6). */
#define VIRQ_DEBUGGER 6 /* (DOM0) A domain has paused for debugging. */
-#define VIRQ_NMI 7 /* (DOM0) Unknown NMI (not from ISA port 0x61).*/
#define NR_VIRQS 8
/*
diff -r 642b26779c4e -r 4b8919585039 xen/include/xen/domain_page.h
--- a/xen/include/xen/domain_page.h Thu Jan 12 12:13:34 2006
+++ b/xen/include/xen/domain_page.h Thu Jan 12 12:20:04 2006
@@ -10,24 +10,19 @@
#include <xen/config.h>
#include <xen/mm.h>
-#define map_domain_page(pfn) map_domain_pages(pfn,0)
-#define unmap_domain_page(va) unmap_domain_pages(va,0)
-
#ifdef CONFIG_DOMAIN_PAGE
/*
- * Maps a given range of page frames, returning the mapped virtual address. The
- * pages are now accessible within the current VCPU until a corresponding
- * call to unmap_domain_page().
+ * Map a given page frame, returning the mapped virtual address. The page is
+ * then accessible within the current VCPU until a corresponding unmap call.
*/
-extern void *map_domain_pages(unsigned long pfn, unsigned int order);
+extern void *map_domain_page(unsigned long pfn);
/*
- * Pass a VA within the first page of a range previously mapped in the context
- * of the currently-executing VCPU via a call to map_domain_pages(). Those
- * pages will then be removed from the mapping lists.
+ * Pass a VA within a page previously mapped in the context of the
+ * currently-executing VCPU via a call to map_domain_pages().
*/
-extern void unmap_domain_pages(void *va, unsigned int order);
+extern void unmap_domain_page(void *va);
/*
* Similar to the above calls, except the mapping is accessible in all
@@ -97,8 +92,8 @@
#else /* !CONFIG_DOMAIN_PAGE */
-#define map_domain_pages(pfn,order) phys_to_virt((pfn)<<PAGE_SHIFT)
-#define unmap_domain_pages(va,order) ((void)((void)(va),(void)(order)))
+#define map_domain_page(pfn) phys_to_virt((pfn)<<PAGE_SHIFT)
+#define unmap_domain_page(va) ((void)(va))
#define map_domain_page_global(pfn) phys_to_virt((pfn)<<PAGE_SHIFT)
#define unmap_domain_page_global(va) ((void)(va))
diff -r 642b26779c4e -r 4b8919585039 xen/include/xen/sched.h
--- a/xen/include/xen/sched.h Thu Jan 12 12:13:34 2006
+++ b/xen/include/xen/sched.h Thu Jan 12 12:20:04 2006
@@ -80,6 +80,8 @@
/* Bitmask of CPUs on which this VCPU may run. */
cpumask_t cpu_affinity;
+
+ unsigned long nmi_addr; /* NMI callback address. */
/* Bitmask of CPUs which are holding onto this VCPU's state. */
cpumask_t vcpu_dirty_cpumask;
@@ -361,6 +363,12 @@
/* VCPU is not-runnable */
#define _VCPUF_down 5
#define VCPUF_down (1UL<<_VCPUF_down)
+ /* NMI callback pending for this VCPU? */
+#define _VCPUF_nmi_pending 8
+#define VCPUF_nmi_pending (1UL<<_VCPUF_nmi_pending)
+ /* Avoid NMI reentry by allowing NMIs to be masked for short periods. */
+#define _VCPUF_nmi_masked 9
+#define VCPUF_nmi_masked (1UL<<_VCPUF_nmi_masked)
/*
* Per-domain flags (domain_flags).
diff -r 642b26779c4e -r 4b8919585039 xen/include/xen/softirq.h
--- a/xen/include/xen/softirq.h Thu Jan 12 12:13:34 2006
+++ b/xen/include/xen/softirq.h Thu Jan 12 12:20:04 2006
@@ -6,7 +6,7 @@
#define SCHEDULE_SOFTIRQ 1
#define NEW_TLBFLUSH_CLOCK_PERIOD_SOFTIRQ 2
#define KEYPRESS_SOFTIRQ 3
-#define NMI_DOM0_SOFTIRQ 4
+#define NMI_SOFTIRQ 4
#define PAGE_SCRUB_SOFTIRQ 5
#define DOMAIN_SHUTDOWN_FINALISE_SOFTIRQ 6
#define NR_SOFTIRQS 7
diff -r 642b26779c4e -r 4b8919585039
linux-2.6-xen-sparse/include/asm-xen/asm-i386/mach-xen/mach_traps.h
--- /dev/null Thu Jan 12 12:13:34 2006
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/mach-xen/mach_traps.h
Thu Jan 12 12:20:04 2006
@@ -0,0 +1,33 @@
+/*
+ * include/asm-xen/asm-i386/mach-xen/mach_traps.h
+ *
+ * Machine specific NMI handling for Xen
+ */
+#ifndef _MACH_TRAPS_H
+#define _MACH_TRAPS_H
+
+#include <linux/bitops.h>
+#include <asm-xen/xen-public/nmi.h>
+
+static inline void clear_mem_error(unsigned char reason) {}
+static inline void clear_io_check_error(unsigned char reason) {}
+
+static inline unsigned char get_nmi_reason(void)
+{
+ shared_info_t *s = HYPERVISOR_shared_info;
+ unsigned char reason = 0;
+
+ /* construct a value which looks like it came from
+ * port 0x61.
+ */
+ if (test_bit(_XEN_NMIREASON_io_error, &s->arch.nmi_reason))
+ reason |= 0x40;
+ if (test_bit(_XEN_NMIREASON_parity_error, &s->arch.nmi_reason))
+ reason |= 0x80;
+
+ return reason;
+}
+
+static inline void reassert_nmi(void) {}
+
+#endif /* !_MACH_TRAPS_H */
diff -r 642b26779c4e -r 4b8919585039
linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/nmi.h
--- /dev/null Thu Jan 12 12:13:34 2006
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/nmi.h Thu Jan 12
12:20:04 2006
@@ -0,0 +1,75 @@
+/*
+ * linux/include/asm-i386/nmi.h
+ */
+#ifndef ASM_NMI_H
+#define ASM_NMI_H
+
+#include <linux/pm.h>
+
+#include <asm-xen/xen-public/nmi.h>
+
+struct pt_regs;
+
+typedef int (*nmi_callback_t)(struct pt_regs * regs, int cpu);
+
+/**
+ * set_nmi_callback
+ *
+ * Set a handler for an NMI. Only one handler may be
+ * set. Return 1 if the NMI was handled.
+ */
+void set_nmi_callback(nmi_callback_t callback);
+
+/**
+ * unset_nmi_callback
+ *
+ * Remove the handler previously set.
+ */
+void unset_nmi_callback(void);
+
+#ifdef CONFIG_PM
+
+/** Replace the PM callback routine for NMI. */
+struct pm_dev * set_nmi_pm_callback(pm_callback callback);
+
+/** Unset the PM callback routine back to the default. */
+void unset_nmi_pm_callback(struct pm_dev * dev);
+
+#else
+
+static inline struct pm_dev * set_nmi_pm_callback(pm_callback callback)
+{
+ return 0;
+}
+
+static inline void unset_nmi_pm_callback(struct pm_dev * dev)
+{
+}
+
+#endif /* CONFIG_PM */
+
+extern void default_do_nmi(struct pt_regs *);
+extern void die_nmi(char *str, struct pt_regs *regs);
+
+static inline unsigned char get_nmi_reason(void)
+{
+ shared_info_t *s = HYPERVISOR_shared_info;
+ unsigned char reason = 0;
+
+ /* construct a value which looks like it came from
+ * port 0x61.
+ */
+ if (test_bit(_XEN_NMIREASON_io_error, &s->arch.nmi_reason))
+ reason |= 0x40;
+ if (test_bit(_XEN_NMIREASON_parity_error, &s->arch.nmi_reason))
+ reason |= 0x80;
+
+ return reason;
+}
+
+extern int panic_on_timeout;
+extern int unknown_nmi_panic;
+
+extern int check_nmi_watchdog(void);
+
+#endif /* ASM_NMI_H */
diff -r 642b26779c4e -r 4b8919585039
patches/linux-2.6.12/i386-mach-io-check-nmi.patch
--- /dev/null Thu Jan 12 12:13:34 2006
+++ b/patches/linux-2.6.12/i386-mach-io-check-nmi.patch Thu Jan 12 12:20:04 2006
@@ -0,0 +1,43 @@
+--- ref-linux-2.6.12/arch/i386/kernel/traps.c 2005-12-19 09:23:44.000000000
+0000
++++ linux-2.6.12-xen0/arch/i386/kernel/traps.c 2006-01-05 15:51:52.000000000
+0000
+@@ -521,18 +521,11 @@
+
+ static void io_check_error(unsigned char reason, struct pt_regs * regs)
+ {
+- unsigned long i;
+-
+ printk("NMI: IOCK error (debug interrupt?)\n");
+ show_registers(regs);
+
+ /* Re-enable the IOCK line, wait for a few seconds */
+- reason = (reason & 0xf) | 8;
+- outb(reason, 0x61);
+- i = 2000;
+- while (--i) udelay(1000);
+- reason &= ~8;
+- outb(reason, 0x61);
++ clear_io_check_error(reason);
+ }
+
+ static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
+--- ref-linux-2.6.12/include/asm-i386/mach-default/mach_traps.h
2005-06-17 20:48:29.000000000 +0100
++++ linux-2.6.12-xen0/include/asm-i386/mach-default/mach_traps.h
2006-01-05 15:52:33.000000000 +0000
+@@ -15,6 +15,18 @@
+ outb(reason, 0x61);
+ }
+
++static inline void clear_io_check_error(unsigned char reason)
++{
++ unsigned long i;
++
++ reason = (reason & 0xf) | 8;
++ outb(reason, 0x61);
++ i = 2000;
++ while (--i) udelay(1000);
++ reason &= ~8;
++ outb(reason, 0x61);
++}
++
+ static inline unsigned char get_nmi_reason(void)
+ {
+ return inb(0x61);
diff -r 642b26779c4e -r 4b8919585039 xen/include/public/nmi.h
--- /dev/null Thu Jan 12 12:13:34 2006
+++ b/xen/include/public/nmi.h Thu Jan 12 12:20:04 2006
@@ -0,0 +1,54 @@
+/******************************************************************************
+ * nmi.h
+ *
+ * NMI callback registration and reason codes.
+ *
+ * Copyright (c) 2005, Keir Fraser <keir@xxxxxxxxxxxxx>
+ */
+
+#ifndef __XEN_PUBLIC_NMI_H__
+#define __XEN_PUBLIC_NMI_H__
+
+/*
+ * NMI reason codes:
+ * Currently these are x86-specific, stored in arch_shared_info.nmi_reason.
+ */
+ /* I/O-check error reported via ISA port 0x61, bit 6. */
+#define _XEN_NMIREASON_io_error 0
+#define XEN_NMIREASON_io_error (1UL << _XEN_NMIREASON_io_error)
+ /* Parity error reported via ISA port 0x61, bit 7. */
+#define _XEN_NMIREASON_parity_error 1
+#define XEN_NMIREASON_parity_error (1UL << _XEN_NMIREASON_parity_error)
+ /* Unknown hardware-generated NMI. */
+#define _XEN_NMIREASON_unknown 2
+#define XEN_NMIREASON_unknown (1UL << _XEN_NMIREASON_unknown)
+
+/*
+ * long nmi_op(unsigned int cmd, void *arg)
+ * NB. All ops return zero on success, else a negative error code.
+ */
+
+/*
+ * Register NMI callback for this (calling) VCPU. Currently this only makes
+ * sense for domain 0, vcpu 0. All other callers will be returned EINVAL.
+ * arg == address of callback function.
+ */
+#define XENNMI_register_callback 0
+
+/*
+ * Deregister NMI callback for this (calling) VCPU.
+ * arg == NULL.
+ */
+#define XENNMI_unregister_callback 1
+
+#endif /* __XEN_PUBLIC_NMI_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|